2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
140 * Much like data_backref, just removed the undetermined members
141 * and change it to use list_head.
142 * During extent scan, it is stored in root->orphan_data_extent.
143 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145 struct orphan_data_extent {
146 struct list_head list;
154 struct tree_backref {
155 struct extent_backref node;
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 return container_of(back, struct tree_backref, node);
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
170 struct extent_record {
171 struct list_head backrefs;
172 struct list_head dups;
173 struct list_head list;
174 struct cache_extent cache;
175 struct btrfs_disk_key parent_key;
180 u64 extent_item_refs;
182 u64 parent_generation;
186 unsigned int flag_block_full_backref:2;
187 unsigned int found_rec:1;
188 unsigned int content_checked:1;
189 unsigned int owner_ref_checked:1;
190 unsigned int is_root:1;
191 unsigned int metadata:1;
192 unsigned int bad_full_backref:1;
193 unsigned int crossing_stripes:1;
194 unsigned int wrong_chunk_type:1;
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 return container_of(entry, struct extent_record, list);
202 struct inode_backref {
203 struct list_head list;
204 unsigned int found_dir_item:1;
205 unsigned int found_dir_index:1;
206 unsigned int found_inode_ref:1;
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 return list_entry(entry, struct inode_backref, list);
221 struct root_item_record {
222 struct list_head list;
228 struct btrfs_key drop_key;
231 #define REF_ERR_NO_DIR_ITEM (1 << 0)
232 #define REF_ERR_NO_DIR_INDEX (1 << 1)
233 #define REF_ERR_NO_INODE_REF (1 << 2)
234 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
235 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
236 #define REF_ERR_DUP_INODE_REF (1 << 5)
237 #define REF_ERR_INDEX_UNMATCH (1 << 6)
238 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
239 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
240 #define REF_ERR_NO_ROOT_REF (1 << 9)
241 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
242 #define REF_ERR_DUP_ROOT_REF (1 << 11)
243 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
245 struct file_extent_hole {
251 struct inode_record {
252 struct list_head backrefs;
253 unsigned int checked:1;
254 unsigned int merging:1;
255 unsigned int found_inode_item:1;
256 unsigned int found_dir_item:1;
257 unsigned int found_file_extent:1;
258 unsigned int found_csum_item:1;
259 unsigned int some_csum_missing:1;
260 unsigned int nodatasum:1;
273 struct rb_root holes;
274 struct list_head orphan_extents;
279 #define I_ERR_NO_INODE_ITEM (1 << 0)
280 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
281 #define I_ERR_DUP_INODE_ITEM (1 << 2)
282 #define I_ERR_DUP_DIR_INDEX (1 << 3)
283 #define I_ERR_ODD_DIR_ITEM (1 << 4)
284 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
285 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
286 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
287 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
288 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
289 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
290 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
291 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
292 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
293 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
295 struct root_backref {
296 struct list_head list;
297 unsigned int found_dir_item:1;
298 unsigned int found_dir_index:1;
299 unsigned int found_back_ref:1;
300 unsigned int found_forward_ref:1;
301 unsigned int reachable:1;
310 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 return list_entry(entry, struct root_backref, list);
316 struct list_head backrefs;
317 struct cache_extent cache;
318 unsigned int found_root_item:1;
324 struct cache_extent cache;
329 struct cache_extent cache;
330 struct cache_tree root_cache;
331 struct cache_tree inode_cache;
332 struct inode_record *current;
341 struct walk_control {
342 struct cache_tree shared;
343 struct shared_node *nodes[BTRFS_MAX_LEVEL];
349 struct btrfs_key key;
351 struct list_head list;
354 struct extent_entry {
359 struct list_head list;
362 struct root_item_info {
363 /* level of the root */
365 /* number of nodes at this level, must be 1 for a root */
369 struct cache_extent cache_extent;
373 * Error bit for low memory mode check.
375 * Currently no caller cares about it yet. Just internal use for error
378 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
379 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
380 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
381 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
382 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
383 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
384 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
385 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
386 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
387 #define CHUNK_TYPE_MISMATCH (1 << 8)
389 static void *print_status_check(void *p)
391 struct task_ctx *priv = p;
392 const char work_indicator[] = { '.', 'o', 'O', 'o' };
394 static char *task_position_string[] = {
396 "checking free space cache",
400 task_period_start(priv->info, 1000 /* 1s */);
402 if (priv->tp == TASK_NOTHING)
406 printf("%s [%c]\r", task_position_string[priv->tp],
407 work_indicator[count % 4]);
410 task_period_wait(priv->info);
415 static int print_status_return(void *p)
423 static enum btrfs_check_mode parse_check_mode(const char *str)
425 if (strcmp(str, "lowmem") == 0)
426 return CHECK_MODE_LOWMEM;
427 if (strcmp(str, "orig") == 0)
428 return CHECK_MODE_ORIGINAL;
429 if (strcmp(str, "original") == 0)
430 return CHECK_MODE_ORIGINAL;
432 return CHECK_MODE_UNKNOWN;
435 /* Compatible function to allow reuse of old codes */
436 static u64 first_extent_gap(struct rb_root *holes)
438 struct file_extent_hole *hole;
440 if (RB_EMPTY_ROOT(holes))
443 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
447 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 struct file_extent_hole *hole1;
450 struct file_extent_hole *hole2;
452 hole1 = rb_entry(node1, struct file_extent_hole, node);
453 hole2 = rb_entry(node2, struct file_extent_hole, node);
455 if (hole1->start > hole2->start)
457 if (hole1->start < hole2->start)
459 /* Now hole1->start == hole2->start */
460 if (hole1->len >= hole2->len)
462 * Hole 1 will be merge center
463 * Same hole will be merged later
466 /* Hole 2 will be merge center */
471 * Add a hole to the record
473 * This will do hole merge for copy_file_extent_holes(),
474 * which will ensure there won't be continuous holes.
476 static int add_file_extent_hole(struct rb_root *holes,
479 struct file_extent_hole *hole;
480 struct file_extent_hole *prev = NULL;
481 struct file_extent_hole *next = NULL;
483 hole = malloc(sizeof(*hole));
488 /* Since compare will not return 0, no -EEXIST will happen */
489 rb_insert(holes, &hole->node, compare_hole);
491 /* simple merge with previous hole */
492 if (rb_prev(&hole->node))
493 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495 if (prev && prev->start + prev->len >= hole->start) {
496 hole->len = hole->start + hole->len - prev->start;
497 hole->start = prev->start;
498 rb_erase(&prev->node, holes);
503 /* iterate merge with next holes */
505 if (!rb_next(&hole->node))
507 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509 if (hole->start + hole->len >= next->start) {
510 if (hole->start + hole->len <= next->start + next->len)
511 hole->len = next->start + next->len -
513 rb_erase(&next->node, holes);
522 static int compare_hole_range(struct rb_node *node, void *data)
524 struct file_extent_hole *hole;
527 hole = (struct file_extent_hole *)data;
530 hole = rb_entry(node, struct file_extent_hole, node);
531 if (start < hole->start)
533 if (start >= hole->start && start < hole->start + hole->len)
539 * Delete a hole in the record
541 * This will do the hole split and is much restrict than add.
543 static int del_file_extent_hole(struct rb_root *holes,
546 struct file_extent_hole *hole;
547 struct file_extent_hole tmp;
552 struct rb_node *node;
559 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 hole = rb_entry(node, struct file_extent_hole, node);
563 if (start + len > hole->start + hole->len)
567 * Now there will be no overlap, delete the hole and re-add the
568 * split(s) if they exists.
570 if (start > hole->start) {
571 prev_start = hole->start;
572 prev_len = start - hole->start;
575 if (hole->start + hole->len > start + len) {
576 next_start = start + len;
577 next_len = hole->start + hole->len - start - len;
580 rb_erase(node, holes);
583 ret = add_file_extent_hole(holes, prev_start, prev_len);
588 ret = add_file_extent_hole(holes, next_start, next_len);
595 static int copy_file_extent_holes(struct rb_root *dst,
598 struct file_extent_hole *hole;
599 struct rb_node *node;
602 node = rb_first(src);
604 hole = rb_entry(node, struct file_extent_hole, node);
605 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 node = rb_next(node);
613 static void free_file_extent_holes(struct rb_root *holes)
615 struct rb_node *node;
616 struct file_extent_hole *hole;
618 node = rb_first(holes);
620 hole = rb_entry(node, struct file_extent_hole, node);
621 rb_erase(node, holes);
623 node = rb_first(holes);
627 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629 static void record_root_in_trans(struct btrfs_trans_handle *trans,
630 struct btrfs_root *root)
632 if (root->last_trans != trans->transid) {
633 root->track_dirty = 1;
634 root->last_trans = trans->transid;
635 root->commit_root = root->node;
636 extent_buffer_get(root->node);
640 static u8 imode_to_type(u32 imode)
643 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
644 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
645 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
646 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
647 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
648 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
649 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
650 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
653 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
657 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 struct device_record *rec1;
660 struct device_record *rec2;
662 rec1 = rb_entry(node1, struct device_record, node);
663 rec2 = rb_entry(node2, struct device_record, node);
664 if (rec1->devid > rec2->devid)
666 else if (rec1->devid < rec2->devid)
672 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 struct inode_record *rec;
675 struct inode_backref *backref;
676 struct inode_backref *orig;
677 struct inode_backref *tmp;
678 struct orphan_data_extent *src_orphan;
679 struct orphan_data_extent *dst_orphan;
684 rec = malloc(sizeof(*rec));
686 return ERR_PTR(-ENOMEM);
687 memcpy(rec, orig_rec, sizeof(*rec));
689 INIT_LIST_HEAD(&rec->backrefs);
690 INIT_LIST_HEAD(&rec->orphan_extents);
691 rec->holes = RB_ROOT;
693 list_for_each_entry(orig, &orig_rec->backrefs, list) {
694 size = sizeof(*orig) + orig->namelen + 1;
695 backref = malloc(size);
700 memcpy(backref, orig, size);
701 list_add_tail(&backref->list, &rec->backrefs);
703 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
704 dst_orphan = malloc(sizeof(*dst_orphan));
709 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
710 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
719 rb = rb_first(&rec->holes);
721 struct file_extent_hole *hole;
723 hole = rb_entry(rb, struct file_extent_hole, node);
729 if (!list_empty(&rec->backrefs))
730 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
731 list_del(&orig->list);
735 if (!list_empty(&rec->orphan_extents))
736 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
737 list_del(&orig->list);
746 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 struct orphan_data_extent *orphan;
751 if (list_empty(orphan_extents))
753 printf("The following data extent is lost in tree %llu:\n",
755 list_for_each_entry(orphan, orphan_extents, list) {
756 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
757 orphan->objectid, orphan->offset, orphan->disk_bytenr,
762 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 u64 root_objectid = root->root_key.objectid;
765 int errors = rec->errors;
769 /* reloc root errors, we print its corresponding fs root objectid*/
770 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
771 root_objectid = root->root_key.offset;
772 fprintf(stderr, "reloc");
774 fprintf(stderr, "root %llu inode %llu errors %x",
775 (unsigned long long) root_objectid,
776 (unsigned long long) rec->ino, rec->errors);
778 if (errors & I_ERR_NO_INODE_ITEM)
779 fprintf(stderr, ", no inode item");
780 if (errors & I_ERR_NO_ORPHAN_ITEM)
781 fprintf(stderr, ", no orphan item");
782 if (errors & I_ERR_DUP_INODE_ITEM)
783 fprintf(stderr, ", dup inode item");
784 if (errors & I_ERR_DUP_DIR_INDEX)
785 fprintf(stderr, ", dup dir index");
786 if (errors & I_ERR_ODD_DIR_ITEM)
787 fprintf(stderr, ", odd dir item");
788 if (errors & I_ERR_ODD_FILE_EXTENT)
789 fprintf(stderr, ", odd file extent");
790 if (errors & I_ERR_BAD_FILE_EXTENT)
791 fprintf(stderr, ", bad file extent");
792 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
793 fprintf(stderr, ", file extent overlap");
794 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
795 fprintf(stderr, ", file extent discount");
796 if (errors & I_ERR_DIR_ISIZE_WRONG)
797 fprintf(stderr, ", dir isize wrong");
798 if (errors & I_ERR_FILE_NBYTES_WRONG)
799 fprintf(stderr, ", nbytes wrong");
800 if (errors & I_ERR_ODD_CSUM_ITEM)
801 fprintf(stderr, ", odd csum item");
802 if (errors & I_ERR_SOME_CSUM_MISSING)
803 fprintf(stderr, ", some csum missing");
804 if (errors & I_ERR_LINK_COUNT_WRONG)
805 fprintf(stderr, ", link count wrong");
806 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
807 fprintf(stderr, ", orphan file extent");
808 fprintf(stderr, "\n");
809 /* Print the orphan extents if needed */
810 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
811 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813 /* Print the holes if needed */
814 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
815 struct file_extent_hole *hole;
816 struct rb_node *node;
819 node = rb_first(&rec->holes);
820 fprintf(stderr, "Found file extent holes:\n");
823 hole = rb_entry(node, struct file_extent_hole, node);
824 fprintf(stderr, "\tstart: %llu, len: %llu\n",
825 hole->start, hole->len);
826 node = rb_next(node);
829 fprintf(stderr, "\tstart: 0, len: %llu\n",
831 root->fs_info->sectorsize));
835 static void print_ref_error(int errors)
837 if (errors & REF_ERR_NO_DIR_ITEM)
838 fprintf(stderr, ", no dir item");
839 if (errors & REF_ERR_NO_DIR_INDEX)
840 fprintf(stderr, ", no dir index");
841 if (errors & REF_ERR_NO_INODE_REF)
842 fprintf(stderr, ", no inode ref");
843 if (errors & REF_ERR_DUP_DIR_ITEM)
844 fprintf(stderr, ", dup dir item");
845 if (errors & REF_ERR_DUP_DIR_INDEX)
846 fprintf(stderr, ", dup dir index");
847 if (errors & REF_ERR_DUP_INODE_REF)
848 fprintf(stderr, ", dup inode ref");
849 if (errors & REF_ERR_INDEX_UNMATCH)
850 fprintf(stderr, ", index mismatch");
851 if (errors & REF_ERR_FILETYPE_UNMATCH)
852 fprintf(stderr, ", filetype mismatch");
853 if (errors & REF_ERR_NAME_TOO_LONG)
854 fprintf(stderr, ", name too long");
855 if (errors & REF_ERR_NO_ROOT_REF)
856 fprintf(stderr, ", no root ref");
857 if (errors & REF_ERR_NO_ROOT_BACKREF)
858 fprintf(stderr, ", no root backref");
859 if (errors & REF_ERR_DUP_ROOT_REF)
860 fprintf(stderr, ", dup root ref");
861 if (errors & REF_ERR_DUP_ROOT_BACKREF)
862 fprintf(stderr, ", dup root backref");
863 fprintf(stderr, "\n");
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869 struct ptr_node *node;
870 struct cache_extent *cache;
871 struct inode_record *rec = NULL;
874 cache = lookup_cache_extent(inode_cache, ino, 1);
876 node = container_of(cache, struct ptr_node, cache);
878 if (mod && rec->refs > 1) {
879 node->data = clone_inode_rec(rec);
880 if (IS_ERR(node->data))
886 rec = calloc(1, sizeof(*rec));
888 return ERR_PTR(-ENOMEM);
890 rec->extent_start = (u64)-1;
892 INIT_LIST_HEAD(&rec->backrefs);
893 INIT_LIST_HEAD(&rec->orphan_extents);
894 rec->holes = RB_ROOT;
896 node = malloc(sizeof(*node));
899 return ERR_PTR(-ENOMEM);
901 node->cache.start = ino;
902 node->cache.size = 1;
905 if (ino == BTRFS_FREE_INO_OBJECTID)
908 ret = insert_cache_extent(inode_cache, &node->cache);
910 return ERR_PTR(-EEXIST);
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 struct orphan_data_extent *orphan;
919 while (!list_empty(orphan_extents)) {
920 orphan = list_entry(orphan_extents->next,
921 struct orphan_data_extent, list);
922 list_del(&orphan->list);
927 static void free_inode_rec(struct inode_record *rec)
929 struct inode_backref *backref;
934 while (!list_empty(&rec->backrefs)) {
935 backref = to_inode_backref(rec->backrefs.next);
936 list_del(&backref->list);
939 free_orphan_data_extents(&rec->orphan_extents);
940 free_file_extent_holes(&rec->holes);
944 static int can_free_inode_rec(struct inode_record *rec)
946 if (!rec->errors && rec->checked && rec->found_inode_item &&
947 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953 struct inode_record *rec)
955 struct cache_extent *cache;
956 struct inode_backref *tmp, *backref;
957 struct ptr_node *node;
960 if (!rec->found_inode_item)
963 filetype = imode_to_type(rec->imode);
964 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965 if (backref->found_dir_item && backref->found_dir_index) {
966 if (backref->filetype != filetype)
967 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968 if (!backref->errors && backref->found_inode_ref &&
969 rec->nlink == rec->found_link) {
970 list_del(&backref->list);
976 if (!rec->checked || rec->merging)
979 if (S_ISDIR(rec->imode)) {
980 if (rec->found_size != rec->isize)
981 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982 if (rec->found_file_extent)
983 rec->errors |= I_ERR_ODD_FILE_EXTENT;
984 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985 if (rec->found_dir_item)
986 rec->errors |= I_ERR_ODD_DIR_ITEM;
987 if (rec->found_size != rec->nbytes)
988 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989 if (rec->nlink > 0 && !no_holes &&
990 (rec->extent_end < rec->isize ||
991 first_extent_gap(&rec->holes) < rec->isize))
992 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996 if (rec->found_csum_item && rec->nodatasum)
997 rec->errors |= I_ERR_ODD_CSUM_ITEM;
998 if (rec->some_csum_missing && !rec->nodatasum)
999 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002 BUG_ON(rec->refs != 1);
1003 if (can_free_inode_rec(rec)) {
1004 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005 node = container_of(cache, struct ptr_node, cache);
1006 BUG_ON(node->data != rec);
1007 remove_cache_extent(inode_cache, &node->cache);
1009 free_inode_rec(rec);
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 struct btrfs_path path;
1016 struct btrfs_key key;
1019 key.objectid = BTRFS_ORPHAN_OBJECTID;
1020 key.type = BTRFS_ORPHAN_ITEM_KEY;
1023 btrfs_init_path(&path);
1024 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025 btrfs_release_path(&path);
1031 static int process_inode_item(struct extent_buffer *eb,
1032 int slot, struct btrfs_key *key,
1033 struct shared_node *active_node)
1035 struct inode_record *rec;
1036 struct btrfs_inode_item *item;
1038 rec = active_node->current;
1039 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040 if (rec->found_inode_item) {
1041 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045 rec->nlink = btrfs_inode_nlink(eb, item);
1046 rec->isize = btrfs_inode_size(eb, item);
1047 rec->nbytes = btrfs_inode_nbytes(eb, item);
1048 rec->imode = btrfs_inode_mode(eb, item);
1049 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051 rec->found_inode_item = 1;
1052 if (rec->nlink == 0)
1053 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054 maybe_free_inode_rec(&active_node->inode_cache, rec);
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060 int namelen, u64 dir)
1062 struct inode_backref *backref;
1064 list_for_each_entry(backref, &rec->backrefs, list) {
1065 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067 if (backref->dir != dir || backref->namelen != namelen)
1069 if (memcmp(name, backref->name, namelen))
1074 backref = malloc(sizeof(*backref) + namelen + 1);
1077 memset(backref, 0, sizeof(*backref));
1079 backref->namelen = namelen;
1080 memcpy(backref->name, name, namelen);
1081 backref->name[namelen] = '\0';
1082 list_add_tail(&backref->list, &rec->backrefs);
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087 u64 ino, u64 dir, u64 index,
1088 const char *name, int namelen,
1089 u8 filetype, u8 itemtype, int errors)
1091 struct inode_record *rec;
1092 struct inode_backref *backref;
1094 rec = get_inode_rec(inode_cache, ino, 1);
1095 BUG_ON(IS_ERR(rec));
1096 backref = get_inode_backref(rec, name, namelen, dir);
1099 backref->errors |= errors;
1100 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101 if (backref->found_dir_index)
1102 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103 if (backref->found_inode_ref && backref->index != index)
1104 backref->errors |= REF_ERR_INDEX_UNMATCH;
1105 if (backref->found_dir_item && backref->filetype != filetype)
1106 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108 backref->index = index;
1109 backref->filetype = filetype;
1110 backref->found_dir_index = 1;
1111 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113 if (backref->found_dir_item)
1114 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115 if (backref->found_dir_index && backref->filetype != filetype)
1116 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118 backref->filetype = filetype;
1119 backref->found_dir_item = 1;
1120 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122 if (backref->found_inode_ref)
1123 backref->errors |= REF_ERR_DUP_INODE_REF;
1124 if (backref->found_dir_index && backref->index != index)
1125 backref->errors |= REF_ERR_INDEX_UNMATCH;
1127 backref->index = index;
1129 backref->ref_type = itemtype;
1130 backref->found_inode_ref = 1;
1135 maybe_free_inode_rec(inode_cache, rec);
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140 struct cache_tree *dst_cache)
1142 struct inode_backref *backref;
1147 list_for_each_entry(backref, &src->backrefs, list) {
1148 if (backref->found_dir_index) {
1149 add_inode_backref(dst_cache, dst->ino, backref->dir,
1150 backref->index, backref->name,
1151 backref->namelen, backref->filetype,
1152 BTRFS_DIR_INDEX_KEY, backref->errors);
1154 if (backref->found_dir_item) {
1156 add_inode_backref(dst_cache, dst->ino,
1157 backref->dir, 0, backref->name,
1158 backref->namelen, backref->filetype,
1159 BTRFS_DIR_ITEM_KEY, backref->errors);
1161 if (backref->found_inode_ref) {
1162 add_inode_backref(dst_cache, dst->ino,
1163 backref->dir, backref->index,
1164 backref->name, backref->namelen, 0,
1165 backref->ref_type, backref->errors);
1169 if (src->found_dir_item)
1170 dst->found_dir_item = 1;
1171 if (src->found_file_extent)
1172 dst->found_file_extent = 1;
1173 if (src->found_csum_item)
1174 dst->found_csum_item = 1;
1175 if (src->some_csum_missing)
1176 dst->some_csum_missing = 1;
1177 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1183 BUG_ON(src->found_link < dir_count);
1184 dst->found_link += src->found_link - dir_count;
1185 dst->found_size += src->found_size;
1186 if (src->extent_start != (u64)-1) {
1187 if (dst->extent_start == (u64)-1) {
1188 dst->extent_start = src->extent_start;
1189 dst->extent_end = src->extent_end;
1191 if (dst->extent_end > src->extent_start)
1192 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193 else if (dst->extent_end < src->extent_start) {
1194 ret = add_file_extent_hole(&dst->holes,
1196 src->extent_start - dst->extent_end);
1198 if (dst->extent_end < src->extent_end)
1199 dst->extent_end = src->extent_end;
1203 dst->errors |= src->errors;
1204 if (src->found_inode_item) {
1205 if (!dst->found_inode_item) {
1206 dst->nlink = src->nlink;
1207 dst->isize = src->isize;
1208 dst->nbytes = src->nbytes;
1209 dst->imode = src->imode;
1210 dst->nodatasum = src->nodatasum;
1211 dst->found_inode_item = 1;
1213 dst->errors |= I_ERR_DUP_INODE_ITEM;
1221 static int splice_shared_node(struct shared_node *src_node,
1222 struct shared_node *dst_node)
1224 struct cache_extent *cache;
1225 struct ptr_node *node, *ins;
1226 struct cache_tree *src, *dst;
1227 struct inode_record *rec, *conflict;
1228 u64 current_ino = 0;
1232 if (--src_node->refs == 0)
1234 if (src_node->current)
1235 current_ino = src_node->current->ino;
1237 src = &src_node->root_cache;
1238 dst = &dst_node->root_cache;
1240 cache = search_cache_extent(src, 0);
1242 node = container_of(cache, struct ptr_node, cache);
1244 cache = next_cache_extent(cache);
1247 remove_cache_extent(src, &node->cache);
1250 ins = malloc(sizeof(*ins));
1252 ins->cache.start = node->cache.start;
1253 ins->cache.size = node->cache.size;
1257 ret = insert_cache_extent(dst, &ins->cache);
1258 if (ret == -EEXIST) {
1259 conflict = get_inode_rec(dst, rec->ino, 1);
1260 BUG_ON(IS_ERR(conflict));
1261 merge_inode_recs(rec, conflict, dst);
1263 conflict->checked = 1;
1264 if (dst_node->current == conflict)
1265 dst_node->current = NULL;
1267 maybe_free_inode_rec(dst, conflict);
1268 free_inode_rec(rec);
1275 if (src == &src_node->root_cache) {
1276 src = &src_node->inode_cache;
1277 dst = &dst_node->inode_cache;
1281 if (current_ino > 0 && (!dst_node->current ||
1282 current_ino > dst_node->current->ino)) {
1283 if (dst_node->current) {
1284 dst_node->current->checked = 1;
1285 maybe_free_inode_rec(dst, dst_node->current);
1287 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288 BUG_ON(IS_ERR(dst_node->current));
1293 static void free_inode_ptr(struct cache_extent *cache)
1295 struct ptr_node *node;
1296 struct inode_record *rec;
1298 node = container_of(cache, struct ptr_node, cache);
1300 free_inode_rec(rec);
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309 struct cache_extent *cache;
1310 struct shared_node *node;
1312 cache = lookup_cache_extent(shared, bytenr, 1);
1314 node = container_of(cache, struct shared_node, cache);
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 struct shared_node *node;
1325 node = calloc(1, sizeof(*node));
1328 node->cache.start = bytenr;
1329 node->cache.size = 1;
1330 cache_tree_init(&node->root_cache);
1331 cache_tree_init(&node->inode_cache);
1334 ret = insert_cache_extent(shared, &node->cache);
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340 struct walk_control *wc, int level)
1342 struct shared_node *node;
1343 struct shared_node *dest;
1346 if (level == wc->active_node)
1349 BUG_ON(wc->active_node <= level);
1350 node = find_shared_node(&wc->shared, bytenr);
1352 ret = add_shared_node(&wc->shared, bytenr, refs);
1354 node = find_shared_node(&wc->shared, bytenr);
1355 wc->nodes[level] = node;
1356 wc->active_node = level;
1360 if (wc->root_level == wc->active_node &&
1361 btrfs_root_refs(&root->root_item) == 0) {
1362 if (--node->refs == 0) {
1363 free_inode_recs_tree(&node->root_cache);
1364 free_inode_recs_tree(&node->inode_cache);
1365 remove_cache_extent(&wc->shared, &node->cache);
1371 dest = wc->nodes[wc->active_node];
1372 splice_shared_node(node, dest);
1373 if (node->refs == 0) {
1374 remove_cache_extent(&wc->shared, &node->cache);
1380 static int leave_shared_node(struct btrfs_root *root,
1381 struct walk_control *wc, int level)
1383 struct shared_node *node;
1384 struct shared_node *dest;
1387 if (level == wc->root_level)
1390 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1394 BUG_ON(i >= BTRFS_MAX_LEVEL);
1396 node = wc->nodes[wc->active_node];
1397 wc->nodes[wc->active_node] = NULL;
1398 wc->active_node = i;
1400 dest = wc->nodes[wc->active_node];
1401 if (wc->active_node < wc->root_level ||
1402 btrfs_root_refs(&root->root_item) > 0) {
1403 BUG_ON(node->refs <= 1);
1404 splice_shared_node(node, dest);
1406 BUG_ON(node->refs < 2);
1415 * 1 - if the root with id child_root_id is a child of root parent_root_id
1416 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1417 * has other root(s) as parent(s)
1418 * 2 - if the root child_root_id doesn't have any parent roots
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423 struct btrfs_path path;
1424 struct btrfs_key key;
1425 struct extent_buffer *leaf;
1429 btrfs_init_path(&path);
1431 key.objectid = parent_root_id;
1432 key.type = BTRFS_ROOT_REF_KEY;
1433 key.offset = child_root_id;
1434 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1438 btrfs_release_path(&path);
1442 key.objectid = child_root_id;
1443 key.type = BTRFS_ROOT_BACKREF_KEY;
1445 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1451 leaf = path.nodes[0];
1452 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456 leaf = path.nodes[0];
1459 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460 if (key.objectid != child_root_id ||
1461 key.type != BTRFS_ROOT_BACKREF_KEY)
1466 if (key.offset == parent_root_id) {
1467 btrfs_release_path(&path);
1474 btrfs_release_path(&path);
1477 return has_parent ? 0 : 2;
1480 static int process_dir_item(struct extent_buffer *eb,
1481 int slot, struct btrfs_key *key,
1482 struct shared_node *active_node)
1492 struct btrfs_dir_item *di;
1493 struct inode_record *rec;
1494 struct cache_tree *root_cache;
1495 struct cache_tree *inode_cache;
1496 struct btrfs_key location;
1497 char namebuf[BTRFS_NAME_LEN];
1499 root_cache = &active_node->root_cache;
1500 inode_cache = &active_node->inode_cache;
1501 rec = active_node->current;
1502 rec->found_dir_item = 1;
1504 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1505 total = btrfs_item_size_nr(eb, slot);
1506 while (cur < total) {
1508 btrfs_dir_item_key_to_cpu(eb, di, &location);
1509 name_len = btrfs_dir_name_len(eb, di);
1510 data_len = btrfs_dir_data_len(eb, di);
1511 filetype = btrfs_dir_type(eb, di);
1513 rec->found_size += name_len;
1514 if (cur + sizeof(*di) + name_len > total ||
1515 name_len > BTRFS_NAME_LEN) {
1516 error = REF_ERR_NAME_TOO_LONG;
1518 if (cur + sizeof(*di) > total)
1520 len = min_t(u32, total - cur - sizeof(*di),
1527 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1529 if (key->type == BTRFS_DIR_ITEM_KEY &&
1530 key->offset != btrfs_name_hash(namebuf, len)) {
1531 rec->errors |= I_ERR_ODD_DIR_ITEM;
1532 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1533 key->objectid, key->offset, namebuf, len, filetype,
1534 key->offset, btrfs_name_hash(namebuf, len));
1537 if (location.type == BTRFS_INODE_ITEM_KEY) {
1538 add_inode_backref(inode_cache, location.objectid,
1539 key->objectid, key->offset, namebuf,
1540 len, filetype, key->type, error);
1541 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1542 add_inode_backref(root_cache, location.objectid,
1543 key->objectid, key->offset,
1544 namebuf, len, filetype,
1547 fprintf(stderr, "invalid location in dir item %u\n",
1549 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1550 key->objectid, key->offset, namebuf,
1551 len, filetype, key->type, error);
1554 len = sizeof(*di) + name_len + data_len;
1555 di = (struct btrfs_dir_item *)((char *)di + len);
1558 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1559 rec->errors |= I_ERR_DUP_DIR_INDEX;
1564 static int process_inode_ref(struct extent_buffer *eb,
1565 int slot, struct btrfs_key *key,
1566 struct shared_node *active_node)
1574 struct cache_tree *inode_cache;
1575 struct btrfs_inode_ref *ref;
1576 char namebuf[BTRFS_NAME_LEN];
1578 inode_cache = &active_node->inode_cache;
1580 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1581 total = btrfs_item_size_nr(eb, slot);
1582 while (cur < total) {
1583 name_len = btrfs_inode_ref_name_len(eb, ref);
1584 index = btrfs_inode_ref_index(eb, ref);
1586 /* inode_ref + namelen should not cross item boundary */
1587 if (cur + sizeof(*ref) + name_len > total ||
1588 name_len > BTRFS_NAME_LEN) {
1589 if (total < cur + sizeof(*ref))
1592 /* Still try to read out the remaining part */
1593 len = min_t(u32, total - cur - sizeof(*ref),
1595 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1602 add_inode_backref(inode_cache, key->objectid, key->offset,
1603 index, namebuf, len, 0, key->type, error);
1605 len = sizeof(*ref) + name_len;
1606 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1612 static int process_inode_extref(struct extent_buffer *eb,
1613 int slot, struct btrfs_key *key,
1614 struct shared_node *active_node)
1623 struct cache_tree *inode_cache;
1624 struct btrfs_inode_extref *extref;
1625 char namebuf[BTRFS_NAME_LEN];
1627 inode_cache = &active_node->inode_cache;
1629 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1630 total = btrfs_item_size_nr(eb, slot);
1631 while (cur < total) {
1632 name_len = btrfs_inode_extref_name_len(eb, extref);
1633 index = btrfs_inode_extref_index(eb, extref);
1634 parent = btrfs_inode_extref_parent(eb, extref);
1635 if (name_len <= BTRFS_NAME_LEN) {
1639 len = BTRFS_NAME_LEN;
1640 error = REF_ERR_NAME_TOO_LONG;
1642 read_extent_buffer(eb, namebuf,
1643 (unsigned long)(extref + 1), len);
1644 add_inode_backref(inode_cache, key->objectid, parent,
1645 index, namebuf, len, 0, key->type, error);
1647 len = sizeof(*extref) + name_len;
1648 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1655 static int count_csum_range(struct btrfs_root *root, u64 start,
1656 u64 len, u64 *found)
1658 struct btrfs_key key;
1659 struct btrfs_path path;
1660 struct extent_buffer *leaf;
1665 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1667 btrfs_init_path(&path);
1669 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1671 key.type = BTRFS_EXTENT_CSUM_KEY;
1673 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1677 if (ret > 0 && path.slots[0] > 0) {
1678 leaf = path.nodes[0];
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1680 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1681 key.type == BTRFS_EXTENT_CSUM_KEY)
1686 leaf = path.nodes[0];
1687 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1688 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1693 leaf = path.nodes[0];
1696 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1697 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1698 key.type != BTRFS_EXTENT_CSUM_KEY)
1701 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1702 if (key.offset >= start + len)
1705 if (key.offset > start)
1708 size = btrfs_item_size_nr(leaf, path.slots[0]);
1709 csum_end = key.offset + (size / csum_size) *
1710 root->fs_info->sectorsize;
1711 if (csum_end > start) {
1712 size = min(csum_end - start, len);
1721 btrfs_release_path(&path);
1727 static int process_file_extent(struct btrfs_root *root,
1728 struct extent_buffer *eb,
1729 int slot, struct btrfs_key *key,
1730 struct shared_node *active_node)
1732 struct inode_record *rec;
1733 struct btrfs_file_extent_item *fi;
1735 u64 disk_bytenr = 0;
1736 u64 extent_offset = 0;
1737 u64 mask = root->fs_info->sectorsize - 1;
1741 rec = active_node->current;
1742 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743 rec->found_file_extent = 1;
1745 if (rec->extent_start == (u64)-1) {
1746 rec->extent_start = key->offset;
1747 rec->extent_end = key->offset;
1750 if (rec->extent_end > key->offset)
1751 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752 else if (rec->extent_end < key->offset) {
1753 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754 key->offset - rec->extent_end);
1759 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760 extent_type = btrfs_file_extent_type(eb, fi);
1762 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1765 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766 rec->found_size += num_bytes;
1767 num_bytes = (num_bytes + mask) & ~mask;
1768 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772 extent_offset = btrfs_file_extent_offset(eb, fi);
1773 if (num_bytes == 0 || (num_bytes & mask))
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 if (num_bytes + extent_offset >
1776 btrfs_file_extent_ram_bytes(eb, fi))
1777 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779 (btrfs_file_extent_compression(eb, fi) ||
1780 btrfs_file_extent_encryption(eb, fi) ||
1781 btrfs_file_extent_other_encoding(eb, fi)))
1782 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783 if (disk_bytenr > 0)
1784 rec->found_size += num_bytes;
1786 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788 rec->extent_end = key->offset + num_bytes;
1791 * The data reloc tree will copy full extents into its inode and then
1792 * copy the corresponding csums. Because the extent it copied could be
1793 * a preallocated extent that hasn't been written to yet there may be no
1794 * csums to copy, ergo we won't have csums for our file extent. This is
1795 * ok so just don't bother checking csums if the inode belongs to the
1798 if (disk_bytenr > 0 &&
1799 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1801 if (btrfs_file_extent_compression(eb, fi))
1802 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1804 disk_bytenr += extent_offset;
1806 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1811 rec->found_csum_item = 1;
1812 if (found < num_bytes)
1813 rec->some_csum_missing = 1;
1814 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1816 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823 struct walk_control *wc)
1825 struct btrfs_key key;
1829 struct cache_tree *inode_cache;
1830 struct shared_node *active_node;
1832 if (wc->root_level == wc->active_node &&
1833 btrfs_root_refs(&root->root_item) == 0)
1836 active_node = wc->nodes[wc->active_node];
1837 inode_cache = &active_node->inode_cache;
1838 nritems = btrfs_header_nritems(eb);
1839 for (i = 0; i < nritems; i++) {
1840 btrfs_item_key_to_cpu(eb, &key, i);
1842 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1844 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847 if (active_node->current == NULL ||
1848 active_node->current->ino < key.objectid) {
1849 if (active_node->current) {
1850 active_node->current->checked = 1;
1851 maybe_free_inode_rec(inode_cache,
1852 active_node->current);
1854 active_node->current = get_inode_rec(inode_cache,
1856 BUG_ON(IS_ERR(active_node->current));
1859 case BTRFS_DIR_ITEM_KEY:
1860 case BTRFS_DIR_INDEX_KEY:
1861 ret = process_dir_item(eb, i, &key, active_node);
1863 case BTRFS_INODE_REF_KEY:
1864 ret = process_inode_ref(eb, i, &key, active_node);
1866 case BTRFS_INODE_EXTREF_KEY:
1867 ret = process_inode_extref(eb, i, &key, active_node);
1869 case BTRFS_INODE_ITEM_KEY:
1870 ret = process_inode_item(eb, i, &key, active_node);
1872 case BTRFS_EXTENT_DATA_KEY:
1873 ret = process_file_extent(root, eb, i, &key,
1884 u64 bytenr[BTRFS_MAX_LEVEL];
1885 u64 refs[BTRFS_MAX_LEVEL];
1886 int need_check[BTRFS_MAX_LEVEL];
1889 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1890 struct node_refs *nrefs, u64 level);
1891 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1892 unsigned int ext_ref);
1895 * Returns >0 Found error, not fatal, should continue
1896 * Returns <0 Fatal error, must exit the whole check
1897 * Returns 0 No errors found
1899 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1900 struct node_refs *nrefs, int *level, int ext_ref)
1902 struct extent_buffer *cur = path->nodes[0];
1903 struct btrfs_key key;
1907 int root_level = btrfs_header_level(root->node);
1909 int ret = 0; /* Final return value */
1910 int err = 0; /* Positive error bitmap */
1912 cur_bytenr = cur->start;
1914 /* skip to first inode item or the first inode number change */
1915 nritems = btrfs_header_nritems(cur);
1916 for (i = 0; i < nritems; i++) {
1917 btrfs_item_key_to_cpu(cur, &key, i);
1919 first_ino = key.objectid;
1920 if (key.type == BTRFS_INODE_ITEM_KEY ||
1921 (first_ino && first_ino != key.objectid))
1925 path->slots[0] = nritems;
1931 err |= check_inode_item(root, path, ext_ref);
1933 if (err & LAST_ITEM)
1936 /* still have inode items in thie leaf */
1937 if (cur->start == cur_bytenr)
1941 * we have switched to another leaf, above nodes may
1942 * have changed, here walk down the path, if a node
1943 * or leaf is shared, check whether we can skip this
1946 for (i = root_level; i >= 0; i--) {
1947 if (path->nodes[i]->start == nrefs->bytenr[i])
1950 ret = update_nodes_refs(root,
1951 path->nodes[i]->start,
1956 if (!nrefs->need_check[i]) {
1962 for (i = 0; i < *level; i++) {
1963 free_extent_buffer(path->nodes[i]);
1964 path->nodes[i] = NULL;
1973 static void reada_walk_down(struct btrfs_root *root,
1974 struct extent_buffer *node, int slot)
1976 struct btrfs_fs_info *fs_info = root->fs_info;
1983 level = btrfs_header_level(node);
1987 nritems = btrfs_header_nritems(node);
1988 for (i = slot; i < nritems; i++) {
1989 bytenr = btrfs_node_blockptr(node, i);
1990 ptr_gen = btrfs_node_ptr_generation(node, i);
1991 readahead_tree_block(fs_info, bytenr, ptr_gen);
1996 * Check the child node/leaf by the following condition:
1997 * 1. the first item key of the node/leaf should be the same with the one
1999 * 2. block in parent node should match the child node/leaf.
2000 * 3. generation of parent node and child's header should be consistent.
2002 * Or the child node/leaf pointed by the key in parent is not valid.
2004 * We hope to check leaf owner too, but since subvol may share leaves,
2005 * which makes leaf owner check not so strong, key check should be
2006 * sufficient enough for that case.
2008 static int check_child_node(struct extent_buffer *parent, int slot,
2009 struct extent_buffer *child)
2011 struct btrfs_key parent_key;
2012 struct btrfs_key child_key;
2015 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2016 if (btrfs_header_level(child) == 0)
2017 btrfs_item_key_to_cpu(child, &child_key, 0);
2019 btrfs_node_key_to_cpu(child, &child_key, 0);
2021 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2024 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2025 parent_key.objectid, parent_key.type, parent_key.offset,
2026 child_key.objectid, child_key.type, child_key.offset);
2028 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2030 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2031 btrfs_node_blockptr(parent, slot),
2032 btrfs_header_bytenr(child));
2034 if (btrfs_node_ptr_generation(parent, slot) !=
2035 btrfs_header_generation(child)) {
2037 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2038 btrfs_header_generation(child),
2039 btrfs_node_ptr_generation(parent, slot));
2045 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2046 * in every fs or file tree check. Here we find its all root ids, and only check
2047 * it in the fs or file tree which has the smallest root id.
2049 static int need_check(struct btrfs_root *root, struct ulist *roots)
2051 struct rb_node *node;
2052 struct ulist_node *u;
2054 if (roots->nnodes == 1)
2057 node = rb_first(&roots->root);
2058 u = rb_entry(node, struct ulist_node, rb_node);
2060 * current root id is not smallest, we skip it and let it be checked
2061 * in the fs or file tree who hash the smallest root id.
2063 if (root->objectid != u->val)
2070 * for a tree node or leaf, we record its reference count, so later if we still
2071 * process this node or leaf, don't need to compute its reference count again.
2073 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2074 struct node_refs *nrefs, u64 level)
2078 struct ulist *roots;
2080 if (nrefs->bytenr[level] != bytenr) {
2081 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2082 level, 1, &refs, NULL);
2086 nrefs->bytenr[level] = bytenr;
2087 nrefs->refs[level] = refs;
2089 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2094 check = need_check(root, roots);
2096 nrefs->need_check[level] = check;
2098 nrefs->need_check[level] = 1;
2105 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2106 struct walk_control *wc, int *level,
2107 struct node_refs *nrefs)
2109 enum btrfs_tree_block_status status;
2112 struct btrfs_fs_info *fs_info = root->fs_info;
2113 struct extent_buffer *next;
2114 struct extent_buffer *cur;
2118 WARN_ON(*level < 0);
2119 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2121 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2122 refs = nrefs->refs[*level];
2125 ret = btrfs_lookup_extent_info(NULL, root,
2126 path->nodes[*level]->start,
2127 *level, 1, &refs, NULL);
2132 nrefs->bytenr[*level] = path->nodes[*level]->start;
2133 nrefs->refs[*level] = refs;
2137 ret = enter_shared_node(root, path->nodes[*level]->start,
2145 while (*level >= 0) {
2146 WARN_ON(*level < 0);
2147 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2148 cur = path->nodes[*level];
2150 if (btrfs_header_level(cur) != *level)
2153 if (path->slots[*level] >= btrfs_header_nritems(cur))
2156 ret = process_one_leaf(root, cur, wc);
2161 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2162 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2164 if (bytenr == nrefs->bytenr[*level - 1]) {
2165 refs = nrefs->refs[*level - 1];
2167 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2168 *level - 1, 1, &refs, NULL);
2172 nrefs->bytenr[*level - 1] = bytenr;
2173 nrefs->refs[*level - 1] = refs;
2178 ret = enter_shared_node(root, bytenr, refs,
2181 path->slots[*level]++;
2186 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2187 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2188 free_extent_buffer(next);
2189 reada_walk_down(root, cur, path->slots[*level]);
2190 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2191 if (!extent_buffer_uptodate(next)) {
2192 struct btrfs_key node_key;
2194 btrfs_node_key_to_cpu(path->nodes[*level],
2196 path->slots[*level]);
2197 btrfs_add_corrupt_extent_record(root->fs_info,
2199 path->nodes[*level]->start,
2200 root->fs_info->nodesize,
2207 ret = check_child_node(cur, path->slots[*level], next);
2209 free_extent_buffer(next);
2214 if (btrfs_is_leaf(next))
2215 status = btrfs_check_leaf(root, NULL, next);
2217 status = btrfs_check_node(root, NULL, next);
2218 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2219 free_extent_buffer(next);
2224 *level = *level - 1;
2225 free_extent_buffer(path->nodes[*level]);
2226 path->nodes[*level] = next;
2227 path->slots[*level] = 0;
2230 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2234 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2235 unsigned int ext_ref);
2238 * Returns >0 Found error, should continue
2239 * Returns <0 Fatal error, must exit the whole check
2240 * Returns 0 No errors found
2242 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2243 int *level, struct node_refs *nrefs, int ext_ref)
2245 enum btrfs_tree_block_status status;
2248 struct btrfs_fs_info *fs_info = root->fs_info;
2249 struct extent_buffer *next;
2250 struct extent_buffer *cur;
2253 WARN_ON(*level < 0);
2254 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2256 ret = update_nodes_refs(root, path->nodes[*level]->start,
2261 while (*level >= 0) {
2262 WARN_ON(*level < 0);
2263 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264 cur = path->nodes[*level];
2266 if (btrfs_header_level(cur) != *level)
2269 if (path->slots[*level] >= btrfs_header_nritems(cur))
2271 /* Don't forgot to check leaf/node validation */
2273 ret = btrfs_check_leaf(root, NULL, cur);
2274 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2278 ret = process_one_leaf_v2(root, path, nrefs,
2282 ret = btrfs_check_node(root, NULL, cur);
2283 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2288 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2289 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2291 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2294 if (!nrefs->need_check[*level - 1]) {
2295 path->slots[*level]++;
2299 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2300 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2301 free_extent_buffer(next);
2302 reada_walk_down(root, cur, path->slots[*level]);
2303 next = read_tree_block(fs_info, bytenr, ptr_gen);
2304 if (!extent_buffer_uptodate(next)) {
2305 struct btrfs_key node_key;
2307 btrfs_node_key_to_cpu(path->nodes[*level],
2309 path->slots[*level]);
2310 btrfs_add_corrupt_extent_record(fs_info,
2312 path->nodes[*level]->start,
2320 ret = check_child_node(cur, path->slots[*level], next);
2324 if (btrfs_is_leaf(next))
2325 status = btrfs_check_leaf(root, NULL, next);
2327 status = btrfs_check_node(root, NULL, next);
2328 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2329 free_extent_buffer(next);
2334 *level = *level - 1;
2335 free_extent_buffer(path->nodes[*level]);
2336 path->nodes[*level] = next;
2337 path->slots[*level] = 0;
2342 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2343 struct walk_control *wc, int *level)
2346 struct extent_buffer *leaf;
2348 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349 leaf = path->nodes[i];
2350 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2355 free_extent_buffer(path->nodes[*level]);
2356 path->nodes[*level] = NULL;
2357 BUG_ON(*level > wc->active_node);
2358 if (*level == wc->active_node)
2359 leave_shared_node(root, wc, *level);
2366 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2370 struct extent_buffer *leaf;
2372 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2373 leaf = path->nodes[i];
2374 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2379 free_extent_buffer(path->nodes[*level]);
2380 path->nodes[*level] = NULL;
2387 static int check_root_dir(struct inode_record *rec)
2389 struct inode_backref *backref;
2392 if (!rec->found_inode_item || rec->errors)
2394 if (rec->nlink != 1 || rec->found_link != 0)
2396 if (list_empty(&rec->backrefs))
2398 backref = to_inode_backref(rec->backrefs.next);
2399 if (!backref->found_inode_ref)
2401 if (backref->index != 0 || backref->namelen != 2 ||
2402 memcmp(backref->name, "..", 2))
2404 if (backref->found_dir_index || backref->found_dir_item)
2411 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2412 struct btrfs_root *root, struct btrfs_path *path,
2413 struct inode_record *rec)
2415 struct btrfs_inode_item *ei;
2416 struct btrfs_key key;
2419 key.objectid = rec->ino;
2420 key.type = BTRFS_INODE_ITEM_KEY;
2421 key.offset = (u64)-1;
2423 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2427 if (!path->slots[0]) {
2434 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2435 if (key.objectid != rec->ino) {
2440 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2441 struct btrfs_inode_item);
2442 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2443 btrfs_mark_buffer_dirty(path->nodes[0]);
2444 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2445 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2446 root->root_key.objectid);
2448 btrfs_release_path(path);
2452 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2453 struct btrfs_root *root,
2454 struct btrfs_path *path,
2455 struct inode_record *rec)
2459 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2460 btrfs_release_path(path);
2462 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2466 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2467 struct btrfs_root *root,
2468 struct btrfs_path *path,
2469 struct inode_record *rec)
2471 struct btrfs_inode_item *ei;
2472 struct btrfs_key key;
2475 key.objectid = rec->ino;
2476 key.type = BTRFS_INODE_ITEM_KEY;
2479 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2486 /* Since ret == 0, no need to check anything */
2487 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2488 struct btrfs_inode_item);
2489 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2490 btrfs_mark_buffer_dirty(path->nodes[0]);
2491 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2492 printf("reset nbytes for ino %llu root %llu\n",
2493 rec->ino, root->root_key.objectid);
2495 btrfs_release_path(path);
2499 static int add_missing_dir_index(struct btrfs_root *root,
2500 struct cache_tree *inode_cache,
2501 struct inode_record *rec,
2502 struct inode_backref *backref)
2504 struct btrfs_path path;
2505 struct btrfs_trans_handle *trans;
2506 struct btrfs_dir_item *dir_item;
2507 struct extent_buffer *leaf;
2508 struct btrfs_key key;
2509 struct btrfs_disk_key disk_key;
2510 struct inode_record *dir_rec;
2511 unsigned long name_ptr;
2512 u32 data_size = sizeof(*dir_item) + backref->namelen;
2515 trans = btrfs_start_transaction(root, 1);
2517 return PTR_ERR(trans);
2519 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2520 (unsigned long long)rec->ino);
2522 btrfs_init_path(&path);
2523 key.objectid = backref->dir;
2524 key.type = BTRFS_DIR_INDEX_KEY;
2525 key.offset = backref->index;
2526 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2529 leaf = path.nodes[0];
2530 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2532 disk_key.objectid = cpu_to_le64(rec->ino);
2533 disk_key.type = BTRFS_INODE_ITEM_KEY;
2534 disk_key.offset = 0;
2536 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2537 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2538 btrfs_set_dir_data_len(leaf, dir_item, 0);
2539 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2540 name_ptr = (unsigned long)(dir_item + 1);
2541 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2542 btrfs_mark_buffer_dirty(leaf);
2543 btrfs_release_path(&path);
2544 btrfs_commit_transaction(trans, root);
2546 backref->found_dir_index = 1;
2547 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2548 BUG_ON(IS_ERR(dir_rec));
2551 dir_rec->found_size += backref->namelen;
2552 if (dir_rec->found_size == dir_rec->isize &&
2553 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2554 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2555 if (dir_rec->found_size != dir_rec->isize)
2556 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2561 static int delete_dir_index(struct btrfs_root *root,
2562 struct inode_backref *backref)
2564 struct btrfs_trans_handle *trans;
2565 struct btrfs_dir_item *di;
2566 struct btrfs_path path;
2569 trans = btrfs_start_transaction(root, 1);
2571 return PTR_ERR(trans);
2573 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2574 (unsigned long long)backref->dir,
2575 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2576 (unsigned long long)root->objectid);
2578 btrfs_init_path(&path);
2579 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2580 backref->name, backref->namelen,
2581 backref->index, -1);
2584 btrfs_release_path(&path);
2585 btrfs_commit_transaction(trans, root);
2592 ret = btrfs_del_item(trans, root, &path);
2594 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2596 btrfs_release_path(&path);
2597 btrfs_commit_transaction(trans, root);
2601 static int create_inode_item(struct btrfs_root *root,
2602 struct inode_record *rec,
2605 struct btrfs_trans_handle *trans;
2606 struct btrfs_inode_item inode_item;
2607 time_t now = time(NULL);
2610 trans = btrfs_start_transaction(root, 1);
2611 if (IS_ERR(trans)) {
2612 ret = PTR_ERR(trans);
2616 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2617 "be incomplete, please check permissions and content after "
2618 "the fsck completes.\n", (unsigned long long)root->objectid,
2619 (unsigned long long)rec->ino);
2621 memset(&inode_item, 0, sizeof(inode_item));
2622 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2624 btrfs_set_stack_inode_nlink(&inode_item, 1);
2626 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2627 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2628 if (rec->found_dir_item) {
2629 if (rec->found_file_extent)
2630 fprintf(stderr, "root %llu inode %llu has both a dir "
2631 "item and extents, unsure if it is a dir or a "
2632 "regular file so setting it as a directory\n",
2633 (unsigned long long)root->objectid,
2634 (unsigned long long)rec->ino);
2635 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2636 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2637 } else if (!rec->found_dir_item) {
2638 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2639 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2641 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2642 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2643 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2644 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2645 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2646 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2647 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2648 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2650 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2652 btrfs_commit_transaction(trans, root);
2656 static int repair_inode_backrefs(struct btrfs_root *root,
2657 struct inode_record *rec,
2658 struct cache_tree *inode_cache,
2661 struct inode_backref *tmp, *backref;
2662 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2666 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2667 if (!delete && rec->ino == root_dirid) {
2668 if (!rec->found_inode_item) {
2669 ret = create_inode_item(root, rec, 1);
2676 /* Index 0 for root dir's are special, don't mess with it */
2677 if (rec->ino == root_dirid && backref->index == 0)
2681 ((backref->found_dir_index && !backref->found_inode_ref) ||
2682 (backref->found_dir_index && backref->found_inode_ref &&
2683 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2684 ret = delete_dir_index(root, backref);
2688 list_del(&backref->list);
2693 if (!delete && !backref->found_dir_index &&
2694 backref->found_dir_item && backref->found_inode_ref) {
2695 ret = add_missing_dir_index(root, inode_cache, rec,
2700 if (backref->found_dir_item &&
2701 backref->found_dir_index) {
2702 if (!backref->errors &&
2703 backref->found_inode_ref) {
2704 list_del(&backref->list);
2711 if (!delete && (!backref->found_dir_index &&
2712 !backref->found_dir_item &&
2713 backref->found_inode_ref)) {
2714 struct btrfs_trans_handle *trans;
2715 struct btrfs_key location;
2717 ret = check_dir_conflict(root, backref->name,
2723 * let nlink fixing routine to handle it,
2724 * which can do it better.
2729 location.objectid = rec->ino;
2730 location.type = BTRFS_INODE_ITEM_KEY;
2731 location.offset = 0;
2733 trans = btrfs_start_transaction(root, 1);
2734 if (IS_ERR(trans)) {
2735 ret = PTR_ERR(trans);
2738 fprintf(stderr, "adding missing dir index/item pair "
2740 (unsigned long long)rec->ino);
2741 ret = btrfs_insert_dir_item(trans, root, backref->name,
2743 backref->dir, &location,
2744 imode_to_type(rec->imode),
2747 btrfs_commit_transaction(trans, root);
2751 if (!delete && (backref->found_inode_ref &&
2752 backref->found_dir_index &&
2753 backref->found_dir_item &&
2754 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2755 !rec->found_inode_item)) {
2756 ret = create_inode_item(root, rec, 0);
2763 return ret ? ret : repaired;
2767 * To determine the file type for nlink/inode_item repair
2769 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2770 * Return -ENOENT if file type is not found.
2772 static int find_file_type(struct inode_record *rec, u8 *type)
2774 struct inode_backref *backref;
2776 /* For inode item recovered case */
2777 if (rec->found_inode_item) {
2778 *type = imode_to_type(rec->imode);
2782 list_for_each_entry(backref, &rec->backrefs, list) {
2783 if (backref->found_dir_index || backref->found_dir_item) {
2784 *type = backref->filetype;
2792 * To determine the file name for nlink repair
2794 * Return 0 if file name is found, set name and namelen.
2795 * Return -ENOENT if file name is not found.
2797 static int find_file_name(struct inode_record *rec,
2798 char *name, int *namelen)
2800 struct inode_backref *backref;
2802 list_for_each_entry(backref, &rec->backrefs, list) {
2803 if (backref->found_dir_index || backref->found_dir_item ||
2804 backref->found_inode_ref) {
2805 memcpy(name, backref->name, backref->namelen);
2806 *namelen = backref->namelen;
2813 /* Reset the nlink of the inode to the correct one */
2814 static int reset_nlink(struct btrfs_trans_handle *trans,
2815 struct btrfs_root *root,
2816 struct btrfs_path *path,
2817 struct inode_record *rec)
2819 struct inode_backref *backref;
2820 struct inode_backref *tmp;
2821 struct btrfs_key key;
2822 struct btrfs_inode_item *inode_item;
2825 /* We don't believe this either, reset it and iterate backref */
2826 rec->found_link = 0;
2828 /* Remove all backref including the valid ones */
2829 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2830 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2831 backref->index, backref->name,
2832 backref->namelen, 0);
2836 /* remove invalid backref, so it won't be added back */
2837 if (!(backref->found_dir_index &&
2838 backref->found_dir_item &&
2839 backref->found_inode_ref)) {
2840 list_del(&backref->list);
2847 /* Set nlink to 0 */
2848 key.objectid = rec->ino;
2849 key.type = BTRFS_INODE_ITEM_KEY;
2851 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2858 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2859 struct btrfs_inode_item);
2860 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2861 btrfs_mark_buffer_dirty(path->nodes[0]);
2862 btrfs_release_path(path);
2865 * Add back valid inode_ref/dir_item/dir_index,
2866 * add_link() will handle the nlink inc, so new nlink must be correct
2868 list_for_each_entry(backref, &rec->backrefs, list) {
2869 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2870 backref->name, backref->namelen,
2871 backref->filetype, &backref->index, 1);
2876 btrfs_release_path(path);
2880 static int get_highest_inode(struct btrfs_trans_handle *trans,
2881 struct btrfs_root *root,
2882 struct btrfs_path *path,
2885 struct btrfs_key key, found_key;
2888 btrfs_init_path(path);
2889 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2891 key.type = BTRFS_INODE_ITEM_KEY;
2892 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2894 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2895 path->slots[0] - 1);
2896 *highest_ino = found_key.objectid;
2899 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2901 btrfs_release_path(path);
2905 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 char *dir_name = "lost+found";
2911 char namebuf[BTRFS_NAME_LEN] = {0};
2916 int name_recovered = 0;
2917 int type_recovered = 0;
2921 * Get file name and type first before these invalid inode ref
2922 * are deleted by remove_all_invalid_backref()
2924 name_recovered = !find_file_name(rec, namebuf, &namelen);
2925 type_recovered = !find_file_type(rec, &type);
2927 if (!name_recovered) {
2928 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2929 rec->ino, rec->ino);
2930 namelen = count_digits(rec->ino);
2931 sprintf(namebuf, "%llu", rec->ino);
2934 if (!type_recovered) {
2935 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2937 type = BTRFS_FT_REG_FILE;
2941 ret = reset_nlink(trans, root, path, rec);
2944 "Failed to reset nlink for inode %llu: %s\n",
2945 rec->ino, strerror(-ret));
2949 if (rec->found_link == 0) {
2950 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2954 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2955 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2958 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2959 dir_name, strerror(-ret));
2962 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2963 namebuf, namelen, type, NULL, 1);
2965 * Add ".INO" suffix several times to handle case where
2966 * "FILENAME.INO" is already taken by another file.
2968 while (ret == -EEXIST) {
2970 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2972 if (namelen + count_digits(rec->ino) + 1 >
2977 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2979 namelen += count_digits(rec->ino) + 1;
2980 ret = btrfs_add_link(trans, root, rec->ino,
2981 lost_found_ino, namebuf,
2982 namelen, type, NULL, 1);
2986 "Failed to link the inode %llu to %s dir: %s\n",
2987 rec->ino, dir_name, strerror(-ret));
2991 * Just increase the found_link, don't actually add the
2992 * backref. This will make things easier and this inode
2993 * record will be freed after the repair is done.
2994 * So fsck will not report problem about this inode.
2997 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2998 namelen, namebuf, dir_name);
3000 printf("Fixed the nlink of inode %llu\n", rec->ino);
3003 * Clear the flag anyway, or we will loop forever for the same inode
3004 * as it will not be removed from the bad inode list and the dead loop
3007 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3008 btrfs_release_path(path);
3013 * Check if there is any normal(reg or prealloc) file extent for given
3015 * This is used to determine the file type when neither its dir_index/item or
3016 * inode_item exists.
3018 * This will *NOT* report error, if any error happens, just consider it does
3019 * not have any normal file extent.
3021 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3023 struct btrfs_path path;
3024 struct btrfs_key key;
3025 struct btrfs_key found_key;
3026 struct btrfs_file_extent_item *fi;
3030 btrfs_init_path(&path);
3032 key.type = BTRFS_EXTENT_DATA_KEY;
3035 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3040 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3041 ret = btrfs_next_leaf(root, &path);
3048 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3050 if (found_key.objectid != ino ||
3051 found_key.type != BTRFS_EXTENT_DATA_KEY)
3053 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3054 struct btrfs_file_extent_item);
3055 type = btrfs_file_extent_type(path.nodes[0], fi);
3056 if (type != BTRFS_FILE_EXTENT_INLINE) {
3062 btrfs_release_path(&path);
3066 static u32 btrfs_type_to_imode(u8 type)
3068 static u32 imode_by_btrfs_type[] = {
3069 [BTRFS_FT_REG_FILE] = S_IFREG,
3070 [BTRFS_FT_DIR] = S_IFDIR,
3071 [BTRFS_FT_CHRDEV] = S_IFCHR,
3072 [BTRFS_FT_BLKDEV] = S_IFBLK,
3073 [BTRFS_FT_FIFO] = S_IFIFO,
3074 [BTRFS_FT_SOCK] = S_IFSOCK,
3075 [BTRFS_FT_SYMLINK] = S_IFLNK,
3078 return imode_by_btrfs_type[(type)];
3081 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3082 struct btrfs_root *root,
3083 struct btrfs_path *path,
3084 struct inode_record *rec)
3088 int type_recovered = 0;
3091 printf("Trying to rebuild inode:%llu\n", rec->ino);
3093 type_recovered = !find_file_type(rec, &filetype);
3096 * Try to determine inode type if type not found.
3098 * For found regular file extent, it must be FILE.
3099 * For found dir_item/index, it must be DIR.
3101 * For undetermined one, use FILE as fallback.
3104 * 1. If found backref(inode_index/item is already handled) to it,
3106 * Need new inode-inode ref structure to allow search for that.
3108 if (!type_recovered) {
3109 if (rec->found_file_extent &&
3110 find_normal_file_extent(root, rec->ino)) {
3112 filetype = BTRFS_FT_REG_FILE;
3113 } else if (rec->found_dir_item) {
3115 filetype = BTRFS_FT_DIR;
3116 } else if (!list_empty(&rec->orphan_extents)) {
3118 filetype = BTRFS_FT_REG_FILE;
3120 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3123 filetype = BTRFS_FT_REG_FILE;
3127 ret = btrfs_new_inode(trans, root, rec->ino,
3128 mode | btrfs_type_to_imode(filetype));
3133 * Here inode rebuild is done, we only rebuild the inode item,
3134 * don't repair the nlink(like move to lost+found).
3135 * That is the job of nlink repair.
3137 * We just fill the record and return
3139 rec->found_dir_item = 1;
3140 rec->imode = mode | btrfs_type_to_imode(filetype);
3142 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3143 /* Ensure the inode_nlinks repair function will be called */
3144 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3149 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3150 struct btrfs_root *root,
3151 struct btrfs_path *path,
3152 struct inode_record *rec)
3154 struct orphan_data_extent *orphan;
3155 struct orphan_data_extent *tmp;
3158 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3160 * Check for conflicting file extents
3162 * Here we don't know whether the extents is compressed or not,
3163 * so we can only assume it not compressed nor data offset,
3164 * and use its disk_len as extent length.
3166 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3167 orphan->offset, orphan->disk_len, 0);
3168 btrfs_release_path(path);
3173 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3174 orphan->disk_bytenr, orphan->disk_len);
3175 ret = btrfs_free_extent(trans,
3176 root->fs_info->extent_root,
3177 orphan->disk_bytenr, orphan->disk_len,
3178 0, root->objectid, orphan->objectid,
3183 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3184 orphan->offset, orphan->disk_bytenr,
3185 orphan->disk_len, orphan->disk_len);
3189 /* Update file size info */
3190 rec->found_size += orphan->disk_len;
3191 if (rec->found_size == rec->nbytes)
3192 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3194 /* Update the file extent hole info too */
3195 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3199 if (RB_EMPTY_ROOT(&rec->holes))
3200 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3202 list_del(&orphan->list);
3205 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3210 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3211 struct btrfs_root *root,
3212 struct btrfs_path *path,
3213 struct inode_record *rec)
3215 struct rb_node *node;
3216 struct file_extent_hole *hole;
3220 node = rb_first(&rec->holes);
3224 hole = rb_entry(node, struct file_extent_hole, node);
3225 ret = btrfs_punch_hole(trans, root, rec->ino,
3226 hole->start, hole->len);
3229 ret = del_file_extent_hole(&rec->holes, hole->start,
3233 if (RB_EMPTY_ROOT(&rec->holes))
3234 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3235 node = rb_first(&rec->holes);
3237 /* special case for a file losing all its file extent */
3239 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3240 round_up(rec->isize,
3241 root->fs_info->sectorsize));
3245 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3246 rec->ino, root->objectid);
3251 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3253 struct btrfs_trans_handle *trans;
3254 struct btrfs_path path;
3257 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3258 I_ERR_NO_ORPHAN_ITEM |
3259 I_ERR_LINK_COUNT_WRONG |
3260 I_ERR_NO_INODE_ITEM |
3261 I_ERR_FILE_EXTENT_ORPHAN |
3262 I_ERR_FILE_EXTENT_DISCOUNT|
3263 I_ERR_FILE_NBYTES_WRONG)))
3267 * For nlink repair, it may create a dir and add link, so
3268 * 2 for parent(256)'s dir_index and dir_item
3269 * 2 for lost+found dir's inode_item and inode_ref
3270 * 1 for the new inode_ref of the file
3271 * 2 for lost+found dir's dir_index and dir_item for the file
3273 trans = btrfs_start_transaction(root, 7);
3275 return PTR_ERR(trans);
3277 btrfs_init_path(&path);
3278 if (rec->errors & I_ERR_NO_INODE_ITEM)
3279 ret = repair_inode_no_item(trans, root, &path, rec);
3280 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3281 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3282 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3283 ret = repair_inode_discount_extent(trans, root, &path, rec);
3284 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3285 ret = repair_inode_isize(trans, root, &path, rec);
3286 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3287 ret = repair_inode_orphan_item(trans, root, &path, rec);
3288 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3289 ret = repair_inode_nlinks(trans, root, &path, rec);
3290 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3291 ret = repair_inode_nbytes(trans, root, &path, rec);
3292 btrfs_commit_transaction(trans, root);
3293 btrfs_release_path(&path);
3297 static int check_inode_recs(struct btrfs_root *root,
3298 struct cache_tree *inode_cache)
3300 struct cache_extent *cache;
3301 struct ptr_node *node;
3302 struct inode_record *rec;
3303 struct inode_backref *backref;
3308 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3310 if (btrfs_root_refs(&root->root_item) == 0) {
3311 if (!cache_tree_empty(inode_cache))
3312 fprintf(stderr, "warning line %d\n", __LINE__);
3317 * We need to repair backrefs first because we could change some of the
3318 * errors in the inode recs.
3320 * We also need to go through and delete invalid backrefs first and then
3321 * add the correct ones second. We do this because we may get EEXIST
3322 * when adding back the correct index because we hadn't yet deleted the
3325 * For example, if we were missing a dir index then the directories
3326 * isize would be wrong, so if we fixed the isize to what we thought it
3327 * would be and then fixed the backref we'd still have a invalid fs, so
3328 * we need to add back the dir index and then check to see if the isize
3333 if (stage == 3 && !err)
3336 cache = search_cache_extent(inode_cache, 0);
3337 while (repair && cache) {
3338 node = container_of(cache, struct ptr_node, cache);
3340 cache = next_cache_extent(cache);
3342 /* Need to free everything up and rescan */
3344 remove_cache_extent(inode_cache, &node->cache);
3346 free_inode_rec(rec);
3350 if (list_empty(&rec->backrefs))
3353 ret = repair_inode_backrefs(root, rec, inode_cache,
3367 rec = get_inode_rec(inode_cache, root_dirid, 0);
3368 BUG_ON(IS_ERR(rec));
3370 ret = check_root_dir(rec);
3372 fprintf(stderr, "root %llu root dir %llu error\n",
3373 (unsigned long long)root->root_key.objectid,
3374 (unsigned long long)root_dirid);
3375 print_inode_error(root, rec);
3380 struct btrfs_trans_handle *trans;
3382 trans = btrfs_start_transaction(root, 1);
3383 if (IS_ERR(trans)) {
3384 err = PTR_ERR(trans);
3389 "root %llu missing its root dir, recreating\n",
3390 (unsigned long long)root->objectid);
3392 ret = btrfs_make_root_dir(trans, root, root_dirid);
3395 btrfs_commit_transaction(trans, root);
3399 fprintf(stderr, "root %llu root dir %llu not found\n",
3400 (unsigned long long)root->root_key.objectid,
3401 (unsigned long long)root_dirid);
3405 cache = search_cache_extent(inode_cache, 0);
3408 node = container_of(cache, struct ptr_node, cache);
3410 remove_cache_extent(inode_cache, &node->cache);
3412 if (rec->ino == root_dirid ||
3413 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3414 free_inode_rec(rec);
3418 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3419 ret = check_orphan_item(root, rec->ino);
3421 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3422 if (can_free_inode_rec(rec)) {
3423 free_inode_rec(rec);
3428 if (!rec->found_inode_item)
3429 rec->errors |= I_ERR_NO_INODE_ITEM;
3430 if (rec->found_link != rec->nlink)
3431 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3433 ret = try_repair_inode(root, rec);
3434 if (ret == 0 && can_free_inode_rec(rec)) {
3435 free_inode_rec(rec);
3441 if (!(repair && ret == 0))
3443 print_inode_error(root, rec);
3444 list_for_each_entry(backref, &rec->backrefs, list) {
3445 if (!backref->found_dir_item)
3446 backref->errors |= REF_ERR_NO_DIR_ITEM;
3447 if (!backref->found_dir_index)
3448 backref->errors |= REF_ERR_NO_DIR_INDEX;
3449 if (!backref->found_inode_ref)
3450 backref->errors |= REF_ERR_NO_INODE_REF;
3451 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3452 " namelen %u name %s filetype %d errors %x",
3453 (unsigned long long)backref->dir,
3454 (unsigned long long)backref->index,
3455 backref->namelen, backref->name,
3456 backref->filetype, backref->errors);
3457 print_ref_error(backref->errors);
3459 free_inode_rec(rec);
3461 return (error > 0) ? -1 : 0;
3464 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3467 struct cache_extent *cache;
3468 struct root_record *rec = NULL;
3471 cache = lookup_cache_extent(root_cache, objectid, 1);
3473 rec = container_of(cache, struct root_record, cache);
3475 rec = calloc(1, sizeof(*rec));
3477 return ERR_PTR(-ENOMEM);
3478 rec->objectid = objectid;
3479 INIT_LIST_HEAD(&rec->backrefs);
3480 rec->cache.start = objectid;
3481 rec->cache.size = 1;
3483 ret = insert_cache_extent(root_cache, &rec->cache);
3485 return ERR_PTR(-EEXIST);
3490 static struct root_backref *get_root_backref(struct root_record *rec,
3491 u64 ref_root, u64 dir, u64 index,
3492 const char *name, int namelen)
3494 struct root_backref *backref;
3496 list_for_each_entry(backref, &rec->backrefs, list) {
3497 if (backref->ref_root != ref_root || backref->dir != dir ||
3498 backref->namelen != namelen)
3500 if (memcmp(name, backref->name, namelen))
3505 backref = calloc(1, sizeof(*backref) + namelen + 1);
3508 backref->ref_root = ref_root;
3510 backref->index = index;
3511 backref->namelen = namelen;
3512 memcpy(backref->name, name, namelen);
3513 backref->name[namelen] = '\0';
3514 list_add_tail(&backref->list, &rec->backrefs);
3518 static void free_root_record(struct cache_extent *cache)
3520 struct root_record *rec;
3521 struct root_backref *backref;
3523 rec = container_of(cache, struct root_record, cache);
3524 while (!list_empty(&rec->backrefs)) {
3525 backref = to_root_backref(rec->backrefs.next);
3526 list_del(&backref->list);
3533 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3535 static int add_root_backref(struct cache_tree *root_cache,
3536 u64 root_id, u64 ref_root, u64 dir, u64 index,
3537 const char *name, int namelen,
3538 int item_type, int errors)
3540 struct root_record *rec;
3541 struct root_backref *backref;
3543 rec = get_root_rec(root_cache, root_id);
3544 BUG_ON(IS_ERR(rec));
3545 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3548 backref->errors |= errors;
3550 if (item_type != BTRFS_DIR_ITEM_KEY) {
3551 if (backref->found_dir_index || backref->found_back_ref ||
3552 backref->found_forward_ref) {
3553 if (backref->index != index)
3554 backref->errors |= REF_ERR_INDEX_UNMATCH;
3556 backref->index = index;
3560 if (item_type == BTRFS_DIR_ITEM_KEY) {
3561 if (backref->found_forward_ref)
3563 backref->found_dir_item = 1;
3564 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3565 backref->found_dir_index = 1;
3566 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3567 if (backref->found_forward_ref)
3568 backref->errors |= REF_ERR_DUP_ROOT_REF;
3569 else if (backref->found_dir_item)
3571 backref->found_forward_ref = 1;
3572 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3573 if (backref->found_back_ref)
3574 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3575 backref->found_back_ref = 1;
3580 if (backref->found_forward_ref && backref->found_dir_item)
3581 backref->reachable = 1;
3585 static int merge_root_recs(struct btrfs_root *root,
3586 struct cache_tree *src_cache,
3587 struct cache_tree *dst_cache)
3589 struct cache_extent *cache;
3590 struct ptr_node *node;
3591 struct inode_record *rec;
3592 struct inode_backref *backref;
3595 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3596 free_inode_recs_tree(src_cache);
3601 cache = search_cache_extent(src_cache, 0);
3604 node = container_of(cache, struct ptr_node, cache);
3606 remove_cache_extent(src_cache, &node->cache);
3609 ret = is_child_root(root, root->objectid, rec->ino);
3615 list_for_each_entry(backref, &rec->backrefs, list) {
3616 BUG_ON(backref->found_inode_ref);
3617 if (backref->found_dir_item)
3618 add_root_backref(dst_cache, rec->ino,
3619 root->root_key.objectid, backref->dir,
3620 backref->index, backref->name,
3621 backref->namelen, BTRFS_DIR_ITEM_KEY,
3623 if (backref->found_dir_index)
3624 add_root_backref(dst_cache, rec->ino,
3625 root->root_key.objectid, backref->dir,
3626 backref->index, backref->name,
3627 backref->namelen, BTRFS_DIR_INDEX_KEY,
3631 free_inode_rec(rec);
3638 static int check_root_refs(struct btrfs_root *root,
3639 struct cache_tree *root_cache)
3641 struct root_record *rec;
3642 struct root_record *ref_root;
3643 struct root_backref *backref;
3644 struct cache_extent *cache;
3650 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3651 BUG_ON(IS_ERR(rec));
3654 /* fixme: this can not detect circular references */
3657 cache = search_cache_extent(root_cache, 0);
3661 rec = container_of(cache, struct root_record, cache);
3662 cache = next_cache_extent(cache);
3664 if (rec->found_ref == 0)
3667 list_for_each_entry(backref, &rec->backrefs, list) {
3668 if (!backref->reachable)
3671 ref_root = get_root_rec(root_cache,
3673 BUG_ON(IS_ERR(ref_root));
3674 if (ref_root->found_ref > 0)
3677 backref->reachable = 0;
3679 if (rec->found_ref == 0)
3685 cache = search_cache_extent(root_cache, 0);
3689 rec = container_of(cache, struct root_record, cache);
3690 cache = next_cache_extent(cache);
3692 if (rec->found_ref == 0 &&
3693 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3694 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3695 ret = check_orphan_item(root->fs_info->tree_root,
3701 * If we don't have a root item then we likely just have
3702 * a dir item in a snapshot for this root but no actual
3703 * ref key or anything so it's meaningless.
3705 if (!rec->found_root_item)
3708 fprintf(stderr, "fs tree %llu not referenced\n",
3709 (unsigned long long)rec->objectid);
3713 if (rec->found_ref > 0 && !rec->found_root_item)
3715 list_for_each_entry(backref, &rec->backrefs, list) {
3716 if (!backref->found_dir_item)
3717 backref->errors |= REF_ERR_NO_DIR_ITEM;
3718 if (!backref->found_dir_index)
3719 backref->errors |= REF_ERR_NO_DIR_INDEX;
3720 if (!backref->found_back_ref)
3721 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3722 if (!backref->found_forward_ref)
3723 backref->errors |= REF_ERR_NO_ROOT_REF;
3724 if (backref->reachable && backref->errors)
3731 fprintf(stderr, "fs tree %llu refs %u %s\n",
3732 (unsigned long long)rec->objectid, rec->found_ref,
3733 rec->found_root_item ? "" : "not found");
3735 list_for_each_entry(backref, &rec->backrefs, list) {
3736 if (!backref->reachable)
3738 if (!backref->errors && rec->found_root_item)
3740 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3741 " index %llu namelen %u name %s errors %x\n",
3742 (unsigned long long)backref->ref_root,
3743 (unsigned long long)backref->dir,
3744 (unsigned long long)backref->index,
3745 backref->namelen, backref->name,
3747 print_ref_error(backref->errors);
3750 return errors > 0 ? 1 : 0;
3753 static int process_root_ref(struct extent_buffer *eb, int slot,
3754 struct btrfs_key *key,
3755 struct cache_tree *root_cache)
3761 struct btrfs_root_ref *ref;
3762 char namebuf[BTRFS_NAME_LEN];
3765 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3767 dirid = btrfs_root_ref_dirid(eb, ref);
3768 index = btrfs_root_ref_sequence(eb, ref);
3769 name_len = btrfs_root_ref_name_len(eb, ref);
3771 if (name_len <= BTRFS_NAME_LEN) {
3775 len = BTRFS_NAME_LEN;
3776 error = REF_ERR_NAME_TOO_LONG;
3778 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3780 if (key->type == BTRFS_ROOT_REF_KEY) {
3781 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3782 index, namebuf, len, key->type, error);
3784 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3785 index, namebuf, len, key->type, error);
3790 static void free_corrupt_block(struct cache_extent *cache)
3792 struct btrfs_corrupt_block *corrupt;
3794 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3798 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3801 * Repair the btree of the given root.
3803 * The fix is to remove the node key in corrupt_blocks cache_tree.
3804 * and rebalance the tree.
3805 * After the fix, the btree should be writeable.
3807 static int repair_btree(struct btrfs_root *root,
3808 struct cache_tree *corrupt_blocks)
3810 struct btrfs_trans_handle *trans;
3811 struct btrfs_path path;
3812 struct btrfs_corrupt_block *corrupt;
3813 struct cache_extent *cache;
3814 struct btrfs_key key;
3819 if (cache_tree_empty(corrupt_blocks))
3822 trans = btrfs_start_transaction(root, 1);
3823 if (IS_ERR(trans)) {
3824 ret = PTR_ERR(trans);
3825 fprintf(stderr, "Error starting transaction: %s\n",
3829 btrfs_init_path(&path);
3830 cache = first_cache_extent(corrupt_blocks);
3832 corrupt = container_of(cache, struct btrfs_corrupt_block,
3834 level = corrupt->level;
3835 path.lowest_level = level;
3836 key.objectid = corrupt->key.objectid;
3837 key.type = corrupt->key.type;
3838 key.offset = corrupt->key.offset;
3841 * Here we don't want to do any tree balance, since it may
3842 * cause a balance with corrupted brother leaf/node,
3843 * so ins_len set to 0 here.
3844 * Balance will be done after all corrupt node/leaf is deleted.
3846 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3849 offset = btrfs_node_blockptr(path.nodes[level],
3852 /* Remove the ptr */
3853 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3857 * Remove the corresponding extent
3858 * return value is not concerned.
3860 btrfs_release_path(&path);
3861 ret = btrfs_free_extent(trans, root, offset,
3862 root->fs_info->nodesize, 0,
3863 root->root_key.objectid, level - 1, 0);
3864 cache = next_cache_extent(cache);
3867 /* Balance the btree using btrfs_search_slot() */
3868 cache = first_cache_extent(corrupt_blocks);
3870 corrupt = container_of(cache, struct btrfs_corrupt_block,
3872 memcpy(&key, &corrupt->key, sizeof(key));
3873 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3876 /* return will always >0 since it won't find the item */
3878 btrfs_release_path(&path);
3879 cache = next_cache_extent(cache);
3882 btrfs_commit_transaction(trans, root);
3883 btrfs_release_path(&path);
3887 static int check_fs_root(struct btrfs_root *root,
3888 struct cache_tree *root_cache,
3889 struct walk_control *wc)
3895 struct btrfs_path path;
3896 struct shared_node root_node;
3897 struct root_record *rec;
3898 struct btrfs_root_item *root_item = &root->root_item;
3899 struct cache_tree corrupt_blocks;
3900 struct orphan_data_extent *orphan;
3901 struct orphan_data_extent *tmp;
3902 enum btrfs_tree_block_status status;
3903 struct node_refs nrefs;
3906 * Reuse the corrupt_block cache tree to record corrupted tree block
3908 * Unlike the usage in extent tree check, here we do it in a per
3909 * fs/subvol tree base.
3911 cache_tree_init(&corrupt_blocks);
3912 root->fs_info->corrupt_blocks = &corrupt_blocks;
3914 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3915 rec = get_root_rec(root_cache, root->root_key.objectid);
3916 BUG_ON(IS_ERR(rec));
3917 if (btrfs_root_refs(root_item) > 0)
3918 rec->found_root_item = 1;
3921 btrfs_init_path(&path);
3922 memset(&root_node, 0, sizeof(root_node));
3923 cache_tree_init(&root_node.root_cache);
3924 cache_tree_init(&root_node.inode_cache);
3925 memset(&nrefs, 0, sizeof(nrefs));
3927 /* Move the orphan extent record to corresponding inode_record */
3928 list_for_each_entry_safe(orphan, tmp,
3929 &root->orphan_data_extents, list) {
3930 struct inode_record *inode;
3932 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3934 BUG_ON(IS_ERR(inode));
3935 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3936 list_move(&orphan->list, &inode->orphan_extents);
3939 level = btrfs_header_level(root->node);
3940 memset(wc->nodes, 0, sizeof(wc->nodes));
3941 wc->nodes[level] = &root_node;
3942 wc->active_node = level;
3943 wc->root_level = level;
3945 /* We may not have checked the root block, lets do that now */
3946 if (btrfs_is_leaf(root->node))
3947 status = btrfs_check_leaf(root, NULL, root->node);
3949 status = btrfs_check_node(root, NULL, root->node);
3950 if (status != BTRFS_TREE_BLOCK_CLEAN)
3953 if (btrfs_root_refs(root_item) > 0 ||
3954 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3955 path.nodes[level] = root->node;
3956 extent_buffer_get(root->node);
3957 path.slots[level] = 0;
3959 struct btrfs_key key;
3960 struct btrfs_disk_key found_key;
3962 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3963 level = root_item->drop_level;
3964 path.lowest_level = level;
3965 if (level > btrfs_header_level(root->node) ||
3966 level >= BTRFS_MAX_LEVEL) {
3967 error("ignoring invalid drop level: %u", level);
3970 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3973 btrfs_node_key(path.nodes[level], &found_key,
3975 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3976 sizeof(found_key)));
3980 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3986 wret = walk_up_tree(root, &path, wc, &level);
3993 btrfs_release_path(&path);
3995 if (!cache_tree_empty(&corrupt_blocks)) {
3996 struct cache_extent *cache;
3997 struct btrfs_corrupt_block *corrupt;
3999 printf("The following tree block(s) is corrupted in tree %llu:\n",
4000 root->root_key.objectid);
4001 cache = first_cache_extent(&corrupt_blocks);
4003 corrupt = container_of(cache,
4004 struct btrfs_corrupt_block,
4006 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4007 cache->start, corrupt->level,
4008 corrupt->key.objectid, corrupt->key.type,
4009 corrupt->key.offset);
4010 cache = next_cache_extent(cache);
4013 printf("Try to repair the btree for root %llu\n",
4014 root->root_key.objectid);
4015 ret = repair_btree(root, &corrupt_blocks);
4017 fprintf(stderr, "Failed to repair btree: %s\n",
4020 printf("Btree for root %llu is fixed\n",
4021 root->root_key.objectid);
4025 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4029 if (root_node.current) {
4030 root_node.current->checked = 1;
4031 maybe_free_inode_rec(&root_node.inode_cache,
4035 err = check_inode_recs(root, &root_node.inode_cache);
4039 free_corrupt_blocks_tree(&corrupt_blocks);
4040 root->fs_info->corrupt_blocks = NULL;
4041 free_orphan_data_extents(&root->orphan_data_extents);
4045 static int fs_root_objectid(u64 objectid)
4047 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4048 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4050 return is_fstree(objectid);
4053 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4054 struct cache_tree *root_cache)
4056 struct btrfs_path path;
4057 struct btrfs_key key;
4058 struct walk_control wc;
4059 struct extent_buffer *leaf, *tree_node;
4060 struct btrfs_root *tmp_root;
4061 struct btrfs_root *tree_root = fs_info->tree_root;
4065 if (ctx.progress_enabled) {
4066 ctx.tp = TASK_FS_ROOTS;
4067 task_start(ctx.info);
4071 * Just in case we made any changes to the extent tree that weren't
4072 * reflected into the free space cache yet.
4075 reset_cached_block_groups(fs_info);
4076 memset(&wc, 0, sizeof(wc));
4077 cache_tree_init(&wc.shared);
4078 btrfs_init_path(&path);
4083 key.type = BTRFS_ROOT_ITEM_KEY;
4084 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4089 tree_node = tree_root->node;
4091 if (tree_node != tree_root->node) {
4092 free_root_recs_tree(root_cache);
4093 btrfs_release_path(&path);
4096 leaf = path.nodes[0];
4097 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4098 ret = btrfs_next_leaf(tree_root, &path);
4104 leaf = path.nodes[0];
4106 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4107 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4108 fs_root_objectid(key.objectid)) {
4109 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4110 tmp_root = btrfs_read_fs_root_no_cache(
4113 key.offset = (u64)-1;
4114 tmp_root = btrfs_read_fs_root(
4117 if (IS_ERR(tmp_root)) {
4121 ret = check_fs_root(tmp_root, root_cache, &wc);
4122 if (ret == -EAGAIN) {
4123 free_root_recs_tree(root_cache);
4124 btrfs_release_path(&path);
4129 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4130 btrfs_free_fs_root(tmp_root);
4131 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4132 key.type == BTRFS_ROOT_BACKREF_KEY) {
4133 process_root_ref(leaf, path.slots[0], &key,
4140 btrfs_release_path(&path);
4142 free_extent_cache_tree(&wc.shared);
4143 if (!cache_tree_empty(&wc.shared))
4144 fprintf(stderr, "warning line %d\n", __LINE__);
4146 task_stop(ctx.info);
4152 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4153 * INODE_REF/INODE_EXTREF match.
4155 * @root: the root of the fs/file tree
4156 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4157 * @key: the key of the DIR_ITEM/DIR_INDEX
4158 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4159 * distinguish root_dir between normal dir/file
4160 * @name: the name in the INODE_REF/INODE_EXTREF
4161 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4162 * @mode: the st_mode of INODE_ITEM
4164 * Return 0 if no error occurred.
4165 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4166 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4168 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4169 * not match for normal dir/file.
4171 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4172 struct btrfs_key *key, u64 index, char *name,
4173 u32 namelen, u32 mode)
4175 struct btrfs_path path;
4176 struct extent_buffer *node;
4177 struct btrfs_dir_item *di;
4178 struct btrfs_key location;
4179 char namebuf[BTRFS_NAME_LEN] = {0};
4189 btrfs_init_path(&path);
4190 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4192 ret = DIR_ITEM_MISSING;
4196 /* Process root dir and goto out*/
4199 ret = ROOT_DIR_ERROR;
4201 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4203 ref_key->type == BTRFS_INODE_REF_KEY ?
4205 ref_key->objectid, ref_key->offset,
4206 key->type == BTRFS_DIR_ITEM_KEY ?
4207 "DIR_ITEM" : "DIR_INDEX");
4215 /* Process normal file/dir */
4217 ret = DIR_ITEM_MISSING;
4219 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4221 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4222 ref_key->objectid, ref_key->offset,
4223 key->type == BTRFS_DIR_ITEM_KEY ?
4224 "DIR_ITEM" : "DIR_INDEX",
4225 key->objectid, key->offset, namelen, name,
4226 imode_to_type(mode));
4230 /* Check whether inode_id/filetype/name match */
4231 node = path.nodes[0];
4232 slot = path.slots[0];
4233 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4234 total = btrfs_item_size_nr(node, slot);
4235 while (cur < total) {
4236 ret = DIR_ITEM_MISMATCH;
4237 name_len = btrfs_dir_name_len(node, di);
4238 data_len = btrfs_dir_data_len(node, di);
4240 btrfs_dir_item_key_to_cpu(node, di, &location);
4241 if (location.objectid != ref_key->objectid ||
4242 location.type != BTRFS_INODE_ITEM_KEY ||
4243 location.offset != 0)
4246 filetype = btrfs_dir_type(node, di);
4247 if (imode_to_type(mode) != filetype)
4250 if (cur + sizeof(*di) + name_len > total ||
4251 name_len > BTRFS_NAME_LEN) {
4252 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4254 key->type == BTRFS_DIR_ITEM_KEY ?
4255 "DIR_ITEM" : "DIR_INDEX",
4256 key->objectid, key->offset, name_len);
4258 if (cur + sizeof(*di) > total)
4260 len = min_t(u32, total - cur - sizeof(*di),
4266 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4267 if (len != namelen || strncmp(namebuf, name, len))
4273 len = sizeof(*di) + name_len + data_len;
4274 di = (struct btrfs_dir_item *)((char *)di + len);
4277 if (ret == DIR_ITEM_MISMATCH)
4279 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4281 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4282 ref_key->objectid, ref_key->offset,
4283 key->type == BTRFS_DIR_ITEM_KEY ?
4284 "DIR_ITEM" : "DIR_INDEX",
4285 key->objectid, key->offset, namelen, name,
4286 imode_to_type(mode));
4288 btrfs_release_path(&path);
4293 * Traverse the given INODE_REF and call find_dir_item() to find related
4294 * DIR_ITEM/DIR_INDEX.
4296 * @root: the root of the fs/file tree
4297 * @ref_key: the key of the INODE_REF
4298 * @refs: the count of INODE_REF
4299 * @mode: the st_mode of INODE_ITEM
4301 * Return 0 if no error occurred.
4303 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4304 struct extent_buffer *node, int slot, u64 *refs,
4307 struct btrfs_key key;
4308 struct btrfs_inode_ref *ref;
4309 char namebuf[BTRFS_NAME_LEN] = {0};
4317 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4318 total = btrfs_item_size_nr(node, slot);
4321 /* Update inode ref count */
4324 index = btrfs_inode_ref_index(node, ref);
4325 name_len = btrfs_inode_ref_name_len(node, ref);
4326 if (cur + sizeof(*ref) + name_len > total ||
4327 name_len > BTRFS_NAME_LEN) {
4328 warning("root %llu INODE_REF[%llu %llu] name too long",
4329 root->objectid, ref_key->objectid, ref_key->offset);
4331 if (total < cur + sizeof(*ref))
4333 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4338 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4340 /* Check root dir ref name */
4341 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4342 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4343 root->objectid, ref_key->objectid, ref_key->offset,
4345 err |= ROOT_DIR_ERROR;
4348 /* Find related DIR_INDEX */
4349 key.objectid = ref_key->offset;
4350 key.type = BTRFS_DIR_INDEX_KEY;
4352 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4355 /* Find related dir_item */
4356 key.objectid = ref_key->offset;
4357 key.type = BTRFS_DIR_ITEM_KEY;
4358 key.offset = btrfs_name_hash(namebuf, len);
4359 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4362 len = sizeof(*ref) + name_len;
4363 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4373 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4374 * DIR_ITEM/DIR_INDEX.
4376 * @root: the root of the fs/file tree
4377 * @ref_key: the key of the INODE_EXTREF
4378 * @refs: the count of INODE_EXTREF
4379 * @mode: the st_mode of INODE_ITEM
4381 * Return 0 if no error occurred.
4383 static int check_inode_extref(struct btrfs_root *root,
4384 struct btrfs_key *ref_key,
4385 struct extent_buffer *node, int slot, u64 *refs,
4388 struct btrfs_key key;
4389 struct btrfs_inode_extref *extref;
4390 char namebuf[BTRFS_NAME_LEN] = {0};
4400 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4401 total = btrfs_item_size_nr(node, slot);
4404 /* update inode ref count */
4406 name_len = btrfs_inode_extref_name_len(node, extref);
4407 index = btrfs_inode_extref_index(node, extref);
4408 parent = btrfs_inode_extref_parent(node, extref);
4409 if (name_len <= BTRFS_NAME_LEN) {
4412 len = BTRFS_NAME_LEN;
4413 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4414 root->objectid, ref_key->objectid, ref_key->offset);
4416 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4418 /* Check root dir ref name */
4419 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4420 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4421 root->objectid, ref_key->objectid, ref_key->offset,
4423 err |= ROOT_DIR_ERROR;
4426 /* find related dir_index */
4427 key.objectid = parent;
4428 key.type = BTRFS_DIR_INDEX_KEY;
4430 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4433 /* find related dir_item */
4434 key.objectid = parent;
4435 key.type = BTRFS_DIR_ITEM_KEY;
4436 key.offset = btrfs_name_hash(namebuf, len);
4437 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4440 len = sizeof(*extref) + name_len;
4441 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4451 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4452 * DIR_ITEM/DIR_INDEX match.
4454 * @root: the root of the fs/file tree
4455 * @key: the key of the INODE_REF/INODE_EXTREF
4456 * @name: the name in the INODE_REF/INODE_EXTREF
4457 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4458 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4460 * @ext_ref: the EXTENDED_IREF feature
4462 * Return 0 if no error occurred.
4463 * Return >0 for error bitmap
4465 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4466 char *name, int namelen, u64 index,
4467 unsigned int ext_ref)
4469 struct btrfs_path path;
4470 struct btrfs_inode_ref *ref;
4471 struct btrfs_inode_extref *extref;
4472 struct extent_buffer *node;
4473 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4484 btrfs_init_path(&path);
4485 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4487 ret = INODE_REF_MISSING;
4491 node = path.nodes[0];
4492 slot = path.slots[0];
4494 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4495 total = btrfs_item_size_nr(node, slot);
4497 /* Iterate all entry of INODE_REF */
4498 while (cur < total) {
4499 ret = INODE_REF_MISSING;
4501 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4502 ref_index = btrfs_inode_ref_index(node, ref);
4503 if (index != (u64)-1 && index != ref_index)
4506 if (cur + sizeof(*ref) + ref_namelen > total ||
4507 ref_namelen > BTRFS_NAME_LEN) {
4508 warning("root %llu INODE %s[%llu %llu] name too long",
4510 key->type == BTRFS_INODE_REF_KEY ?
4512 key->objectid, key->offset);
4514 if (cur + sizeof(*ref) > total)
4516 len = min_t(u32, total - cur - sizeof(*ref),
4522 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4525 if (len != namelen || strncmp(ref_namebuf, name, len))
4531 len = sizeof(*ref) + ref_namelen;
4532 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4537 /* Skip if not support EXTENDED_IREF feature */
4541 btrfs_release_path(&path);
4542 btrfs_init_path(&path);
4544 dir_id = key->offset;
4545 key->type = BTRFS_INODE_EXTREF_KEY;
4546 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4548 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4550 ret = INODE_REF_MISSING;
4554 node = path.nodes[0];
4555 slot = path.slots[0];
4557 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4559 total = btrfs_item_size_nr(node, slot);
4561 /* Iterate all entry of INODE_EXTREF */
4562 while (cur < total) {
4563 ret = INODE_REF_MISSING;
4565 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4566 ref_index = btrfs_inode_extref_index(node, extref);
4567 parent = btrfs_inode_extref_parent(node, extref);
4568 if (index != (u64)-1 && index != ref_index)
4571 if (parent != dir_id)
4574 if (ref_namelen <= BTRFS_NAME_LEN) {
4577 len = BTRFS_NAME_LEN;
4578 warning("root %llu INODE %s[%llu %llu] name too long",
4580 key->type == BTRFS_INODE_REF_KEY ?
4582 key->objectid, key->offset);
4584 read_extent_buffer(node, ref_namebuf,
4585 (unsigned long)(extref + 1), len);
4587 if (len != namelen || strncmp(ref_namebuf, name, len))
4594 len = sizeof(*extref) + ref_namelen;
4595 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4600 btrfs_release_path(&path);
4605 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4606 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4608 * @root: the root of the fs/file tree
4609 * @key: the key of the INODE_REF/INODE_EXTREF
4610 * @size: the st_size of the INODE_ITEM
4611 * @ext_ref: the EXTENDED_IREF feature
4613 * Return 0 if no error occurred.
4615 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4616 struct extent_buffer *node, int slot, u64 *size,
4617 unsigned int ext_ref)
4619 struct btrfs_dir_item *di;
4620 struct btrfs_inode_item *ii;
4621 struct btrfs_path path;
4622 struct btrfs_key location;
4623 char namebuf[BTRFS_NAME_LEN] = {0};
4636 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4637 * ignore index check.
4639 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4641 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4642 total = btrfs_item_size_nr(node, slot);
4644 while (cur < total) {
4645 data_len = btrfs_dir_data_len(node, di);
4647 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4648 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4649 "DIR_ITEM" : "DIR_INDEX",
4650 key->objectid, key->offset, data_len);
4652 name_len = btrfs_dir_name_len(node, di);
4653 if (cur + sizeof(*di) + name_len > total ||
4654 name_len > BTRFS_NAME_LEN) {
4655 warning("root %llu %s[%llu %llu] name too long",
4657 key->type == BTRFS_DIR_ITEM_KEY ?
4658 "DIR_ITEM" : "DIR_INDEX",
4659 key->objectid, key->offset);
4661 if (cur + sizeof(*di) > total)
4663 len = min_t(u32, total - cur - sizeof(*di),
4668 (*size) += name_len;
4670 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4671 filetype = btrfs_dir_type(node, di);
4673 if (key->type == BTRFS_DIR_ITEM_KEY &&
4674 key->offset != btrfs_name_hash(namebuf, len)) {
4676 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4677 root->objectid, key->objectid, key->offset,
4678 namebuf, len, filetype, key->offset,
4679 btrfs_name_hash(namebuf, len));
4682 btrfs_init_path(&path);
4683 btrfs_dir_item_key_to_cpu(node, di, &location);
4685 /* Ignore related ROOT_ITEM check */
4686 if (location.type == BTRFS_ROOT_ITEM_KEY)
4689 /* Check relative INODE_ITEM(existence/filetype) */
4690 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4692 err |= INODE_ITEM_MISSING;
4693 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4694 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4695 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4696 key->offset, location.objectid, name_len,
4701 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4702 struct btrfs_inode_item);
4703 mode = btrfs_inode_mode(path.nodes[0], ii);
4705 if (imode_to_type(mode) != filetype) {
4706 err |= INODE_ITEM_MISMATCH;
4707 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4708 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4709 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4710 key->offset, name_len, namebuf, filetype);
4713 /* Check relative INODE_REF/INODE_EXTREF */
4714 location.type = BTRFS_INODE_REF_KEY;
4715 location.offset = key->objectid;
4716 ret = find_inode_ref(root, &location, namebuf, len,
4719 if (ret & INODE_REF_MISSING)
4720 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4721 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4722 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4723 key->offset, name_len, namebuf, filetype);
4726 btrfs_release_path(&path);
4727 len = sizeof(*di) + name_len + data_len;
4728 di = (struct btrfs_dir_item *)((char *)di + len);
4731 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4732 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4733 root->objectid, key->objectid, key->offset);
4742 * Check file extent datasum/hole, update the size of the file extents,
4743 * check and update the last offset of the file extent.
4745 * @root: the root of fs/file tree.
4746 * @fkey: the key of the file extent.
4747 * @nodatasum: INODE_NODATASUM feature.
4748 * @size: the sum of all EXTENT_DATA items size for this inode.
4749 * @end: the offset of the last extent.
4751 * Return 0 if no error occurred.
4753 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4754 struct extent_buffer *node, int slot,
4755 unsigned int nodatasum, u64 *size, u64 *end)
4757 struct btrfs_file_extent_item *fi;
4760 u64 extent_num_bytes;
4762 u64 csum_found; /* In byte size, sectorsize aligned */
4763 u64 search_start; /* Logical range start we search for csum */
4764 u64 search_len; /* Logical range len we search for csum */
4765 unsigned int extent_type;
4766 unsigned int is_hole;
4771 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4773 /* Check inline extent */
4774 extent_type = btrfs_file_extent_type(node, fi);
4775 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4776 struct btrfs_item *e = btrfs_item_nr(slot);
4777 u32 item_inline_len;
4779 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4780 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4781 compressed = btrfs_file_extent_compression(node, fi);
4782 if (extent_num_bytes == 0) {
4784 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4785 root->objectid, fkey->objectid, fkey->offset);
4786 err |= FILE_EXTENT_ERROR;
4788 if (!compressed && extent_num_bytes != item_inline_len) {
4790 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4791 root->objectid, fkey->objectid, fkey->offset,
4792 extent_num_bytes, item_inline_len);
4793 err |= FILE_EXTENT_ERROR;
4795 *end += extent_num_bytes;
4796 *size += extent_num_bytes;
4800 /* Check extent type */
4801 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4802 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4803 err |= FILE_EXTENT_ERROR;
4804 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4805 root->objectid, fkey->objectid, fkey->offset);
4809 /* Check REG_EXTENT/PREALLOC_EXTENT */
4810 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4811 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4812 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4813 extent_offset = btrfs_file_extent_offset(node, fi);
4814 compressed = btrfs_file_extent_compression(node, fi);
4815 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4818 * Check EXTENT_DATA csum
4820 * For plain (uncompressed) extent, we should only check the range
4821 * we're referring to, as it's possible that part of prealloc extent
4822 * has been written, and has csum:
4824 * |<--- Original large preallocated extent A ---->|
4825 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4828 * For compressed extent, we should check the whole range.
4831 search_start = disk_bytenr + extent_offset;
4832 search_len = extent_num_bytes;
4834 search_start = disk_bytenr;
4835 search_len = disk_num_bytes;
4837 ret = count_csum_range(root, search_start, search_len, &csum_found);
4838 if (csum_found > 0 && nodatasum) {
4839 err |= ODD_CSUM_ITEM;
4840 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4841 root->objectid, fkey->objectid, fkey->offset);
4842 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4843 !is_hole && (ret < 0 || csum_found < search_len)) {
4844 err |= CSUM_ITEM_MISSING;
4845 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4846 root->objectid, fkey->objectid, fkey->offset,
4847 csum_found, search_len);
4848 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4849 err |= ODD_CSUM_ITEM;
4850 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4851 root->objectid, fkey->objectid, fkey->offset, csum_found);
4854 /* Check EXTENT_DATA hole */
4855 if (!no_holes && *end != fkey->offset) {
4856 err |= FILE_EXTENT_ERROR;
4857 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4858 root->objectid, fkey->objectid, fkey->offset);
4861 *end += extent_num_bytes;
4863 *size += extent_num_bytes;
4869 * Check INODE_ITEM and related ITEMs (the same inode number)
4870 * 1. check link count
4871 * 2. check inode ref/extref
4872 * 3. check dir item/index
4874 * @ext_ref: the EXTENDED_IREF feature
4876 * Return 0 if no error occurred.
4877 * Return >0 for error or hit the traversal is done(by error bitmap)
4879 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4880 unsigned int ext_ref)
4882 struct extent_buffer *node;
4883 struct btrfs_inode_item *ii;
4884 struct btrfs_key key;
4893 u64 extent_size = 0;
4895 unsigned int nodatasum;
4900 node = path->nodes[0];
4901 slot = path->slots[0];
4903 btrfs_item_key_to_cpu(node, &key, slot);
4904 inode_id = key.objectid;
4906 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4907 ret = btrfs_next_item(root, path);
4913 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4914 isize = btrfs_inode_size(node, ii);
4915 nbytes = btrfs_inode_nbytes(node, ii);
4916 mode = btrfs_inode_mode(node, ii);
4917 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4918 nlink = btrfs_inode_nlink(node, ii);
4919 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4922 ret = btrfs_next_item(root, path);
4924 /* out will fill 'err' rusing current statistics */
4926 } else if (ret > 0) {
4931 node = path->nodes[0];
4932 slot = path->slots[0];
4933 btrfs_item_key_to_cpu(node, &key, slot);
4934 if (key.objectid != inode_id)
4938 case BTRFS_INODE_REF_KEY:
4939 ret = check_inode_ref(root, &key, node, slot, &refs,
4943 case BTRFS_INODE_EXTREF_KEY:
4944 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4945 warning("root %llu EXTREF[%llu %llu] isn't supported",
4946 root->objectid, key.objectid,
4948 ret = check_inode_extref(root, &key, node, slot, &refs,
4952 case BTRFS_DIR_ITEM_KEY:
4953 case BTRFS_DIR_INDEX_KEY:
4955 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4956 root->objectid, inode_id,
4957 imode_to_type(mode), key.objectid,
4960 ret = check_dir_item(root, &key, node, slot, &size,
4964 case BTRFS_EXTENT_DATA_KEY:
4966 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4967 root->objectid, inode_id, key.objectid,
4970 ret = check_file_extent(root, &key, node, slot,
4971 nodatasum, &extent_size,
4975 case BTRFS_XATTR_ITEM_KEY:
4978 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4979 key.objectid, key.type, key.offset);
4984 /* verify INODE_ITEM nlink/isize/nbytes */
4987 err |= LINK_COUNT_ERROR;
4988 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4989 root->objectid, inode_id, nlink);
4993 * Just a warning, as dir inode nbytes is just an
4994 * instructive value.
4996 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4997 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4998 root->objectid, inode_id,
4999 root->fs_info->nodesize);
5002 if (isize != size) {
5004 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5005 root->objectid, inode_id, isize, size);
5008 if (nlink != refs) {
5009 err |= LINK_COUNT_ERROR;
5010 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5011 root->objectid, inode_id, nlink, refs);
5012 } else if (!nlink) {
5016 if (!nbytes && !no_holes && extent_end < isize) {
5017 err |= NBYTES_ERROR;
5018 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5019 root->objectid, inode_id, isize);
5022 if (nbytes != extent_size) {
5023 err |= NBYTES_ERROR;
5024 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5025 root->objectid, inode_id, nbytes, extent_size);
5032 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5034 struct btrfs_path path;
5035 struct btrfs_key key;
5039 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5040 key.type = BTRFS_INODE_ITEM_KEY;
5043 /* For root being dropped, we don't need to check first inode */
5044 if (btrfs_root_refs(&root->root_item) == 0 &&
5045 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5049 btrfs_init_path(&path);
5051 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5056 err |= INODE_ITEM_MISSING;
5057 error("first inode item of root %llu is missing",
5061 err |= check_inode_item(root, &path, ext_ref);
5066 btrfs_release_path(&path);
5071 * Iterate all item on the tree and call check_inode_item() to check.
5073 * @root: the root of the tree to be checked.
5074 * @ext_ref: the EXTENDED_IREF feature
5076 * Return 0 if no error found.
5077 * Return <0 for error.
5079 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5081 struct btrfs_path path;
5082 struct node_refs nrefs;
5083 struct btrfs_root_item *root_item = &root->root_item;
5089 * We need to manually check the first inode item(256)
5090 * As the following traversal function will only start from
5091 * the first inode item in the leaf, if inode item(256) is missing
5092 * we will just skip it forever.
5094 ret = check_fs_first_inode(root, ext_ref);
5098 memset(&nrefs, 0, sizeof(nrefs));
5099 level = btrfs_header_level(root->node);
5100 btrfs_init_path(&path);
5102 if (btrfs_root_refs(root_item) > 0 ||
5103 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5104 path.nodes[level] = root->node;
5105 path.slots[level] = 0;
5106 extent_buffer_get(root->node);
5108 struct btrfs_key key;
5110 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5111 level = root_item->drop_level;
5112 path.lowest_level = level;
5113 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5120 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5123 /* if ret is negative, walk shall stop */
5129 ret = walk_up_tree_v2(root, &path, &level);
5131 /* Normal exit, reset ret to err */
5138 btrfs_release_path(&path);
5143 * Find the relative ref for root_ref and root_backref.
5145 * @root: the root of the root tree.
5146 * @ref_key: the key of the root ref.
5148 * Return 0 if no error occurred.
5150 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5151 struct extent_buffer *node, int slot)
5153 struct btrfs_path path;
5154 struct btrfs_key key;
5155 struct btrfs_root_ref *ref;
5156 struct btrfs_root_ref *backref;
5157 char ref_name[BTRFS_NAME_LEN] = {0};
5158 char backref_name[BTRFS_NAME_LEN] = {0};
5164 u32 backref_namelen;
5169 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5170 ref_dirid = btrfs_root_ref_dirid(node, ref);
5171 ref_seq = btrfs_root_ref_sequence(node, ref);
5172 ref_namelen = btrfs_root_ref_name_len(node, ref);
5174 if (ref_namelen <= BTRFS_NAME_LEN) {
5177 len = BTRFS_NAME_LEN;
5178 warning("%s[%llu %llu] ref_name too long",
5179 ref_key->type == BTRFS_ROOT_REF_KEY ?
5180 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5183 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5185 /* Find relative root_ref */
5186 key.objectid = ref_key->offset;
5187 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5188 key.offset = ref_key->objectid;
5190 btrfs_init_path(&path);
5191 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5193 err |= ROOT_REF_MISSING;
5194 error("%s[%llu %llu] couldn't find relative ref",
5195 ref_key->type == BTRFS_ROOT_REF_KEY ?
5196 "ROOT_REF" : "ROOT_BACKREF",
5197 ref_key->objectid, ref_key->offset);
5201 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5202 struct btrfs_root_ref);
5203 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5204 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5205 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5207 if (backref_namelen <= BTRFS_NAME_LEN) {
5208 len = backref_namelen;
5210 len = BTRFS_NAME_LEN;
5211 warning("%s[%llu %llu] ref_name too long",
5212 key.type == BTRFS_ROOT_REF_KEY ?
5213 "ROOT_REF" : "ROOT_BACKREF",
5214 key.objectid, key.offset);
5216 read_extent_buffer(path.nodes[0], backref_name,
5217 (unsigned long)(backref + 1), len);
5219 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5220 ref_namelen != backref_namelen ||
5221 strncmp(ref_name, backref_name, len)) {
5222 err |= ROOT_REF_MISMATCH;
5223 error("%s[%llu %llu] mismatch relative ref",
5224 ref_key->type == BTRFS_ROOT_REF_KEY ?
5225 "ROOT_REF" : "ROOT_BACKREF",
5226 ref_key->objectid, ref_key->offset);
5229 btrfs_release_path(&path);
5234 * Check all fs/file tree in low_memory mode.
5236 * 1. for fs tree root item, call check_fs_root_v2()
5237 * 2. for fs tree root ref/backref, call check_root_ref()
5239 * Return 0 if no error occurred.
5241 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5243 struct btrfs_root *tree_root = fs_info->tree_root;
5244 struct btrfs_root *cur_root = NULL;
5245 struct btrfs_path path;
5246 struct btrfs_key key;
5247 struct extent_buffer *node;
5248 unsigned int ext_ref;
5253 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5255 btrfs_init_path(&path);
5256 key.objectid = BTRFS_FS_TREE_OBJECTID;
5258 key.type = BTRFS_ROOT_ITEM_KEY;
5260 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5264 } else if (ret > 0) {
5270 node = path.nodes[0];
5271 slot = path.slots[0];
5272 btrfs_item_key_to_cpu(node, &key, slot);
5273 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5275 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5276 fs_root_objectid(key.objectid)) {
5277 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5278 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5281 key.offset = (u64)-1;
5282 cur_root = btrfs_read_fs_root(fs_info, &key);
5285 if (IS_ERR(cur_root)) {
5286 error("Fail to read fs/subvol tree: %lld",
5292 ret = check_fs_root_v2(cur_root, ext_ref);
5295 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5296 btrfs_free_fs_root(cur_root);
5297 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5298 key.type == BTRFS_ROOT_BACKREF_KEY) {
5299 ret = check_root_ref(tree_root, &key, node, slot);
5303 ret = btrfs_next_item(tree_root, &path);
5313 btrfs_release_path(&path);
5317 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5319 struct list_head *cur = rec->backrefs.next;
5320 struct extent_backref *back;
5321 struct tree_backref *tback;
5322 struct data_backref *dback;
5326 while(cur != &rec->backrefs) {
5327 back = to_extent_backref(cur);
5329 if (!back->found_extent_tree) {
5333 if (back->is_data) {
5334 dback = to_data_backref(back);
5335 fprintf(stderr, "Backref %llu %s %llu"
5336 " owner %llu offset %llu num_refs %lu"
5337 " not found in extent tree\n",
5338 (unsigned long long)rec->start,
5339 back->full_backref ?
5341 back->full_backref ?
5342 (unsigned long long)dback->parent:
5343 (unsigned long long)dback->root,
5344 (unsigned long long)dback->owner,
5345 (unsigned long long)dback->offset,
5346 (unsigned long)dback->num_refs);
5348 tback = to_tree_backref(back);
5349 fprintf(stderr, "Backref %llu parent %llu"
5350 " root %llu not found in extent tree\n",
5351 (unsigned long long)rec->start,
5352 (unsigned long long)tback->parent,
5353 (unsigned long long)tback->root);
5356 if (!back->is_data && !back->found_ref) {
5360 tback = to_tree_backref(back);
5361 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5362 (unsigned long long)rec->start,
5363 back->full_backref ? "parent" : "root",
5364 back->full_backref ?
5365 (unsigned long long)tback->parent :
5366 (unsigned long long)tback->root, back);
5368 if (back->is_data) {
5369 dback = to_data_backref(back);
5370 if (dback->found_ref != dback->num_refs) {
5374 fprintf(stderr, "Incorrect local backref count"
5375 " on %llu %s %llu owner %llu"
5376 " offset %llu found %u wanted %u back %p\n",
5377 (unsigned long long)rec->start,
5378 back->full_backref ?
5380 back->full_backref ?
5381 (unsigned long long)dback->parent:
5382 (unsigned long long)dback->root,
5383 (unsigned long long)dback->owner,
5384 (unsigned long long)dback->offset,
5385 dback->found_ref, dback->num_refs, back);
5387 if (dback->disk_bytenr != rec->start) {
5391 fprintf(stderr, "Backref disk bytenr does not"
5392 " match extent record, bytenr=%llu, "
5393 "ref bytenr=%llu\n",
5394 (unsigned long long)rec->start,
5395 (unsigned long long)dback->disk_bytenr);
5398 if (dback->bytes != rec->nr) {
5402 fprintf(stderr, "Backref bytes do not match "
5403 "extent backref, bytenr=%llu, ref "
5404 "bytes=%llu, backref bytes=%llu\n",
5405 (unsigned long long)rec->start,
5406 (unsigned long long)rec->nr,
5407 (unsigned long long)dback->bytes);
5410 if (!back->is_data) {
5413 dback = to_data_backref(back);
5414 found += dback->found_ref;
5417 if (found != rec->refs) {
5421 fprintf(stderr, "Incorrect global backref count "
5422 "on %llu found %llu wanted %llu\n",
5423 (unsigned long long)rec->start,
5424 (unsigned long long)found,
5425 (unsigned long long)rec->refs);
5431 static int free_all_extent_backrefs(struct extent_record *rec)
5433 struct extent_backref *back;
5434 struct list_head *cur;
5435 while (!list_empty(&rec->backrefs)) {
5436 cur = rec->backrefs.next;
5437 back = to_extent_backref(cur);
5444 static void free_extent_record_cache(struct cache_tree *extent_cache)
5446 struct cache_extent *cache;
5447 struct extent_record *rec;
5450 cache = first_cache_extent(extent_cache);
5453 rec = container_of(cache, struct extent_record, cache);
5454 remove_cache_extent(extent_cache, cache);
5455 free_all_extent_backrefs(rec);
5460 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5461 struct extent_record *rec)
5463 if (rec->content_checked && rec->owner_ref_checked &&
5464 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5465 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5466 !rec->bad_full_backref && !rec->crossing_stripes &&
5467 !rec->wrong_chunk_type) {
5468 remove_cache_extent(extent_cache, &rec->cache);
5469 free_all_extent_backrefs(rec);
5470 list_del_init(&rec->list);
5476 static int check_owner_ref(struct btrfs_root *root,
5477 struct extent_record *rec,
5478 struct extent_buffer *buf)
5480 struct extent_backref *node;
5481 struct tree_backref *back;
5482 struct btrfs_root *ref_root;
5483 struct btrfs_key key;
5484 struct btrfs_path path;
5485 struct extent_buffer *parent;
5490 list_for_each_entry(node, &rec->backrefs, list) {
5493 if (!node->found_ref)
5495 if (node->full_backref)
5497 back = to_tree_backref(node);
5498 if (btrfs_header_owner(buf) == back->root)
5501 BUG_ON(rec->is_root);
5503 /* try to find the block by search corresponding fs tree */
5504 key.objectid = btrfs_header_owner(buf);
5505 key.type = BTRFS_ROOT_ITEM_KEY;
5506 key.offset = (u64)-1;
5508 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5509 if (IS_ERR(ref_root))
5512 level = btrfs_header_level(buf);
5514 btrfs_item_key_to_cpu(buf, &key, 0);
5516 btrfs_node_key_to_cpu(buf, &key, 0);
5518 btrfs_init_path(&path);
5519 path.lowest_level = level + 1;
5520 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5524 parent = path.nodes[level + 1];
5525 if (parent && buf->start == btrfs_node_blockptr(parent,
5526 path.slots[level + 1]))
5529 btrfs_release_path(&path);
5530 return found ? 0 : 1;
5533 static int is_extent_tree_record(struct extent_record *rec)
5535 struct list_head *cur = rec->backrefs.next;
5536 struct extent_backref *node;
5537 struct tree_backref *back;
5540 while(cur != &rec->backrefs) {
5541 node = to_extent_backref(cur);
5545 back = to_tree_backref(node);
5546 if (node->full_backref)
5548 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5555 static int record_bad_block_io(struct btrfs_fs_info *info,
5556 struct cache_tree *extent_cache,
5559 struct extent_record *rec;
5560 struct cache_extent *cache;
5561 struct btrfs_key key;
5563 cache = lookup_cache_extent(extent_cache, start, len);
5567 rec = container_of(cache, struct extent_record, cache);
5568 if (!is_extent_tree_record(rec))
5571 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5572 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5575 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5576 struct extent_buffer *buf, int slot)
5578 if (btrfs_header_level(buf)) {
5579 struct btrfs_key_ptr ptr1, ptr2;
5581 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5582 sizeof(struct btrfs_key_ptr));
5583 read_extent_buffer(buf, &ptr2,
5584 btrfs_node_key_ptr_offset(slot + 1),
5585 sizeof(struct btrfs_key_ptr));
5586 write_extent_buffer(buf, &ptr1,
5587 btrfs_node_key_ptr_offset(slot + 1),
5588 sizeof(struct btrfs_key_ptr));
5589 write_extent_buffer(buf, &ptr2,
5590 btrfs_node_key_ptr_offset(slot),
5591 sizeof(struct btrfs_key_ptr));
5593 struct btrfs_disk_key key;
5594 btrfs_node_key(buf, &key, 0);
5595 btrfs_fixup_low_keys(root, path, &key,
5596 btrfs_header_level(buf) + 1);
5599 struct btrfs_item *item1, *item2;
5600 struct btrfs_key k1, k2;
5601 char *item1_data, *item2_data;
5602 u32 item1_offset, item2_offset, item1_size, item2_size;
5604 item1 = btrfs_item_nr(slot);
5605 item2 = btrfs_item_nr(slot + 1);
5606 btrfs_item_key_to_cpu(buf, &k1, slot);
5607 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5608 item1_offset = btrfs_item_offset(buf, item1);
5609 item2_offset = btrfs_item_offset(buf, item2);
5610 item1_size = btrfs_item_size(buf, item1);
5611 item2_size = btrfs_item_size(buf, item2);
5613 item1_data = malloc(item1_size);
5616 item2_data = malloc(item2_size);
5622 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5623 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5625 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5626 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5630 btrfs_set_item_offset(buf, item1, item2_offset);
5631 btrfs_set_item_offset(buf, item2, item1_offset);
5632 btrfs_set_item_size(buf, item1, item2_size);
5633 btrfs_set_item_size(buf, item2, item1_size);
5635 path->slots[0] = slot;
5636 btrfs_set_item_key_unsafe(root, path, &k2);
5637 path->slots[0] = slot + 1;
5638 btrfs_set_item_key_unsafe(root, path, &k1);
5643 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5645 struct extent_buffer *buf;
5646 struct btrfs_key k1, k2;
5648 int level = path->lowest_level;
5651 buf = path->nodes[level];
5652 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5654 btrfs_node_key_to_cpu(buf, &k1, i);
5655 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5657 btrfs_item_key_to_cpu(buf, &k1, i);
5658 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5660 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5662 ret = swap_values(root, path, buf, i);
5665 btrfs_mark_buffer_dirty(buf);
5671 static int delete_bogus_item(struct btrfs_root *root,
5672 struct btrfs_path *path,
5673 struct extent_buffer *buf, int slot)
5675 struct btrfs_key key;
5676 int nritems = btrfs_header_nritems(buf);
5678 btrfs_item_key_to_cpu(buf, &key, slot);
5680 /* These are all the keys we can deal with missing. */
5681 if (key.type != BTRFS_DIR_INDEX_KEY &&
5682 key.type != BTRFS_EXTENT_ITEM_KEY &&
5683 key.type != BTRFS_METADATA_ITEM_KEY &&
5684 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5685 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5688 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5689 (unsigned long long)key.objectid, key.type,
5690 (unsigned long long)key.offset, slot, buf->start);
5691 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5692 btrfs_item_nr_offset(slot + 1),
5693 sizeof(struct btrfs_item) *
5694 (nritems - slot - 1));
5695 btrfs_set_header_nritems(buf, nritems - 1);
5697 struct btrfs_disk_key disk_key;
5699 btrfs_item_key(buf, &disk_key, 0);
5700 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5702 btrfs_mark_buffer_dirty(buf);
5706 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5708 struct extent_buffer *buf;
5712 /* We should only get this for leaves */
5713 BUG_ON(path->lowest_level);
5714 buf = path->nodes[0];
5716 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5717 unsigned int shift = 0, offset;
5719 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5720 BTRFS_LEAF_DATA_SIZE(root)) {
5721 if (btrfs_item_end_nr(buf, i) >
5722 BTRFS_LEAF_DATA_SIZE(root)) {
5723 ret = delete_bogus_item(root, path, buf, i);
5726 fprintf(stderr, "item is off the end of the "
5727 "leaf, can't fix\n");
5731 shift = BTRFS_LEAF_DATA_SIZE(root) -
5732 btrfs_item_end_nr(buf, i);
5733 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5734 btrfs_item_offset_nr(buf, i - 1)) {
5735 if (btrfs_item_end_nr(buf, i) >
5736 btrfs_item_offset_nr(buf, i - 1)) {
5737 ret = delete_bogus_item(root, path, buf, i);
5740 fprintf(stderr, "items overlap, can't fix\n");
5744 shift = btrfs_item_offset_nr(buf, i - 1) -
5745 btrfs_item_end_nr(buf, i);
5750 printf("Shifting item nr %d by %u bytes in block %llu\n",
5751 i, shift, (unsigned long long)buf->start);
5752 offset = btrfs_item_offset_nr(buf, i);
5753 memmove_extent_buffer(buf,
5754 btrfs_leaf_data(buf) + offset + shift,
5755 btrfs_leaf_data(buf) + offset,
5756 btrfs_item_size_nr(buf, i));
5757 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5759 btrfs_mark_buffer_dirty(buf);
5763 * We may have moved things, in which case we want to exit so we don't
5764 * write those changes out. Once we have proper abort functionality in
5765 * progs this can be changed to something nicer.
5772 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5773 * then just return -EIO.
5775 static int try_to_fix_bad_block(struct btrfs_root *root,
5776 struct extent_buffer *buf,
5777 enum btrfs_tree_block_status status)
5779 struct btrfs_trans_handle *trans;
5780 struct ulist *roots;
5781 struct ulist_node *node;
5782 struct btrfs_root *search_root;
5783 struct btrfs_path path;
5784 struct ulist_iterator iter;
5785 struct btrfs_key root_key, key;
5788 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5789 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5792 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5796 btrfs_init_path(&path);
5797 ULIST_ITER_INIT(&iter);
5798 while ((node = ulist_next(roots, &iter))) {
5799 root_key.objectid = node->val;
5800 root_key.type = BTRFS_ROOT_ITEM_KEY;
5801 root_key.offset = (u64)-1;
5803 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5810 trans = btrfs_start_transaction(search_root, 0);
5811 if (IS_ERR(trans)) {
5812 ret = PTR_ERR(trans);
5816 path.lowest_level = btrfs_header_level(buf);
5817 path.skip_check_block = 1;
5818 if (path.lowest_level)
5819 btrfs_node_key_to_cpu(buf, &key, 0);
5821 btrfs_item_key_to_cpu(buf, &key, 0);
5822 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5825 btrfs_commit_transaction(trans, search_root);
5828 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5829 ret = fix_key_order(search_root, &path);
5830 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5831 ret = fix_item_offset(search_root, &path);
5833 btrfs_commit_transaction(trans, search_root);
5836 btrfs_release_path(&path);
5837 btrfs_commit_transaction(trans, search_root);
5840 btrfs_release_path(&path);
5844 static int check_block(struct btrfs_root *root,
5845 struct cache_tree *extent_cache,
5846 struct extent_buffer *buf, u64 flags)
5848 struct extent_record *rec;
5849 struct cache_extent *cache;
5850 struct btrfs_key key;
5851 enum btrfs_tree_block_status status;
5855 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5858 rec = container_of(cache, struct extent_record, cache);
5859 rec->generation = btrfs_header_generation(buf);
5861 level = btrfs_header_level(buf);
5862 if (btrfs_header_nritems(buf) > 0) {
5865 btrfs_item_key_to_cpu(buf, &key, 0);
5867 btrfs_node_key_to_cpu(buf, &key, 0);
5869 rec->info_objectid = key.objectid;
5871 rec->info_level = level;
5873 if (btrfs_is_leaf(buf))
5874 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5876 status = btrfs_check_node(root, &rec->parent_key, buf);
5878 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5880 status = try_to_fix_bad_block(root, buf, status);
5881 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5883 fprintf(stderr, "bad block %llu\n",
5884 (unsigned long long)buf->start);
5887 * Signal to callers we need to start the scan over
5888 * again since we'll have cowed blocks.
5893 rec->content_checked = 1;
5894 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5895 rec->owner_ref_checked = 1;
5897 ret = check_owner_ref(root, rec, buf);
5899 rec->owner_ref_checked = 1;
5903 maybe_free_extent_rec(extent_cache, rec);
5907 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5908 u64 parent, u64 root)
5910 struct list_head *cur = rec->backrefs.next;
5911 struct extent_backref *node;
5912 struct tree_backref *back;
5914 while(cur != &rec->backrefs) {
5915 node = to_extent_backref(cur);
5919 back = to_tree_backref(node);
5921 if (!node->full_backref)
5923 if (parent == back->parent)
5926 if (node->full_backref)
5928 if (back->root == root)
5935 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5936 u64 parent, u64 root)
5938 struct tree_backref *ref = malloc(sizeof(*ref));
5942 memset(&ref->node, 0, sizeof(ref->node));
5944 ref->parent = parent;
5945 ref->node.full_backref = 1;
5948 ref->node.full_backref = 0;
5950 list_add_tail(&ref->node.list, &rec->backrefs);
5955 static struct data_backref *find_data_backref(struct extent_record *rec,
5956 u64 parent, u64 root,
5957 u64 owner, u64 offset,
5959 u64 disk_bytenr, u64 bytes)
5961 struct list_head *cur = rec->backrefs.next;
5962 struct extent_backref *node;
5963 struct data_backref *back;
5965 while(cur != &rec->backrefs) {
5966 node = to_extent_backref(cur);
5970 back = to_data_backref(node);
5972 if (!node->full_backref)
5974 if (parent == back->parent)
5977 if (node->full_backref)
5979 if (back->root == root && back->owner == owner &&
5980 back->offset == offset) {
5981 if (found_ref && node->found_ref &&
5982 (back->bytes != bytes ||
5983 back->disk_bytenr != disk_bytenr))
5992 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5993 u64 parent, u64 root,
5994 u64 owner, u64 offset,
5997 struct data_backref *ref = malloc(sizeof(*ref));
6001 memset(&ref->node, 0, sizeof(ref->node));
6002 ref->node.is_data = 1;
6005 ref->parent = parent;
6008 ref->node.full_backref = 1;
6012 ref->offset = offset;
6013 ref->node.full_backref = 0;
6015 ref->bytes = max_size;
6018 list_add_tail(&ref->node.list, &rec->backrefs);
6019 if (max_size > rec->max_size)
6020 rec->max_size = max_size;
6024 /* Check if the type of extent matches with its chunk */
6025 static void check_extent_type(struct extent_record *rec)
6027 struct btrfs_block_group_cache *bg_cache;
6029 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6033 /* data extent, check chunk directly*/
6034 if (!rec->metadata) {
6035 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6036 rec->wrong_chunk_type = 1;
6040 /* metadata extent, check the obvious case first */
6041 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6042 BTRFS_BLOCK_GROUP_METADATA))) {
6043 rec->wrong_chunk_type = 1;
6048 * Check SYSTEM extent, as it's also marked as metadata, we can only
6049 * make sure it's a SYSTEM extent by its backref
6051 if (!list_empty(&rec->backrefs)) {
6052 struct extent_backref *node;
6053 struct tree_backref *tback;
6056 node = to_extent_backref(rec->backrefs.next);
6057 if (node->is_data) {
6058 /* tree block shouldn't have data backref */
6059 rec->wrong_chunk_type = 1;
6062 tback = container_of(node, struct tree_backref, node);
6064 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6065 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6067 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6068 if (!(bg_cache->flags & bg_type))
6069 rec->wrong_chunk_type = 1;
6074 * Allocate a new extent record, fill default values from @tmpl and insert int
6075 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6076 * the cache, otherwise it fails.
6078 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6079 struct extent_record *tmpl)
6081 struct extent_record *rec;
6084 BUG_ON(tmpl->max_size == 0);
6085 rec = malloc(sizeof(*rec));
6088 rec->start = tmpl->start;
6089 rec->max_size = tmpl->max_size;
6090 rec->nr = max(tmpl->nr, tmpl->max_size);
6091 rec->found_rec = tmpl->found_rec;
6092 rec->content_checked = tmpl->content_checked;
6093 rec->owner_ref_checked = tmpl->owner_ref_checked;
6094 rec->num_duplicates = 0;
6095 rec->metadata = tmpl->metadata;
6096 rec->flag_block_full_backref = FLAG_UNSET;
6097 rec->bad_full_backref = 0;
6098 rec->crossing_stripes = 0;
6099 rec->wrong_chunk_type = 0;
6100 rec->is_root = tmpl->is_root;
6101 rec->refs = tmpl->refs;
6102 rec->extent_item_refs = tmpl->extent_item_refs;
6103 rec->parent_generation = tmpl->parent_generation;
6104 INIT_LIST_HEAD(&rec->backrefs);
6105 INIT_LIST_HEAD(&rec->dups);
6106 INIT_LIST_HEAD(&rec->list);
6107 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6108 rec->cache.start = tmpl->start;
6109 rec->cache.size = tmpl->nr;
6110 ret = insert_cache_extent(extent_cache, &rec->cache);
6115 bytes_used += rec->nr;
6118 rec->crossing_stripes = check_crossing_stripes(global_info,
6119 rec->start, global_info->nodesize);
6120 check_extent_type(rec);
6125 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6127 * - refs - if found, increase refs
6128 * - is_root - if found, set
6129 * - content_checked - if found, set
6130 * - owner_ref_checked - if found, set
6132 * If not found, create a new one, initialize and insert.
6134 static int add_extent_rec(struct cache_tree *extent_cache,
6135 struct extent_record *tmpl)
6137 struct extent_record *rec;
6138 struct cache_extent *cache;
6142 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6144 rec = container_of(cache, struct extent_record, cache);
6148 rec->nr = max(tmpl->nr, tmpl->max_size);
6151 * We need to make sure to reset nr to whatever the extent
6152 * record says was the real size, this way we can compare it to
6155 if (tmpl->found_rec) {
6156 if (tmpl->start != rec->start || rec->found_rec) {
6157 struct extent_record *tmp;
6160 if (list_empty(&rec->list))
6161 list_add_tail(&rec->list,
6162 &duplicate_extents);
6165 * We have to do this song and dance in case we
6166 * find an extent record that falls inside of
6167 * our current extent record but does not have
6168 * the same objectid.
6170 tmp = malloc(sizeof(*tmp));
6173 tmp->start = tmpl->start;
6174 tmp->max_size = tmpl->max_size;
6177 tmp->metadata = tmpl->metadata;
6178 tmp->extent_item_refs = tmpl->extent_item_refs;
6179 INIT_LIST_HEAD(&tmp->list);
6180 list_add_tail(&tmp->list, &rec->dups);
6181 rec->num_duplicates++;
6188 if (tmpl->extent_item_refs && !dup) {
6189 if (rec->extent_item_refs) {
6190 fprintf(stderr, "block %llu rec "
6191 "extent_item_refs %llu, passed %llu\n",
6192 (unsigned long long)tmpl->start,
6193 (unsigned long long)
6194 rec->extent_item_refs,
6195 (unsigned long long)tmpl->extent_item_refs);
6197 rec->extent_item_refs = tmpl->extent_item_refs;
6201 if (tmpl->content_checked)
6202 rec->content_checked = 1;
6203 if (tmpl->owner_ref_checked)
6204 rec->owner_ref_checked = 1;
6205 memcpy(&rec->parent_key, &tmpl->parent_key,
6206 sizeof(tmpl->parent_key));
6207 if (tmpl->parent_generation)
6208 rec->parent_generation = tmpl->parent_generation;
6209 if (rec->max_size < tmpl->max_size)
6210 rec->max_size = tmpl->max_size;
6213 * A metadata extent can't cross stripe_len boundary, otherwise
6214 * kernel scrub won't be able to handle it.
6215 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6219 rec->crossing_stripes = check_crossing_stripes(
6220 global_info, rec->start,
6221 global_info->nodesize);
6222 check_extent_type(rec);
6223 maybe_free_extent_rec(extent_cache, rec);
6227 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6232 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6233 u64 parent, u64 root, int found_ref)
6235 struct extent_record *rec;
6236 struct tree_backref *back;
6237 struct cache_extent *cache;
6240 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6242 struct extent_record tmpl;
6244 memset(&tmpl, 0, sizeof(tmpl));
6245 tmpl.start = bytenr;
6250 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6254 /* really a bug in cache_extent implement now */
6255 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6260 rec = container_of(cache, struct extent_record, cache);
6261 if (rec->start != bytenr) {
6263 * Several cause, from unaligned bytenr to over lapping extents
6268 back = find_tree_backref(rec, parent, root);
6270 back = alloc_tree_backref(rec, parent, root);
6276 if (back->node.found_ref) {
6277 fprintf(stderr, "Extent back ref already exists "
6278 "for %llu parent %llu root %llu \n",
6279 (unsigned long long)bytenr,
6280 (unsigned long long)parent,
6281 (unsigned long long)root);
6283 back->node.found_ref = 1;
6285 if (back->node.found_extent_tree) {
6286 fprintf(stderr, "Extent back ref already exists "
6287 "for %llu parent %llu root %llu \n",
6288 (unsigned long long)bytenr,
6289 (unsigned long long)parent,
6290 (unsigned long long)root);
6292 back->node.found_extent_tree = 1;
6294 check_extent_type(rec);
6295 maybe_free_extent_rec(extent_cache, rec);
6299 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6300 u64 parent, u64 root, u64 owner, u64 offset,
6301 u32 num_refs, int found_ref, u64 max_size)
6303 struct extent_record *rec;
6304 struct data_backref *back;
6305 struct cache_extent *cache;
6308 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6310 struct extent_record tmpl;
6312 memset(&tmpl, 0, sizeof(tmpl));
6313 tmpl.start = bytenr;
6315 tmpl.max_size = max_size;
6317 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6321 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6326 rec = container_of(cache, struct extent_record, cache);
6327 if (rec->max_size < max_size)
6328 rec->max_size = max_size;
6331 * If found_ref is set then max_size is the real size and must match the
6332 * existing refs. So if we have already found a ref then we need to
6333 * make sure that this ref matches the existing one, otherwise we need
6334 * to add a new backref so we can notice that the backrefs don't match
6335 * and we need to figure out who is telling the truth. This is to
6336 * account for that awful fsync bug I introduced where we'd end up with
6337 * a btrfs_file_extent_item that would have its length include multiple
6338 * prealloc extents or point inside of a prealloc extent.
6340 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6343 back = alloc_data_backref(rec, parent, root, owner, offset,
6349 BUG_ON(num_refs != 1);
6350 if (back->node.found_ref)
6351 BUG_ON(back->bytes != max_size);
6352 back->node.found_ref = 1;
6353 back->found_ref += 1;
6354 back->bytes = max_size;
6355 back->disk_bytenr = bytenr;
6357 rec->content_checked = 1;
6358 rec->owner_ref_checked = 1;
6360 if (back->node.found_extent_tree) {
6361 fprintf(stderr, "Extent back ref already exists "
6362 "for %llu parent %llu root %llu "
6363 "owner %llu offset %llu num_refs %lu\n",
6364 (unsigned long long)bytenr,
6365 (unsigned long long)parent,
6366 (unsigned long long)root,
6367 (unsigned long long)owner,
6368 (unsigned long long)offset,
6369 (unsigned long)num_refs);
6371 back->num_refs = num_refs;
6372 back->node.found_extent_tree = 1;
6374 maybe_free_extent_rec(extent_cache, rec);
6378 static int add_pending(struct cache_tree *pending,
6379 struct cache_tree *seen, u64 bytenr, u32 size)
6382 ret = add_cache_extent(seen, bytenr, size);
6385 add_cache_extent(pending, bytenr, size);
6389 static int pick_next_pending(struct cache_tree *pending,
6390 struct cache_tree *reada,
6391 struct cache_tree *nodes,
6392 u64 last, struct block_info *bits, int bits_nr,
6395 unsigned long node_start = last;
6396 struct cache_extent *cache;
6399 cache = search_cache_extent(reada, 0);
6401 bits[0].start = cache->start;
6402 bits[0].size = cache->size;
6407 if (node_start > 32768)
6408 node_start -= 32768;
6410 cache = search_cache_extent(nodes, node_start);
6412 cache = search_cache_extent(nodes, 0);
6415 cache = search_cache_extent(pending, 0);
6420 bits[ret].start = cache->start;
6421 bits[ret].size = cache->size;
6422 cache = next_cache_extent(cache);
6424 } while (cache && ret < bits_nr);
6430 bits[ret].start = cache->start;
6431 bits[ret].size = cache->size;
6432 cache = next_cache_extent(cache);
6434 } while (cache && ret < bits_nr);
6436 if (bits_nr - ret > 8) {
6437 u64 lookup = bits[0].start + bits[0].size;
6438 struct cache_extent *next;
6439 next = search_cache_extent(pending, lookup);
6441 if (next->start - lookup > 32768)
6443 bits[ret].start = next->start;
6444 bits[ret].size = next->size;
6445 lookup = next->start + next->size;
6449 next = next_cache_extent(next);
6457 static void free_chunk_record(struct cache_extent *cache)
6459 struct chunk_record *rec;
6461 rec = container_of(cache, struct chunk_record, cache);
6462 list_del_init(&rec->list);
6463 list_del_init(&rec->dextents);
6467 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6469 cache_tree_free_extents(chunk_cache, free_chunk_record);
6472 static void free_device_record(struct rb_node *node)
6474 struct device_record *rec;
6476 rec = container_of(node, struct device_record, node);
6480 FREE_RB_BASED_TREE(device_cache, free_device_record);
6482 int insert_block_group_record(struct block_group_tree *tree,
6483 struct block_group_record *bg_rec)
6487 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6491 list_add_tail(&bg_rec->list, &tree->block_groups);
6495 static void free_block_group_record(struct cache_extent *cache)
6497 struct block_group_record *rec;
6499 rec = container_of(cache, struct block_group_record, cache);
6500 list_del_init(&rec->list);
6504 void free_block_group_tree(struct block_group_tree *tree)
6506 cache_tree_free_extents(&tree->tree, free_block_group_record);
6509 int insert_device_extent_record(struct device_extent_tree *tree,
6510 struct device_extent_record *de_rec)
6515 * Device extent is a bit different from the other extents, because
6516 * the extents which belong to the different devices may have the
6517 * same start and size, so we need use the special extent cache
6518 * search/insert functions.
6520 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6524 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6525 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6529 static void free_device_extent_record(struct cache_extent *cache)
6531 struct device_extent_record *rec;
6533 rec = container_of(cache, struct device_extent_record, cache);
6534 if (!list_empty(&rec->chunk_list))
6535 list_del_init(&rec->chunk_list);
6536 if (!list_empty(&rec->device_list))
6537 list_del_init(&rec->device_list);
6541 void free_device_extent_tree(struct device_extent_tree *tree)
6543 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6546 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6547 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6548 struct extent_buffer *leaf, int slot)
6550 struct btrfs_extent_ref_v0 *ref0;
6551 struct btrfs_key key;
6554 btrfs_item_key_to_cpu(leaf, &key, slot);
6555 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6556 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6557 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6560 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6561 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6567 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6568 struct btrfs_key *key,
6571 struct btrfs_chunk *ptr;
6572 struct chunk_record *rec;
6575 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6576 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6578 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6580 fprintf(stderr, "memory allocation failed\n");
6584 INIT_LIST_HEAD(&rec->list);
6585 INIT_LIST_HEAD(&rec->dextents);
6588 rec->cache.start = key->offset;
6589 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6591 rec->generation = btrfs_header_generation(leaf);
6593 rec->objectid = key->objectid;
6594 rec->type = key->type;
6595 rec->offset = key->offset;
6597 rec->length = rec->cache.size;
6598 rec->owner = btrfs_chunk_owner(leaf, ptr);
6599 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6600 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6601 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6602 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6603 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6604 rec->num_stripes = num_stripes;
6605 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6607 for (i = 0; i < rec->num_stripes; ++i) {
6608 rec->stripes[i].devid =
6609 btrfs_stripe_devid_nr(leaf, ptr, i);
6610 rec->stripes[i].offset =
6611 btrfs_stripe_offset_nr(leaf, ptr, i);
6612 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6613 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6620 static int process_chunk_item(struct cache_tree *chunk_cache,
6621 struct btrfs_key *key, struct extent_buffer *eb,
6624 struct chunk_record *rec;
6625 struct btrfs_chunk *chunk;
6628 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6630 * Do extra check for this chunk item,
6632 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6633 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6634 * and owner<->key_type check.
6636 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6639 error("chunk(%llu, %llu) is not valid, ignore it",
6640 key->offset, btrfs_chunk_length(eb, chunk));
6643 rec = btrfs_new_chunk_record(eb, key, slot);
6644 ret = insert_cache_extent(chunk_cache, &rec->cache);
6646 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6647 rec->offset, rec->length);
6654 static int process_device_item(struct rb_root *dev_cache,
6655 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6657 struct btrfs_dev_item *ptr;
6658 struct device_record *rec;
6661 ptr = btrfs_item_ptr(eb,
6662 slot, struct btrfs_dev_item);
6664 rec = malloc(sizeof(*rec));
6666 fprintf(stderr, "memory allocation failed\n");
6670 rec->devid = key->offset;
6671 rec->generation = btrfs_header_generation(eb);
6673 rec->objectid = key->objectid;
6674 rec->type = key->type;
6675 rec->offset = key->offset;
6677 rec->devid = btrfs_device_id(eb, ptr);
6678 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6679 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6681 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6683 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6690 struct block_group_record *
6691 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6694 struct btrfs_block_group_item *ptr;
6695 struct block_group_record *rec;
6697 rec = calloc(1, sizeof(*rec));
6699 fprintf(stderr, "memory allocation failed\n");
6703 rec->cache.start = key->objectid;
6704 rec->cache.size = key->offset;
6706 rec->generation = btrfs_header_generation(leaf);
6708 rec->objectid = key->objectid;
6709 rec->type = key->type;
6710 rec->offset = key->offset;
6712 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6713 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6715 INIT_LIST_HEAD(&rec->list);
6720 static int process_block_group_item(struct block_group_tree *block_group_cache,
6721 struct btrfs_key *key,
6722 struct extent_buffer *eb, int slot)
6724 struct block_group_record *rec;
6727 rec = btrfs_new_block_group_record(eb, key, slot);
6728 ret = insert_block_group_record(block_group_cache, rec);
6730 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6731 rec->objectid, rec->offset);
6738 struct device_extent_record *
6739 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6740 struct btrfs_key *key, int slot)
6742 struct device_extent_record *rec;
6743 struct btrfs_dev_extent *ptr;
6745 rec = calloc(1, sizeof(*rec));
6747 fprintf(stderr, "memory allocation failed\n");
6751 rec->cache.objectid = key->objectid;
6752 rec->cache.start = key->offset;
6754 rec->generation = btrfs_header_generation(leaf);
6756 rec->objectid = key->objectid;
6757 rec->type = key->type;
6758 rec->offset = key->offset;
6760 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6761 rec->chunk_objecteid =
6762 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6764 btrfs_dev_extent_chunk_offset(leaf, ptr);
6765 rec->length = btrfs_dev_extent_length(leaf, ptr);
6766 rec->cache.size = rec->length;
6768 INIT_LIST_HEAD(&rec->chunk_list);
6769 INIT_LIST_HEAD(&rec->device_list);
6775 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6776 struct btrfs_key *key, struct extent_buffer *eb,
6779 struct device_extent_record *rec;
6782 rec = btrfs_new_device_extent_record(eb, key, slot);
6783 ret = insert_device_extent_record(dev_extent_cache, rec);
6786 "Device extent[%llu, %llu, %llu] existed.\n",
6787 rec->objectid, rec->offset, rec->length);
6794 static int process_extent_item(struct btrfs_root *root,
6795 struct cache_tree *extent_cache,
6796 struct extent_buffer *eb, int slot)
6798 struct btrfs_extent_item *ei;
6799 struct btrfs_extent_inline_ref *iref;
6800 struct btrfs_extent_data_ref *dref;
6801 struct btrfs_shared_data_ref *sref;
6802 struct btrfs_key key;
6803 struct extent_record tmpl;
6808 u32 item_size = btrfs_item_size_nr(eb, slot);
6814 btrfs_item_key_to_cpu(eb, &key, slot);
6816 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6818 num_bytes = root->fs_info->nodesize;
6820 num_bytes = key.offset;
6823 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6824 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6825 key.objectid, root->fs_info->sectorsize);
6828 if (item_size < sizeof(*ei)) {
6829 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6830 struct btrfs_extent_item_v0 *ei0;
6831 BUG_ON(item_size != sizeof(*ei0));
6832 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6833 refs = btrfs_extent_refs_v0(eb, ei0);
6837 memset(&tmpl, 0, sizeof(tmpl));
6838 tmpl.start = key.objectid;
6839 tmpl.nr = num_bytes;
6840 tmpl.extent_item_refs = refs;
6841 tmpl.metadata = metadata;
6843 tmpl.max_size = num_bytes;
6845 return add_extent_rec(extent_cache, &tmpl);
6848 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6849 refs = btrfs_extent_refs(eb, ei);
6850 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6854 if (metadata && num_bytes != root->fs_info->nodesize) {
6855 error("ignore invalid metadata extent, length %llu does not equal to %u",
6856 num_bytes, root->fs_info->nodesize);
6859 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6860 error("ignore invalid data extent, length %llu is not aligned to %u",
6861 num_bytes, root->fs_info->sectorsize);
6865 memset(&tmpl, 0, sizeof(tmpl));
6866 tmpl.start = key.objectid;
6867 tmpl.nr = num_bytes;
6868 tmpl.extent_item_refs = refs;
6869 tmpl.metadata = metadata;
6871 tmpl.max_size = num_bytes;
6872 add_extent_rec(extent_cache, &tmpl);
6874 ptr = (unsigned long)(ei + 1);
6875 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6876 key.type == BTRFS_EXTENT_ITEM_KEY)
6877 ptr += sizeof(struct btrfs_tree_block_info);
6879 end = (unsigned long)ei + item_size;
6881 iref = (struct btrfs_extent_inline_ref *)ptr;
6882 type = btrfs_extent_inline_ref_type(eb, iref);
6883 offset = btrfs_extent_inline_ref_offset(eb, iref);
6885 case BTRFS_TREE_BLOCK_REF_KEY:
6886 ret = add_tree_backref(extent_cache, key.objectid,
6890 "add_tree_backref failed (extent items tree block): %s",
6893 case BTRFS_SHARED_BLOCK_REF_KEY:
6894 ret = add_tree_backref(extent_cache, key.objectid,
6898 "add_tree_backref failed (extent items shared block): %s",
6901 case BTRFS_EXTENT_DATA_REF_KEY:
6902 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6903 add_data_backref(extent_cache, key.objectid, 0,
6904 btrfs_extent_data_ref_root(eb, dref),
6905 btrfs_extent_data_ref_objectid(eb,
6907 btrfs_extent_data_ref_offset(eb, dref),
6908 btrfs_extent_data_ref_count(eb, dref),
6911 case BTRFS_SHARED_DATA_REF_KEY:
6912 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6913 add_data_backref(extent_cache, key.objectid, offset,
6915 btrfs_shared_data_ref_count(eb, sref),
6919 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6920 key.objectid, key.type, num_bytes);
6923 ptr += btrfs_extent_inline_ref_size(type);
6930 static int check_cache_range(struct btrfs_root *root,
6931 struct btrfs_block_group_cache *cache,
6932 u64 offset, u64 bytes)
6934 struct btrfs_free_space *entry;
6940 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6941 bytenr = btrfs_sb_offset(i);
6942 ret = btrfs_rmap_block(root->fs_info,
6943 cache->key.objectid, bytenr, 0,
6944 &logical, &nr, &stripe_len);
6949 if (logical[nr] + stripe_len <= offset)
6951 if (offset + bytes <= logical[nr])
6953 if (logical[nr] == offset) {
6954 if (stripe_len >= bytes) {
6958 bytes -= stripe_len;
6959 offset += stripe_len;
6960 } else if (logical[nr] < offset) {
6961 if (logical[nr] + stripe_len >=
6966 bytes = (offset + bytes) -
6967 (logical[nr] + stripe_len);
6968 offset = logical[nr] + stripe_len;
6971 * Could be tricky, the super may land in the
6972 * middle of the area we're checking. First
6973 * check the easiest case, it's at the end.
6975 if (logical[nr] + stripe_len >=
6977 bytes = logical[nr] - offset;
6981 /* Check the left side */
6982 ret = check_cache_range(root, cache,
6984 logical[nr] - offset);
6990 /* Now we continue with the right side */
6991 bytes = (offset + bytes) -
6992 (logical[nr] + stripe_len);
6993 offset = logical[nr] + stripe_len;
7000 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7002 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7003 offset, offset+bytes);
7007 if (entry->offset != offset) {
7008 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7013 if (entry->bytes != bytes) {
7014 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7015 bytes, entry->bytes, offset);
7019 unlink_free_space(cache->free_space_ctl, entry);
7024 static int verify_space_cache(struct btrfs_root *root,
7025 struct btrfs_block_group_cache *cache)
7027 struct btrfs_path path;
7028 struct extent_buffer *leaf;
7029 struct btrfs_key key;
7033 root = root->fs_info->extent_root;
7035 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7037 btrfs_init_path(&path);
7038 key.objectid = last;
7040 key.type = BTRFS_EXTENT_ITEM_KEY;
7041 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7046 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7047 ret = btrfs_next_leaf(root, &path);
7055 leaf = path.nodes[0];
7056 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7057 if (key.objectid >= cache->key.offset + cache->key.objectid)
7059 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7060 key.type != BTRFS_METADATA_ITEM_KEY) {
7065 if (last == key.objectid) {
7066 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7067 last = key.objectid + key.offset;
7069 last = key.objectid + root->fs_info->nodesize;
7074 ret = check_cache_range(root, cache, last,
7075 key.objectid - last);
7078 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7079 last = key.objectid + key.offset;
7081 last = key.objectid + root->fs_info->nodesize;
7085 if (last < cache->key.objectid + cache->key.offset)
7086 ret = check_cache_range(root, cache, last,
7087 cache->key.objectid +
7088 cache->key.offset - last);
7091 btrfs_release_path(&path);
7094 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7095 fprintf(stderr, "There are still entries left in the space "
7103 static int check_space_cache(struct btrfs_root *root)
7105 struct btrfs_block_group_cache *cache;
7106 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7110 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7111 btrfs_super_generation(root->fs_info->super_copy) !=
7112 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7113 printf("cache and super generation don't match, space cache "
7114 "will be invalidated\n");
7118 if (ctx.progress_enabled) {
7119 ctx.tp = TASK_FREE_SPACE;
7120 task_start(ctx.info);
7124 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7128 start = cache->key.objectid + cache->key.offset;
7129 if (!cache->free_space_ctl) {
7130 if (btrfs_init_free_space_ctl(cache,
7131 root->fs_info->sectorsize)) {
7136 btrfs_remove_free_space_cache(cache);
7139 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7140 ret = exclude_super_stripes(root, cache);
7142 fprintf(stderr, "could not exclude super stripes: %s\n",
7147 ret = load_free_space_tree(root->fs_info, cache);
7148 free_excluded_extents(root, cache);
7150 fprintf(stderr, "could not load free space tree: %s\n",
7157 ret = load_free_space_cache(root->fs_info, cache);
7162 ret = verify_space_cache(root, cache);
7164 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7165 cache->key.objectid);
7170 task_stop(ctx.info);
7172 return error ? -EINVAL : 0;
7175 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7176 u64 num_bytes, unsigned long leaf_offset,
7177 struct extent_buffer *eb) {
7179 struct btrfs_fs_info *fs_info = root->fs_info;
7181 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7183 unsigned long csum_offset;
7187 u64 data_checked = 0;
7193 if (num_bytes % fs_info->sectorsize)
7196 data = malloc(num_bytes);
7200 while (offset < num_bytes) {
7203 read_len = num_bytes - offset;
7204 /* read as much space once a time */
7205 ret = read_extent_data(fs_info, data + offset,
7206 bytenr + offset, &read_len, mirror);
7210 /* verify every 4k data's checksum */
7211 while (data_checked < read_len) {
7213 tmp = offset + data_checked;
7215 csum = btrfs_csum_data((char *)data + tmp,
7216 csum, fs_info->sectorsize);
7217 btrfs_csum_final(csum, (u8 *)&csum);
7219 csum_offset = leaf_offset +
7220 tmp / fs_info->sectorsize * csum_size;
7221 read_extent_buffer(eb, (char *)&csum_expected,
7222 csum_offset, csum_size);
7223 /* try another mirror */
7224 if (csum != csum_expected) {
7225 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7226 mirror, bytenr + tmp,
7227 csum, csum_expected);
7228 num_copies = btrfs_num_copies(root->fs_info,
7230 if (mirror < num_copies - 1) {
7235 data_checked += fs_info->sectorsize;
7244 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7247 struct btrfs_path path;
7248 struct extent_buffer *leaf;
7249 struct btrfs_key key;
7252 btrfs_init_path(&path);
7253 key.objectid = bytenr;
7254 key.type = BTRFS_EXTENT_ITEM_KEY;
7255 key.offset = (u64)-1;
7258 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7261 fprintf(stderr, "Error looking up extent record %d\n", ret);
7262 btrfs_release_path(&path);
7265 if (path.slots[0] > 0) {
7268 ret = btrfs_prev_leaf(root, &path);
7271 } else if (ret > 0) {
7278 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7281 * Block group items come before extent items if they have the same
7282 * bytenr, so walk back one more just in case. Dear future traveller,
7283 * first congrats on mastering time travel. Now if it's not too much
7284 * trouble could you go back to 2006 and tell Chris to make the
7285 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7286 * EXTENT_ITEM_KEY please?
7288 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7289 if (path.slots[0] > 0) {
7292 ret = btrfs_prev_leaf(root, &path);
7295 } else if (ret > 0) {
7300 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7304 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7305 ret = btrfs_next_leaf(root, &path);
7307 fprintf(stderr, "Error going to next leaf "
7309 btrfs_release_path(&path);
7315 leaf = path.nodes[0];
7316 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7317 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7321 if (key.objectid + key.offset < bytenr) {
7325 if (key.objectid > bytenr + num_bytes)
7328 if (key.objectid == bytenr) {
7329 if (key.offset >= num_bytes) {
7333 num_bytes -= key.offset;
7334 bytenr += key.offset;
7335 } else if (key.objectid < bytenr) {
7336 if (key.objectid + key.offset >= bytenr + num_bytes) {
7340 num_bytes = (bytenr + num_bytes) -
7341 (key.objectid + key.offset);
7342 bytenr = key.objectid + key.offset;
7344 if (key.objectid + key.offset < bytenr + num_bytes) {
7345 u64 new_start = key.objectid + key.offset;
7346 u64 new_bytes = bytenr + num_bytes - new_start;
7349 * Weird case, the extent is in the middle of
7350 * our range, we'll have to search one side
7351 * and then the other. Not sure if this happens
7352 * in real life, but no harm in coding it up
7353 * anyway just in case.
7355 btrfs_release_path(&path);
7356 ret = check_extent_exists(root, new_start,
7359 fprintf(stderr, "Right section didn't "
7363 num_bytes = key.objectid - bytenr;
7366 num_bytes = key.objectid - bytenr;
7373 if (num_bytes && !ret) {
7374 fprintf(stderr, "There are no extents for csum range "
7375 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7379 btrfs_release_path(&path);
7383 static int check_csums(struct btrfs_root *root)
7385 struct btrfs_path path;
7386 struct extent_buffer *leaf;
7387 struct btrfs_key key;
7388 u64 offset = 0, num_bytes = 0;
7389 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7393 unsigned long leaf_offset;
7395 root = root->fs_info->csum_root;
7396 if (!extent_buffer_uptodate(root->node)) {
7397 fprintf(stderr, "No valid csum tree found\n");
7401 btrfs_init_path(&path);
7402 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7403 key.type = BTRFS_EXTENT_CSUM_KEY;
7405 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7407 fprintf(stderr, "Error searching csum tree %d\n", ret);
7408 btrfs_release_path(&path);
7412 if (ret > 0 && path.slots[0])
7417 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7418 ret = btrfs_next_leaf(root, &path);
7420 fprintf(stderr, "Error going to next leaf "
7427 leaf = path.nodes[0];
7429 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7430 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7435 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7436 csum_size) * root->fs_info->sectorsize;
7437 if (!check_data_csum)
7438 goto skip_csum_check;
7439 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7440 ret = check_extent_csums(root, key.offset, data_len,
7446 offset = key.offset;
7447 } else if (key.offset != offset + num_bytes) {
7448 ret = check_extent_exists(root, offset, num_bytes);
7450 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7451 "there is no extent record\n",
7452 offset, offset+num_bytes);
7455 offset = key.offset;
7458 num_bytes += data_len;
7462 btrfs_release_path(&path);
7466 static int is_dropped_key(struct btrfs_key *key,
7467 struct btrfs_key *drop_key) {
7468 if (key->objectid < drop_key->objectid)
7470 else if (key->objectid == drop_key->objectid) {
7471 if (key->type < drop_key->type)
7473 else if (key->type == drop_key->type) {
7474 if (key->offset < drop_key->offset)
7482 * Here are the rules for FULL_BACKREF.
7484 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7485 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7487 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7488 * if it happened after the relocation occurred since we'll have dropped the
7489 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7490 * have no real way to know for sure.
7492 * We process the blocks one root at a time, and we start from the lowest root
7493 * objectid and go to the highest. So we can just lookup the owner backref for
7494 * the record and if we don't find it then we know it doesn't exist and we have
7497 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7498 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7499 * be set or not and then we can check later once we've gathered all the refs.
7501 static int calc_extent_flag(struct cache_tree *extent_cache,
7502 struct extent_buffer *buf,
7503 struct root_item_record *ri,
7506 struct extent_record *rec;
7507 struct cache_extent *cache;
7508 struct tree_backref *tback;
7511 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7512 /* we have added this extent before */
7516 rec = container_of(cache, struct extent_record, cache);
7519 * Except file/reloc tree, we can not have
7522 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7527 if (buf->start == ri->bytenr)
7530 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7533 owner = btrfs_header_owner(buf);
7534 if (owner == ri->objectid)
7537 tback = find_tree_backref(rec, 0, owner);
7542 if (rec->flag_block_full_backref != FLAG_UNSET &&
7543 rec->flag_block_full_backref != 0)
7544 rec->bad_full_backref = 1;
7547 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7548 if (rec->flag_block_full_backref != FLAG_UNSET &&
7549 rec->flag_block_full_backref != 1)
7550 rec->bad_full_backref = 1;
7554 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7556 fprintf(stderr, "Invalid key type(");
7557 print_key_type(stderr, 0, key_type);
7558 fprintf(stderr, ") found in root(");
7559 print_objectid(stderr, rootid, 0);
7560 fprintf(stderr, ")\n");
7564 * Check if the key is valid with its extent buffer.
7566 * This is a early check in case invalid key exists in a extent buffer
7567 * This is not comprehensive yet, but should prevent wrong key/item passed
7570 static int check_type_with_root(u64 rootid, u8 key_type)
7573 /* Only valid in chunk tree */
7574 case BTRFS_DEV_ITEM_KEY:
7575 case BTRFS_CHUNK_ITEM_KEY:
7576 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7579 /* valid in csum and log tree */
7580 case BTRFS_CSUM_TREE_OBJECTID:
7581 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7585 case BTRFS_EXTENT_ITEM_KEY:
7586 case BTRFS_METADATA_ITEM_KEY:
7587 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7588 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7591 case BTRFS_ROOT_ITEM_KEY:
7592 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7595 case BTRFS_DEV_EXTENT_KEY:
7596 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7602 report_mismatch_key_root(key_type, rootid);
7606 static int run_next_block(struct btrfs_root *root,
7607 struct block_info *bits,
7610 struct cache_tree *pending,
7611 struct cache_tree *seen,
7612 struct cache_tree *reada,
7613 struct cache_tree *nodes,
7614 struct cache_tree *extent_cache,
7615 struct cache_tree *chunk_cache,
7616 struct rb_root *dev_cache,
7617 struct block_group_tree *block_group_cache,
7618 struct device_extent_tree *dev_extent_cache,
7619 struct root_item_record *ri)
7621 struct btrfs_fs_info *fs_info = root->fs_info;
7622 struct extent_buffer *buf;
7623 struct extent_record *rec = NULL;
7634 struct btrfs_key key;
7635 struct cache_extent *cache;
7638 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7639 bits_nr, &reada_bits);
7644 for(i = 0; i < nritems; i++) {
7645 ret = add_cache_extent(reada, bits[i].start,
7650 /* fixme, get the parent transid */
7651 readahead_tree_block(fs_info, bits[i].start, 0);
7654 *last = bits[0].start;
7655 bytenr = bits[0].start;
7656 size = bits[0].size;
7658 cache = lookup_cache_extent(pending, bytenr, size);
7660 remove_cache_extent(pending, cache);
7663 cache = lookup_cache_extent(reada, bytenr, size);
7665 remove_cache_extent(reada, cache);
7668 cache = lookup_cache_extent(nodes, bytenr, size);
7670 remove_cache_extent(nodes, cache);
7673 cache = lookup_cache_extent(extent_cache, bytenr, size);
7675 rec = container_of(cache, struct extent_record, cache);
7676 gen = rec->parent_generation;
7679 /* fixme, get the real parent transid */
7680 buf = read_tree_block(root->fs_info, bytenr, gen);
7681 if (!extent_buffer_uptodate(buf)) {
7682 record_bad_block_io(root->fs_info,
7683 extent_cache, bytenr, size);
7687 nritems = btrfs_header_nritems(buf);
7690 if (!init_extent_tree) {
7691 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7692 btrfs_header_level(buf), 1, NULL,
7695 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7697 fprintf(stderr, "Couldn't calc extent flags\n");
7698 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7703 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7705 fprintf(stderr, "Couldn't calc extent flags\n");
7706 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7710 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7712 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7713 ri->objectid == btrfs_header_owner(buf)) {
7715 * Ok we got to this block from it's original owner and
7716 * we have FULL_BACKREF set. Relocation can leave
7717 * converted blocks over so this is altogether possible,
7718 * however it's not possible if the generation > the
7719 * last snapshot, so check for this case.
7721 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7722 btrfs_header_generation(buf) > ri->last_snapshot) {
7723 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7724 rec->bad_full_backref = 1;
7729 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7730 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7731 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7732 rec->bad_full_backref = 1;
7736 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7737 rec->flag_block_full_backref = 1;
7741 rec->flag_block_full_backref = 0;
7743 owner = btrfs_header_owner(buf);
7746 ret = check_block(root, extent_cache, buf, flags);
7750 if (btrfs_is_leaf(buf)) {
7751 btree_space_waste += btrfs_leaf_free_space(root, buf);
7752 for (i = 0; i < nritems; i++) {
7753 struct btrfs_file_extent_item *fi;
7754 btrfs_item_key_to_cpu(buf, &key, i);
7756 * Check key type against the leaf owner.
7757 * Could filter quite a lot of early error if
7760 if (check_type_with_root(btrfs_header_owner(buf),
7762 fprintf(stderr, "ignoring invalid key\n");
7765 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7766 process_extent_item(root, extent_cache, buf,
7770 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7771 process_extent_item(root, extent_cache, buf,
7775 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7777 btrfs_item_size_nr(buf, i);
7780 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7781 process_chunk_item(chunk_cache, &key, buf, i);
7784 if (key.type == BTRFS_DEV_ITEM_KEY) {
7785 process_device_item(dev_cache, &key, buf, i);
7788 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7789 process_block_group_item(block_group_cache,
7793 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7794 process_device_extent_item(dev_extent_cache,
7799 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7800 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7801 process_extent_ref_v0(extent_cache, buf, i);
7808 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7809 ret = add_tree_backref(extent_cache,
7810 key.objectid, 0, key.offset, 0);
7813 "add_tree_backref failed (leaf tree block): %s",
7817 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7818 ret = add_tree_backref(extent_cache,
7819 key.objectid, key.offset, 0, 0);
7822 "add_tree_backref failed (leaf shared block): %s",
7826 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7827 struct btrfs_extent_data_ref *ref;
7828 ref = btrfs_item_ptr(buf, i,
7829 struct btrfs_extent_data_ref);
7830 add_data_backref(extent_cache,
7832 btrfs_extent_data_ref_root(buf, ref),
7833 btrfs_extent_data_ref_objectid(buf,
7835 btrfs_extent_data_ref_offset(buf, ref),
7836 btrfs_extent_data_ref_count(buf, ref),
7837 0, root->fs_info->sectorsize);
7840 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7841 struct btrfs_shared_data_ref *ref;
7842 ref = btrfs_item_ptr(buf, i,
7843 struct btrfs_shared_data_ref);
7844 add_data_backref(extent_cache,
7845 key.objectid, key.offset, 0, 0, 0,
7846 btrfs_shared_data_ref_count(buf, ref),
7847 0, root->fs_info->sectorsize);
7850 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7851 struct bad_item *bad;
7853 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7857 bad = malloc(sizeof(struct bad_item));
7860 INIT_LIST_HEAD(&bad->list);
7861 memcpy(&bad->key, &key,
7862 sizeof(struct btrfs_key));
7863 bad->root_id = owner;
7864 list_add_tail(&bad->list, &delete_items);
7867 if (key.type != BTRFS_EXTENT_DATA_KEY)
7869 fi = btrfs_item_ptr(buf, i,
7870 struct btrfs_file_extent_item);
7871 if (btrfs_file_extent_type(buf, fi) ==
7872 BTRFS_FILE_EXTENT_INLINE)
7874 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7877 data_bytes_allocated +=
7878 btrfs_file_extent_disk_num_bytes(buf, fi);
7879 if (data_bytes_allocated < root->fs_info->sectorsize) {
7882 data_bytes_referenced +=
7883 btrfs_file_extent_num_bytes(buf, fi);
7884 add_data_backref(extent_cache,
7885 btrfs_file_extent_disk_bytenr(buf, fi),
7886 parent, owner, key.objectid, key.offset -
7887 btrfs_file_extent_offset(buf, fi), 1, 1,
7888 btrfs_file_extent_disk_num_bytes(buf, fi));
7892 struct btrfs_key first_key;
7894 first_key.objectid = 0;
7897 btrfs_item_key_to_cpu(buf, &first_key, 0);
7898 level = btrfs_header_level(buf);
7899 for (i = 0; i < nritems; i++) {
7900 struct extent_record tmpl;
7902 ptr = btrfs_node_blockptr(buf, i);
7903 size = root->fs_info->nodesize;
7904 btrfs_node_key_to_cpu(buf, &key, i);
7906 if ((level == ri->drop_level)
7907 && is_dropped_key(&key, &ri->drop_key)) {
7912 memset(&tmpl, 0, sizeof(tmpl));
7913 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7914 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7919 tmpl.max_size = size;
7920 ret = add_extent_rec(extent_cache, &tmpl);
7924 ret = add_tree_backref(extent_cache, ptr, parent,
7928 "add_tree_backref failed (non-leaf block): %s",
7934 add_pending(nodes, seen, ptr, size);
7936 add_pending(pending, seen, ptr, size);
7939 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7940 nritems) * sizeof(struct btrfs_key_ptr);
7942 total_btree_bytes += buf->len;
7943 if (fs_root_objectid(btrfs_header_owner(buf)))
7944 total_fs_tree_bytes += buf->len;
7945 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7946 total_extent_tree_bytes += buf->len;
7948 free_extent_buffer(buf);
7952 static int add_root_to_pending(struct extent_buffer *buf,
7953 struct cache_tree *extent_cache,
7954 struct cache_tree *pending,
7955 struct cache_tree *seen,
7956 struct cache_tree *nodes,
7959 struct extent_record tmpl;
7962 if (btrfs_header_level(buf) > 0)
7963 add_pending(nodes, seen, buf->start, buf->len);
7965 add_pending(pending, seen, buf->start, buf->len);
7967 memset(&tmpl, 0, sizeof(tmpl));
7968 tmpl.start = buf->start;
7973 tmpl.max_size = buf->len;
7974 add_extent_rec(extent_cache, &tmpl);
7976 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7977 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7978 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7981 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7986 /* as we fix the tree, we might be deleting blocks that
7987 * we're tracking for repair. This hook makes sure we
7988 * remove any backrefs for blocks as we are fixing them.
7990 static int free_extent_hook(struct btrfs_trans_handle *trans,
7991 struct btrfs_root *root,
7992 u64 bytenr, u64 num_bytes, u64 parent,
7993 u64 root_objectid, u64 owner, u64 offset,
7996 struct extent_record *rec;
7997 struct cache_extent *cache;
7999 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8001 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8002 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8006 rec = container_of(cache, struct extent_record, cache);
8008 struct data_backref *back;
8009 back = find_data_backref(rec, parent, root_objectid, owner,
8010 offset, 1, bytenr, num_bytes);
8013 if (back->node.found_ref) {
8014 back->found_ref -= refs_to_drop;
8016 rec->refs -= refs_to_drop;
8018 if (back->node.found_extent_tree) {
8019 back->num_refs -= refs_to_drop;
8020 if (rec->extent_item_refs)
8021 rec->extent_item_refs -= refs_to_drop;
8023 if (back->found_ref == 0)
8024 back->node.found_ref = 0;
8025 if (back->num_refs == 0)
8026 back->node.found_extent_tree = 0;
8028 if (!back->node.found_extent_tree && back->node.found_ref) {
8029 list_del(&back->node.list);
8033 struct tree_backref *back;
8034 back = find_tree_backref(rec, parent, root_objectid);
8037 if (back->node.found_ref) {
8040 back->node.found_ref = 0;
8042 if (back->node.found_extent_tree) {
8043 if (rec->extent_item_refs)
8044 rec->extent_item_refs--;
8045 back->node.found_extent_tree = 0;
8047 if (!back->node.found_extent_tree && back->node.found_ref) {
8048 list_del(&back->node.list);
8052 maybe_free_extent_rec(extent_cache, rec);
8057 static int delete_extent_records(struct btrfs_trans_handle *trans,
8058 struct btrfs_root *root,
8059 struct btrfs_path *path,
8062 struct btrfs_key key;
8063 struct btrfs_key found_key;
8064 struct extent_buffer *leaf;
8069 key.objectid = bytenr;
8071 key.offset = (u64)-1;
8074 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8081 if (path->slots[0] == 0)
8087 leaf = path->nodes[0];
8088 slot = path->slots[0];
8090 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8091 if (found_key.objectid != bytenr)
8094 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8095 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8096 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8097 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8098 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8099 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8100 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8101 btrfs_release_path(path);
8102 if (found_key.type == 0) {
8103 if (found_key.offset == 0)
8105 key.offset = found_key.offset - 1;
8106 key.type = found_key.type;
8108 key.type = found_key.type - 1;
8109 key.offset = (u64)-1;
8113 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8114 found_key.objectid, found_key.type, found_key.offset);
8116 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8119 btrfs_release_path(path);
8121 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8122 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8123 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8124 found_key.offset : root->fs_info->nodesize;
8126 ret = btrfs_update_block_group(trans, root, bytenr,
8133 btrfs_release_path(path);
8138 * for a single backref, this will allocate a new extent
8139 * and add the backref to it.
8141 static int record_extent(struct btrfs_trans_handle *trans,
8142 struct btrfs_fs_info *info,
8143 struct btrfs_path *path,
8144 struct extent_record *rec,
8145 struct extent_backref *back,
8146 int allocated, u64 flags)
8149 struct btrfs_root *extent_root = info->extent_root;
8150 struct extent_buffer *leaf;
8151 struct btrfs_key ins_key;
8152 struct btrfs_extent_item *ei;
8153 struct data_backref *dback;
8154 struct btrfs_tree_block_info *bi;
8157 rec->max_size = max_t(u64, rec->max_size,
8161 u32 item_size = sizeof(*ei);
8164 item_size += sizeof(*bi);
8166 ins_key.objectid = rec->start;
8167 ins_key.offset = rec->max_size;
8168 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8170 ret = btrfs_insert_empty_item(trans, extent_root, path,
8171 &ins_key, item_size);
8175 leaf = path->nodes[0];
8176 ei = btrfs_item_ptr(leaf, path->slots[0],
8177 struct btrfs_extent_item);
8179 btrfs_set_extent_refs(leaf, ei, 0);
8180 btrfs_set_extent_generation(leaf, ei, rec->generation);
8182 if (back->is_data) {
8183 btrfs_set_extent_flags(leaf, ei,
8184 BTRFS_EXTENT_FLAG_DATA);
8186 struct btrfs_disk_key copy_key;;
8188 bi = (struct btrfs_tree_block_info *)(ei + 1);
8189 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8192 btrfs_set_disk_key_objectid(©_key,
8193 rec->info_objectid);
8194 btrfs_set_disk_key_type(©_key, 0);
8195 btrfs_set_disk_key_offset(©_key, 0);
8197 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8198 btrfs_set_tree_block_key(leaf, bi, ©_key);
8200 btrfs_set_extent_flags(leaf, ei,
8201 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8204 btrfs_mark_buffer_dirty(leaf);
8205 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8206 rec->max_size, 1, 0);
8209 btrfs_release_path(path);
8212 if (back->is_data) {
8216 dback = to_data_backref(back);
8217 if (back->full_backref)
8218 parent = dback->parent;
8222 for (i = 0; i < dback->found_ref; i++) {
8223 /* if parent != 0, we're doing a full backref
8224 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8225 * just makes the backref allocator create a data
8228 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8229 rec->start, rec->max_size,
8233 BTRFS_FIRST_FREE_OBJECTID :
8239 fprintf(stderr, "adding new data backref"
8240 " on %llu %s %llu owner %llu"
8241 " offset %llu found %d\n",
8242 (unsigned long long)rec->start,
8243 back->full_backref ?
8245 back->full_backref ?
8246 (unsigned long long)parent :
8247 (unsigned long long)dback->root,
8248 (unsigned long long)dback->owner,
8249 (unsigned long long)dback->offset,
8253 struct tree_backref *tback;
8255 tback = to_tree_backref(back);
8256 if (back->full_backref)
8257 parent = tback->parent;
8261 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8262 rec->start, rec->max_size,
8263 parent, tback->root, 0, 0);
8264 fprintf(stderr, "adding new tree backref on "
8265 "start %llu len %llu parent %llu root %llu\n",
8266 rec->start, rec->max_size, parent, tback->root);
8269 btrfs_release_path(path);
8273 static struct extent_entry *find_entry(struct list_head *entries,
8274 u64 bytenr, u64 bytes)
8276 struct extent_entry *entry = NULL;
8278 list_for_each_entry(entry, entries, list) {
8279 if (entry->bytenr == bytenr && entry->bytes == bytes)
8286 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8288 struct extent_entry *entry, *best = NULL, *prev = NULL;
8290 list_for_each_entry(entry, entries, list) {
8292 * If there are as many broken entries as entries then we know
8293 * not to trust this particular entry.
8295 if (entry->broken == entry->count)
8299 * Special case, when there are only two entries and 'best' is
8309 * If our current entry == best then we can't be sure our best
8310 * is really the best, so we need to keep searching.
8312 if (best && best->count == entry->count) {
8318 /* Prev == entry, not good enough, have to keep searching */
8319 if (!prev->broken && prev->count == entry->count)
8323 best = (prev->count > entry->count) ? prev : entry;
8324 else if (best->count < entry->count)
8332 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8333 struct data_backref *dback, struct extent_entry *entry)
8335 struct btrfs_trans_handle *trans;
8336 struct btrfs_root *root;
8337 struct btrfs_file_extent_item *fi;
8338 struct extent_buffer *leaf;
8339 struct btrfs_key key;
8343 key.objectid = dback->root;
8344 key.type = BTRFS_ROOT_ITEM_KEY;
8345 key.offset = (u64)-1;
8346 root = btrfs_read_fs_root(info, &key);
8348 fprintf(stderr, "Couldn't find root for our ref\n");
8353 * The backref points to the original offset of the extent if it was
8354 * split, so we need to search down to the offset we have and then walk
8355 * forward until we find the backref we're looking for.
8357 key.objectid = dback->owner;
8358 key.type = BTRFS_EXTENT_DATA_KEY;
8359 key.offset = dback->offset;
8360 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8362 fprintf(stderr, "Error looking up ref %d\n", ret);
8367 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8368 ret = btrfs_next_leaf(root, path);
8370 fprintf(stderr, "Couldn't find our ref, next\n");
8374 leaf = path->nodes[0];
8375 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8376 if (key.objectid != dback->owner ||
8377 key.type != BTRFS_EXTENT_DATA_KEY) {
8378 fprintf(stderr, "Couldn't find our ref, search\n");
8381 fi = btrfs_item_ptr(leaf, path->slots[0],
8382 struct btrfs_file_extent_item);
8383 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8384 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8386 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8391 btrfs_release_path(path);
8393 trans = btrfs_start_transaction(root, 1);
8395 return PTR_ERR(trans);
8398 * Ok we have the key of the file extent we want to fix, now we can cow
8399 * down to the thing and fix it.
8401 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8403 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8404 key.objectid, key.type, key.offset, ret);
8408 fprintf(stderr, "Well that's odd, we just found this key "
8409 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8414 leaf = path->nodes[0];
8415 fi = btrfs_item_ptr(leaf, path->slots[0],
8416 struct btrfs_file_extent_item);
8418 if (btrfs_file_extent_compression(leaf, fi) &&
8419 dback->disk_bytenr != entry->bytenr) {
8420 fprintf(stderr, "Ref doesn't match the record start and is "
8421 "compressed, please take a btrfs-image of this file "
8422 "system and send it to a btrfs developer so they can "
8423 "complete this functionality for bytenr %Lu\n",
8424 dback->disk_bytenr);
8429 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8430 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8431 } else if (dback->disk_bytenr > entry->bytenr) {
8432 u64 off_diff, offset;
8434 off_diff = dback->disk_bytenr - entry->bytenr;
8435 offset = btrfs_file_extent_offset(leaf, fi);
8436 if (dback->disk_bytenr + offset +
8437 btrfs_file_extent_num_bytes(leaf, fi) >
8438 entry->bytenr + entry->bytes) {
8439 fprintf(stderr, "Ref is past the entry end, please "
8440 "take a btrfs-image of this file system and "
8441 "send it to a btrfs developer, ref %Lu\n",
8442 dback->disk_bytenr);
8447 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8448 btrfs_set_file_extent_offset(leaf, fi, offset);
8449 } else if (dback->disk_bytenr < entry->bytenr) {
8452 offset = btrfs_file_extent_offset(leaf, fi);
8453 if (dback->disk_bytenr + offset < entry->bytenr) {
8454 fprintf(stderr, "Ref is before the entry start, please"
8455 " take a btrfs-image of this file system and "
8456 "send it to a btrfs developer, ref %Lu\n",
8457 dback->disk_bytenr);
8462 offset += dback->disk_bytenr;
8463 offset -= entry->bytenr;
8464 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8465 btrfs_set_file_extent_offset(leaf, fi, offset);
8468 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8471 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8472 * only do this if we aren't using compression, otherwise it's a
8475 if (!btrfs_file_extent_compression(leaf, fi))
8476 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8478 printf("ram bytes may be wrong?\n");
8479 btrfs_mark_buffer_dirty(leaf);
8481 err = btrfs_commit_transaction(trans, root);
8482 btrfs_release_path(path);
8483 return ret ? ret : err;
8486 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8487 struct extent_record *rec)
8489 struct extent_backref *back;
8490 struct data_backref *dback;
8491 struct extent_entry *entry, *best = NULL;
8494 int broken_entries = 0;
8499 * Metadata is easy and the backrefs should always agree on bytenr and
8500 * size, if not we've got bigger issues.
8505 list_for_each_entry(back, &rec->backrefs, list) {
8506 if (back->full_backref || !back->is_data)
8509 dback = to_data_backref(back);
8512 * We only pay attention to backrefs that we found a real
8515 if (dback->found_ref == 0)
8519 * For now we only catch when the bytes don't match, not the
8520 * bytenr. We can easily do this at the same time, but I want
8521 * to have a fs image to test on before we just add repair
8522 * functionality willy-nilly so we know we won't screw up the
8526 entry = find_entry(&entries, dback->disk_bytenr,
8529 entry = malloc(sizeof(struct extent_entry));
8534 memset(entry, 0, sizeof(*entry));
8535 entry->bytenr = dback->disk_bytenr;
8536 entry->bytes = dback->bytes;
8537 list_add_tail(&entry->list, &entries);
8542 * If we only have on entry we may think the entries agree when
8543 * in reality they don't so we have to do some extra checking.
8545 if (dback->disk_bytenr != rec->start ||
8546 dback->bytes != rec->nr || back->broken)
8557 /* Yay all the backrefs agree, carry on good sir */
8558 if (nr_entries <= 1 && !mismatch)
8561 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8562 "%Lu\n", rec->start);
8565 * First we want to see if the backrefs can agree amongst themselves who
8566 * is right, so figure out which one of the entries has the highest
8569 best = find_most_right_entry(&entries);
8572 * Ok so we may have an even split between what the backrefs think, so
8573 * this is where we use the extent ref to see what it thinks.
8576 entry = find_entry(&entries, rec->start, rec->nr);
8577 if (!entry && (!broken_entries || !rec->found_rec)) {
8578 fprintf(stderr, "Backrefs don't agree with each other "
8579 "and extent record doesn't agree with anybody,"
8580 " so we can't fix bytenr %Lu bytes %Lu\n",
8581 rec->start, rec->nr);
8584 } else if (!entry) {
8586 * Ok our backrefs were broken, we'll assume this is the
8587 * correct value and add an entry for this range.
8589 entry = malloc(sizeof(struct extent_entry));
8594 memset(entry, 0, sizeof(*entry));
8595 entry->bytenr = rec->start;
8596 entry->bytes = rec->nr;
8597 list_add_tail(&entry->list, &entries);
8601 best = find_most_right_entry(&entries);
8603 fprintf(stderr, "Backrefs and extent record evenly "
8604 "split on who is right, this is going to "
8605 "require user input to fix bytenr %Lu bytes "
8606 "%Lu\n", rec->start, rec->nr);
8613 * I don't think this can happen currently as we'll abort() if we catch
8614 * this case higher up, but in case somebody removes that we still can't
8615 * deal with it properly here yet, so just bail out of that's the case.
8617 if (best->bytenr != rec->start) {
8618 fprintf(stderr, "Extent start and backref starts don't match, "
8619 "please use btrfs-image on this file system and send "
8620 "it to a btrfs developer so they can make fsck fix "
8621 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8622 rec->start, rec->nr);
8628 * Ok great we all agreed on an extent record, let's go find the real
8629 * references and fix up the ones that don't match.
8631 list_for_each_entry(back, &rec->backrefs, list) {
8632 if (back->full_backref || !back->is_data)
8635 dback = to_data_backref(back);
8638 * Still ignoring backrefs that don't have a real ref attached
8641 if (dback->found_ref == 0)
8644 if (dback->bytes == best->bytes &&
8645 dback->disk_bytenr == best->bytenr)
8648 ret = repair_ref(info, path, dback, best);
8654 * Ok we messed with the actual refs, which means we need to drop our
8655 * entire cache and go back and rescan. I know this is a huge pain and
8656 * adds a lot of extra work, but it's the only way to be safe. Once all
8657 * the backrefs agree we may not need to do anything to the extent
8662 while (!list_empty(&entries)) {
8663 entry = list_entry(entries.next, struct extent_entry, list);
8664 list_del_init(&entry->list);
8670 static int process_duplicates(struct cache_tree *extent_cache,
8671 struct extent_record *rec)
8673 struct extent_record *good, *tmp;
8674 struct cache_extent *cache;
8678 * If we found a extent record for this extent then return, or if we
8679 * have more than one duplicate we are likely going to need to delete
8682 if (rec->found_rec || rec->num_duplicates > 1)
8685 /* Shouldn't happen but just in case */
8686 BUG_ON(!rec->num_duplicates);
8689 * So this happens if we end up with a backref that doesn't match the
8690 * actual extent entry. So either the backref is bad or the extent
8691 * entry is bad. Either way we want to have the extent_record actually
8692 * reflect what we found in the extent_tree, so we need to take the
8693 * duplicate out and use that as the extent_record since the only way we
8694 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8696 remove_cache_extent(extent_cache, &rec->cache);
8698 good = to_extent_record(rec->dups.next);
8699 list_del_init(&good->list);
8700 INIT_LIST_HEAD(&good->backrefs);
8701 INIT_LIST_HEAD(&good->dups);
8702 good->cache.start = good->start;
8703 good->cache.size = good->nr;
8704 good->content_checked = 0;
8705 good->owner_ref_checked = 0;
8706 good->num_duplicates = 0;
8707 good->refs = rec->refs;
8708 list_splice_init(&rec->backrefs, &good->backrefs);
8710 cache = lookup_cache_extent(extent_cache, good->start,
8714 tmp = container_of(cache, struct extent_record, cache);
8717 * If we find another overlapping extent and it's found_rec is
8718 * set then it's a duplicate and we need to try and delete
8721 if (tmp->found_rec || tmp->num_duplicates > 0) {
8722 if (list_empty(&good->list))
8723 list_add_tail(&good->list,
8724 &duplicate_extents);
8725 good->num_duplicates += tmp->num_duplicates + 1;
8726 list_splice_init(&tmp->dups, &good->dups);
8727 list_del_init(&tmp->list);
8728 list_add_tail(&tmp->list, &good->dups);
8729 remove_cache_extent(extent_cache, &tmp->cache);
8734 * Ok we have another non extent item backed extent rec, so lets
8735 * just add it to this extent and carry on like we did above.
8737 good->refs += tmp->refs;
8738 list_splice_init(&tmp->backrefs, &good->backrefs);
8739 remove_cache_extent(extent_cache, &tmp->cache);
8742 ret = insert_cache_extent(extent_cache, &good->cache);
8745 return good->num_duplicates ? 0 : 1;
8748 static int delete_duplicate_records(struct btrfs_root *root,
8749 struct extent_record *rec)
8751 struct btrfs_trans_handle *trans;
8752 LIST_HEAD(delete_list);
8753 struct btrfs_path path;
8754 struct extent_record *tmp, *good, *n;
8757 struct btrfs_key key;
8759 btrfs_init_path(&path);
8762 /* Find the record that covers all of the duplicates. */
8763 list_for_each_entry(tmp, &rec->dups, list) {
8764 if (good->start < tmp->start)
8766 if (good->nr > tmp->nr)
8769 if (tmp->start + tmp->nr < good->start + good->nr) {
8770 fprintf(stderr, "Ok we have overlapping extents that "
8771 "aren't completely covered by each other, this "
8772 "is going to require more careful thought. "
8773 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8774 tmp->start, tmp->nr, good->start, good->nr);
8781 list_add_tail(&rec->list, &delete_list);
8783 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8786 list_move_tail(&tmp->list, &delete_list);
8789 root = root->fs_info->extent_root;
8790 trans = btrfs_start_transaction(root, 1);
8791 if (IS_ERR(trans)) {
8792 ret = PTR_ERR(trans);
8796 list_for_each_entry(tmp, &delete_list, list) {
8797 if (tmp->found_rec == 0)
8799 key.objectid = tmp->start;
8800 key.type = BTRFS_EXTENT_ITEM_KEY;
8801 key.offset = tmp->nr;
8803 /* Shouldn't happen but just in case */
8804 if (tmp->metadata) {
8805 fprintf(stderr, "Well this shouldn't happen, extent "
8806 "record overlaps but is metadata? "
8807 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8811 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8817 ret = btrfs_del_item(trans, root, &path);
8820 btrfs_release_path(&path);
8823 err = btrfs_commit_transaction(trans, root);
8827 while (!list_empty(&delete_list)) {
8828 tmp = to_extent_record(delete_list.next);
8829 list_del_init(&tmp->list);
8835 while (!list_empty(&rec->dups)) {
8836 tmp = to_extent_record(rec->dups.next);
8837 list_del_init(&tmp->list);
8841 btrfs_release_path(&path);
8843 if (!ret && !nr_del)
8844 rec->num_duplicates = 0;
8846 return ret ? ret : nr_del;
8849 static int find_possible_backrefs(struct btrfs_fs_info *info,
8850 struct btrfs_path *path,
8851 struct cache_tree *extent_cache,
8852 struct extent_record *rec)
8854 struct btrfs_root *root;
8855 struct extent_backref *back;
8856 struct data_backref *dback;
8857 struct cache_extent *cache;
8858 struct btrfs_file_extent_item *fi;
8859 struct btrfs_key key;
8863 list_for_each_entry(back, &rec->backrefs, list) {
8864 /* Don't care about full backrefs (poor unloved backrefs) */
8865 if (back->full_backref || !back->is_data)
8868 dback = to_data_backref(back);
8870 /* We found this one, we don't need to do a lookup */
8871 if (dback->found_ref)
8874 key.objectid = dback->root;
8875 key.type = BTRFS_ROOT_ITEM_KEY;
8876 key.offset = (u64)-1;
8878 root = btrfs_read_fs_root(info, &key);
8880 /* No root, definitely a bad ref, skip */
8881 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8883 /* Other err, exit */
8885 return PTR_ERR(root);
8887 key.objectid = dback->owner;
8888 key.type = BTRFS_EXTENT_DATA_KEY;
8889 key.offset = dback->offset;
8890 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8892 btrfs_release_path(path);
8895 /* Didn't find it, we can carry on */
8900 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8901 struct btrfs_file_extent_item);
8902 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8903 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8904 btrfs_release_path(path);
8905 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8907 struct extent_record *tmp;
8908 tmp = container_of(cache, struct extent_record, cache);
8911 * If we found an extent record for the bytenr for this
8912 * particular backref then we can't add it to our
8913 * current extent record. We only want to add backrefs
8914 * that don't have a corresponding extent item in the
8915 * extent tree since they likely belong to this record
8916 * and we need to fix it if it doesn't match bytenrs.
8922 dback->found_ref += 1;
8923 dback->disk_bytenr = bytenr;
8924 dback->bytes = bytes;
8927 * Set this so the verify backref code knows not to trust the
8928 * values in this backref.
8937 * Record orphan data ref into corresponding root.
8939 * Return 0 if the extent item contains data ref and recorded.
8940 * Return 1 if the extent item contains no useful data ref
8941 * On that case, it may contains only shared_dataref or metadata backref
8942 * or the file extent exists(this should be handled by the extent bytenr
8944 * Return <0 if something goes wrong.
8946 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8947 struct extent_record *rec)
8949 struct btrfs_key key;
8950 struct btrfs_root *dest_root;
8951 struct extent_backref *back;
8952 struct data_backref *dback;
8953 struct orphan_data_extent *orphan;
8954 struct btrfs_path path;
8955 int recorded_data_ref = 0;
8960 btrfs_init_path(&path);
8961 list_for_each_entry(back, &rec->backrefs, list) {
8962 if (back->full_backref || !back->is_data ||
8963 !back->found_extent_tree)
8965 dback = to_data_backref(back);
8966 if (dback->found_ref)
8968 key.objectid = dback->root;
8969 key.type = BTRFS_ROOT_ITEM_KEY;
8970 key.offset = (u64)-1;
8972 dest_root = btrfs_read_fs_root(fs_info, &key);
8974 /* For non-exist root we just skip it */
8975 if (IS_ERR(dest_root) || !dest_root)
8978 key.objectid = dback->owner;
8979 key.type = BTRFS_EXTENT_DATA_KEY;
8980 key.offset = dback->offset;
8982 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8983 btrfs_release_path(&path);
8985 * For ret < 0, it's OK since the fs-tree may be corrupted,
8986 * we need to record it for inode/file extent rebuild.
8987 * For ret > 0, we record it only for file extent rebuild.
8988 * For ret == 0, the file extent exists but only bytenr
8989 * mismatch, let the original bytenr fix routine to handle,
8995 orphan = malloc(sizeof(*orphan));
9000 INIT_LIST_HEAD(&orphan->list);
9001 orphan->root = dback->root;
9002 orphan->objectid = dback->owner;
9003 orphan->offset = dback->offset;
9004 orphan->disk_bytenr = rec->cache.start;
9005 orphan->disk_len = rec->cache.size;
9006 list_add(&dest_root->orphan_data_extents, &orphan->list);
9007 recorded_data_ref = 1;
9010 btrfs_release_path(&path);
9012 return !recorded_data_ref;
9018 * when an incorrect extent item is found, this will delete
9019 * all of the existing entries for it and recreate them
9020 * based on what the tree scan found.
9022 static int fixup_extent_refs(struct btrfs_fs_info *info,
9023 struct cache_tree *extent_cache,
9024 struct extent_record *rec)
9026 struct btrfs_trans_handle *trans = NULL;
9028 struct btrfs_path path;
9029 struct list_head *cur = rec->backrefs.next;
9030 struct cache_extent *cache;
9031 struct extent_backref *back;
9035 if (rec->flag_block_full_backref)
9036 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9038 btrfs_init_path(&path);
9039 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9041 * Sometimes the backrefs themselves are so broken they don't
9042 * get attached to any meaningful rec, so first go back and
9043 * check any of our backrefs that we couldn't find and throw
9044 * them into the list if we find the backref so that
9045 * verify_backrefs can figure out what to do.
9047 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9052 /* step one, make sure all of the backrefs agree */
9053 ret = verify_backrefs(info, &path, rec);
9057 trans = btrfs_start_transaction(info->extent_root, 1);
9058 if (IS_ERR(trans)) {
9059 ret = PTR_ERR(trans);
9063 /* step two, delete all the existing records */
9064 ret = delete_extent_records(trans, info->extent_root, &path,
9070 /* was this block corrupt? If so, don't add references to it */
9071 cache = lookup_cache_extent(info->corrupt_blocks,
9072 rec->start, rec->max_size);
9078 /* step three, recreate all the refs we did find */
9079 while(cur != &rec->backrefs) {
9080 back = to_extent_backref(cur);
9084 * if we didn't find any references, don't create a
9087 if (!back->found_ref)
9090 rec->bad_full_backref = 0;
9091 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9099 int err = btrfs_commit_transaction(trans, info->extent_root);
9105 fprintf(stderr, "Repaired extent references for %llu\n",
9106 (unsigned long long)rec->start);
9108 btrfs_release_path(&path);
9112 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9113 struct extent_record *rec)
9115 struct btrfs_trans_handle *trans;
9116 struct btrfs_root *root = fs_info->extent_root;
9117 struct btrfs_path path;
9118 struct btrfs_extent_item *ei;
9119 struct btrfs_key key;
9123 key.objectid = rec->start;
9124 if (rec->metadata) {
9125 key.type = BTRFS_METADATA_ITEM_KEY;
9126 key.offset = rec->info_level;
9128 key.type = BTRFS_EXTENT_ITEM_KEY;
9129 key.offset = rec->max_size;
9132 trans = btrfs_start_transaction(root, 0);
9134 return PTR_ERR(trans);
9136 btrfs_init_path(&path);
9137 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9139 btrfs_release_path(&path);
9140 btrfs_commit_transaction(trans, root);
9143 fprintf(stderr, "Didn't find extent for %llu\n",
9144 (unsigned long long)rec->start);
9145 btrfs_release_path(&path);
9146 btrfs_commit_transaction(trans, root);
9150 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9151 struct btrfs_extent_item);
9152 flags = btrfs_extent_flags(path.nodes[0], ei);
9153 if (rec->flag_block_full_backref) {
9154 fprintf(stderr, "setting full backref on %llu\n",
9155 (unsigned long long)key.objectid);
9156 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9158 fprintf(stderr, "clearing full backref on %llu\n",
9159 (unsigned long long)key.objectid);
9160 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9162 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9163 btrfs_mark_buffer_dirty(path.nodes[0]);
9164 btrfs_release_path(&path);
9165 ret = btrfs_commit_transaction(trans, root);
9167 fprintf(stderr, "Repaired extent flags for %llu\n",
9168 (unsigned long long)rec->start);
9173 /* right now we only prune from the extent allocation tree */
9174 static int prune_one_block(struct btrfs_trans_handle *trans,
9175 struct btrfs_fs_info *info,
9176 struct btrfs_corrupt_block *corrupt)
9179 struct btrfs_path path;
9180 struct extent_buffer *eb;
9184 int level = corrupt->level + 1;
9186 btrfs_init_path(&path);
9188 /* we want to stop at the parent to our busted block */
9189 path.lowest_level = level;
9191 ret = btrfs_search_slot(trans, info->extent_root,
9192 &corrupt->key, &path, -1, 1);
9197 eb = path.nodes[level];
9204 * hopefully the search gave us the block we want to prune,
9205 * lets try that first
9207 slot = path.slots[level];
9208 found = btrfs_node_blockptr(eb, slot);
9209 if (found == corrupt->cache.start)
9212 nritems = btrfs_header_nritems(eb);
9214 /* the search failed, lets scan this node and hope we find it */
9215 for (slot = 0; slot < nritems; slot++) {
9216 found = btrfs_node_blockptr(eb, slot);
9217 if (found == corrupt->cache.start)
9221 * we couldn't find the bad block. TODO, search all the nodes for pointers
9224 if (eb == info->extent_root->node) {
9229 btrfs_release_path(&path);
9234 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9235 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9238 btrfs_release_path(&path);
9242 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9244 struct btrfs_trans_handle *trans = NULL;
9245 struct cache_extent *cache;
9246 struct btrfs_corrupt_block *corrupt;
9249 cache = search_cache_extent(info->corrupt_blocks, 0);
9253 trans = btrfs_start_transaction(info->extent_root, 1);
9255 return PTR_ERR(trans);
9257 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9258 prune_one_block(trans, info, corrupt);
9259 remove_cache_extent(info->corrupt_blocks, cache);
9262 return btrfs_commit_transaction(trans, info->extent_root);
9266 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9268 struct btrfs_block_group_cache *cache;
9273 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9274 &start, &end, EXTENT_DIRTY);
9277 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9282 cache = btrfs_lookup_first_block_group(fs_info, start);
9287 start = cache->key.objectid + cache->key.offset;
9291 static int check_extent_refs(struct btrfs_root *root,
9292 struct cache_tree *extent_cache)
9294 struct extent_record *rec;
9295 struct cache_extent *cache;
9301 * if we're doing a repair, we have to make sure
9302 * we don't allocate from the problem extents.
9303 * In the worst case, this will be all the
9306 cache = search_cache_extent(extent_cache, 0);
9308 rec = container_of(cache, struct extent_record, cache);
9309 set_extent_dirty(root->fs_info->excluded_extents,
9311 rec->start + rec->max_size - 1);
9312 cache = next_cache_extent(cache);
9315 /* pin down all the corrupted blocks too */
9316 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9318 set_extent_dirty(root->fs_info->excluded_extents,
9320 cache->start + cache->size - 1);
9321 cache = next_cache_extent(cache);
9323 prune_corrupt_blocks(root->fs_info);
9324 reset_cached_block_groups(root->fs_info);
9327 reset_cached_block_groups(root->fs_info);
9330 * We need to delete any duplicate entries we find first otherwise we
9331 * could mess up the extent tree when we have backrefs that actually
9332 * belong to a different extent item and not the weird duplicate one.
9334 while (repair && !list_empty(&duplicate_extents)) {
9335 rec = to_extent_record(duplicate_extents.next);
9336 list_del_init(&rec->list);
9338 /* Sometimes we can find a backref before we find an actual
9339 * extent, so we need to process it a little bit to see if there
9340 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9341 * if this is a backref screwup. If we need to delete stuff
9342 * process_duplicates() will return 0, otherwise it will return
9345 if (process_duplicates(extent_cache, rec))
9347 ret = delete_duplicate_records(root, rec);
9351 * delete_duplicate_records will return the number of entries
9352 * deleted, so if it's greater than 0 then we know we actually
9353 * did something and we need to remove.
9366 cache = search_cache_extent(extent_cache, 0);
9369 rec = container_of(cache, struct extent_record, cache);
9370 if (rec->num_duplicates) {
9371 fprintf(stderr, "extent item %llu has multiple extent "
9372 "items\n", (unsigned long long)rec->start);
9376 if (rec->refs != rec->extent_item_refs) {
9377 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9378 (unsigned long long)rec->start,
9379 (unsigned long long)rec->nr);
9380 fprintf(stderr, "extent item %llu, found %llu\n",
9381 (unsigned long long)rec->extent_item_refs,
9382 (unsigned long long)rec->refs);
9383 ret = record_orphan_data_extents(root->fs_info, rec);
9389 if (all_backpointers_checked(rec, 1)) {
9390 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9391 (unsigned long long)rec->start,
9392 (unsigned long long)rec->nr);
9396 if (!rec->owner_ref_checked) {
9397 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9398 (unsigned long long)rec->start,
9399 (unsigned long long)rec->nr);
9404 if (repair && fix) {
9405 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9411 if (rec->bad_full_backref) {
9412 fprintf(stderr, "bad full backref, on [%llu]\n",
9413 (unsigned long long)rec->start);
9415 ret = fixup_extent_flags(root->fs_info, rec);
9423 * Although it's not a extent ref's problem, we reuse this
9424 * routine for error reporting.
9425 * No repair function yet.
9427 if (rec->crossing_stripes) {
9429 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9430 rec->start, rec->start + rec->max_size);
9434 if (rec->wrong_chunk_type) {
9436 "bad extent [%llu, %llu), type mismatch with chunk\n",
9437 rec->start, rec->start + rec->max_size);
9441 remove_cache_extent(extent_cache, cache);
9442 free_all_extent_backrefs(rec);
9443 if (!init_extent_tree && repair && (!cur_err || fix))
9444 clear_extent_dirty(root->fs_info->excluded_extents,
9446 rec->start + rec->max_size - 1);
9451 if (ret && ret != -EAGAIN) {
9452 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9455 struct btrfs_trans_handle *trans;
9457 root = root->fs_info->extent_root;
9458 trans = btrfs_start_transaction(root, 1);
9459 if (IS_ERR(trans)) {
9460 ret = PTR_ERR(trans);
9464 ret = btrfs_fix_block_accounting(trans, root);
9467 ret = btrfs_commit_transaction(trans, root);
9476 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9480 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9481 stripe_size = length;
9482 stripe_size /= num_stripes;
9483 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9484 stripe_size = length * 2;
9485 stripe_size /= num_stripes;
9486 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9487 stripe_size = length;
9488 stripe_size /= (num_stripes - 1);
9489 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9490 stripe_size = length;
9491 stripe_size /= (num_stripes - 2);
9493 stripe_size = length;
9499 * Check the chunk with its block group/dev list ref:
9500 * Return 0 if all refs seems valid.
9501 * Return 1 if part of refs seems valid, need later check for rebuild ref
9502 * like missing block group and needs to search extent tree to rebuild them.
9503 * Return -1 if essential refs are missing and unable to rebuild.
9505 static int check_chunk_refs(struct chunk_record *chunk_rec,
9506 struct block_group_tree *block_group_cache,
9507 struct device_extent_tree *dev_extent_cache,
9510 struct cache_extent *block_group_item;
9511 struct block_group_record *block_group_rec;
9512 struct cache_extent *dev_extent_item;
9513 struct device_extent_record *dev_extent_rec;
9517 int metadump_v2 = 0;
9521 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9524 if (block_group_item) {
9525 block_group_rec = container_of(block_group_item,
9526 struct block_group_record,
9528 if (chunk_rec->length != block_group_rec->offset ||
9529 chunk_rec->offset != block_group_rec->objectid ||
9531 chunk_rec->type_flags != block_group_rec->flags)) {
9534 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9535 chunk_rec->objectid,
9540 chunk_rec->type_flags,
9541 block_group_rec->objectid,
9542 block_group_rec->type,
9543 block_group_rec->offset,
9544 block_group_rec->offset,
9545 block_group_rec->objectid,
9546 block_group_rec->flags);
9549 list_del_init(&block_group_rec->list);
9550 chunk_rec->bg_rec = block_group_rec;
9555 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9556 chunk_rec->objectid,
9561 chunk_rec->type_flags);
9568 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9569 chunk_rec->num_stripes);
9570 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9571 devid = chunk_rec->stripes[i].devid;
9572 offset = chunk_rec->stripes[i].offset;
9573 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9574 devid, offset, length);
9575 if (dev_extent_item) {
9576 dev_extent_rec = container_of(dev_extent_item,
9577 struct device_extent_record,
9579 if (dev_extent_rec->objectid != devid ||
9580 dev_extent_rec->offset != offset ||
9581 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9582 dev_extent_rec->length != length) {
9585 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9586 chunk_rec->objectid,
9589 chunk_rec->stripes[i].devid,
9590 chunk_rec->stripes[i].offset,
9591 dev_extent_rec->objectid,
9592 dev_extent_rec->offset,
9593 dev_extent_rec->length);
9596 list_move(&dev_extent_rec->chunk_list,
9597 &chunk_rec->dextents);
9602 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9603 chunk_rec->objectid,
9606 chunk_rec->stripes[i].devid,
9607 chunk_rec->stripes[i].offset);
9614 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9615 int check_chunks(struct cache_tree *chunk_cache,
9616 struct block_group_tree *block_group_cache,
9617 struct device_extent_tree *dev_extent_cache,
9618 struct list_head *good, struct list_head *bad,
9619 struct list_head *rebuild, int silent)
9621 struct cache_extent *chunk_item;
9622 struct chunk_record *chunk_rec;
9623 struct block_group_record *bg_rec;
9624 struct device_extent_record *dext_rec;
9628 chunk_item = first_cache_extent(chunk_cache);
9629 while (chunk_item) {
9630 chunk_rec = container_of(chunk_item, struct chunk_record,
9632 err = check_chunk_refs(chunk_rec, block_group_cache,
9633 dev_extent_cache, silent);
9636 if (err == 0 && good)
9637 list_add_tail(&chunk_rec->list, good);
9638 if (err > 0 && rebuild)
9639 list_add_tail(&chunk_rec->list, rebuild);
9641 list_add_tail(&chunk_rec->list, bad);
9642 chunk_item = next_cache_extent(chunk_item);
9645 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9648 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9656 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9660 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9671 static int check_device_used(struct device_record *dev_rec,
9672 struct device_extent_tree *dext_cache)
9674 struct cache_extent *cache;
9675 struct device_extent_record *dev_extent_rec;
9678 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9680 dev_extent_rec = container_of(cache,
9681 struct device_extent_record,
9683 if (dev_extent_rec->objectid != dev_rec->devid)
9686 list_del_init(&dev_extent_rec->device_list);
9687 total_byte += dev_extent_rec->length;
9688 cache = next_cache_extent(cache);
9691 if (total_byte != dev_rec->byte_used) {
9693 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9694 total_byte, dev_rec->byte_used, dev_rec->objectid,
9695 dev_rec->type, dev_rec->offset);
9702 /* check btrfs_dev_item -> btrfs_dev_extent */
9703 static int check_devices(struct rb_root *dev_cache,
9704 struct device_extent_tree *dev_extent_cache)
9706 struct rb_node *dev_node;
9707 struct device_record *dev_rec;
9708 struct device_extent_record *dext_rec;
9712 dev_node = rb_first(dev_cache);
9714 dev_rec = container_of(dev_node, struct device_record, node);
9715 err = check_device_used(dev_rec, dev_extent_cache);
9719 dev_node = rb_next(dev_node);
9721 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9724 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9725 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9732 static int add_root_item_to_list(struct list_head *head,
9733 u64 objectid, u64 bytenr, u64 last_snapshot,
9734 u8 level, u8 drop_level,
9735 struct btrfs_key *drop_key)
9738 struct root_item_record *ri_rec;
9739 ri_rec = malloc(sizeof(*ri_rec));
9742 ri_rec->bytenr = bytenr;
9743 ri_rec->objectid = objectid;
9744 ri_rec->level = level;
9745 ri_rec->drop_level = drop_level;
9746 ri_rec->last_snapshot = last_snapshot;
9748 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9749 list_add_tail(&ri_rec->list, head);
9754 static void free_root_item_list(struct list_head *list)
9756 struct root_item_record *ri_rec;
9758 while (!list_empty(list)) {
9759 ri_rec = list_first_entry(list, struct root_item_record,
9761 list_del_init(&ri_rec->list);
9766 static int deal_root_from_list(struct list_head *list,
9767 struct btrfs_root *root,
9768 struct block_info *bits,
9770 struct cache_tree *pending,
9771 struct cache_tree *seen,
9772 struct cache_tree *reada,
9773 struct cache_tree *nodes,
9774 struct cache_tree *extent_cache,
9775 struct cache_tree *chunk_cache,
9776 struct rb_root *dev_cache,
9777 struct block_group_tree *block_group_cache,
9778 struct device_extent_tree *dev_extent_cache)
9783 while (!list_empty(list)) {
9784 struct root_item_record *rec;
9785 struct extent_buffer *buf;
9786 rec = list_entry(list->next,
9787 struct root_item_record, list);
9789 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9790 if (!extent_buffer_uptodate(buf)) {
9791 free_extent_buffer(buf);
9795 ret = add_root_to_pending(buf, extent_cache, pending,
9796 seen, nodes, rec->objectid);
9800 * To rebuild extent tree, we need deal with snapshot
9801 * one by one, otherwise we deal with node firstly which
9802 * can maximize readahead.
9805 ret = run_next_block(root, bits, bits_nr, &last,
9806 pending, seen, reada, nodes,
9807 extent_cache, chunk_cache,
9808 dev_cache, block_group_cache,
9809 dev_extent_cache, rec);
9813 free_extent_buffer(buf);
9814 list_del(&rec->list);
9820 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9821 reada, nodes, extent_cache, chunk_cache,
9822 dev_cache, block_group_cache,
9823 dev_extent_cache, NULL);
9833 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
9835 struct rb_root dev_cache;
9836 struct cache_tree chunk_cache;
9837 struct block_group_tree block_group_cache;
9838 struct device_extent_tree dev_extent_cache;
9839 struct cache_tree extent_cache;
9840 struct cache_tree seen;
9841 struct cache_tree pending;
9842 struct cache_tree reada;
9843 struct cache_tree nodes;
9844 struct extent_io_tree excluded_extents;
9845 struct cache_tree corrupt_blocks;
9846 struct btrfs_path path;
9847 struct btrfs_key key;
9848 struct btrfs_key found_key;
9850 struct block_info *bits;
9852 struct extent_buffer *leaf;
9854 struct btrfs_root_item ri;
9855 struct list_head dropping_trees;
9856 struct list_head normal_trees;
9857 struct btrfs_root *root1;
9858 struct btrfs_root *root;
9862 root = fs_info->fs_root;
9863 dev_cache = RB_ROOT;
9864 cache_tree_init(&chunk_cache);
9865 block_group_tree_init(&block_group_cache);
9866 device_extent_tree_init(&dev_extent_cache);
9868 cache_tree_init(&extent_cache);
9869 cache_tree_init(&seen);
9870 cache_tree_init(&pending);
9871 cache_tree_init(&nodes);
9872 cache_tree_init(&reada);
9873 cache_tree_init(&corrupt_blocks);
9874 extent_io_tree_init(&excluded_extents);
9875 INIT_LIST_HEAD(&dropping_trees);
9876 INIT_LIST_HEAD(&normal_trees);
9879 fs_info->excluded_extents = &excluded_extents;
9880 fs_info->fsck_extent_cache = &extent_cache;
9881 fs_info->free_extent_hook = free_extent_hook;
9882 fs_info->corrupt_blocks = &corrupt_blocks;
9886 bits = malloc(bits_nr * sizeof(struct block_info));
9892 if (ctx.progress_enabled) {
9893 ctx.tp = TASK_EXTENTS;
9894 task_start(ctx.info);
9898 root1 = fs_info->tree_root;
9899 level = btrfs_header_level(root1->node);
9900 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9901 root1->node->start, 0, level, 0, NULL);
9904 root1 = fs_info->chunk_root;
9905 level = btrfs_header_level(root1->node);
9906 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9907 root1->node->start, 0, level, 0, NULL);
9910 btrfs_init_path(&path);
9913 key.type = BTRFS_ROOT_ITEM_KEY;
9914 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
9918 leaf = path.nodes[0];
9919 slot = path.slots[0];
9920 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9921 ret = btrfs_next_leaf(root, &path);
9924 leaf = path.nodes[0];
9925 slot = path.slots[0];
9927 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9928 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9929 unsigned long offset;
9932 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9933 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9934 last_snapshot = btrfs_root_last_snapshot(&ri);
9935 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9936 level = btrfs_root_level(&ri);
9937 ret = add_root_item_to_list(&normal_trees,
9939 btrfs_root_bytenr(&ri),
9940 last_snapshot, level,
9945 level = btrfs_root_level(&ri);
9946 objectid = found_key.objectid;
9947 btrfs_disk_key_to_cpu(&found_key,
9949 ret = add_root_item_to_list(&dropping_trees,
9951 btrfs_root_bytenr(&ri),
9952 last_snapshot, level,
9953 ri.drop_level, &found_key);
9960 btrfs_release_path(&path);
9963 * check_block can return -EAGAIN if it fixes something, please keep
9964 * this in mind when dealing with return values from these functions, if
9965 * we get -EAGAIN we want to fall through and restart the loop.
9967 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9968 &seen, &reada, &nodes, &extent_cache,
9969 &chunk_cache, &dev_cache, &block_group_cache,
9976 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9977 &pending, &seen, &reada, &nodes,
9978 &extent_cache, &chunk_cache, &dev_cache,
9979 &block_group_cache, &dev_extent_cache);
9986 ret = check_chunks(&chunk_cache, &block_group_cache,
9987 &dev_extent_cache, NULL, NULL, NULL, 0);
9994 ret = check_extent_refs(root, &extent_cache);
10001 ret = check_devices(&dev_cache, &dev_extent_cache);
10006 task_stop(ctx.info);
10008 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10009 extent_io_tree_cleanup(&excluded_extents);
10010 fs_info->fsck_extent_cache = NULL;
10011 fs_info->free_extent_hook = NULL;
10012 fs_info->corrupt_blocks = NULL;
10013 fs_info->excluded_extents = NULL;
10016 free_chunk_cache_tree(&chunk_cache);
10017 free_device_cache_tree(&dev_cache);
10018 free_block_group_tree(&block_group_cache);
10019 free_device_extent_tree(&dev_extent_cache);
10020 free_extent_cache_tree(&seen);
10021 free_extent_cache_tree(&pending);
10022 free_extent_cache_tree(&reada);
10023 free_extent_cache_tree(&nodes);
10024 free_root_item_list(&normal_trees);
10025 free_root_item_list(&dropping_trees);
10028 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10029 free_extent_cache_tree(&seen);
10030 free_extent_cache_tree(&pending);
10031 free_extent_cache_tree(&reada);
10032 free_extent_cache_tree(&nodes);
10033 free_chunk_cache_tree(&chunk_cache);
10034 free_block_group_tree(&block_group_cache);
10035 free_device_cache_tree(&dev_cache);
10036 free_device_extent_tree(&dev_extent_cache);
10037 free_extent_record_cache(&extent_cache);
10038 free_root_item_list(&normal_trees);
10039 free_root_item_list(&dropping_trees);
10040 extent_io_tree_cleanup(&excluded_extents);
10045 * Check backrefs of a tree block given by @bytenr or @eb.
10047 * @root: the root containing the @bytenr or @eb
10048 * @eb: tree block extent buffer, can be NULL
10049 * @bytenr: bytenr of the tree block to search
10050 * @level: tree level of the tree block
10051 * @owner: owner of the tree block
10053 * Return >0 for any error found and output error message
10054 * Return 0 for no error found
10056 static int check_tree_block_ref(struct btrfs_root *root,
10057 struct extent_buffer *eb, u64 bytenr,
10058 int level, u64 owner)
10060 struct btrfs_key key;
10061 struct btrfs_root *extent_root = root->fs_info->extent_root;
10062 struct btrfs_path path;
10063 struct btrfs_extent_item *ei;
10064 struct btrfs_extent_inline_ref *iref;
10065 struct extent_buffer *leaf;
10071 u32 nodesize = root->fs_info->nodesize;
10074 int tree_reloc_root = 0;
10079 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10080 btrfs_header_bytenr(root->node) == bytenr)
10081 tree_reloc_root = 1;
10083 btrfs_init_path(&path);
10084 key.objectid = bytenr;
10085 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10086 key.type = BTRFS_METADATA_ITEM_KEY;
10088 key.type = BTRFS_EXTENT_ITEM_KEY;
10089 key.offset = (u64)-1;
10091 /* Search for the backref in extent tree */
10092 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10094 err |= BACKREF_MISSING;
10097 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10099 err |= BACKREF_MISSING;
10103 leaf = path.nodes[0];
10104 slot = path.slots[0];
10105 btrfs_item_key_to_cpu(leaf, &key, slot);
10107 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10109 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10110 skinny_level = (int)key.offset;
10111 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10113 struct btrfs_tree_block_info *info;
10115 info = (struct btrfs_tree_block_info *)(ei + 1);
10116 skinny_level = btrfs_tree_block_level(leaf, info);
10117 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10124 if (!(btrfs_extent_flags(leaf, ei) &
10125 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10127 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10128 key.objectid, nodesize,
10129 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10130 err = BACKREF_MISMATCH;
10132 header_gen = btrfs_header_generation(eb);
10133 extent_gen = btrfs_extent_generation(leaf, ei);
10134 if (header_gen != extent_gen) {
10136 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10137 key.objectid, nodesize, header_gen,
10139 err = BACKREF_MISMATCH;
10141 if (level != skinny_level) {
10143 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10144 key.objectid, nodesize, level, skinny_level);
10145 err = BACKREF_MISMATCH;
10147 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10149 "extent[%llu %u] is referred by other roots than %llu",
10150 key.objectid, nodesize, root->objectid);
10151 err = BACKREF_MISMATCH;
10156 * Iterate the extent/metadata item to find the exact backref
10158 item_size = btrfs_item_size_nr(leaf, slot);
10159 ptr = (unsigned long)iref;
10160 end = (unsigned long)ei + item_size;
10161 while (ptr < end) {
10162 iref = (struct btrfs_extent_inline_ref *)ptr;
10163 type = btrfs_extent_inline_ref_type(leaf, iref);
10164 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10166 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10167 (offset == root->objectid || offset == owner)) {
10169 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10171 * Backref of tree reloc root points to itself, no need
10172 * to check backref any more.
10174 if (tree_reloc_root)
10177 /* Check if the backref points to valid referencer */
10178 found_ref = !check_tree_block_ref(root, NULL,
10179 offset, level + 1, owner);
10184 ptr += btrfs_extent_inline_ref_size(type);
10188 * Inlined extent item doesn't have what we need, check
10189 * TREE_BLOCK_REF_KEY
10192 btrfs_release_path(&path);
10193 key.objectid = bytenr;
10194 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10195 key.offset = root->objectid;
10197 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10202 err |= BACKREF_MISSING;
10204 btrfs_release_path(&path);
10205 if (eb && (err & BACKREF_MISSING))
10206 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10207 bytenr, nodesize, owner, level);
10212 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10214 * Return >0 any error found and output error message
10215 * Return 0 for no error found
10217 static int check_extent_data_item(struct btrfs_root *root,
10218 struct extent_buffer *eb, int slot)
10220 struct btrfs_file_extent_item *fi;
10221 struct btrfs_path path;
10222 struct btrfs_root *extent_root = root->fs_info->extent_root;
10223 struct btrfs_key fi_key;
10224 struct btrfs_key dbref_key;
10225 struct extent_buffer *leaf;
10226 struct btrfs_extent_item *ei;
10227 struct btrfs_extent_inline_ref *iref;
10228 struct btrfs_extent_data_ref *dref;
10231 u64 disk_num_bytes;
10232 u64 extent_num_bytes;
10239 int found_dbackref = 0;
10243 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10244 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10246 /* Nothing to check for hole and inline data extents */
10247 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10248 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10251 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10252 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10253 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10255 /* Check unaligned disk_num_bytes and num_bytes */
10256 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10258 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10259 fi_key.objectid, fi_key.offset, disk_num_bytes,
10260 root->fs_info->sectorsize);
10261 err |= BYTES_UNALIGNED;
10263 data_bytes_allocated += disk_num_bytes;
10265 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10267 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10268 fi_key.objectid, fi_key.offset, extent_num_bytes,
10269 root->fs_info->sectorsize);
10270 err |= BYTES_UNALIGNED;
10272 data_bytes_referenced += extent_num_bytes;
10274 owner = btrfs_header_owner(eb);
10276 /* Check the extent item of the file extent in extent tree */
10277 btrfs_init_path(&path);
10278 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10279 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10280 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10282 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10286 leaf = path.nodes[0];
10287 slot = path.slots[0];
10288 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10290 extent_flags = btrfs_extent_flags(leaf, ei);
10292 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10294 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10295 disk_bytenr, disk_num_bytes,
10296 BTRFS_EXTENT_FLAG_DATA);
10297 err |= BACKREF_MISMATCH;
10300 /* Check data backref inside that extent item */
10301 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10302 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10303 ptr = (unsigned long)iref;
10304 end = (unsigned long)ei + item_size;
10305 while (ptr < end) {
10306 iref = (struct btrfs_extent_inline_ref *)ptr;
10307 type = btrfs_extent_inline_ref_type(leaf, iref);
10308 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10310 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10311 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10312 if (ref_root == owner || ref_root == root->objectid)
10313 found_dbackref = 1;
10314 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10315 found_dbackref = !check_tree_block_ref(root, NULL,
10316 btrfs_extent_inline_ref_offset(leaf, iref),
10320 if (found_dbackref)
10322 ptr += btrfs_extent_inline_ref_size(type);
10325 if (!found_dbackref) {
10326 btrfs_release_path(&path);
10328 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10329 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10330 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10331 dbref_key.offset = hash_extent_data_ref(root->objectid,
10332 fi_key.objectid, fi_key.offset);
10334 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10335 &dbref_key, &path, 0, 0);
10337 found_dbackref = 1;
10341 btrfs_release_path(&path);
10344 * Neither inlined nor EXTENT_DATA_REF found, try
10345 * SHARED_DATA_REF as last chance.
10347 dbref_key.objectid = disk_bytenr;
10348 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10349 dbref_key.offset = eb->start;
10351 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10352 &dbref_key, &path, 0, 0);
10354 found_dbackref = 1;
10360 if (!found_dbackref)
10361 err |= BACKREF_MISSING;
10362 btrfs_release_path(&path);
10363 if (err & BACKREF_MISSING) {
10364 error("data extent[%llu %llu] backref lost",
10365 disk_bytenr, disk_num_bytes);
10371 * Get real tree block level for the case like shared block
10372 * Return >= 0 as tree level
10373 * Return <0 for error
10375 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10377 struct extent_buffer *eb;
10378 struct btrfs_path path;
10379 struct btrfs_key key;
10380 struct btrfs_extent_item *ei;
10387 /* Search extent tree for extent generation and level */
10388 key.objectid = bytenr;
10389 key.type = BTRFS_METADATA_ITEM_KEY;
10390 key.offset = (u64)-1;
10392 btrfs_init_path(&path);
10393 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10396 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10404 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10405 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10406 struct btrfs_extent_item);
10407 flags = btrfs_extent_flags(path.nodes[0], ei);
10408 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10413 /* Get transid for later read_tree_block() check */
10414 transid = btrfs_extent_generation(path.nodes[0], ei);
10416 /* Get backref level as one source */
10417 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10418 backref_level = key.offset;
10420 struct btrfs_tree_block_info *info;
10422 info = (struct btrfs_tree_block_info *)(ei + 1);
10423 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10425 btrfs_release_path(&path);
10427 /* Get level from tree block as an alternative source */
10428 eb = read_tree_block(fs_info, bytenr, transid);
10429 if (!extent_buffer_uptodate(eb)) {
10430 free_extent_buffer(eb);
10433 header_level = btrfs_header_level(eb);
10434 free_extent_buffer(eb);
10436 if (header_level != backref_level)
10438 return header_level;
10441 btrfs_release_path(&path);
10446 * Check if a tree block backref is valid (points to a valid tree block)
10447 * if level == -1, level will be resolved
10448 * Return >0 for any error found and print error message
10450 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10451 u64 bytenr, int level)
10453 struct btrfs_root *root;
10454 struct btrfs_key key;
10455 struct btrfs_path path;
10456 struct extent_buffer *eb;
10457 struct extent_buffer *node;
10458 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10462 /* Query level for level == -1 special case */
10464 level = query_tree_block_level(fs_info, bytenr);
10466 err |= REFERENCER_MISSING;
10470 key.objectid = root_id;
10471 key.type = BTRFS_ROOT_ITEM_KEY;
10472 key.offset = (u64)-1;
10474 root = btrfs_read_fs_root(fs_info, &key);
10475 if (IS_ERR(root)) {
10476 err |= REFERENCER_MISSING;
10480 /* Read out the tree block to get item/node key */
10481 eb = read_tree_block(fs_info, bytenr, 0);
10482 if (!extent_buffer_uptodate(eb)) {
10483 err |= REFERENCER_MISSING;
10484 free_extent_buffer(eb);
10488 /* Empty tree, no need to check key */
10489 if (!btrfs_header_nritems(eb) && !level) {
10490 free_extent_buffer(eb);
10495 btrfs_node_key_to_cpu(eb, &key, 0);
10497 btrfs_item_key_to_cpu(eb, &key, 0);
10499 free_extent_buffer(eb);
10501 btrfs_init_path(&path);
10502 path.lowest_level = level;
10503 /* Search with the first key, to ensure we can reach it */
10504 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10506 err |= REFERENCER_MISSING;
10510 node = path.nodes[level];
10511 if (btrfs_header_bytenr(node) != bytenr) {
10513 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10514 bytenr, nodesize, bytenr,
10515 btrfs_header_bytenr(node));
10516 err |= REFERENCER_MISMATCH;
10518 if (btrfs_header_level(node) != level) {
10520 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10521 bytenr, nodesize, level,
10522 btrfs_header_level(node));
10523 err |= REFERENCER_MISMATCH;
10527 btrfs_release_path(&path);
10529 if (err & REFERENCER_MISSING) {
10531 error("extent [%llu %d] lost referencer (owner: %llu)",
10532 bytenr, nodesize, root_id);
10535 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10536 bytenr, nodesize, root_id, level);
10543 * Check if tree block @eb is tree reloc root.
10544 * Return 0 if it's not or any problem happens
10545 * Return 1 if it's a tree reloc root
10547 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10548 struct extent_buffer *eb)
10550 struct btrfs_root *tree_reloc_root;
10551 struct btrfs_key key;
10552 u64 bytenr = btrfs_header_bytenr(eb);
10553 u64 owner = btrfs_header_owner(eb);
10556 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10557 key.offset = owner;
10558 key.type = BTRFS_ROOT_ITEM_KEY;
10560 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10561 if (IS_ERR(tree_reloc_root))
10564 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10566 btrfs_free_fs_root(tree_reloc_root);
10571 * Check referencer for shared block backref
10572 * If level == -1, this function will resolve the level.
10574 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10575 u64 parent, u64 bytenr, int level)
10577 struct extent_buffer *eb;
10579 int found_parent = 0;
10582 eb = read_tree_block(fs_info, parent, 0);
10583 if (!extent_buffer_uptodate(eb))
10587 level = query_tree_block_level(fs_info, bytenr);
10591 /* It's possible it's a tree reloc root */
10592 if (parent == bytenr) {
10593 if (is_tree_reloc_root(fs_info, eb))
10598 if (level + 1 != btrfs_header_level(eb))
10601 nr = btrfs_header_nritems(eb);
10602 for (i = 0; i < nr; i++) {
10603 if (bytenr == btrfs_node_blockptr(eb, i)) {
10609 free_extent_buffer(eb);
10610 if (!found_parent) {
10612 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10613 bytenr, fs_info->nodesize, parent, level);
10614 return REFERENCER_MISSING;
10620 * Check referencer for normal (inlined) data ref
10621 * If len == 0, it will be resolved by searching in extent tree
10623 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10624 u64 root_id, u64 objectid, u64 offset,
10625 u64 bytenr, u64 len, u32 count)
10627 struct btrfs_root *root;
10628 struct btrfs_root *extent_root = fs_info->extent_root;
10629 struct btrfs_key key;
10630 struct btrfs_path path;
10631 struct extent_buffer *leaf;
10632 struct btrfs_file_extent_item *fi;
10633 u32 found_count = 0;
10638 key.objectid = bytenr;
10639 key.type = BTRFS_EXTENT_ITEM_KEY;
10640 key.offset = (u64)-1;
10642 btrfs_init_path(&path);
10643 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10646 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10649 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10650 if (key.objectid != bytenr ||
10651 key.type != BTRFS_EXTENT_ITEM_KEY)
10654 btrfs_release_path(&path);
10656 key.objectid = root_id;
10657 key.type = BTRFS_ROOT_ITEM_KEY;
10658 key.offset = (u64)-1;
10659 btrfs_init_path(&path);
10661 root = btrfs_read_fs_root(fs_info, &key);
10665 key.objectid = objectid;
10666 key.type = BTRFS_EXTENT_DATA_KEY;
10668 * It can be nasty as data backref offset is
10669 * file offset - file extent offset, which is smaller or
10670 * equal to original backref offset. The only special case is
10671 * overflow. So we need to special check and do further search.
10673 key.offset = offset & (1ULL << 63) ? 0 : offset;
10675 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10680 * Search afterwards to get correct one
10681 * NOTE: As we must do a comprehensive check on the data backref to
10682 * make sure the dref count also matches, we must iterate all file
10683 * extents for that inode.
10686 leaf = path.nodes[0];
10687 slot = path.slots[0];
10689 if (slot >= btrfs_header_nritems(leaf))
10691 btrfs_item_key_to_cpu(leaf, &key, slot);
10692 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10694 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10696 * Except normal disk bytenr and disk num bytes, we still
10697 * need to do extra check on dbackref offset as
10698 * dbackref offset = file_offset - file_extent_offset
10700 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10701 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10702 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10707 ret = btrfs_next_item(root, &path);
10712 btrfs_release_path(&path);
10713 if (found_count != count) {
10715 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10716 bytenr, len, root_id, objectid, offset, count, found_count);
10717 return REFERENCER_MISSING;
10723 * Check if the referencer of a shared data backref exists
10725 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10726 u64 parent, u64 bytenr)
10728 struct extent_buffer *eb;
10729 struct btrfs_key key;
10730 struct btrfs_file_extent_item *fi;
10732 int found_parent = 0;
10735 eb = read_tree_block(fs_info, parent, 0);
10736 if (!extent_buffer_uptodate(eb))
10739 nr = btrfs_header_nritems(eb);
10740 for (i = 0; i < nr; i++) {
10741 btrfs_item_key_to_cpu(eb, &key, i);
10742 if (key.type != BTRFS_EXTENT_DATA_KEY)
10745 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10746 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10749 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10756 free_extent_buffer(eb);
10757 if (!found_parent) {
10758 error("shared extent %llu referencer lost (parent: %llu)",
10760 return REFERENCER_MISSING;
10766 * This function will check a given extent item, including its backref and
10767 * itself (like crossing stripe boundary and type)
10769 * Since we don't use extent_record anymore, introduce new error bit
10771 static int check_extent_item(struct btrfs_fs_info *fs_info,
10772 struct extent_buffer *eb, int slot)
10774 struct btrfs_extent_item *ei;
10775 struct btrfs_extent_inline_ref *iref;
10776 struct btrfs_extent_data_ref *dref;
10780 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10781 u32 item_size = btrfs_item_size_nr(eb, slot);
10786 struct btrfs_key key;
10790 btrfs_item_key_to_cpu(eb, &key, slot);
10791 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10792 bytes_used += key.offset;
10794 bytes_used += nodesize;
10796 if (item_size < sizeof(*ei)) {
10798 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10799 * old thing when on disk format is still un-determined.
10800 * No need to care about it anymore
10802 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10806 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10807 flags = btrfs_extent_flags(eb, ei);
10809 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10811 if (metadata && check_crossing_stripes(global_info, key.objectid,
10813 error("bad metadata [%llu, %llu) crossing stripe boundary",
10814 key.objectid, key.objectid + nodesize);
10815 err |= CROSSING_STRIPE_BOUNDARY;
10818 ptr = (unsigned long)(ei + 1);
10820 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10821 /* Old EXTENT_ITEM metadata */
10822 struct btrfs_tree_block_info *info;
10824 info = (struct btrfs_tree_block_info *)ptr;
10825 level = btrfs_tree_block_level(eb, info);
10826 ptr += sizeof(struct btrfs_tree_block_info);
10828 /* New METADATA_ITEM */
10829 level = key.offset;
10831 end = (unsigned long)ei + item_size;
10834 /* Reached extent item end normally */
10838 /* Beyond extent item end, wrong item size */
10840 err |= ITEM_SIZE_MISMATCH;
10841 error("extent item at bytenr %llu slot %d has wrong size",
10846 /* Now check every backref in this extent item */
10847 iref = (struct btrfs_extent_inline_ref *)ptr;
10848 type = btrfs_extent_inline_ref_type(eb, iref);
10849 offset = btrfs_extent_inline_ref_offset(eb, iref);
10851 case BTRFS_TREE_BLOCK_REF_KEY:
10852 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10856 case BTRFS_SHARED_BLOCK_REF_KEY:
10857 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10861 case BTRFS_EXTENT_DATA_REF_KEY:
10862 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10863 ret = check_extent_data_backref(fs_info,
10864 btrfs_extent_data_ref_root(eb, dref),
10865 btrfs_extent_data_ref_objectid(eb, dref),
10866 btrfs_extent_data_ref_offset(eb, dref),
10867 key.objectid, key.offset,
10868 btrfs_extent_data_ref_count(eb, dref));
10871 case BTRFS_SHARED_DATA_REF_KEY:
10872 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10876 error("extent[%llu %d %llu] has unknown ref type: %d",
10877 key.objectid, key.type, key.offset, type);
10878 err |= UNKNOWN_TYPE;
10882 ptr += btrfs_extent_inline_ref_size(type);
10890 * Check if a dev extent item is referred correctly by its chunk
10892 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10893 struct extent_buffer *eb, int slot)
10895 struct btrfs_root *chunk_root = fs_info->chunk_root;
10896 struct btrfs_dev_extent *ptr;
10897 struct btrfs_path path;
10898 struct btrfs_key chunk_key;
10899 struct btrfs_key devext_key;
10900 struct btrfs_chunk *chunk;
10901 struct extent_buffer *l;
10905 int found_chunk = 0;
10908 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10909 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10910 length = btrfs_dev_extent_length(eb, ptr);
10912 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10913 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10914 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10916 btrfs_init_path(&path);
10917 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10922 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10923 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10928 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10931 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10932 for (i = 0; i < num_stripes; i++) {
10933 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10934 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10936 if (devid == devext_key.objectid &&
10937 offset == devext_key.offset) {
10943 btrfs_release_path(&path);
10944 if (!found_chunk) {
10946 "device extent[%llu, %llu, %llu] did not find the related chunk",
10947 devext_key.objectid, devext_key.offset, length);
10948 return REFERENCER_MISSING;
10954 * Check if the used space is correct with the dev item
10956 static int check_dev_item(struct btrfs_fs_info *fs_info,
10957 struct extent_buffer *eb, int slot)
10959 struct btrfs_root *dev_root = fs_info->dev_root;
10960 struct btrfs_dev_item *dev_item;
10961 struct btrfs_path path;
10962 struct btrfs_key key;
10963 struct btrfs_dev_extent *ptr;
10969 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10970 dev_id = btrfs_device_id(eb, dev_item);
10971 used = btrfs_device_bytes_used(eb, dev_item);
10973 key.objectid = dev_id;
10974 key.type = BTRFS_DEV_EXTENT_KEY;
10977 btrfs_init_path(&path);
10978 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10980 btrfs_item_key_to_cpu(eb, &key, slot);
10981 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10982 key.objectid, key.type, key.offset);
10983 btrfs_release_path(&path);
10984 return REFERENCER_MISSING;
10987 /* Iterate dev_extents to calculate the used space of a device */
10989 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10992 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10993 if (key.objectid > dev_id)
10995 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10998 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10999 struct btrfs_dev_extent);
11000 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11002 ret = btrfs_next_item(dev_root, &path);
11006 btrfs_release_path(&path);
11008 if (used != total) {
11009 btrfs_item_key_to_cpu(eb, &key, slot);
11011 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11012 total, used, BTRFS_ROOT_TREE_OBJECTID,
11013 BTRFS_DEV_EXTENT_KEY, dev_id);
11014 return ACCOUNTING_MISMATCH;
11020 * Check a block group item with its referener (chunk) and its used space
11021 * with extent/metadata item
11023 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11024 struct extent_buffer *eb, int slot)
11026 struct btrfs_root *extent_root = fs_info->extent_root;
11027 struct btrfs_root *chunk_root = fs_info->chunk_root;
11028 struct btrfs_block_group_item *bi;
11029 struct btrfs_block_group_item bg_item;
11030 struct btrfs_path path;
11031 struct btrfs_key bg_key;
11032 struct btrfs_key chunk_key;
11033 struct btrfs_key extent_key;
11034 struct btrfs_chunk *chunk;
11035 struct extent_buffer *leaf;
11036 struct btrfs_extent_item *ei;
11037 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11045 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11046 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11047 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11048 used = btrfs_block_group_used(&bg_item);
11049 bg_flags = btrfs_block_group_flags(&bg_item);
11051 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11052 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11053 chunk_key.offset = bg_key.objectid;
11055 btrfs_init_path(&path);
11056 /* Search for the referencer chunk */
11057 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11060 "block group[%llu %llu] did not find the related chunk item",
11061 bg_key.objectid, bg_key.offset);
11062 err |= REFERENCER_MISSING;
11064 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11065 struct btrfs_chunk);
11066 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11069 "block group[%llu %llu] related chunk item length does not match",
11070 bg_key.objectid, bg_key.offset);
11071 err |= REFERENCER_MISMATCH;
11074 btrfs_release_path(&path);
11076 /* Search from the block group bytenr */
11077 extent_key.objectid = bg_key.objectid;
11078 extent_key.type = 0;
11079 extent_key.offset = 0;
11081 btrfs_init_path(&path);
11082 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11086 /* Iterate extent tree to account used space */
11088 leaf = path.nodes[0];
11090 /* Search slot can point to the last item beyond leaf nritems */
11091 if (path.slots[0] >= btrfs_header_nritems(leaf))
11094 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11095 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11098 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11099 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11101 if (extent_key.objectid < bg_key.objectid)
11104 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11107 total += extent_key.offset;
11109 ei = btrfs_item_ptr(leaf, path.slots[0],
11110 struct btrfs_extent_item);
11111 flags = btrfs_extent_flags(leaf, ei);
11112 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11113 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11115 "bad extent[%llu, %llu) type mismatch with chunk",
11116 extent_key.objectid,
11117 extent_key.objectid + extent_key.offset);
11118 err |= CHUNK_TYPE_MISMATCH;
11120 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11121 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11122 BTRFS_BLOCK_GROUP_METADATA))) {
11124 "bad extent[%llu, %llu) type mismatch with chunk",
11125 extent_key.objectid,
11126 extent_key.objectid + nodesize);
11127 err |= CHUNK_TYPE_MISMATCH;
11131 ret = btrfs_next_item(extent_root, &path);
11137 btrfs_release_path(&path);
11139 if (total != used) {
11141 "block group[%llu %llu] used %llu but extent items used %llu",
11142 bg_key.objectid, bg_key.offset, used, total);
11143 err |= ACCOUNTING_MISMATCH;
11149 * Check a chunk item.
11150 * Including checking all referred dev_extents and block group
11152 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11153 struct extent_buffer *eb, int slot)
11155 struct btrfs_root *extent_root = fs_info->extent_root;
11156 struct btrfs_root *dev_root = fs_info->dev_root;
11157 struct btrfs_path path;
11158 struct btrfs_key chunk_key;
11159 struct btrfs_key bg_key;
11160 struct btrfs_key devext_key;
11161 struct btrfs_chunk *chunk;
11162 struct extent_buffer *leaf;
11163 struct btrfs_block_group_item *bi;
11164 struct btrfs_block_group_item bg_item;
11165 struct btrfs_dev_extent *ptr;
11177 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11178 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11179 length = btrfs_chunk_length(eb, chunk);
11180 chunk_end = chunk_key.offset + length;
11181 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11184 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11186 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11189 type = btrfs_chunk_type(eb, chunk);
11191 bg_key.objectid = chunk_key.offset;
11192 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11193 bg_key.offset = length;
11195 btrfs_init_path(&path);
11196 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11199 "chunk[%llu %llu) did not find the related block group item",
11200 chunk_key.offset, chunk_end);
11201 err |= REFERENCER_MISSING;
11203 leaf = path.nodes[0];
11204 bi = btrfs_item_ptr(leaf, path.slots[0],
11205 struct btrfs_block_group_item);
11206 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11208 if (btrfs_block_group_flags(&bg_item) != type) {
11210 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11211 chunk_key.offset, chunk_end, type,
11212 btrfs_block_group_flags(&bg_item));
11213 err |= REFERENCER_MISSING;
11217 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11218 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11219 for (i = 0; i < num_stripes; i++) {
11220 btrfs_release_path(&path);
11221 btrfs_init_path(&path);
11222 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11223 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11224 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11226 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11229 goto not_match_dev;
11231 leaf = path.nodes[0];
11232 ptr = btrfs_item_ptr(leaf, path.slots[0],
11233 struct btrfs_dev_extent);
11234 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11235 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11236 if (objectid != chunk_key.objectid ||
11237 offset != chunk_key.offset ||
11238 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11239 goto not_match_dev;
11242 err |= BACKREF_MISSING;
11244 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11245 chunk_key.objectid, chunk_end, i);
11248 btrfs_release_path(&path);
11254 * Main entry function to check known items and update related accounting info
11256 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11258 struct btrfs_fs_info *fs_info = root->fs_info;
11259 struct btrfs_key key;
11262 struct btrfs_extent_data_ref *dref;
11267 btrfs_item_key_to_cpu(eb, &key, slot);
11271 case BTRFS_EXTENT_DATA_KEY:
11272 ret = check_extent_data_item(root, eb, slot);
11275 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11276 ret = check_block_group_item(fs_info, eb, slot);
11279 case BTRFS_DEV_ITEM_KEY:
11280 ret = check_dev_item(fs_info, eb, slot);
11283 case BTRFS_CHUNK_ITEM_KEY:
11284 ret = check_chunk_item(fs_info, eb, slot);
11287 case BTRFS_DEV_EXTENT_KEY:
11288 ret = check_dev_extent_item(fs_info, eb, slot);
11291 case BTRFS_EXTENT_ITEM_KEY:
11292 case BTRFS_METADATA_ITEM_KEY:
11293 ret = check_extent_item(fs_info, eb, slot);
11296 case BTRFS_EXTENT_CSUM_KEY:
11297 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11299 case BTRFS_TREE_BLOCK_REF_KEY:
11300 ret = check_tree_block_backref(fs_info, key.offset,
11304 case BTRFS_EXTENT_DATA_REF_KEY:
11305 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11306 ret = check_extent_data_backref(fs_info,
11307 btrfs_extent_data_ref_root(eb, dref),
11308 btrfs_extent_data_ref_objectid(eb, dref),
11309 btrfs_extent_data_ref_offset(eb, dref),
11311 btrfs_extent_data_ref_count(eb, dref));
11314 case BTRFS_SHARED_BLOCK_REF_KEY:
11315 ret = check_shared_block_backref(fs_info, key.offset,
11319 case BTRFS_SHARED_DATA_REF_KEY:
11320 ret = check_shared_data_backref(fs_info, key.offset,
11328 if (++slot < btrfs_header_nritems(eb))
11335 * Helper function for later fs/subvol tree check. To determine if a tree
11336 * block should be checked.
11337 * This function will ensure only the direct referencer with lowest rootid to
11338 * check a fs/subvolume tree block.
11340 * Backref check at extent tree would detect errors like missing subvolume
11341 * tree, so we can do aggressive check to reduce duplicated checks.
11343 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11345 struct btrfs_root *extent_root = root->fs_info->extent_root;
11346 struct btrfs_key key;
11347 struct btrfs_path path;
11348 struct extent_buffer *leaf;
11350 struct btrfs_extent_item *ei;
11356 struct btrfs_extent_inline_ref *iref;
11359 btrfs_init_path(&path);
11360 key.objectid = btrfs_header_bytenr(eb);
11361 key.type = BTRFS_METADATA_ITEM_KEY;
11362 key.offset = (u64)-1;
11365 * Any failure in backref resolving means we can't determine
11366 * whom the tree block belongs to.
11367 * So in that case, we need to check that tree block
11369 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11373 ret = btrfs_previous_extent_item(extent_root, &path,
11374 btrfs_header_bytenr(eb));
11378 leaf = path.nodes[0];
11379 slot = path.slots[0];
11380 btrfs_item_key_to_cpu(leaf, &key, slot);
11381 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11383 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11384 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11386 struct btrfs_tree_block_info *info;
11388 info = (struct btrfs_tree_block_info *)(ei + 1);
11389 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11392 item_size = btrfs_item_size_nr(leaf, slot);
11393 ptr = (unsigned long)iref;
11394 end = (unsigned long)ei + item_size;
11395 while (ptr < end) {
11396 iref = (struct btrfs_extent_inline_ref *)ptr;
11397 type = btrfs_extent_inline_ref_type(leaf, iref);
11398 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11401 * We only check the tree block if current root is
11402 * the lowest referencer of it.
11404 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11405 offset < root->objectid) {
11406 btrfs_release_path(&path);
11410 ptr += btrfs_extent_inline_ref_size(type);
11413 * Normally we should also check keyed tree block ref, but that may be
11414 * very time consuming. Inlined ref should already make us skip a lot
11415 * of refs now. So skip search keyed tree block ref.
11419 btrfs_release_path(&path);
11424 * Traversal function for tree block. We will do:
11425 * 1) Skip shared fs/subvolume tree blocks
11426 * 2) Update related bytes accounting
11427 * 3) Pre-order traversal
11429 static int traverse_tree_block(struct btrfs_root *root,
11430 struct extent_buffer *node)
11432 struct extent_buffer *eb;
11433 struct btrfs_key key;
11434 struct btrfs_key drop_key;
11442 * Skip shared fs/subvolume tree block, in that case they will
11443 * be checked by referencer with lowest rootid
11445 if (is_fstree(root->objectid) && !should_check(root, node))
11448 /* Update bytes accounting */
11449 total_btree_bytes += node->len;
11450 if (fs_root_objectid(btrfs_header_owner(node)))
11451 total_fs_tree_bytes += node->len;
11452 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11453 total_extent_tree_bytes += node->len;
11455 /* pre-order tranversal, check itself first */
11456 level = btrfs_header_level(node);
11457 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11458 btrfs_header_level(node),
11459 btrfs_header_owner(node));
11463 "check %s failed root %llu bytenr %llu level %d, force continue check",
11464 level ? "node":"leaf", root->objectid,
11465 btrfs_header_bytenr(node), btrfs_header_level(node));
11468 btree_space_waste += btrfs_leaf_free_space(root, node);
11469 ret = check_leaf_items(root, node);
11474 nr = btrfs_header_nritems(node);
11475 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11476 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11477 sizeof(struct btrfs_key_ptr);
11479 /* Then check all its children */
11480 for (i = 0; i < nr; i++) {
11481 u64 blocknr = btrfs_node_blockptr(node, i);
11483 btrfs_node_key_to_cpu(node, &key, i);
11484 if (level == root->root_item.drop_level &&
11485 is_dropped_key(&key, &drop_key))
11489 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11490 * to call the function itself.
11492 eb = read_tree_block(root->fs_info, blocknr, 0);
11493 if (extent_buffer_uptodate(eb)) {
11494 ret = traverse_tree_block(root, eb);
11497 free_extent_buffer(eb);
11504 * Low memory usage version check_chunks_and_extents.
11506 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11508 struct btrfs_path path;
11509 struct btrfs_key key;
11510 struct btrfs_root *root1;
11511 struct btrfs_root *root;
11512 struct btrfs_root *cur_root;
11516 root = fs_info->fs_root;
11518 root1 = root->fs_info->chunk_root;
11519 ret = traverse_tree_block(root1, root1->node);
11522 root1 = root->fs_info->tree_root;
11523 ret = traverse_tree_block(root1, root1->node);
11526 btrfs_init_path(&path);
11527 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11529 key.type = BTRFS_ROOT_ITEM_KEY;
11531 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11533 error("cannot find extent treet in tree_root");
11538 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11539 if (key.type != BTRFS_ROOT_ITEM_KEY)
11541 key.offset = (u64)-1;
11543 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11544 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11547 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11548 if (IS_ERR(cur_root) || !cur_root) {
11549 error("failed to read tree: %lld", key.objectid);
11553 ret = traverse_tree_block(cur_root, cur_root->node);
11556 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11557 btrfs_free_fs_root(cur_root);
11559 ret = btrfs_next_item(root1, &path);
11565 btrfs_release_path(&path);
11569 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11573 if (!ctx.progress_enabled)
11574 fprintf(stderr, "checking extents\n");
11575 if (check_mode == CHECK_MODE_LOWMEM)
11576 ret = check_chunks_and_extents_v2(fs_info);
11578 ret = check_chunks_and_extents(fs_info);
11583 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11584 struct btrfs_root *root, int overwrite)
11586 struct extent_buffer *c;
11587 struct extent_buffer *old = root->node;
11590 struct btrfs_disk_key disk_key = {0,0,0};
11596 extent_buffer_get(c);
11599 c = btrfs_alloc_free_block(trans, root,
11600 root->fs_info->nodesize,
11601 root->root_key.objectid,
11602 &disk_key, level, 0, 0);
11605 extent_buffer_get(c);
11609 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11610 btrfs_set_header_level(c, level);
11611 btrfs_set_header_bytenr(c, c->start);
11612 btrfs_set_header_generation(c, trans->transid);
11613 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11614 btrfs_set_header_owner(c, root->root_key.objectid);
11616 write_extent_buffer(c, root->fs_info->fsid,
11617 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11619 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11620 btrfs_header_chunk_tree_uuid(c),
11623 btrfs_mark_buffer_dirty(c);
11625 * this case can happen in the following case:
11627 * 1.overwrite previous root.
11629 * 2.reinit reloc data root, this is because we skip pin
11630 * down reloc data tree before which means we can allocate
11631 * same block bytenr here.
11633 if (old->start == c->start) {
11634 btrfs_set_root_generation(&root->root_item,
11636 root->root_item.level = btrfs_header_level(root->node);
11637 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11638 &root->root_key, &root->root_item);
11640 free_extent_buffer(c);
11644 free_extent_buffer(old);
11646 add_root_to_dirty_list(root);
11650 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11651 struct extent_buffer *eb, int tree_root)
11653 struct extent_buffer *tmp;
11654 struct btrfs_root_item *ri;
11655 struct btrfs_key key;
11657 int level = btrfs_header_level(eb);
11663 * If we have pinned this block before, don't pin it again.
11664 * This can not only avoid forever loop with broken filesystem
11665 * but also give us some speedups.
11667 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11668 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11671 btrfs_pin_extent(fs_info, eb->start, eb->len);
11673 nritems = btrfs_header_nritems(eb);
11674 for (i = 0; i < nritems; i++) {
11676 btrfs_item_key_to_cpu(eb, &key, i);
11677 if (key.type != BTRFS_ROOT_ITEM_KEY)
11679 /* Skip the extent root and reloc roots */
11680 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11681 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11682 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11684 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11685 bytenr = btrfs_disk_root_bytenr(eb, ri);
11688 * If at any point we start needing the real root we
11689 * will have to build a stump root for the root we are
11690 * in, but for now this doesn't actually use the root so
11691 * just pass in extent_root.
11693 tmp = read_tree_block(fs_info, bytenr, 0);
11694 if (!extent_buffer_uptodate(tmp)) {
11695 fprintf(stderr, "Error reading root block\n");
11698 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11699 free_extent_buffer(tmp);
11703 bytenr = btrfs_node_blockptr(eb, i);
11705 /* If we aren't the tree root don't read the block */
11706 if (level == 1 && !tree_root) {
11707 btrfs_pin_extent(fs_info, bytenr,
11708 fs_info->nodesize);
11712 tmp = read_tree_block(fs_info, bytenr, 0);
11713 if (!extent_buffer_uptodate(tmp)) {
11714 fprintf(stderr, "Error reading tree block\n");
11717 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11718 free_extent_buffer(tmp);
11727 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11731 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11735 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11738 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11740 struct btrfs_block_group_cache *cache;
11741 struct btrfs_path path;
11742 struct extent_buffer *leaf;
11743 struct btrfs_chunk *chunk;
11744 struct btrfs_key key;
11748 btrfs_init_path(&path);
11750 key.type = BTRFS_CHUNK_ITEM_KEY;
11752 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11754 btrfs_release_path(&path);
11759 * We do this in case the block groups were screwed up and had alloc
11760 * bits that aren't actually set on the chunks. This happens with
11761 * restored images every time and could happen in real life I guess.
11763 fs_info->avail_data_alloc_bits = 0;
11764 fs_info->avail_metadata_alloc_bits = 0;
11765 fs_info->avail_system_alloc_bits = 0;
11767 /* First we need to create the in-memory block groups */
11769 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11770 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11772 btrfs_release_path(&path);
11780 leaf = path.nodes[0];
11781 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11782 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11787 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11788 btrfs_add_block_group(fs_info, 0,
11789 btrfs_chunk_type(leaf, chunk),
11790 key.objectid, key.offset,
11791 btrfs_chunk_length(leaf, chunk));
11792 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11793 key.offset + btrfs_chunk_length(leaf, chunk));
11798 cache = btrfs_lookup_first_block_group(fs_info, start);
11802 start = cache->key.objectid + cache->key.offset;
11805 btrfs_release_path(&path);
11809 static int reset_balance(struct btrfs_trans_handle *trans,
11810 struct btrfs_fs_info *fs_info)
11812 struct btrfs_root *root = fs_info->tree_root;
11813 struct btrfs_path path;
11814 struct extent_buffer *leaf;
11815 struct btrfs_key key;
11816 int del_slot, del_nr = 0;
11820 btrfs_init_path(&path);
11821 key.objectid = BTRFS_BALANCE_OBJECTID;
11822 key.type = BTRFS_BALANCE_ITEM_KEY;
11824 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11829 goto reinit_data_reloc;
11834 ret = btrfs_del_item(trans, root, &path);
11837 btrfs_release_path(&path);
11839 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11840 key.type = BTRFS_ROOT_ITEM_KEY;
11842 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11846 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11851 ret = btrfs_del_items(trans, root, &path,
11858 btrfs_release_path(&path);
11861 ret = btrfs_search_slot(trans, root, &key, &path,
11868 leaf = path.nodes[0];
11869 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11870 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11872 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11877 del_slot = path.slots[0];
11886 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11890 btrfs_release_path(&path);
11893 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11894 key.type = BTRFS_ROOT_ITEM_KEY;
11895 key.offset = (u64)-1;
11896 root = btrfs_read_fs_root(fs_info, &key);
11897 if (IS_ERR(root)) {
11898 fprintf(stderr, "Error reading data reloc tree\n");
11899 ret = PTR_ERR(root);
11902 record_root_in_trans(trans, root);
11903 ret = btrfs_fsck_reinit_root(trans, root, 0);
11906 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11908 btrfs_release_path(&path);
11912 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11913 struct btrfs_fs_info *fs_info)
11919 * The only reason we don't do this is because right now we're just
11920 * walking the trees we find and pinning down their bytes, we don't look
11921 * at any of the leaves. In order to do mixed groups we'd have to check
11922 * the leaves of any fs roots and pin down the bytes for any file
11923 * extents we find. Not hard but why do it if we don't have to?
11925 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11926 fprintf(stderr, "We don't support re-initing the extent tree "
11927 "for mixed block groups yet, please notify a btrfs "
11928 "developer you want to do this so they can add this "
11929 "functionality.\n");
11934 * first we need to walk all of the trees except the extent tree and pin
11935 * down the bytes that are in use so we don't overwrite any existing
11938 ret = pin_metadata_blocks(fs_info);
11940 fprintf(stderr, "error pinning down used bytes\n");
11945 * Need to drop all the block groups since we're going to recreate all
11948 btrfs_free_block_groups(fs_info);
11949 ret = reset_block_groups(fs_info);
11951 fprintf(stderr, "error resetting the block groups\n");
11955 /* Ok we can allocate now, reinit the extent root */
11956 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11958 fprintf(stderr, "extent root initialization failed\n");
11960 * When the transaction code is updated we should end the
11961 * transaction, but for now progs only knows about commit so
11962 * just return an error.
11968 * Now we have all the in-memory block groups setup so we can make
11969 * allocations properly, and the metadata we care about is safe since we
11970 * pinned all of it above.
11973 struct btrfs_block_group_cache *cache;
11975 cache = btrfs_lookup_first_block_group(fs_info, start);
11978 start = cache->key.objectid + cache->key.offset;
11979 ret = btrfs_insert_item(trans, fs_info->extent_root,
11980 &cache->key, &cache->item,
11981 sizeof(cache->item));
11983 fprintf(stderr, "Error adding block group\n");
11986 btrfs_extent_post_op(trans, fs_info->extent_root);
11989 ret = reset_balance(trans, fs_info);
11991 fprintf(stderr, "error resetting the pending balance\n");
11996 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11998 struct btrfs_path path;
11999 struct btrfs_trans_handle *trans;
12000 struct btrfs_key key;
12003 printf("Recowing metadata block %llu\n", eb->start);
12004 key.objectid = btrfs_header_owner(eb);
12005 key.type = BTRFS_ROOT_ITEM_KEY;
12006 key.offset = (u64)-1;
12008 root = btrfs_read_fs_root(root->fs_info, &key);
12009 if (IS_ERR(root)) {
12010 fprintf(stderr, "Couldn't find owner root %llu\n",
12012 return PTR_ERR(root);
12015 trans = btrfs_start_transaction(root, 1);
12017 return PTR_ERR(trans);
12019 btrfs_init_path(&path);
12020 path.lowest_level = btrfs_header_level(eb);
12021 if (path.lowest_level)
12022 btrfs_node_key_to_cpu(eb, &key, 0);
12024 btrfs_item_key_to_cpu(eb, &key, 0);
12026 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12027 btrfs_commit_transaction(trans, root);
12028 btrfs_release_path(&path);
12032 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12034 struct btrfs_path path;
12035 struct btrfs_trans_handle *trans;
12036 struct btrfs_key key;
12039 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12040 bad->key.type, bad->key.offset);
12041 key.objectid = bad->root_id;
12042 key.type = BTRFS_ROOT_ITEM_KEY;
12043 key.offset = (u64)-1;
12045 root = btrfs_read_fs_root(root->fs_info, &key);
12046 if (IS_ERR(root)) {
12047 fprintf(stderr, "Couldn't find owner root %llu\n",
12049 return PTR_ERR(root);
12052 trans = btrfs_start_transaction(root, 1);
12054 return PTR_ERR(trans);
12056 btrfs_init_path(&path);
12057 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12063 ret = btrfs_del_item(trans, root, &path);
12065 btrfs_commit_transaction(trans, root);
12066 btrfs_release_path(&path);
12070 static int zero_log_tree(struct btrfs_root *root)
12072 struct btrfs_trans_handle *trans;
12075 trans = btrfs_start_transaction(root, 1);
12076 if (IS_ERR(trans)) {
12077 ret = PTR_ERR(trans);
12080 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12081 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12082 ret = btrfs_commit_transaction(trans, root);
12086 static int populate_csum(struct btrfs_trans_handle *trans,
12087 struct btrfs_root *csum_root, char *buf, u64 start,
12090 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12095 while (offset < len) {
12096 sectorsize = fs_info->sectorsize;
12097 ret = read_extent_data(fs_info, buf, start + offset,
12101 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12102 start + offset, buf, sectorsize);
12105 offset += sectorsize;
12110 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12111 struct btrfs_root *csum_root,
12112 struct btrfs_root *cur_root)
12114 struct btrfs_path path;
12115 struct btrfs_key key;
12116 struct extent_buffer *node;
12117 struct btrfs_file_extent_item *fi;
12124 buf = malloc(cur_root->fs_info->sectorsize);
12128 btrfs_init_path(&path);
12132 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12135 /* Iterate all regular file extents and fill its csum */
12137 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12139 if (key.type != BTRFS_EXTENT_DATA_KEY)
12141 node = path.nodes[0];
12142 slot = path.slots[0];
12143 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12144 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12146 start = btrfs_file_extent_disk_bytenr(node, fi);
12147 len = btrfs_file_extent_disk_num_bytes(node, fi);
12149 ret = populate_csum(trans, csum_root, buf, start, len);
12150 if (ret == -EEXIST)
12156 * TODO: if next leaf is corrupted, jump to nearest next valid
12159 ret = btrfs_next_item(cur_root, &path);
12169 btrfs_release_path(&path);
12174 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12175 struct btrfs_root *csum_root)
12177 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12178 struct btrfs_path path;
12179 struct btrfs_root *tree_root = fs_info->tree_root;
12180 struct btrfs_root *cur_root;
12181 struct extent_buffer *node;
12182 struct btrfs_key key;
12186 btrfs_init_path(&path);
12187 key.objectid = BTRFS_FS_TREE_OBJECTID;
12189 key.type = BTRFS_ROOT_ITEM_KEY;
12190 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12199 node = path.nodes[0];
12200 slot = path.slots[0];
12201 btrfs_item_key_to_cpu(node, &key, slot);
12202 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12204 if (key.type != BTRFS_ROOT_ITEM_KEY)
12206 if (!is_fstree(key.objectid))
12208 key.offset = (u64)-1;
12210 cur_root = btrfs_read_fs_root(fs_info, &key);
12211 if (IS_ERR(cur_root) || !cur_root) {
12212 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12216 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12221 ret = btrfs_next_item(tree_root, &path);
12231 btrfs_release_path(&path);
12235 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12236 struct btrfs_root *csum_root)
12238 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12239 struct btrfs_path path;
12240 struct btrfs_extent_item *ei;
12241 struct extent_buffer *leaf;
12243 struct btrfs_key key;
12246 btrfs_init_path(&path);
12248 key.type = BTRFS_EXTENT_ITEM_KEY;
12250 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12252 btrfs_release_path(&path);
12256 buf = malloc(csum_root->fs_info->sectorsize);
12258 btrfs_release_path(&path);
12263 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12264 ret = btrfs_next_leaf(extent_root, &path);
12272 leaf = path.nodes[0];
12274 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12275 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12280 ei = btrfs_item_ptr(leaf, path.slots[0],
12281 struct btrfs_extent_item);
12282 if (!(btrfs_extent_flags(leaf, ei) &
12283 BTRFS_EXTENT_FLAG_DATA)) {
12288 ret = populate_csum(trans, csum_root, buf, key.objectid,
12295 btrfs_release_path(&path);
12301 * Recalculate the csum and put it into the csum tree.
12303 * Extent tree init will wipe out all the extent info, so in that case, we
12304 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12305 * will use fs/subvol trees to init the csum tree.
12307 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12308 struct btrfs_root *csum_root,
12309 int search_fs_tree)
12311 if (search_fs_tree)
12312 return fill_csum_tree_from_fs(trans, csum_root);
12314 return fill_csum_tree_from_extent(trans, csum_root);
12317 static void free_roots_info_cache(void)
12319 if (!roots_info_cache)
12322 while (!cache_tree_empty(roots_info_cache)) {
12323 struct cache_extent *entry;
12324 struct root_item_info *rii;
12326 entry = first_cache_extent(roots_info_cache);
12329 remove_cache_extent(roots_info_cache, entry);
12330 rii = container_of(entry, struct root_item_info, cache_extent);
12334 free(roots_info_cache);
12335 roots_info_cache = NULL;
12338 static int build_roots_info_cache(struct btrfs_fs_info *info)
12341 struct btrfs_key key;
12342 struct extent_buffer *leaf;
12343 struct btrfs_path path;
12345 if (!roots_info_cache) {
12346 roots_info_cache = malloc(sizeof(*roots_info_cache));
12347 if (!roots_info_cache)
12349 cache_tree_init(roots_info_cache);
12352 btrfs_init_path(&path);
12354 key.type = BTRFS_EXTENT_ITEM_KEY;
12356 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12359 leaf = path.nodes[0];
12362 struct btrfs_key found_key;
12363 struct btrfs_extent_item *ei;
12364 struct btrfs_extent_inline_ref *iref;
12365 int slot = path.slots[0];
12370 struct cache_extent *entry;
12371 struct root_item_info *rii;
12373 if (slot >= btrfs_header_nritems(leaf)) {
12374 ret = btrfs_next_leaf(info->extent_root, &path);
12381 leaf = path.nodes[0];
12382 slot = path.slots[0];
12385 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12387 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12388 found_key.type != BTRFS_METADATA_ITEM_KEY)
12391 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12392 flags = btrfs_extent_flags(leaf, ei);
12394 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12395 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12398 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12399 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12400 level = found_key.offset;
12402 struct btrfs_tree_block_info *binfo;
12404 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12405 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12406 level = btrfs_tree_block_level(leaf, binfo);
12410 * For a root extent, it must be of the following type and the
12411 * first (and only one) iref in the item.
12413 type = btrfs_extent_inline_ref_type(leaf, iref);
12414 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12417 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12418 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12420 rii = malloc(sizeof(struct root_item_info));
12425 rii->cache_extent.start = root_id;
12426 rii->cache_extent.size = 1;
12427 rii->level = (u8)-1;
12428 entry = &rii->cache_extent;
12429 ret = insert_cache_extent(roots_info_cache, entry);
12432 rii = container_of(entry, struct root_item_info,
12436 ASSERT(rii->cache_extent.start == root_id);
12437 ASSERT(rii->cache_extent.size == 1);
12439 if (level > rii->level || rii->level == (u8)-1) {
12440 rii->level = level;
12441 rii->bytenr = found_key.objectid;
12442 rii->gen = btrfs_extent_generation(leaf, ei);
12443 rii->node_count = 1;
12444 } else if (level == rii->level) {
12452 btrfs_release_path(&path);
12457 static int maybe_repair_root_item(struct btrfs_path *path,
12458 const struct btrfs_key *root_key,
12459 const int read_only_mode)
12461 const u64 root_id = root_key->objectid;
12462 struct cache_extent *entry;
12463 struct root_item_info *rii;
12464 struct btrfs_root_item ri;
12465 unsigned long offset;
12467 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12470 "Error: could not find extent items for root %llu\n",
12471 root_key->objectid);
12475 rii = container_of(entry, struct root_item_info, cache_extent);
12476 ASSERT(rii->cache_extent.start == root_id);
12477 ASSERT(rii->cache_extent.size == 1);
12479 if (rii->node_count != 1) {
12481 "Error: could not find btree root extent for root %llu\n",
12486 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12487 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12489 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12490 btrfs_root_level(&ri) != rii->level ||
12491 btrfs_root_generation(&ri) != rii->gen) {
12494 * If we're in repair mode but our caller told us to not update
12495 * the root item, i.e. just check if it needs to be updated, don't
12496 * print this message, since the caller will call us again shortly
12497 * for the same root item without read only mode (the caller will
12498 * open a transaction first).
12500 if (!(read_only_mode && repair))
12502 "%sroot item for root %llu,"
12503 " current bytenr %llu, current gen %llu, current level %u,"
12504 " new bytenr %llu, new gen %llu, new level %u\n",
12505 (read_only_mode ? "" : "fixing "),
12507 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12508 btrfs_root_level(&ri),
12509 rii->bytenr, rii->gen, rii->level);
12511 if (btrfs_root_generation(&ri) > rii->gen) {
12513 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12514 root_id, btrfs_root_generation(&ri), rii->gen);
12518 if (!read_only_mode) {
12519 btrfs_set_root_bytenr(&ri, rii->bytenr);
12520 btrfs_set_root_level(&ri, rii->level);
12521 btrfs_set_root_generation(&ri, rii->gen);
12522 write_extent_buffer(path->nodes[0], &ri,
12523 offset, sizeof(ri));
12533 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12534 * caused read-only snapshots to be corrupted if they were created at a moment
12535 * when the source subvolume/snapshot had orphan items. The issue was that the
12536 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12537 * node instead of the post orphan cleanup root node.
12538 * So this function, and its callees, just detects and fixes those cases. Even
12539 * though the regression was for read-only snapshots, this function applies to
12540 * any snapshot/subvolume root.
12541 * This must be run before any other repair code - not doing it so, makes other
12542 * repair code delete or modify backrefs in the extent tree for example, which
12543 * will result in an inconsistent fs after repairing the root items.
12545 static int repair_root_items(struct btrfs_fs_info *info)
12547 struct btrfs_path path;
12548 struct btrfs_key key;
12549 struct extent_buffer *leaf;
12550 struct btrfs_trans_handle *trans = NULL;
12553 int need_trans = 0;
12555 btrfs_init_path(&path);
12557 ret = build_roots_info_cache(info);
12561 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12562 key.type = BTRFS_ROOT_ITEM_KEY;
12567 * Avoid opening and committing transactions if a leaf doesn't have
12568 * any root items that need to be fixed, so that we avoid rotating
12569 * backup roots unnecessarily.
12572 trans = btrfs_start_transaction(info->tree_root, 1);
12573 if (IS_ERR(trans)) {
12574 ret = PTR_ERR(trans);
12579 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12583 leaf = path.nodes[0];
12586 struct btrfs_key found_key;
12588 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12589 int no_more_keys = find_next_key(&path, &key);
12591 btrfs_release_path(&path);
12593 ret = btrfs_commit_transaction(trans,
12605 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12607 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12609 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12612 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12616 if (!trans && repair) {
12619 btrfs_release_path(&path);
12629 free_roots_info_cache();
12630 btrfs_release_path(&path);
12632 btrfs_commit_transaction(trans, info->tree_root);
12639 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12641 struct btrfs_trans_handle *trans;
12642 struct btrfs_block_group_cache *bg_cache;
12646 /* Clear all free space cache inodes and its extent data */
12648 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12651 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12654 current = bg_cache->key.objectid + bg_cache->key.offset;
12657 /* Don't forget to set cache_generation to -1 */
12658 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12659 if (IS_ERR(trans)) {
12660 error("failed to update super block cache generation");
12661 return PTR_ERR(trans);
12663 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12664 btrfs_commit_transaction(trans, fs_info->tree_root);
12669 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12674 if (clear_version == 1) {
12675 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12677 "free space cache v2 detected, use --clear-space-cache v2");
12681 printf("Clearing free space cache\n");
12682 ret = clear_free_space_cache(fs_info);
12684 error("failed to clear free space cache");
12687 printf("Free space cache cleared\n");
12689 } else if (clear_version == 2) {
12690 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12691 printf("no free space cache v2 to clear\n");
12695 printf("Clear free space cache v2\n");
12696 ret = btrfs_clear_free_space_tree(fs_info);
12698 error("failed to clear free space cache v2: %d", ret);
12701 printf("free space cache v2 cleared\n");
12708 const char * const cmd_check_usage[] = {
12709 "btrfs check [options] <device>",
12710 "Check structural integrity of a filesystem (unmounted).",
12711 "Check structural integrity of an unmounted filesystem. Verify internal",
12712 "trees' consistency and item connectivity. In the repair mode try to",
12713 "fix the problems found. ",
12714 "WARNING: the repair mode is considered dangerous",
12716 "-s|--super <superblock> use this superblock copy",
12717 "-b|--backup use the first valid backup root copy",
12718 "--repair try to repair the filesystem",
12719 "--readonly run in read-only mode (default)",
12720 "--init-csum-tree create a new CRC tree",
12721 "--init-extent-tree create a new extent tree",
12722 "--mode <MODE> allows choice of memory/IO trade-offs",
12723 " where MODE is one of:",
12724 " original - read inodes and extents to memory (requires",
12725 " more memory, does less IO)",
12726 " lowmem - try to use less memory but read blocks again",
12728 "--check-data-csum verify checksums of data blocks",
12729 "-Q|--qgroup-report print a report on qgroup consistency",
12730 "-E|--subvol-extents <subvolid>",
12731 " print subvolume extents and sharing state",
12732 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12733 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12734 "-p|--progress indicate progress",
12735 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12739 int cmd_check(int argc, char **argv)
12741 struct cache_tree root_cache;
12742 struct btrfs_root *root;
12743 struct btrfs_fs_info *info;
12746 u64 tree_root_bytenr = 0;
12747 u64 chunk_root_bytenr = 0;
12748 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12752 int init_csum_tree = 0;
12754 int clear_space_cache = 0;
12755 int qgroup_report = 0;
12756 int qgroups_repaired = 0;
12757 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12761 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12762 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12763 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12764 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12765 static const struct option long_options[] = {
12766 { "super", required_argument, NULL, 's' },
12767 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12768 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12769 { "init-csum-tree", no_argument, NULL,
12770 GETOPT_VAL_INIT_CSUM },
12771 { "init-extent-tree", no_argument, NULL,
12772 GETOPT_VAL_INIT_EXTENT },
12773 { "check-data-csum", no_argument, NULL,
12774 GETOPT_VAL_CHECK_CSUM },
12775 { "backup", no_argument, NULL, 'b' },
12776 { "subvol-extents", required_argument, NULL, 'E' },
12777 { "qgroup-report", no_argument, NULL, 'Q' },
12778 { "tree-root", required_argument, NULL, 'r' },
12779 { "chunk-root", required_argument, NULL,
12780 GETOPT_VAL_CHUNK_TREE },
12781 { "progress", no_argument, NULL, 'p' },
12782 { "mode", required_argument, NULL,
12784 { "clear-space-cache", required_argument, NULL,
12785 GETOPT_VAL_CLEAR_SPACE_CACHE},
12786 { NULL, 0, NULL, 0}
12789 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12793 case 'a': /* ignored */ break;
12795 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12798 num = arg_strtou64(optarg);
12799 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12801 "super mirror should be less than %d",
12802 BTRFS_SUPER_MIRROR_MAX);
12805 bytenr = btrfs_sb_offset(((int)num));
12806 printf("using SB copy %llu, bytenr %llu\n", num,
12807 (unsigned long long)bytenr);
12813 subvolid = arg_strtou64(optarg);
12816 tree_root_bytenr = arg_strtou64(optarg);
12818 case GETOPT_VAL_CHUNK_TREE:
12819 chunk_root_bytenr = arg_strtou64(optarg);
12822 ctx.progress_enabled = true;
12826 usage(cmd_check_usage);
12827 case GETOPT_VAL_REPAIR:
12828 printf("enabling repair mode\n");
12830 ctree_flags |= OPEN_CTREE_WRITES;
12832 case GETOPT_VAL_READONLY:
12835 case GETOPT_VAL_INIT_CSUM:
12836 printf("Creating a new CRC tree\n");
12837 init_csum_tree = 1;
12839 ctree_flags |= OPEN_CTREE_WRITES;
12841 case GETOPT_VAL_INIT_EXTENT:
12842 init_extent_tree = 1;
12843 ctree_flags |= (OPEN_CTREE_WRITES |
12844 OPEN_CTREE_NO_BLOCK_GROUPS);
12847 case GETOPT_VAL_CHECK_CSUM:
12848 check_data_csum = 1;
12850 case GETOPT_VAL_MODE:
12851 check_mode = parse_check_mode(optarg);
12852 if (check_mode == CHECK_MODE_UNKNOWN) {
12853 error("unknown mode: %s", optarg);
12857 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12858 if (strcmp(optarg, "v1") == 0) {
12859 clear_space_cache = 1;
12860 } else if (strcmp(optarg, "v2") == 0) {
12861 clear_space_cache = 2;
12862 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12865 "invalid argument to --clear-space-cache, must be v1 or v2");
12868 ctree_flags |= OPEN_CTREE_WRITES;
12873 if (check_argc_exact(argc - optind, 1))
12874 usage(cmd_check_usage);
12876 if (ctx.progress_enabled) {
12877 ctx.tp = TASK_NOTHING;
12878 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12881 /* This check is the only reason for --readonly to exist */
12882 if (readonly && repair) {
12883 error("repair options are not compatible with --readonly");
12888 * Not supported yet
12890 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12891 error("low memory mode doesn't support repair yet");
12896 cache_tree_init(&root_cache);
12898 if((ret = check_mounted(argv[optind])) < 0) {
12899 error("could not check mount status: %s", strerror(-ret));
12903 error("%s is currently mounted, aborting", argv[optind]);
12909 /* only allow partial opening under repair mode */
12911 ctree_flags |= OPEN_CTREE_PARTIAL;
12913 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12914 chunk_root_bytenr, ctree_flags);
12916 error("cannot open file system");
12922 global_info = info;
12923 root = info->fs_root;
12924 uuid_unparse(info->super_copy->fsid, uuidbuf);
12926 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12929 * Check the bare minimum before starting anything else that could rely
12930 * on it, namely the tree roots, any local consistency checks
12932 if (!extent_buffer_uptodate(info->tree_root->node) ||
12933 !extent_buffer_uptodate(info->dev_root->node) ||
12934 !extent_buffer_uptodate(info->chunk_root->node)) {
12935 error("critical roots corrupted, unable to check the filesystem");
12941 if (clear_space_cache) {
12942 ret = do_clear_free_space_cache(info, clear_space_cache);
12948 * repair mode will force us to commit transaction which
12949 * will make us fail to load log tree when mounting.
12951 if (repair && btrfs_super_log_root(info->super_copy)) {
12952 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12958 ret = zero_log_tree(root);
12961 error("failed to zero log tree: %d", ret);
12966 if (qgroup_report) {
12967 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12969 ret = qgroup_verify_all(info);
12976 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12977 subvolid, argv[optind], uuidbuf);
12978 ret = print_extent_state(info, subvolid);
12983 if (init_extent_tree || init_csum_tree) {
12984 struct btrfs_trans_handle *trans;
12986 trans = btrfs_start_transaction(info->extent_root, 0);
12987 if (IS_ERR(trans)) {
12988 error("error starting transaction");
12989 ret = PTR_ERR(trans);
12994 if (init_extent_tree) {
12995 printf("Creating a new extent tree\n");
12996 ret = reinit_extent_tree(trans, info);
13002 if (init_csum_tree) {
13003 printf("Reinitialize checksum tree\n");
13004 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13006 error("checksum tree initialization failed: %d",
13013 ret = fill_csum_tree(trans, info->csum_root,
13017 error("checksum tree refilling failed: %d", ret);
13022 * Ok now we commit and run the normal fsck, which will add
13023 * extent entries for all of the items it finds.
13025 ret = btrfs_commit_transaction(trans, info->extent_root);
13030 if (!extent_buffer_uptodate(info->extent_root->node)) {
13031 error("critical: extent_root, unable to check the filesystem");
13036 if (!extent_buffer_uptodate(info->csum_root->node)) {
13037 error("critical: csum_root, unable to check the filesystem");
13043 ret = do_check_chunks_and_extents(info);
13047 "errors found in extent allocation tree or chunk allocation");
13049 ret = repair_root_items(info);
13052 error("failed to repair root items: %s", strerror(-ret));
13056 fprintf(stderr, "Fixed %d roots.\n", ret);
13058 } else if (ret > 0) {
13060 "Found %d roots with an outdated root item.\n",
13063 "Please run a filesystem check with the option --repair to fix them.\n");
13069 if (!ctx.progress_enabled) {
13070 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13071 fprintf(stderr, "checking free space tree\n");
13073 fprintf(stderr, "checking free space cache\n");
13075 ret = check_space_cache(root);
13078 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13079 error("errors found in free space tree");
13081 error("errors found in free space cache");
13086 * We used to have to have these hole extents in between our real
13087 * extents so if we don't have this flag set we need to make sure there
13088 * are no gaps in the file extents for inodes, otherwise we can just
13089 * ignore it when this happens.
13091 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13092 if (!ctx.progress_enabled)
13093 fprintf(stderr, "checking fs roots\n");
13094 if (check_mode == CHECK_MODE_LOWMEM)
13095 ret = check_fs_roots_v2(root->fs_info);
13097 ret = check_fs_roots(info, &root_cache);
13100 error("errors found in fs roots");
13104 fprintf(stderr, "checking csums\n");
13105 ret = check_csums(root);
13108 error("errors found in csum tree");
13112 fprintf(stderr, "checking root refs\n");
13113 /* For low memory mode, check_fs_roots_v2 handles root refs */
13114 if (check_mode != CHECK_MODE_LOWMEM) {
13115 ret = check_root_refs(root, &root_cache);
13118 error("errors found in root refs");
13123 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13124 struct extent_buffer *eb;
13126 eb = list_first_entry(&root->fs_info->recow_ebs,
13127 struct extent_buffer, recow);
13128 list_del_init(&eb->recow);
13129 ret = recow_extent_buffer(root, eb);
13132 error("fails to fix transid errors");
13137 while (!list_empty(&delete_items)) {
13138 struct bad_item *bad;
13140 bad = list_first_entry(&delete_items, struct bad_item, list);
13141 list_del_init(&bad->list);
13143 ret = delete_bad_item(root, bad);
13149 if (info->quota_enabled) {
13150 fprintf(stderr, "checking quota groups\n");
13151 ret = qgroup_verify_all(info);
13154 error("failed to check quota groups");
13158 ret = repair_qgroups(info, &qgroups_repaired);
13161 error("failed to repair quota groups");
13167 if (!list_empty(&root->fs_info->recow_ebs)) {
13168 error("transid errors in file system");
13173 printf("found %llu bytes used, ",
13174 (unsigned long long)bytes_used);
13176 printf("error(s) found\n");
13178 printf("no error found\n");
13179 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13180 printf("total tree bytes: %llu\n",
13181 (unsigned long long)total_btree_bytes);
13182 printf("total fs tree bytes: %llu\n",
13183 (unsigned long long)total_fs_tree_bytes);
13184 printf("total extent tree bytes: %llu\n",
13185 (unsigned long long)total_extent_tree_bytes);
13186 printf("btree space waste bytes: %llu\n",
13187 (unsigned long long)btree_space_waste);
13188 printf("file data blocks allocated: %llu\n referenced %llu\n",
13189 (unsigned long long)data_bytes_allocated,
13190 (unsigned long long)data_bytes_referenced);
13192 free_qgroup_counts();
13193 free_root_recs_tree(&root_cache);
13197 if (ctx.progress_enabled)
13198 task_deinit(ctx.info);