2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
229 struct btrfs_key drop_key;
232 #define REF_ERR_NO_DIR_ITEM (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX (1 << 1)
234 #define REF_ERR_NO_INODE_REF (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
237 #define REF_ERR_DUP_INODE_REF (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
246 struct file_extent_hole {
252 struct inode_record {
253 struct list_head backrefs;
254 unsigned int checked:1;
255 unsigned int merging:1;
256 unsigned int found_inode_item:1;
257 unsigned int found_dir_item:1;
258 unsigned int found_file_extent:1;
259 unsigned int found_csum_item:1;
260 unsigned int some_csum_missing:1;
261 unsigned int nodatasum:1;
274 struct rb_root holes;
275 struct list_head orphan_extents;
280 #define I_ERR_NO_INODE_ITEM (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
296 struct root_backref {
297 struct list_head list;
298 unsigned int found_dir_item:1;
299 unsigned int found_dir_index:1;
300 unsigned int found_back_ref:1;
301 unsigned int found_forward_ref:1;
302 unsigned int reachable:1;
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 return list_entry(entry, struct root_backref, list);
317 struct list_head backrefs;
318 struct cache_extent cache;
319 unsigned int found_root_item:1;
325 struct cache_extent cache;
330 struct cache_extent cache;
331 struct cache_tree root_cache;
332 struct cache_tree inode_cache;
333 struct inode_record *current;
342 struct walk_control {
343 struct cache_tree shared;
344 struct shared_node *nodes[BTRFS_MAX_LEVEL];
350 struct btrfs_key key;
352 struct list_head list;
355 struct extent_entry {
360 struct list_head list;
363 struct root_item_info {
364 /* level of the root */
366 /* number of nodes at this level, must be 1 for a root */
370 struct cache_extent cache_extent;
374 * Error bit for low memory mode check.
376 * Currently no caller cares about it yet. Just internal use for error
379 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH (1 << 8)
390 static void *print_status_check(void *p)
392 struct task_ctx *priv = p;
393 const char work_indicator[] = { '.', 'o', 'O', 'o' };
395 static char *task_position_string[] = {
397 "checking free space cache",
401 task_period_start(priv->info, 1000 /* 1s */);
403 if (priv->tp == TASK_NOTHING)
407 printf("%s [%c]\r", task_position_string[priv->tp],
408 work_indicator[count % 4]);
411 task_period_wait(priv->info);
416 static int print_status_return(void *p)
424 static enum btrfs_check_mode parse_check_mode(const char *str)
426 if (strcmp(str, "lowmem") == 0)
427 return CHECK_MODE_LOWMEM;
428 if (strcmp(str, "orig") == 0)
429 return CHECK_MODE_ORIGINAL;
430 if (strcmp(str, "original") == 0)
431 return CHECK_MODE_ORIGINAL;
433 return CHECK_MODE_UNKNOWN;
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
439 struct file_extent_hole *hole;
441 if (RB_EMPTY_ROOT(holes))
444 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 struct file_extent_hole *hole1;
451 struct file_extent_hole *hole2;
453 hole1 = rb_entry(node1, struct file_extent_hole, node);
454 hole2 = rb_entry(node2, struct file_extent_hole, node);
456 if (hole1->start > hole2->start)
458 if (hole1->start < hole2->start)
460 /* Now hole1->start == hole2->start */
461 if (hole1->len >= hole2->len)
463 * Hole 1 will be merge center
464 * Same hole will be merged later
467 /* Hole 2 will be merge center */
472 * Add a hole to the record
474 * This will do hole merge for copy_file_extent_holes(),
475 * which will ensure there won't be continuous holes.
477 static int add_file_extent_hole(struct rb_root *holes,
480 struct file_extent_hole *hole;
481 struct file_extent_hole *prev = NULL;
482 struct file_extent_hole *next = NULL;
484 hole = malloc(sizeof(*hole));
489 /* Since compare will not return 0, no -EEXIST will happen */
490 rb_insert(holes, &hole->node, compare_hole);
492 /* simple merge with previous hole */
493 if (rb_prev(&hole->node))
494 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496 if (prev && prev->start + prev->len >= hole->start) {
497 hole->len = hole->start + hole->len - prev->start;
498 hole->start = prev->start;
499 rb_erase(&prev->node, holes);
504 /* iterate merge with next holes */
506 if (!rb_next(&hole->node))
508 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510 if (hole->start + hole->len >= next->start) {
511 if (hole->start + hole->len <= next->start + next->len)
512 hole->len = next->start + next->len -
514 rb_erase(&next->node, holes);
523 static int compare_hole_range(struct rb_node *node, void *data)
525 struct file_extent_hole *hole;
528 hole = (struct file_extent_hole *)data;
531 hole = rb_entry(node, struct file_extent_hole, node);
532 if (start < hole->start)
534 if (start >= hole->start && start < hole->start + hole->len)
540 * Delete a hole in the record
542 * This will do the hole split and is much restrict than add.
544 static int del_file_extent_hole(struct rb_root *holes,
547 struct file_extent_hole *hole;
548 struct file_extent_hole tmp;
553 struct rb_node *node;
560 node = rb_search(holes, &tmp, compare_hole_range, NULL);
563 hole = rb_entry(node, struct file_extent_hole, node);
564 if (start + len > hole->start + hole->len)
568 * Now there will be no overlap, delete the hole and re-add the
569 * split(s) if they exists.
571 if (start > hole->start) {
572 prev_start = hole->start;
573 prev_len = start - hole->start;
576 if (hole->start + hole->len > start + len) {
577 next_start = start + len;
578 next_len = hole->start + hole->len - start - len;
581 rb_erase(node, holes);
584 ret = add_file_extent_hole(holes, prev_start, prev_len);
589 ret = add_file_extent_hole(holes, next_start, next_len);
596 static int copy_file_extent_holes(struct rb_root *dst,
599 struct file_extent_hole *hole;
600 struct rb_node *node;
603 node = rb_first(src);
605 hole = rb_entry(node, struct file_extent_hole, node);
606 ret = add_file_extent_hole(dst, hole->start, hole->len);
609 node = rb_next(node);
614 static void free_file_extent_holes(struct rb_root *holes)
616 struct rb_node *node;
617 struct file_extent_hole *hole;
619 node = rb_first(holes);
621 hole = rb_entry(node, struct file_extent_hole, node);
622 rb_erase(node, holes);
624 node = rb_first(holes);
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631 struct btrfs_root *root)
633 if (root->last_trans != trans->transid) {
634 root->track_dirty = 1;
635 root->last_trans = trans->transid;
636 root->commit_root = root->node;
637 extent_buffer_get(root->node);
641 static u8 imode_to_type(u32 imode)
644 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
646 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
647 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
648 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
649 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
650 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
651 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
654 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 struct device_record *rec1;
661 struct device_record *rec2;
663 rec1 = rb_entry(node1, struct device_record, node);
664 rec2 = rb_entry(node2, struct device_record, node);
665 if (rec1->devid > rec2->devid)
667 else if (rec1->devid < rec2->devid)
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 struct inode_record *rec;
676 struct inode_backref *backref;
677 struct inode_backref *orig;
678 struct inode_backref *tmp;
679 struct orphan_data_extent *src_orphan;
680 struct orphan_data_extent *dst_orphan;
685 rec = malloc(sizeof(*rec));
687 return ERR_PTR(-ENOMEM);
688 memcpy(rec, orig_rec, sizeof(*rec));
690 INIT_LIST_HEAD(&rec->backrefs);
691 INIT_LIST_HEAD(&rec->orphan_extents);
692 rec->holes = RB_ROOT;
694 list_for_each_entry(orig, &orig_rec->backrefs, list) {
695 size = sizeof(*orig) + orig->namelen + 1;
696 backref = malloc(size);
701 memcpy(backref, orig, size);
702 list_add_tail(&backref->list, &rec->backrefs);
704 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705 dst_orphan = malloc(sizeof(*dst_orphan));
710 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
720 rb = rb_first(&rec->holes);
722 struct file_extent_hole *hole;
724 hole = rb_entry(rb, struct file_extent_hole, node);
730 if (!list_empty(&rec->backrefs))
731 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732 list_del(&orig->list);
736 if (!list_empty(&rec->orphan_extents))
737 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738 list_del(&orig->list);
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
750 struct orphan_data_extent *orphan;
752 if (list_empty(orphan_extents))
754 printf("The following data extent is lost in tree %llu:\n",
756 list_for_each_entry(orphan, orphan_extents, list) {
757 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758 orphan->objectid, orphan->offset, orphan->disk_bytenr,
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 u64 root_objectid = root->root_key.objectid;
766 int errors = rec->errors;
770 /* reloc root errors, we print its corresponding fs root objectid*/
771 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772 root_objectid = root->root_key.offset;
773 fprintf(stderr, "reloc");
775 fprintf(stderr, "root %llu inode %llu errors %x",
776 (unsigned long long) root_objectid,
777 (unsigned long long) rec->ino, rec->errors);
779 if (errors & I_ERR_NO_INODE_ITEM)
780 fprintf(stderr, ", no inode item");
781 if (errors & I_ERR_NO_ORPHAN_ITEM)
782 fprintf(stderr, ", no orphan item");
783 if (errors & I_ERR_DUP_INODE_ITEM)
784 fprintf(stderr, ", dup inode item");
785 if (errors & I_ERR_DUP_DIR_INDEX)
786 fprintf(stderr, ", dup dir index");
787 if (errors & I_ERR_ODD_DIR_ITEM)
788 fprintf(stderr, ", odd dir item");
789 if (errors & I_ERR_ODD_FILE_EXTENT)
790 fprintf(stderr, ", odd file extent");
791 if (errors & I_ERR_BAD_FILE_EXTENT)
792 fprintf(stderr, ", bad file extent");
793 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794 fprintf(stderr, ", file extent overlap");
795 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796 fprintf(stderr, ", file extent discount");
797 if (errors & I_ERR_DIR_ISIZE_WRONG)
798 fprintf(stderr, ", dir isize wrong");
799 if (errors & I_ERR_FILE_NBYTES_WRONG)
800 fprintf(stderr, ", nbytes wrong");
801 if (errors & I_ERR_ODD_CSUM_ITEM)
802 fprintf(stderr, ", odd csum item");
803 if (errors & I_ERR_SOME_CSUM_MISSING)
804 fprintf(stderr, ", some csum missing");
805 if (errors & I_ERR_LINK_COUNT_WRONG)
806 fprintf(stderr, ", link count wrong");
807 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808 fprintf(stderr, ", orphan file extent");
809 fprintf(stderr, "\n");
810 /* Print the orphan extents if needed */
811 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814 /* Print the holes if needed */
815 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816 struct file_extent_hole *hole;
817 struct rb_node *node;
820 node = rb_first(&rec->holes);
821 fprintf(stderr, "Found file extent holes:\n");
824 hole = rb_entry(node, struct file_extent_hole, node);
825 fprintf(stderr, "\tstart: %llu, len: %llu\n",
826 hole->start, hole->len);
827 node = rb_next(node);
830 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 root->fs_info->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (cur + sizeof(*di) + name_len > total ||
1516 name_len > BTRFS_NAME_LEN) {
1517 error = REF_ERR_NAME_TOO_LONG;
1519 if (cur + sizeof(*di) > total)
1521 len = min_t(u32, total - cur - sizeof(*di),
1528 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530 if (key->type == BTRFS_DIR_ITEM_KEY &&
1531 key->offset != btrfs_name_hash(namebuf, len)) {
1532 rec->errors |= I_ERR_ODD_DIR_ITEM;
1533 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1534 key->objectid, key->offset, namebuf, len, filetype,
1535 key->offset, btrfs_name_hash(namebuf, len));
1538 if (location.type == BTRFS_INODE_ITEM_KEY) {
1539 add_inode_backref(inode_cache, location.objectid,
1540 key->objectid, key->offset, namebuf,
1541 len, filetype, key->type, error);
1542 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1543 add_inode_backref(root_cache, location.objectid,
1544 key->objectid, key->offset,
1545 namebuf, len, filetype,
1548 fprintf(stderr, "invalid location in dir item %u\n",
1550 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1551 key->objectid, key->offset, namebuf,
1552 len, filetype, key->type, error);
1555 len = sizeof(*di) + name_len + data_len;
1556 di = (struct btrfs_dir_item *)((char *)di + len);
1559 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1560 rec->errors |= I_ERR_DUP_DIR_INDEX;
1565 static int process_inode_ref(struct extent_buffer *eb,
1566 int slot, struct btrfs_key *key,
1567 struct shared_node *active_node)
1575 struct cache_tree *inode_cache;
1576 struct btrfs_inode_ref *ref;
1577 char namebuf[BTRFS_NAME_LEN];
1579 inode_cache = &active_node->inode_cache;
1581 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1582 total = btrfs_item_size_nr(eb, slot);
1583 while (cur < total) {
1584 name_len = btrfs_inode_ref_name_len(eb, ref);
1585 index = btrfs_inode_ref_index(eb, ref);
1587 /* inode_ref + namelen should not cross item boundary */
1588 if (cur + sizeof(*ref) + name_len > total ||
1589 name_len > BTRFS_NAME_LEN) {
1590 if (total < cur + sizeof(*ref))
1593 /* Still try to read out the remaining part */
1594 len = min_t(u32, total - cur - sizeof(*ref),
1596 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, key->offset,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*ref) + name_len;
1607 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1613 static int process_inode_extref(struct extent_buffer *eb,
1614 int slot, struct btrfs_key *key,
1615 struct shared_node *active_node)
1624 struct cache_tree *inode_cache;
1625 struct btrfs_inode_extref *extref;
1626 char namebuf[BTRFS_NAME_LEN];
1628 inode_cache = &active_node->inode_cache;
1630 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1631 total = btrfs_item_size_nr(eb, slot);
1632 while (cur < total) {
1633 name_len = btrfs_inode_extref_name_len(eb, extref);
1634 index = btrfs_inode_extref_index(eb, extref);
1635 parent = btrfs_inode_extref_parent(eb, extref);
1636 if (name_len <= BTRFS_NAME_LEN) {
1640 len = BTRFS_NAME_LEN;
1641 error = REF_ERR_NAME_TOO_LONG;
1643 read_extent_buffer(eb, namebuf,
1644 (unsigned long)(extref + 1), len);
1645 add_inode_backref(inode_cache, key->objectid, parent,
1646 index, namebuf, len, 0, key->type, error);
1648 len = sizeof(*extref) + name_len;
1649 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1656 static int count_csum_range(struct btrfs_root *root, u64 start,
1657 u64 len, u64 *found)
1659 struct btrfs_key key;
1660 struct btrfs_path path;
1661 struct extent_buffer *leaf;
1666 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1668 btrfs_init_path(&path);
1670 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1672 key.type = BTRFS_EXTENT_CSUM_KEY;
1674 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1678 if (ret > 0 && path.slots[0] > 0) {
1679 leaf = path.nodes[0];
1680 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1681 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1682 key.type == BTRFS_EXTENT_CSUM_KEY)
1687 leaf = path.nodes[0];
1688 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1689 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1694 leaf = path.nodes[0];
1697 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1698 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1699 key.type != BTRFS_EXTENT_CSUM_KEY)
1702 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1703 if (key.offset >= start + len)
1706 if (key.offset > start)
1709 size = btrfs_item_size_nr(leaf, path.slots[0]);
1710 csum_end = key.offset + (size / csum_size) *
1711 root->fs_info->sectorsize;
1712 if (csum_end > start) {
1713 size = min(csum_end - start, len);
1722 btrfs_release_path(&path);
1728 static int process_file_extent(struct btrfs_root *root,
1729 struct extent_buffer *eb,
1730 int slot, struct btrfs_key *key,
1731 struct shared_node *active_node)
1733 struct inode_record *rec;
1734 struct btrfs_file_extent_item *fi;
1736 u64 disk_bytenr = 0;
1737 u64 extent_offset = 0;
1738 u64 mask = root->fs_info->sectorsize - 1;
1742 rec = active_node->current;
1743 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1744 rec->found_file_extent = 1;
1746 if (rec->extent_start == (u64)-1) {
1747 rec->extent_start = key->offset;
1748 rec->extent_end = key->offset;
1751 if (rec->extent_end > key->offset)
1752 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1753 else if (rec->extent_end < key->offset) {
1754 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1755 key->offset - rec->extent_end);
1760 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1761 extent_type = btrfs_file_extent_type(eb, fi);
1763 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1764 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1766 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767 rec->found_size += num_bytes;
1768 num_bytes = (num_bytes + mask) & ~mask;
1769 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1770 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1771 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1772 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1773 extent_offset = btrfs_file_extent_offset(eb, fi);
1774 if (num_bytes == 0 || (num_bytes & mask))
1775 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1776 if (num_bytes + extent_offset >
1777 btrfs_file_extent_ram_bytes(eb, fi))
1778 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1779 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1780 (btrfs_file_extent_compression(eb, fi) ||
1781 btrfs_file_extent_encryption(eb, fi) ||
1782 btrfs_file_extent_other_encoding(eb, fi)))
1783 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784 if (disk_bytenr > 0)
1785 rec->found_size += num_bytes;
1787 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1789 rec->extent_end = key->offset + num_bytes;
1792 * The data reloc tree will copy full extents into its inode and then
1793 * copy the corresponding csums. Because the extent it copied could be
1794 * a preallocated extent that hasn't been written to yet there may be no
1795 * csums to copy, ergo we won't have csums for our file extent. This is
1796 * ok so just don't bother checking csums if the inode belongs to the
1799 if (disk_bytenr > 0 &&
1800 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1802 if (btrfs_file_extent_compression(eb, fi))
1803 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1805 disk_bytenr += extent_offset;
1807 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1810 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1812 rec->found_csum_item = 1;
1813 if (found < num_bytes)
1814 rec->some_csum_missing = 1;
1815 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1817 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1823 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1824 struct walk_control *wc)
1826 struct btrfs_key key;
1830 struct cache_tree *inode_cache;
1831 struct shared_node *active_node;
1833 if (wc->root_level == wc->active_node &&
1834 btrfs_root_refs(&root->root_item) == 0)
1837 active_node = wc->nodes[wc->active_node];
1838 inode_cache = &active_node->inode_cache;
1839 nritems = btrfs_header_nritems(eb);
1840 for (i = 0; i < nritems; i++) {
1841 btrfs_item_key_to_cpu(eb, &key, i);
1843 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1845 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1848 if (active_node->current == NULL ||
1849 active_node->current->ino < key.objectid) {
1850 if (active_node->current) {
1851 active_node->current->checked = 1;
1852 maybe_free_inode_rec(inode_cache,
1853 active_node->current);
1855 active_node->current = get_inode_rec(inode_cache,
1857 BUG_ON(IS_ERR(active_node->current));
1860 case BTRFS_DIR_ITEM_KEY:
1861 case BTRFS_DIR_INDEX_KEY:
1862 ret = process_dir_item(eb, i, &key, active_node);
1864 case BTRFS_INODE_REF_KEY:
1865 ret = process_inode_ref(eb, i, &key, active_node);
1867 case BTRFS_INODE_EXTREF_KEY:
1868 ret = process_inode_extref(eb, i, &key, active_node);
1870 case BTRFS_INODE_ITEM_KEY:
1871 ret = process_inode_item(eb, i, &key, active_node);
1873 case BTRFS_EXTENT_DATA_KEY:
1874 ret = process_file_extent(root, eb, i, &key,
1885 u64 bytenr[BTRFS_MAX_LEVEL];
1886 u64 refs[BTRFS_MAX_LEVEL];
1887 int need_check[BTRFS_MAX_LEVEL];
1890 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1891 struct node_refs *nrefs, u64 level);
1892 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1893 unsigned int ext_ref);
1896 * Returns >0 Found error, not fatal, should continue
1897 * Returns <0 Fatal error, must exit the whole check
1898 * Returns 0 No errors found
1900 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1901 struct node_refs *nrefs, int *level, int ext_ref)
1903 struct extent_buffer *cur = path->nodes[0];
1904 struct btrfs_key key;
1908 int root_level = btrfs_header_level(root->node);
1910 int ret = 0; /* Final return value */
1911 int err = 0; /* Positive error bitmap */
1913 cur_bytenr = cur->start;
1915 /* skip to first inode item or the first inode number change */
1916 nritems = btrfs_header_nritems(cur);
1917 for (i = 0; i < nritems; i++) {
1918 btrfs_item_key_to_cpu(cur, &key, i);
1920 first_ino = key.objectid;
1921 if (key.type == BTRFS_INODE_ITEM_KEY ||
1922 (first_ino && first_ino != key.objectid))
1926 path->slots[0] = nritems;
1932 err |= check_inode_item(root, path, ext_ref);
1934 if (err & LAST_ITEM)
1937 /* still have inode items in thie leaf */
1938 if (cur->start == cur_bytenr)
1942 * we have switched to another leaf, above nodes may
1943 * have changed, here walk down the path, if a node
1944 * or leaf is shared, check whether we can skip this
1947 for (i = root_level; i >= 0; i--) {
1948 if (path->nodes[i]->start == nrefs->bytenr[i])
1951 ret = update_nodes_refs(root,
1952 path->nodes[i]->start,
1957 if (!nrefs->need_check[i]) {
1963 for (i = 0; i < *level; i++) {
1964 free_extent_buffer(path->nodes[i]);
1965 path->nodes[i] = NULL;
1974 static void reada_walk_down(struct btrfs_root *root,
1975 struct extent_buffer *node, int slot)
1977 struct btrfs_fs_info *fs_info = root->fs_info;
1984 level = btrfs_header_level(node);
1988 nritems = btrfs_header_nritems(node);
1989 for (i = slot; i < nritems; i++) {
1990 bytenr = btrfs_node_blockptr(node, i);
1991 ptr_gen = btrfs_node_ptr_generation(node, i);
1992 readahead_tree_block(fs_info, bytenr, ptr_gen);
1997 * Check the child node/leaf by the following condition:
1998 * 1. the first item key of the node/leaf should be the same with the one
2000 * 2. block in parent node should match the child node/leaf.
2001 * 3. generation of parent node and child's header should be consistent.
2003 * Or the child node/leaf pointed by the key in parent is not valid.
2005 * We hope to check leaf owner too, but since subvol may share leaves,
2006 * which makes leaf owner check not so strong, key check should be
2007 * sufficient enough for that case.
2009 static int check_child_node(struct extent_buffer *parent, int slot,
2010 struct extent_buffer *child)
2012 struct btrfs_key parent_key;
2013 struct btrfs_key child_key;
2016 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2017 if (btrfs_header_level(child) == 0)
2018 btrfs_item_key_to_cpu(child, &child_key, 0);
2020 btrfs_node_key_to_cpu(child, &child_key, 0);
2022 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2025 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2026 parent_key.objectid, parent_key.type, parent_key.offset,
2027 child_key.objectid, child_key.type, child_key.offset);
2029 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2031 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2032 btrfs_node_blockptr(parent, slot),
2033 btrfs_header_bytenr(child));
2035 if (btrfs_node_ptr_generation(parent, slot) !=
2036 btrfs_header_generation(child)) {
2038 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2039 btrfs_header_generation(child),
2040 btrfs_node_ptr_generation(parent, slot));
2046 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2047 * in every fs or file tree check. Here we find its all root ids, and only check
2048 * it in the fs or file tree which has the smallest root id.
2050 static int need_check(struct btrfs_root *root, struct ulist *roots)
2052 struct rb_node *node;
2053 struct ulist_node *u;
2055 if (roots->nnodes == 1)
2058 node = rb_first(&roots->root);
2059 u = rb_entry(node, struct ulist_node, rb_node);
2061 * current root id is not smallest, we skip it and let it be checked
2062 * in the fs or file tree who hash the smallest root id.
2064 if (root->objectid != u->val)
2071 * for a tree node or leaf, we record its reference count, so later if we still
2072 * process this node or leaf, don't need to compute its reference count again.
2074 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2075 struct node_refs *nrefs, u64 level)
2079 struct ulist *roots;
2081 if (nrefs->bytenr[level] != bytenr) {
2082 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2083 level, 1, &refs, NULL);
2087 nrefs->bytenr[level] = bytenr;
2088 nrefs->refs[level] = refs;
2090 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2095 check = need_check(root, roots);
2097 nrefs->need_check[level] = check;
2099 nrefs->need_check[level] = 1;
2106 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2107 struct walk_control *wc, int *level,
2108 struct node_refs *nrefs)
2110 enum btrfs_tree_block_status status;
2113 struct btrfs_fs_info *fs_info = root->fs_info;
2114 struct extent_buffer *next;
2115 struct extent_buffer *cur;
2119 WARN_ON(*level < 0);
2120 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2122 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2123 refs = nrefs->refs[*level];
2126 ret = btrfs_lookup_extent_info(NULL, root,
2127 path->nodes[*level]->start,
2128 *level, 1, &refs, NULL);
2133 nrefs->bytenr[*level] = path->nodes[*level]->start;
2134 nrefs->refs[*level] = refs;
2138 ret = enter_shared_node(root, path->nodes[*level]->start,
2146 while (*level >= 0) {
2147 WARN_ON(*level < 0);
2148 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2149 cur = path->nodes[*level];
2151 if (btrfs_header_level(cur) != *level)
2154 if (path->slots[*level] >= btrfs_header_nritems(cur))
2157 ret = process_one_leaf(root, cur, wc);
2162 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2163 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2165 if (bytenr == nrefs->bytenr[*level - 1]) {
2166 refs = nrefs->refs[*level - 1];
2168 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2169 *level - 1, 1, &refs, NULL);
2173 nrefs->bytenr[*level - 1] = bytenr;
2174 nrefs->refs[*level - 1] = refs;
2179 ret = enter_shared_node(root, bytenr, refs,
2182 path->slots[*level]++;
2187 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2188 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2189 free_extent_buffer(next);
2190 reada_walk_down(root, cur, path->slots[*level]);
2191 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2192 if (!extent_buffer_uptodate(next)) {
2193 struct btrfs_key node_key;
2195 btrfs_node_key_to_cpu(path->nodes[*level],
2197 path->slots[*level]);
2198 btrfs_add_corrupt_extent_record(root->fs_info,
2200 path->nodes[*level]->start,
2201 root->fs_info->nodesize,
2208 ret = check_child_node(cur, path->slots[*level], next);
2210 free_extent_buffer(next);
2215 if (btrfs_is_leaf(next))
2216 status = btrfs_check_leaf(root, NULL, next);
2218 status = btrfs_check_node(root, NULL, next);
2219 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2220 free_extent_buffer(next);
2225 *level = *level - 1;
2226 free_extent_buffer(path->nodes[*level]);
2227 path->nodes[*level] = next;
2228 path->slots[*level] = 0;
2231 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2235 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2236 unsigned int ext_ref);
2239 * Returns >0 Found error, should continue
2240 * Returns <0 Fatal error, must exit the whole check
2241 * Returns 0 No errors found
2243 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2244 int *level, struct node_refs *nrefs, int ext_ref)
2246 enum btrfs_tree_block_status status;
2249 struct btrfs_fs_info *fs_info = root->fs_info;
2250 struct extent_buffer *next;
2251 struct extent_buffer *cur;
2254 WARN_ON(*level < 0);
2255 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2257 ret = update_nodes_refs(root, path->nodes[*level]->start,
2262 while (*level >= 0) {
2263 WARN_ON(*level < 0);
2264 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2265 cur = path->nodes[*level];
2267 if (btrfs_header_level(cur) != *level)
2270 if (path->slots[*level] >= btrfs_header_nritems(cur))
2272 /* Don't forgot to check leaf/node validation */
2274 ret = btrfs_check_leaf(root, NULL, cur);
2275 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2279 ret = process_one_leaf_v2(root, path, nrefs,
2283 ret = btrfs_check_node(root, NULL, cur);
2284 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2289 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2290 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2292 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2295 if (!nrefs->need_check[*level - 1]) {
2296 path->slots[*level]++;
2300 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2301 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2302 free_extent_buffer(next);
2303 reada_walk_down(root, cur, path->slots[*level]);
2304 next = read_tree_block(fs_info, bytenr, ptr_gen);
2305 if (!extent_buffer_uptodate(next)) {
2306 struct btrfs_key node_key;
2308 btrfs_node_key_to_cpu(path->nodes[*level],
2310 path->slots[*level]);
2311 btrfs_add_corrupt_extent_record(fs_info,
2313 path->nodes[*level]->start,
2321 ret = check_child_node(cur, path->slots[*level], next);
2325 if (btrfs_is_leaf(next))
2326 status = btrfs_check_leaf(root, NULL, next);
2328 status = btrfs_check_node(root, NULL, next);
2329 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2330 free_extent_buffer(next);
2335 *level = *level - 1;
2336 free_extent_buffer(path->nodes[*level]);
2337 path->nodes[*level] = next;
2338 path->slots[*level] = 0;
2343 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2344 struct walk_control *wc, int *level)
2347 struct extent_buffer *leaf;
2349 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2350 leaf = path->nodes[i];
2351 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2356 free_extent_buffer(path->nodes[*level]);
2357 path->nodes[*level] = NULL;
2358 BUG_ON(*level > wc->active_node);
2359 if (*level == wc->active_node)
2360 leave_shared_node(root, wc, *level);
2367 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2371 struct extent_buffer *leaf;
2373 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2374 leaf = path->nodes[i];
2375 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2380 free_extent_buffer(path->nodes[*level]);
2381 path->nodes[*level] = NULL;
2388 static int check_root_dir(struct inode_record *rec)
2390 struct inode_backref *backref;
2393 if (!rec->found_inode_item || rec->errors)
2395 if (rec->nlink != 1 || rec->found_link != 0)
2397 if (list_empty(&rec->backrefs))
2399 backref = to_inode_backref(rec->backrefs.next);
2400 if (!backref->found_inode_ref)
2402 if (backref->index != 0 || backref->namelen != 2 ||
2403 memcmp(backref->name, "..", 2))
2405 if (backref->found_dir_index || backref->found_dir_item)
2412 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2413 struct btrfs_root *root, struct btrfs_path *path,
2414 struct inode_record *rec)
2416 struct btrfs_inode_item *ei;
2417 struct btrfs_key key;
2420 key.objectid = rec->ino;
2421 key.type = BTRFS_INODE_ITEM_KEY;
2422 key.offset = (u64)-1;
2424 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2428 if (!path->slots[0]) {
2435 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2436 if (key.objectid != rec->ino) {
2441 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2442 struct btrfs_inode_item);
2443 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2444 btrfs_mark_buffer_dirty(path->nodes[0]);
2445 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2446 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2447 root->root_key.objectid);
2449 btrfs_release_path(path);
2453 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2454 struct btrfs_root *root,
2455 struct btrfs_path *path,
2456 struct inode_record *rec)
2460 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2461 btrfs_release_path(path);
2463 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2467 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2468 struct btrfs_root *root,
2469 struct btrfs_path *path,
2470 struct inode_record *rec)
2472 struct btrfs_inode_item *ei;
2473 struct btrfs_key key;
2476 key.objectid = rec->ino;
2477 key.type = BTRFS_INODE_ITEM_KEY;
2480 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2487 /* Since ret == 0, no need to check anything */
2488 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2489 struct btrfs_inode_item);
2490 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2491 btrfs_mark_buffer_dirty(path->nodes[0]);
2492 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2493 printf("reset nbytes for ino %llu root %llu\n",
2494 rec->ino, root->root_key.objectid);
2496 btrfs_release_path(path);
2500 static int add_missing_dir_index(struct btrfs_root *root,
2501 struct cache_tree *inode_cache,
2502 struct inode_record *rec,
2503 struct inode_backref *backref)
2505 struct btrfs_path path;
2506 struct btrfs_trans_handle *trans;
2507 struct btrfs_dir_item *dir_item;
2508 struct extent_buffer *leaf;
2509 struct btrfs_key key;
2510 struct btrfs_disk_key disk_key;
2511 struct inode_record *dir_rec;
2512 unsigned long name_ptr;
2513 u32 data_size = sizeof(*dir_item) + backref->namelen;
2516 trans = btrfs_start_transaction(root, 1);
2518 return PTR_ERR(trans);
2520 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2521 (unsigned long long)rec->ino);
2523 btrfs_init_path(&path);
2524 key.objectid = backref->dir;
2525 key.type = BTRFS_DIR_INDEX_KEY;
2526 key.offset = backref->index;
2527 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2530 leaf = path.nodes[0];
2531 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2533 disk_key.objectid = cpu_to_le64(rec->ino);
2534 disk_key.type = BTRFS_INODE_ITEM_KEY;
2535 disk_key.offset = 0;
2537 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2538 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2539 btrfs_set_dir_data_len(leaf, dir_item, 0);
2540 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2541 name_ptr = (unsigned long)(dir_item + 1);
2542 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2543 btrfs_mark_buffer_dirty(leaf);
2544 btrfs_release_path(&path);
2545 btrfs_commit_transaction(trans, root);
2547 backref->found_dir_index = 1;
2548 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2549 BUG_ON(IS_ERR(dir_rec));
2552 dir_rec->found_size += backref->namelen;
2553 if (dir_rec->found_size == dir_rec->isize &&
2554 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2555 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2556 if (dir_rec->found_size != dir_rec->isize)
2557 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2562 static int delete_dir_index(struct btrfs_root *root,
2563 struct inode_backref *backref)
2565 struct btrfs_trans_handle *trans;
2566 struct btrfs_dir_item *di;
2567 struct btrfs_path path;
2570 trans = btrfs_start_transaction(root, 1);
2572 return PTR_ERR(trans);
2574 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2575 (unsigned long long)backref->dir,
2576 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2577 (unsigned long long)root->objectid);
2579 btrfs_init_path(&path);
2580 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2581 backref->name, backref->namelen,
2582 backref->index, -1);
2585 btrfs_release_path(&path);
2586 btrfs_commit_transaction(trans, root);
2593 ret = btrfs_del_item(trans, root, &path);
2595 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2597 btrfs_release_path(&path);
2598 btrfs_commit_transaction(trans, root);
2602 static int create_inode_item(struct btrfs_root *root,
2603 struct inode_record *rec,
2606 struct btrfs_trans_handle *trans;
2607 struct btrfs_inode_item inode_item;
2608 time_t now = time(NULL);
2611 trans = btrfs_start_transaction(root, 1);
2612 if (IS_ERR(trans)) {
2613 ret = PTR_ERR(trans);
2617 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2618 "be incomplete, please check permissions and content after "
2619 "the fsck completes.\n", (unsigned long long)root->objectid,
2620 (unsigned long long)rec->ino);
2622 memset(&inode_item, 0, sizeof(inode_item));
2623 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2625 btrfs_set_stack_inode_nlink(&inode_item, 1);
2627 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2628 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2629 if (rec->found_dir_item) {
2630 if (rec->found_file_extent)
2631 fprintf(stderr, "root %llu inode %llu has both a dir "
2632 "item and extents, unsure if it is a dir or a "
2633 "regular file so setting it as a directory\n",
2634 (unsigned long long)root->objectid,
2635 (unsigned long long)rec->ino);
2636 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2637 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2638 } else if (!rec->found_dir_item) {
2639 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2640 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2642 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2643 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2644 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2645 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2646 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2647 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2648 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2649 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2651 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2653 btrfs_commit_transaction(trans, root);
2657 static int repair_inode_backrefs(struct btrfs_root *root,
2658 struct inode_record *rec,
2659 struct cache_tree *inode_cache,
2662 struct inode_backref *tmp, *backref;
2663 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2667 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2668 if (!delete && rec->ino == root_dirid) {
2669 if (!rec->found_inode_item) {
2670 ret = create_inode_item(root, rec, 1);
2677 /* Index 0 for root dir's are special, don't mess with it */
2678 if (rec->ino == root_dirid && backref->index == 0)
2682 ((backref->found_dir_index && !backref->found_inode_ref) ||
2683 (backref->found_dir_index && backref->found_inode_ref &&
2684 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2685 ret = delete_dir_index(root, backref);
2689 list_del(&backref->list);
2694 if (!delete && !backref->found_dir_index &&
2695 backref->found_dir_item && backref->found_inode_ref) {
2696 ret = add_missing_dir_index(root, inode_cache, rec,
2701 if (backref->found_dir_item &&
2702 backref->found_dir_index) {
2703 if (!backref->errors &&
2704 backref->found_inode_ref) {
2705 list_del(&backref->list);
2712 if (!delete && (!backref->found_dir_index &&
2713 !backref->found_dir_item &&
2714 backref->found_inode_ref)) {
2715 struct btrfs_trans_handle *trans;
2716 struct btrfs_key location;
2718 ret = check_dir_conflict(root, backref->name,
2724 * let nlink fixing routine to handle it,
2725 * which can do it better.
2730 location.objectid = rec->ino;
2731 location.type = BTRFS_INODE_ITEM_KEY;
2732 location.offset = 0;
2734 trans = btrfs_start_transaction(root, 1);
2735 if (IS_ERR(trans)) {
2736 ret = PTR_ERR(trans);
2739 fprintf(stderr, "adding missing dir index/item pair "
2741 (unsigned long long)rec->ino);
2742 ret = btrfs_insert_dir_item(trans, root, backref->name,
2744 backref->dir, &location,
2745 imode_to_type(rec->imode),
2748 btrfs_commit_transaction(trans, root);
2752 if (!delete && (backref->found_inode_ref &&
2753 backref->found_dir_index &&
2754 backref->found_dir_item &&
2755 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2756 !rec->found_inode_item)) {
2757 ret = create_inode_item(root, rec, 0);
2764 return ret ? ret : repaired;
2768 * To determine the file type for nlink/inode_item repair
2770 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2771 * Return -ENOENT if file type is not found.
2773 static int find_file_type(struct inode_record *rec, u8 *type)
2775 struct inode_backref *backref;
2777 /* For inode item recovered case */
2778 if (rec->found_inode_item) {
2779 *type = imode_to_type(rec->imode);
2783 list_for_each_entry(backref, &rec->backrefs, list) {
2784 if (backref->found_dir_index || backref->found_dir_item) {
2785 *type = backref->filetype;
2793 * To determine the file name for nlink repair
2795 * Return 0 if file name is found, set name and namelen.
2796 * Return -ENOENT if file name is not found.
2798 static int find_file_name(struct inode_record *rec,
2799 char *name, int *namelen)
2801 struct inode_backref *backref;
2803 list_for_each_entry(backref, &rec->backrefs, list) {
2804 if (backref->found_dir_index || backref->found_dir_item ||
2805 backref->found_inode_ref) {
2806 memcpy(name, backref->name, backref->namelen);
2807 *namelen = backref->namelen;
2814 /* Reset the nlink of the inode to the correct one */
2815 static int reset_nlink(struct btrfs_trans_handle *trans,
2816 struct btrfs_root *root,
2817 struct btrfs_path *path,
2818 struct inode_record *rec)
2820 struct inode_backref *backref;
2821 struct inode_backref *tmp;
2822 struct btrfs_key key;
2823 struct btrfs_inode_item *inode_item;
2826 /* We don't believe this either, reset it and iterate backref */
2827 rec->found_link = 0;
2829 /* Remove all backref including the valid ones */
2830 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2831 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2832 backref->index, backref->name,
2833 backref->namelen, 0);
2837 /* remove invalid backref, so it won't be added back */
2838 if (!(backref->found_dir_index &&
2839 backref->found_dir_item &&
2840 backref->found_inode_ref)) {
2841 list_del(&backref->list);
2848 /* Set nlink to 0 */
2849 key.objectid = rec->ino;
2850 key.type = BTRFS_INODE_ITEM_KEY;
2852 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2859 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2860 struct btrfs_inode_item);
2861 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2862 btrfs_mark_buffer_dirty(path->nodes[0]);
2863 btrfs_release_path(path);
2866 * Add back valid inode_ref/dir_item/dir_index,
2867 * add_link() will handle the nlink inc, so new nlink must be correct
2869 list_for_each_entry(backref, &rec->backrefs, list) {
2870 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2871 backref->name, backref->namelen,
2872 backref->filetype, &backref->index, 1);
2877 btrfs_release_path(path);
2881 static int get_highest_inode(struct btrfs_trans_handle *trans,
2882 struct btrfs_root *root,
2883 struct btrfs_path *path,
2886 struct btrfs_key key, found_key;
2889 btrfs_init_path(path);
2890 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2892 key.type = BTRFS_INODE_ITEM_KEY;
2893 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2895 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2896 path->slots[0] - 1);
2897 *highest_ino = found_key.objectid;
2900 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2902 btrfs_release_path(path);
2906 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2907 struct btrfs_root *root,
2908 struct btrfs_path *path,
2909 struct inode_record *rec)
2911 char *dir_name = "lost+found";
2912 char namebuf[BTRFS_NAME_LEN] = {0};
2917 int name_recovered = 0;
2918 int type_recovered = 0;
2922 * Get file name and type first before these invalid inode ref
2923 * are deleted by remove_all_invalid_backref()
2925 name_recovered = !find_file_name(rec, namebuf, &namelen);
2926 type_recovered = !find_file_type(rec, &type);
2928 if (!name_recovered) {
2929 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2930 rec->ino, rec->ino);
2931 namelen = count_digits(rec->ino);
2932 sprintf(namebuf, "%llu", rec->ino);
2935 if (!type_recovered) {
2936 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2938 type = BTRFS_FT_REG_FILE;
2942 ret = reset_nlink(trans, root, path, rec);
2945 "Failed to reset nlink for inode %llu: %s\n",
2946 rec->ino, strerror(-ret));
2950 if (rec->found_link == 0) {
2951 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2955 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2956 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2959 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2960 dir_name, strerror(-ret));
2963 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2964 namebuf, namelen, type, NULL, 1);
2966 * Add ".INO" suffix several times to handle case where
2967 * "FILENAME.INO" is already taken by another file.
2969 while (ret == -EEXIST) {
2971 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2973 if (namelen + count_digits(rec->ino) + 1 >
2978 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2980 namelen += count_digits(rec->ino) + 1;
2981 ret = btrfs_add_link(trans, root, rec->ino,
2982 lost_found_ino, namebuf,
2983 namelen, type, NULL, 1);
2987 "Failed to link the inode %llu to %s dir: %s\n",
2988 rec->ino, dir_name, strerror(-ret));
2992 * Just increase the found_link, don't actually add the
2993 * backref. This will make things easier and this inode
2994 * record will be freed after the repair is done.
2995 * So fsck will not report problem about this inode.
2998 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2999 namelen, namebuf, dir_name);
3001 printf("Fixed the nlink of inode %llu\n", rec->ino);
3004 * Clear the flag anyway, or we will loop forever for the same inode
3005 * as it will not be removed from the bad inode list and the dead loop
3008 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3009 btrfs_release_path(path);
3014 * Check if there is any normal(reg or prealloc) file extent for given
3016 * This is used to determine the file type when neither its dir_index/item or
3017 * inode_item exists.
3019 * This will *NOT* report error, if any error happens, just consider it does
3020 * not have any normal file extent.
3022 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3024 struct btrfs_path path;
3025 struct btrfs_key key;
3026 struct btrfs_key found_key;
3027 struct btrfs_file_extent_item *fi;
3031 btrfs_init_path(&path);
3033 key.type = BTRFS_EXTENT_DATA_KEY;
3036 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3041 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3042 ret = btrfs_next_leaf(root, &path);
3049 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3051 if (found_key.objectid != ino ||
3052 found_key.type != BTRFS_EXTENT_DATA_KEY)
3054 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3055 struct btrfs_file_extent_item);
3056 type = btrfs_file_extent_type(path.nodes[0], fi);
3057 if (type != BTRFS_FILE_EXTENT_INLINE) {
3063 btrfs_release_path(&path);
3067 static u32 btrfs_type_to_imode(u8 type)
3069 static u32 imode_by_btrfs_type[] = {
3070 [BTRFS_FT_REG_FILE] = S_IFREG,
3071 [BTRFS_FT_DIR] = S_IFDIR,
3072 [BTRFS_FT_CHRDEV] = S_IFCHR,
3073 [BTRFS_FT_BLKDEV] = S_IFBLK,
3074 [BTRFS_FT_FIFO] = S_IFIFO,
3075 [BTRFS_FT_SOCK] = S_IFSOCK,
3076 [BTRFS_FT_SYMLINK] = S_IFLNK,
3079 return imode_by_btrfs_type[(type)];
3082 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3083 struct btrfs_root *root,
3084 struct btrfs_path *path,
3085 struct inode_record *rec)
3089 int type_recovered = 0;
3092 printf("Trying to rebuild inode:%llu\n", rec->ino);
3094 type_recovered = !find_file_type(rec, &filetype);
3097 * Try to determine inode type if type not found.
3099 * For found regular file extent, it must be FILE.
3100 * For found dir_item/index, it must be DIR.
3102 * For undetermined one, use FILE as fallback.
3105 * 1. If found backref(inode_index/item is already handled) to it,
3107 * Need new inode-inode ref structure to allow search for that.
3109 if (!type_recovered) {
3110 if (rec->found_file_extent &&
3111 find_normal_file_extent(root, rec->ino)) {
3113 filetype = BTRFS_FT_REG_FILE;
3114 } else if (rec->found_dir_item) {
3116 filetype = BTRFS_FT_DIR;
3117 } else if (!list_empty(&rec->orphan_extents)) {
3119 filetype = BTRFS_FT_REG_FILE;
3121 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3124 filetype = BTRFS_FT_REG_FILE;
3128 ret = btrfs_new_inode(trans, root, rec->ino,
3129 mode | btrfs_type_to_imode(filetype));
3134 * Here inode rebuild is done, we only rebuild the inode item,
3135 * don't repair the nlink(like move to lost+found).
3136 * That is the job of nlink repair.
3138 * We just fill the record and return
3140 rec->found_dir_item = 1;
3141 rec->imode = mode | btrfs_type_to_imode(filetype);
3143 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3144 /* Ensure the inode_nlinks repair function will be called */
3145 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3150 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3151 struct btrfs_root *root,
3152 struct btrfs_path *path,
3153 struct inode_record *rec)
3155 struct orphan_data_extent *orphan;
3156 struct orphan_data_extent *tmp;
3159 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3161 * Check for conflicting file extents
3163 * Here we don't know whether the extents is compressed or not,
3164 * so we can only assume it not compressed nor data offset,
3165 * and use its disk_len as extent length.
3167 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3168 orphan->offset, orphan->disk_len, 0);
3169 btrfs_release_path(path);
3174 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3175 orphan->disk_bytenr, orphan->disk_len);
3176 ret = btrfs_free_extent(trans,
3177 root->fs_info->extent_root,
3178 orphan->disk_bytenr, orphan->disk_len,
3179 0, root->objectid, orphan->objectid,
3184 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3185 orphan->offset, orphan->disk_bytenr,
3186 orphan->disk_len, orphan->disk_len);
3190 /* Update file size info */
3191 rec->found_size += orphan->disk_len;
3192 if (rec->found_size == rec->nbytes)
3193 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3195 /* Update the file extent hole info too */
3196 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3200 if (RB_EMPTY_ROOT(&rec->holes))
3201 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3203 list_del(&orphan->list);
3206 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3211 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3212 struct btrfs_root *root,
3213 struct btrfs_path *path,
3214 struct inode_record *rec)
3216 struct rb_node *node;
3217 struct file_extent_hole *hole;
3221 node = rb_first(&rec->holes);
3225 hole = rb_entry(node, struct file_extent_hole, node);
3226 ret = btrfs_punch_hole(trans, root, rec->ino,
3227 hole->start, hole->len);
3230 ret = del_file_extent_hole(&rec->holes, hole->start,
3234 if (RB_EMPTY_ROOT(&rec->holes))
3235 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3236 node = rb_first(&rec->holes);
3238 /* special case for a file losing all its file extent */
3240 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3241 round_up(rec->isize,
3242 root->fs_info->sectorsize));
3246 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3247 rec->ino, root->objectid);
3252 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3254 struct btrfs_trans_handle *trans;
3255 struct btrfs_path path;
3258 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3259 I_ERR_NO_ORPHAN_ITEM |
3260 I_ERR_LINK_COUNT_WRONG |
3261 I_ERR_NO_INODE_ITEM |
3262 I_ERR_FILE_EXTENT_ORPHAN |
3263 I_ERR_FILE_EXTENT_DISCOUNT|
3264 I_ERR_FILE_NBYTES_WRONG)))
3268 * For nlink repair, it may create a dir and add link, so
3269 * 2 for parent(256)'s dir_index and dir_item
3270 * 2 for lost+found dir's inode_item and inode_ref
3271 * 1 for the new inode_ref of the file
3272 * 2 for lost+found dir's dir_index and dir_item for the file
3274 trans = btrfs_start_transaction(root, 7);
3276 return PTR_ERR(trans);
3278 btrfs_init_path(&path);
3279 if (rec->errors & I_ERR_NO_INODE_ITEM)
3280 ret = repair_inode_no_item(trans, root, &path, rec);
3281 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3282 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3283 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3284 ret = repair_inode_discount_extent(trans, root, &path, rec);
3285 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3286 ret = repair_inode_isize(trans, root, &path, rec);
3287 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3288 ret = repair_inode_orphan_item(trans, root, &path, rec);
3289 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3290 ret = repair_inode_nlinks(trans, root, &path, rec);
3291 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3292 ret = repair_inode_nbytes(trans, root, &path, rec);
3293 btrfs_commit_transaction(trans, root);
3294 btrfs_release_path(&path);
3298 static int check_inode_recs(struct btrfs_root *root,
3299 struct cache_tree *inode_cache)
3301 struct cache_extent *cache;
3302 struct ptr_node *node;
3303 struct inode_record *rec;
3304 struct inode_backref *backref;
3309 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3311 if (btrfs_root_refs(&root->root_item) == 0) {
3312 if (!cache_tree_empty(inode_cache))
3313 fprintf(stderr, "warning line %d\n", __LINE__);
3318 * We need to repair backrefs first because we could change some of the
3319 * errors in the inode recs.
3321 * We also need to go through and delete invalid backrefs first and then
3322 * add the correct ones second. We do this because we may get EEXIST
3323 * when adding back the correct index because we hadn't yet deleted the
3326 * For example, if we were missing a dir index then the directories
3327 * isize would be wrong, so if we fixed the isize to what we thought it
3328 * would be and then fixed the backref we'd still have a invalid fs, so
3329 * we need to add back the dir index and then check to see if the isize
3334 if (stage == 3 && !err)
3337 cache = search_cache_extent(inode_cache, 0);
3338 while (repair && cache) {
3339 node = container_of(cache, struct ptr_node, cache);
3341 cache = next_cache_extent(cache);
3343 /* Need to free everything up and rescan */
3345 remove_cache_extent(inode_cache, &node->cache);
3347 free_inode_rec(rec);
3351 if (list_empty(&rec->backrefs))
3354 ret = repair_inode_backrefs(root, rec, inode_cache,
3368 rec = get_inode_rec(inode_cache, root_dirid, 0);
3369 BUG_ON(IS_ERR(rec));
3371 ret = check_root_dir(rec);
3373 fprintf(stderr, "root %llu root dir %llu error\n",
3374 (unsigned long long)root->root_key.objectid,
3375 (unsigned long long)root_dirid);
3376 print_inode_error(root, rec);
3381 struct btrfs_trans_handle *trans;
3383 trans = btrfs_start_transaction(root, 1);
3384 if (IS_ERR(trans)) {
3385 err = PTR_ERR(trans);
3390 "root %llu missing its root dir, recreating\n",
3391 (unsigned long long)root->objectid);
3393 ret = btrfs_make_root_dir(trans, root, root_dirid);
3396 btrfs_commit_transaction(trans, root);
3400 fprintf(stderr, "root %llu root dir %llu not found\n",
3401 (unsigned long long)root->root_key.objectid,
3402 (unsigned long long)root_dirid);
3406 cache = search_cache_extent(inode_cache, 0);
3409 node = container_of(cache, struct ptr_node, cache);
3411 remove_cache_extent(inode_cache, &node->cache);
3413 if (rec->ino == root_dirid ||
3414 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3415 free_inode_rec(rec);
3419 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3420 ret = check_orphan_item(root, rec->ino);
3422 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3423 if (can_free_inode_rec(rec)) {
3424 free_inode_rec(rec);
3429 if (!rec->found_inode_item)
3430 rec->errors |= I_ERR_NO_INODE_ITEM;
3431 if (rec->found_link != rec->nlink)
3432 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3434 ret = try_repair_inode(root, rec);
3435 if (ret == 0 && can_free_inode_rec(rec)) {
3436 free_inode_rec(rec);
3442 if (!(repair && ret == 0))
3444 print_inode_error(root, rec);
3445 list_for_each_entry(backref, &rec->backrefs, list) {
3446 if (!backref->found_dir_item)
3447 backref->errors |= REF_ERR_NO_DIR_ITEM;
3448 if (!backref->found_dir_index)
3449 backref->errors |= REF_ERR_NO_DIR_INDEX;
3450 if (!backref->found_inode_ref)
3451 backref->errors |= REF_ERR_NO_INODE_REF;
3452 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3453 " namelen %u name %s filetype %d errors %x",
3454 (unsigned long long)backref->dir,
3455 (unsigned long long)backref->index,
3456 backref->namelen, backref->name,
3457 backref->filetype, backref->errors);
3458 print_ref_error(backref->errors);
3460 free_inode_rec(rec);
3462 return (error > 0) ? -1 : 0;
3465 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3468 struct cache_extent *cache;
3469 struct root_record *rec = NULL;
3472 cache = lookup_cache_extent(root_cache, objectid, 1);
3474 rec = container_of(cache, struct root_record, cache);
3476 rec = calloc(1, sizeof(*rec));
3478 return ERR_PTR(-ENOMEM);
3479 rec->objectid = objectid;
3480 INIT_LIST_HEAD(&rec->backrefs);
3481 rec->cache.start = objectid;
3482 rec->cache.size = 1;
3484 ret = insert_cache_extent(root_cache, &rec->cache);
3486 return ERR_PTR(-EEXIST);
3491 static struct root_backref *get_root_backref(struct root_record *rec,
3492 u64 ref_root, u64 dir, u64 index,
3493 const char *name, int namelen)
3495 struct root_backref *backref;
3497 list_for_each_entry(backref, &rec->backrefs, list) {
3498 if (backref->ref_root != ref_root || backref->dir != dir ||
3499 backref->namelen != namelen)
3501 if (memcmp(name, backref->name, namelen))
3506 backref = calloc(1, sizeof(*backref) + namelen + 1);
3509 backref->ref_root = ref_root;
3511 backref->index = index;
3512 backref->namelen = namelen;
3513 memcpy(backref->name, name, namelen);
3514 backref->name[namelen] = '\0';
3515 list_add_tail(&backref->list, &rec->backrefs);
3519 static void free_root_record(struct cache_extent *cache)
3521 struct root_record *rec;
3522 struct root_backref *backref;
3524 rec = container_of(cache, struct root_record, cache);
3525 while (!list_empty(&rec->backrefs)) {
3526 backref = to_root_backref(rec->backrefs.next);
3527 list_del(&backref->list);
3534 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3536 static int add_root_backref(struct cache_tree *root_cache,
3537 u64 root_id, u64 ref_root, u64 dir, u64 index,
3538 const char *name, int namelen,
3539 int item_type, int errors)
3541 struct root_record *rec;
3542 struct root_backref *backref;
3544 rec = get_root_rec(root_cache, root_id);
3545 BUG_ON(IS_ERR(rec));
3546 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3549 backref->errors |= errors;
3551 if (item_type != BTRFS_DIR_ITEM_KEY) {
3552 if (backref->found_dir_index || backref->found_back_ref ||
3553 backref->found_forward_ref) {
3554 if (backref->index != index)
3555 backref->errors |= REF_ERR_INDEX_UNMATCH;
3557 backref->index = index;
3561 if (item_type == BTRFS_DIR_ITEM_KEY) {
3562 if (backref->found_forward_ref)
3564 backref->found_dir_item = 1;
3565 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3566 backref->found_dir_index = 1;
3567 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3568 if (backref->found_forward_ref)
3569 backref->errors |= REF_ERR_DUP_ROOT_REF;
3570 else if (backref->found_dir_item)
3572 backref->found_forward_ref = 1;
3573 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3574 if (backref->found_back_ref)
3575 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3576 backref->found_back_ref = 1;
3581 if (backref->found_forward_ref && backref->found_dir_item)
3582 backref->reachable = 1;
3586 static int merge_root_recs(struct btrfs_root *root,
3587 struct cache_tree *src_cache,
3588 struct cache_tree *dst_cache)
3590 struct cache_extent *cache;
3591 struct ptr_node *node;
3592 struct inode_record *rec;
3593 struct inode_backref *backref;
3596 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3597 free_inode_recs_tree(src_cache);
3602 cache = search_cache_extent(src_cache, 0);
3605 node = container_of(cache, struct ptr_node, cache);
3607 remove_cache_extent(src_cache, &node->cache);
3610 ret = is_child_root(root, root->objectid, rec->ino);
3616 list_for_each_entry(backref, &rec->backrefs, list) {
3617 BUG_ON(backref->found_inode_ref);
3618 if (backref->found_dir_item)
3619 add_root_backref(dst_cache, rec->ino,
3620 root->root_key.objectid, backref->dir,
3621 backref->index, backref->name,
3622 backref->namelen, BTRFS_DIR_ITEM_KEY,
3624 if (backref->found_dir_index)
3625 add_root_backref(dst_cache, rec->ino,
3626 root->root_key.objectid, backref->dir,
3627 backref->index, backref->name,
3628 backref->namelen, BTRFS_DIR_INDEX_KEY,
3632 free_inode_rec(rec);
3639 static int check_root_refs(struct btrfs_root *root,
3640 struct cache_tree *root_cache)
3642 struct root_record *rec;
3643 struct root_record *ref_root;
3644 struct root_backref *backref;
3645 struct cache_extent *cache;
3651 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3652 BUG_ON(IS_ERR(rec));
3655 /* fixme: this can not detect circular references */
3658 cache = search_cache_extent(root_cache, 0);
3662 rec = container_of(cache, struct root_record, cache);
3663 cache = next_cache_extent(cache);
3665 if (rec->found_ref == 0)
3668 list_for_each_entry(backref, &rec->backrefs, list) {
3669 if (!backref->reachable)
3672 ref_root = get_root_rec(root_cache,
3674 BUG_ON(IS_ERR(ref_root));
3675 if (ref_root->found_ref > 0)
3678 backref->reachable = 0;
3680 if (rec->found_ref == 0)
3686 cache = search_cache_extent(root_cache, 0);
3690 rec = container_of(cache, struct root_record, cache);
3691 cache = next_cache_extent(cache);
3693 if (rec->found_ref == 0 &&
3694 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3695 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3696 ret = check_orphan_item(root->fs_info->tree_root,
3702 * If we don't have a root item then we likely just have
3703 * a dir item in a snapshot for this root but no actual
3704 * ref key or anything so it's meaningless.
3706 if (!rec->found_root_item)
3709 fprintf(stderr, "fs tree %llu not referenced\n",
3710 (unsigned long long)rec->objectid);
3714 if (rec->found_ref > 0 && !rec->found_root_item)
3716 list_for_each_entry(backref, &rec->backrefs, list) {
3717 if (!backref->found_dir_item)
3718 backref->errors |= REF_ERR_NO_DIR_ITEM;
3719 if (!backref->found_dir_index)
3720 backref->errors |= REF_ERR_NO_DIR_INDEX;
3721 if (!backref->found_back_ref)
3722 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3723 if (!backref->found_forward_ref)
3724 backref->errors |= REF_ERR_NO_ROOT_REF;
3725 if (backref->reachable && backref->errors)
3732 fprintf(stderr, "fs tree %llu refs %u %s\n",
3733 (unsigned long long)rec->objectid, rec->found_ref,
3734 rec->found_root_item ? "" : "not found");
3736 list_for_each_entry(backref, &rec->backrefs, list) {
3737 if (!backref->reachable)
3739 if (!backref->errors && rec->found_root_item)
3741 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3742 " index %llu namelen %u name %s errors %x\n",
3743 (unsigned long long)backref->ref_root,
3744 (unsigned long long)backref->dir,
3745 (unsigned long long)backref->index,
3746 backref->namelen, backref->name,
3748 print_ref_error(backref->errors);
3751 return errors > 0 ? 1 : 0;
3754 static int process_root_ref(struct extent_buffer *eb, int slot,
3755 struct btrfs_key *key,
3756 struct cache_tree *root_cache)
3762 struct btrfs_root_ref *ref;
3763 char namebuf[BTRFS_NAME_LEN];
3766 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3768 dirid = btrfs_root_ref_dirid(eb, ref);
3769 index = btrfs_root_ref_sequence(eb, ref);
3770 name_len = btrfs_root_ref_name_len(eb, ref);
3772 if (name_len <= BTRFS_NAME_LEN) {
3776 len = BTRFS_NAME_LEN;
3777 error = REF_ERR_NAME_TOO_LONG;
3779 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3781 if (key->type == BTRFS_ROOT_REF_KEY) {
3782 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3783 index, namebuf, len, key->type, error);
3785 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3786 index, namebuf, len, key->type, error);
3791 static void free_corrupt_block(struct cache_extent *cache)
3793 struct btrfs_corrupt_block *corrupt;
3795 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3799 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3802 * Repair the btree of the given root.
3804 * The fix is to remove the node key in corrupt_blocks cache_tree.
3805 * and rebalance the tree.
3806 * After the fix, the btree should be writeable.
3808 static int repair_btree(struct btrfs_root *root,
3809 struct cache_tree *corrupt_blocks)
3811 struct btrfs_trans_handle *trans;
3812 struct btrfs_path path;
3813 struct btrfs_corrupt_block *corrupt;
3814 struct cache_extent *cache;
3815 struct btrfs_key key;
3820 if (cache_tree_empty(corrupt_blocks))
3823 trans = btrfs_start_transaction(root, 1);
3824 if (IS_ERR(trans)) {
3825 ret = PTR_ERR(trans);
3826 fprintf(stderr, "Error starting transaction: %s\n",
3830 btrfs_init_path(&path);
3831 cache = first_cache_extent(corrupt_blocks);
3833 corrupt = container_of(cache, struct btrfs_corrupt_block,
3835 level = corrupt->level;
3836 path.lowest_level = level;
3837 key.objectid = corrupt->key.objectid;
3838 key.type = corrupt->key.type;
3839 key.offset = corrupt->key.offset;
3842 * Here we don't want to do any tree balance, since it may
3843 * cause a balance with corrupted brother leaf/node,
3844 * so ins_len set to 0 here.
3845 * Balance will be done after all corrupt node/leaf is deleted.
3847 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3850 offset = btrfs_node_blockptr(path.nodes[level],
3853 /* Remove the ptr */
3854 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3858 * Remove the corresponding extent
3859 * return value is not concerned.
3861 btrfs_release_path(&path);
3862 ret = btrfs_free_extent(trans, root, offset,
3863 root->fs_info->nodesize, 0,
3864 root->root_key.objectid, level - 1, 0);
3865 cache = next_cache_extent(cache);
3868 /* Balance the btree using btrfs_search_slot() */
3869 cache = first_cache_extent(corrupt_blocks);
3871 corrupt = container_of(cache, struct btrfs_corrupt_block,
3873 memcpy(&key, &corrupt->key, sizeof(key));
3874 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3877 /* return will always >0 since it won't find the item */
3879 btrfs_release_path(&path);
3880 cache = next_cache_extent(cache);
3883 btrfs_commit_transaction(trans, root);
3884 btrfs_release_path(&path);
3888 static int check_fs_root(struct btrfs_root *root,
3889 struct cache_tree *root_cache,
3890 struct walk_control *wc)
3896 struct btrfs_path path;
3897 struct shared_node root_node;
3898 struct root_record *rec;
3899 struct btrfs_root_item *root_item = &root->root_item;
3900 struct cache_tree corrupt_blocks;
3901 struct orphan_data_extent *orphan;
3902 struct orphan_data_extent *tmp;
3903 enum btrfs_tree_block_status status;
3904 struct node_refs nrefs;
3907 * Reuse the corrupt_block cache tree to record corrupted tree block
3909 * Unlike the usage in extent tree check, here we do it in a per
3910 * fs/subvol tree base.
3912 cache_tree_init(&corrupt_blocks);
3913 root->fs_info->corrupt_blocks = &corrupt_blocks;
3915 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3916 rec = get_root_rec(root_cache, root->root_key.objectid);
3917 BUG_ON(IS_ERR(rec));
3918 if (btrfs_root_refs(root_item) > 0)
3919 rec->found_root_item = 1;
3922 btrfs_init_path(&path);
3923 memset(&root_node, 0, sizeof(root_node));
3924 cache_tree_init(&root_node.root_cache);
3925 cache_tree_init(&root_node.inode_cache);
3926 memset(&nrefs, 0, sizeof(nrefs));
3928 /* Move the orphan extent record to corresponding inode_record */
3929 list_for_each_entry_safe(orphan, tmp,
3930 &root->orphan_data_extents, list) {
3931 struct inode_record *inode;
3933 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3935 BUG_ON(IS_ERR(inode));
3936 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3937 list_move(&orphan->list, &inode->orphan_extents);
3940 level = btrfs_header_level(root->node);
3941 memset(wc->nodes, 0, sizeof(wc->nodes));
3942 wc->nodes[level] = &root_node;
3943 wc->active_node = level;
3944 wc->root_level = level;
3946 /* We may not have checked the root block, lets do that now */
3947 if (btrfs_is_leaf(root->node))
3948 status = btrfs_check_leaf(root, NULL, root->node);
3950 status = btrfs_check_node(root, NULL, root->node);
3951 if (status != BTRFS_TREE_BLOCK_CLEAN)
3954 if (btrfs_root_refs(root_item) > 0 ||
3955 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3956 path.nodes[level] = root->node;
3957 extent_buffer_get(root->node);
3958 path.slots[level] = 0;
3960 struct btrfs_key key;
3961 struct btrfs_disk_key found_key;
3963 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3964 level = root_item->drop_level;
3965 path.lowest_level = level;
3966 if (level > btrfs_header_level(root->node) ||
3967 level >= BTRFS_MAX_LEVEL) {
3968 error("ignoring invalid drop level: %u", level);
3971 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3974 btrfs_node_key(path.nodes[level], &found_key,
3976 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3977 sizeof(found_key)));
3981 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3987 wret = walk_up_tree(root, &path, wc, &level);
3994 btrfs_release_path(&path);
3996 if (!cache_tree_empty(&corrupt_blocks)) {
3997 struct cache_extent *cache;
3998 struct btrfs_corrupt_block *corrupt;
4000 printf("The following tree block(s) is corrupted in tree %llu:\n",
4001 root->root_key.objectid);
4002 cache = first_cache_extent(&corrupt_blocks);
4004 corrupt = container_of(cache,
4005 struct btrfs_corrupt_block,
4007 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4008 cache->start, corrupt->level,
4009 corrupt->key.objectid, corrupt->key.type,
4010 corrupt->key.offset);
4011 cache = next_cache_extent(cache);
4014 printf("Try to repair the btree for root %llu\n",
4015 root->root_key.objectid);
4016 ret = repair_btree(root, &corrupt_blocks);
4018 fprintf(stderr, "Failed to repair btree: %s\n",
4021 printf("Btree for root %llu is fixed\n",
4022 root->root_key.objectid);
4026 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4030 if (root_node.current) {
4031 root_node.current->checked = 1;
4032 maybe_free_inode_rec(&root_node.inode_cache,
4036 err = check_inode_recs(root, &root_node.inode_cache);
4040 free_corrupt_blocks_tree(&corrupt_blocks);
4041 root->fs_info->corrupt_blocks = NULL;
4042 free_orphan_data_extents(&root->orphan_data_extents);
4046 static int fs_root_objectid(u64 objectid)
4048 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4049 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4051 return is_fstree(objectid);
4054 static int check_fs_roots(struct btrfs_root *root,
4055 struct cache_tree *root_cache)
4057 struct btrfs_path path;
4058 struct btrfs_key key;
4059 struct walk_control wc;
4060 struct extent_buffer *leaf, *tree_node;
4061 struct btrfs_root *tmp_root;
4062 struct btrfs_root *tree_root = root->fs_info->tree_root;
4066 if (ctx.progress_enabled) {
4067 ctx.tp = TASK_FS_ROOTS;
4068 task_start(ctx.info);
4072 * Just in case we made any changes to the extent tree that weren't
4073 * reflected into the free space cache yet.
4076 reset_cached_block_groups(root->fs_info);
4077 memset(&wc, 0, sizeof(wc));
4078 cache_tree_init(&wc.shared);
4079 btrfs_init_path(&path);
4084 key.type = BTRFS_ROOT_ITEM_KEY;
4085 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4090 tree_node = tree_root->node;
4092 if (tree_node != tree_root->node) {
4093 free_root_recs_tree(root_cache);
4094 btrfs_release_path(&path);
4097 leaf = path.nodes[0];
4098 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4099 ret = btrfs_next_leaf(tree_root, &path);
4105 leaf = path.nodes[0];
4107 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4108 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4109 fs_root_objectid(key.objectid)) {
4110 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4111 tmp_root = btrfs_read_fs_root_no_cache(
4112 root->fs_info, &key);
4114 key.offset = (u64)-1;
4115 tmp_root = btrfs_read_fs_root(
4116 root->fs_info, &key);
4118 if (IS_ERR(tmp_root)) {
4122 ret = check_fs_root(tmp_root, root_cache, &wc);
4123 if (ret == -EAGAIN) {
4124 free_root_recs_tree(root_cache);
4125 btrfs_release_path(&path);
4130 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4131 btrfs_free_fs_root(tmp_root);
4132 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4133 key.type == BTRFS_ROOT_BACKREF_KEY) {
4134 process_root_ref(leaf, path.slots[0], &key,
4141 btrfs_release_path(&path);
4143 free_extent_cache_tree(&wc.shared);
4144 if (!cache_tree_empty(&wc.shared))
4145 fprintf(stderr, "warning line %d\n", __LINE__);
4147 task_stop(ctx.info);
4153 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4154 * INODE_REF/INODE_EXTREF match.
4156 * @root: the root of the fs/file tree
4157 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4158 * @key: the key of the DIR_ITEM/DIR_INDEX
4159 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4160 * distinguish root_dir between normal dir/file
4161 * @name: the name in the INODE_REF/INODE_EXTREF
4162 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4163 * @mode: the st_mode of INODE_ITEM
4165 * Return 0 if no error occurred.
4166 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4167 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4169 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4170 * not match for normal dir/file.
4172 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4173 struct btrfs_key *key, u64 index, char *name,
4174 u32 namelen, u32 mode)
4176 struct btrfs_path path;
4177 struct extent_buffer *node;
4178 struct btrfs_dir_item *di;
4179 struct btrfs_key location;
4180 char namebuf[BTRFS_NAME_LEN] = {0};
4190 btrfs_init_path(&path);
4191 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4193 ret = DIR_ITEM_MISSING;
4197 /* Process root dir and goto out*/
4200 ret = ROOT_DIR_ERROR;
4202 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4204 ref_key->type == BTRFS_INODE_REF_KEY ?
4206 ref_key->objectid, ref_key->offset,
4207 key->type == BTRFS_DIR_ITEM_KEY ?
4208 "DIR_ITEM" : "DIR_INDEX");
4216 /* Process normal file/dir */
4218 ret = DIR_ITEM_MISSING;
4220 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4222 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4223 ref_key->objectid, ref_key->offset,
4224 key->type == BTRFS_DIR_ITEM_KEY ?
4225 "DIR_ITEM" : "DIR_INDEX",
4226 key->objectid, key->offset, namelen, name,
4227 imode_to_type(mode));
4231 /* Check whether inode_id/filetype/name match */
4232 node = path.nodes[0];
4233 slot = path.slots[0];
4234 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4235 total = btrfs_item_size_nr(node, slot);
4236 while (cur < total) {
4237 ret = DIR_ITEM_MISMATCH;
4238 name_len = btrfs_dir_name_len(node, di);
4239 data_len = btrfs_dir_data_len(node, di);
4241 btrfs_dir_item_key_to_cpu(node, di, &location);
4242 if (location.objectid != ref_key->objectid ||
4243 location.type != BTRFS_INODE_ITEM_KEY ||
4244 location.offset != 0)
4247 filetype = btrfs_dir_type(node, di);
4248 if (imode_to_type(mode) != filetype)
4251 if (cur + sizeof(*di) + name_len > total ||
4252 name_len > BTRFS_NAME_LEN) {
4253 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4255 key->type == BTRFS_DIR_ITEM_KEY ?
4256 "DIR_ITEM" : "DIR_INDEX",
4257 key->objectid, key->offset, name_len);
4259 if (cur + sizeof(*di) > total)
4261 len = min_t(u32, total - cur - sizeof(*di),
4267 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4268 if (len != namelen || strncmp(namebuf, name, len))
4274 len = sizeof(*di) + name_len + data_len;
4275 di = (struct btrfs_dir_item *)((char *)di + len);
4278 if (ret == DIR_ITEM_MISMATCH)
4280 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4282 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4283 ref_key->objectid, ref_key->offset,
4284 key->type == BTRFS_DIR_ITEM_KEY ?
4285 "DIR_ITEM" : "DIR_INDEX",
4286 key->objectid, key->offset, namelen, name,
4287 imode_to_type(mode));
4289 btrfs_release_path(&path);
4294 * Traverse the given INODE_REF and call find_dir_item() to find related
4295 * DIR_ITEM/DIR_INDEX.
4297 * @root: the root of the fs/file tree
4298 * @ref_key: the key of the INODE_REF
4299 * @refs: the count of INODE_REF
4300 * @mode: the st_mode of INODE_ITEM
4302 * Return 0 if no error occurred.
4304 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4305 struct extent_buffer *node, int slot, u64 *refs,
4308 struct btrfs_key key;
4309 struct btrfs_inode_ref *ref;
4310 char namebuf[BTRFS_NAME_LEN] = {0};
4318 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4319 total = btrfs_item_size_nr(node, slot);
4322 /* Update inode ref count */
4325 index = btrfs_inode_ref_index(node, ref);
4326 name_len = btrfs_inode_ref_name_len(node, ref);
4327 if (cur + sizeof(*ref) + name_len > total ||
4328 name_len > BTRFS_NAME_LEN) {
4329 warning("root %llu INODE_REF[%llu %llu] name too long",
4330 root->objectid, ref_key->objectid, ref_key->offset);
4332 if (total < cur + sizeof(*ref))
4334 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4339 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4341 /* Check root dir ref name */
4342 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4343 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4344 root->objectid, ref_key->objectid, ref_key->offset,
4346 err |= ROOT_DIR_ERROR;
4349 /* Find related DIR_INDEX */
4350 key.objectid = ref_key->offset;
4351 key.type = BTRFS_DIR_INDEX_KEY;
4353 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4356 /* Find related dir_item */
4357 key.objectid = ref_key->offset;
4358 key.type = BTRFS_DIR_ITEM_KEY;
4359 key.offset = btrfs_name_hash(namebuf, len);
4360 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4363 len = sizeof(*ref) + name_len;
4364 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4374 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4375 * DIR_ITEM/DIR_INDEX.
4377 * @root: the root of the fs/file tree
4378 * @ref_key: the key of the INODE_EXTREF
4379 * @refs: the count of INODE_EXTREF
4380 * @mode: the st_mode of INODE_ITEM
4382 * Return 0 if no error occurred.
4384 static int check_inode_extref(struct btrfs_root *root,
4385 struct btrfs_key *ref_key,
4386 struct extent_buffer *node, int slot, u64 *refs,
4389 struct btrfs_key key;
4390 struct btrfs_inode_extref *extref;
4391 char namebuf[BTRFS_NAME_LEN] = {0};
4401 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4402 total = btrfs_item_size_nr(node, slot);
4405 /* update inode ref count */
4407 name_len = btrfs_inode_extref_name_len(node, extref);
4408 index = btrfs_inode_extref_index(node, extref);
4409 parent = btrfs_inode_extref_parent(node, extref);
4410 if (name_len <= BTRFS_NAME_LEN) {
4413 len = BTRFS_NAME_LEN;
4414 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4415 root->objectid, ref_key->objectid, ref_key->offset);
4417 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4419 /* Check root dir ref name */
4420 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4421 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4422 root->objectid, ref_key->objectid, ref_key->offset,
4424 err |= ROOT_DIR_ERROR;
4427 /* find related dir_index */
4428 key.objectid = parent;
4429 key.type = BTRFS_DIR_INDEX_KEY;
4431 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4434 /* find related dir_item */
4435 key.objectid = parent;
4436 key.type = BTRFS_DIR_ITEM_KEY;
4437 key.offset = btrfs_name_hash(namebuf, len);
4438 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4441 len = sizeof(*extref) + name_len;
4442 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4452 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4453 * DIR_ITEM/DIR_INDEX match.
4455 * @root: the root of the fs/file tree
4456 * @key: the key of the INODE_REF/INODE_EXTREF
4457 * @name: the name in the INODE_REF/INODE_EXTREF
4458 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4459 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4461 * @ext_ref: the EXTENDED_IREF feature
4463 * Return 0 if no error occurred.
4464 * Return >0 for error bitmap
4466 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4467 char *name, int namelen, u64 index,
4468 unsigned int ext_ref)
4470 struct btrfs_path path;
4471 struct btrfs_inode_ref *ref;
4472 struct btrfs_inode_extref *extref;
4473 struct extent_buffer *node;
4474 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4485 btrfs_init_path(&path);
4486 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4488 ret = INODE_REF_MISSING;
4492 node = path.nodes[0];
4493 slot = path.slots[0];
4495 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4496 total = btrfs_item_size_nr(node, slot);
4498 /* Iterate all entry of INODE_REF */
4499 while (cur < total) {
4500 ret = INODE_REF_MISSING;
4502 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4503 ref_index = btrfs_inode_ref_index(node, ref);
4504 if (index != (u64)-1 && index != ref_index)
4507 if (cur + sizeof(*ref) + ref_namelen > total ||
4508 ref_namelen > BTRFS_NAME_LEN) {
4509 warning("root %llu INODE %s[%llu %llu] name too long",
4511 key->type == BTRFS_INODE_REF_KEY ?
4513 key->objectid, key->offset);
4515 if (cur + sizeof(*ref) > total)
4517 len = min_t(u32, total - cur - sizeof(*ref),
4523 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4526 if (len != namelen || strncmp(ref_namebuf, name, len))
4532 len = sizeof(*ref) + ref_namelen;
4533 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4538 /* Skip if not support EXTENDED_IREF feature */
4542 btrfs_release_path(&path);
4543 btrfs_init_path(&path);
4545 dir_id = key->offset;
4546 key->type = BTRFS_INODE_EXTREF_KEY;
4547 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4549 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4551 ret = INODE_REF_MISSING;
4555 node = path.nodes[0];
4556 slot = path.slots[0];
4558 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4560 total = btrfs_item_size_nr(node, slot);
4562 /* Iterate all entry of INODE_EXTREF */
4563 while (cur < total) {
4564 ret = INODE_REF_MISSING;
4566 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4567 ref_index = btrfs_inode_extref_index(node, extref);
4568 parent = btrfs_inode_extref_parent(node, extref);
4569 if (index != (u64)-1 && index != ref_index)
4572 if (parent != dir_id)
4575 if (ref_namelen <= BTRFS_NAME_LEN) {
4578 len = BTRFS_NAME_LEN;
4579 warning("root %llu INODE %s[%llu %llu] name too long",
4581 key->type == BTRFS_INODE_REF_KEY ?
4583 key->objectid, key->offset);
4585 read_extent_buffer(node, ref_namebuf,
4586 (unsigned long)(extref + 1), len);
4588 if (len != namelen || strncmp(ref_namebuf, name, len))
4595 len = sizeof(*extref) + ref_namelen;
4596 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4601 btrfs_release_path(&path);
4606 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4607 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4609 * @root: the root of the fs/file tree
4610 * @key: the key of the INODE_REF/INODE_EXTREF
4611 * @size: the st_size of the INODE_ITEM
4612 * @ext_ref: the EXTENDED_IREF feature
4614 * Return 0 if no error occurred.
4616 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4617 struct extent_buffer *node, int slot, u64 *size,
4618 unsigned int ext_ref)
4620 struct btrfs_dir_item *di;
4621 struct btrfs_inode_item *ii;
4622 struct btrfs_path path;
4623 struct btrfs_key location;
4624 char namebuf[BTRFS_NAME_LEN] = {0};
4637 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4638 * ignore index check.
4640 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4642 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4643 total = btrfs_item_size_nr(node, slot);
4645 while (cur < total) {
4646 data_len = btrfs_dir_data_len(node, di);
4648 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4649 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650 "DIR_ITEM" : "DIR_INDEX",
4651 key->objectid, key->offset, data_len);
4653 name_len = btrfs_dir_name_len(node, di);
4654 if (cur + sizeof(*di) + name_len > total ||
4655 name_len > BTRFS_NAME_LEN) {
4656 warning("root %llu %s[%llu %llu] name too long",
4658 key->type == BTRFS_DIR_ITEM_KEY ?
4659 "DIR_ITEM" : "DIR_INDEX",
4660 key->objectid, key->offset);
4662 if (cur + sizeof(*di) > total)
4664 len = min_t(u32, total - cur - sizeof(*di),
4669 (*size) += name_len;
4671 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4672 filetype = btrfs_dir_type(node, di);
4674 if (key->type == BTRFS_DIR_ITEM_KEY &&
4675 key->offset != btrfs_name_hash(namebuf, len)) {
4677 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4678 root->objectid, key->objectid, key->offset,
4679 namebuf, len, filetype, key->offset,
4680 btrfs_name_hash(namebuf, len));
4683 btrfs_init_path(&path);
4684 btrfs_dir_item_key_to_cpu(node, di, &location);
4686 /* Ignore related ROOT_ITEM check */
4687 if (location.type == BTRFS_ROOT_ITEM_KEY)
4690 /* Check relative INODE_ITEM(existence/filetype) */
4691 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4693 err |= INODE_ITEM_MISSING;
4694 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4695 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4696 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4697 key->offset, location.objectid, name_len,
4702 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4703 struct btrfs_inode_item);
4704 mode = btrfs_inode_mode(path.nodes[0], ii);
4706 if (imode_to_type(mode) != filetype) {
4707 err |= INODE_ITEM_MISMATCH;
4708 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4709 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4710 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4711 key->offset, name_len, namebuf, filetype);
4714 /* Check relative INODE_REF/INODE_EXTREF */
4715 location.type = BTRFS_INODE_REF_KEY;
4716 location.offset = key->objectid;
4717 ret = find_inode_ref(root, &location, namebuf, len,
4720 if (ret & INODE_REF_MISSING)
4721 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4722 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4723 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4724 key->offset, name_len, namebuf, filetype);
4727 btrfs_release_path(&path);
4728 len = sizeof(*di) + name_len + data_len;
4729 di = (struct btrfs_dir_item *)((char *)di + len);
4732 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4733 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4734 root->objectid, key->objectid, key->offset);
4743 * Check file extent datasum/hole, update the size of the file extents,
4744 * check and update the last offset of the file extent.
4746 * @root: the root of fs/file tree.
4747 * @fkey: the key of the file extent.
4748 * @nodatasum: INODE_NODATASUM feature.
4749 * @size: the sum of all EXTENT_DATA items size for this inode.
4750 * @end: the offset of the last extent.
4752 * Return 0 if no error occurred.
4754 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4755 struct extent_buffer *node, int slot,
4756 unsigned int nodatasum, u64 *size, u64 *end)
4758 struct btrfs_file_extent_item *fi;
4761 u64 extent_num_bytes;
4763 u64 csum_found; /* In byte size, sectorsize aligned */
4764 u64 search_start; /* Logical range start we search for csum */
4765 u64 search_len; /* Logical range len we search for csum */
4766 unsigned int extent_type;
4767 unsigned int is_hole;
4772 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4774 /* Check inline extent */
4775 extent_type = btrfs_file_extent_type(node, fi);
4776 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4777 struct btrfs_item *e = btrfs_item_nr(slot);
4778 u32 item_inline_len;
4780 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4781 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4782 compressed = btrfs_file_extent_compression(node, fi);
4783 if (extent_num_bytes == 0) {
4785 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4786 root->objectid, fkey->objectid, fkey->offset);
4787 err |= FILE_EXTENT_ERROR;
4789 if (!compressed && extent_num_bytes != item_inline_len) {
4791 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4792 root->objectid, fkey->objectid, fkey->offset,
4793 extent_num_bytes, item_inline_len);
4794 err |= FILE_EXTENT_ERROR;
4796 *end += extent_num_bytes;
4797 *size += extent_num_bytes;
4801 /* Check extent type */
4802 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4803 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4804 err |= FILE_EXTENT_ERROR;
4805 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4806 root->objectid, fkey->objectid, fkey->offset);
4810 /* Check REG_EXTENT/PREALLOC_EXTENT */
4811 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4812 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4813 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4814 extent_offset = btrfs_file_extent_offset(node, fi);
4815 compressed = btrfs_file_extent_compression(node, fi);
4816 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4819 * Check EXTENT_DATA csum
4821 * For plain (uncompressed) extent, we should only check the range
4822 * we're referring to, as it's possible that part of prealloc extent
4823 * has been written, and has csum:
4825 * |<--- Original large preallocated extent A ---->|
4826 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4829 * For compressed extent, we should check the whole range.
4832 search_start = disk_bytenr + extent_offset;
4833 search_len = extent_num_bytes;
4835 search_start = disk_bytenr;
4836 search_len = disk_num_bytes;
4838 ret = count_csum_range(root, search_start, search_len, &csum_found);
4839 if (csum_found > 0 && nodatasum) {
4840 err |= ODD_CSUM_ITEM;
4841 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4842 root->objectid, fkey->objectid, fkey->offset);
4843 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4844 !is_hole && (ret < 0 || csum_found < search_len)) {
4845 err |= CSUM_ITEM_MISSING;
4846 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4847 root->objectid, fkey->objectid, fkey->offset,
4848 csum_found, search_len);
4849 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4850 err |= ODD_CSUM_ITEM;
4851 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4852 root->objectid, fkey->objectid, fkey->offset, csum_found);
4855 /* Check EXTENT_DATA hole */
4856 if (!no_holes && *end != fkey->offset) {
4857 err |= FILE_EXTENT_ERROR;
4858 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4859 root->objectid, fkey->objectid, fkey->offset);
4862 *end += extent_num_bytes;
4864 *size += extent_num_bytes;
4870 * Check INODE_ITEM and related ITEMs (the same inode number)
4871 * 1. check link count
4872 * 2. check inode ref/extref
4873 * 3. check dir item/index
4875 * @ext_ref: the EXTENDED_IREF feature
4877 * Return 0 if no error occurred.
4878 * Return >0 for error or hit the traversal is done(by error bitmap)
4880 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4881 unsigned int ext_ref)
4883 struct extent_buffer *node;
4884 struct btrfs_inode_item *ii;
4885 struct btrfs_key key;
4894 u64 extent_size = 0;
4896 unsigned int nodatasum;
4901 node = path->nodes[0];
4902 slot = path->slots[0];
4904 btrfs_item_key_to_cpu(node, &key, slot);
4905 inode_id = key.objectid;
4907 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4908 ret = btrfs_next_item(root, path);
4914 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4915 isize = btrfs_inode_size(node, ii);
4916 nbytes = btrfs_inode_nbytes(node, ii);
4917 mode = btrfs_inode_mode(node, ii);
4918 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4919 nlink = btrfs_inode_nlink(node, ii);
4920 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4923 ret = btrfs_next_item(root, path);
4925 /* out will fill 'err' rusing current statistics */
4927 } else if (ret > 0) {
4932 node = path->nodes[0];
4933 slot = path->slots[0];
4934 btrfs_item_key_to_cpu(node, &key, slot);
4935 if (key.objectid != inode_id)
4939 case BTRFS_INODE_REF_KEY:
4940 ret = check_inode_ref(root, &key, node, slot, &refs,
4944 case BTRFS_INODE_EXTREF_KEY:
4945 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4946 warning("root %llu EXTREF[%llu %llu] isn't supported",
4947 root->objectid, key.objectid,
4949 ret = check_inode_extref(root, &key, node, slot, &refs,
4953 case BTRFS_DIR_ITEM_KEY:
4954 case BTRFS_DIR_INDEX_KEY:
4956 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4957 root->objectid, inode_id,
4958 imode_to_type(mode), key.objectid,
4961 ret = check_dir_item(root, &key, node, slot, &size,
4965 case BTRFS_EXTENT_DATA_KEY:
4967 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4968 root->objectid, inode_id, key.objectid,
4971 ret = check_file_extent(root, &key, node, slot,
4972 nodatasum, &extent_size,
4976 case BTRFS_XATTR_ITEM_KEY:
4979 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4980 key.objectid, key.type, key.offset);
4985 /* verify INODE_ITEM nlink/isize/nbytes */
4988 err |= LINK_COUNT_ERROR;
4989 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4990 root->objectid, inode_id, nlink);
4994 * Just a warning, as dir inode nbytes is just an
4995 * instructive value.
4997 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4998 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4999 root->objectid, inode_id,
5000 root->fs_info->nodesize);
5003 if (isize != size) {
5005 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5006 root->objectid, inode_id, isize, size);
5009 if (nlink != refs) {
5010 err |= LINK_COUNT_ERROR;
5011 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5012 root->objectid, inode_id, nlink, refs);
5013 } else if (!nlink) {
5017 if (!nbytes && !no_holes && extent_end < isize) {
5018 err |= NBYTES_ERROR;
5019 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5020 root->objectid, inode_id, isize);
5023 if (nbytes != extent_size) {
5024 err |= NBYTES_ERROR;
5025 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5026 root->objectid, inode_id, nbytes, extent_size);
5033 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5035 struct btrfs_path path;
5036 struct btrfs_key key;
5040 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5041 key.type = BTRFS_INODE_ITEM_KEY;
5044 /* For root being dropped, we don't need to check first inode */
5045 if (btrfs_root_refs(&root->root_item) == 0 &&
5046 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5050 btrfs_init_path(&path);
5052 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5057 err |= INODE_ITEM_MISSING;
5058 error("first inode item of root %llu is missing",
5062 err |= check_inode_item(root, &path, ext_ref);
5067 btrfs_release_path(&path);
5072 * Iterate all item on the tree and call check_inode_item() to check.
5074 * @root: the root of the tree to be checked.
5075 * @ext_ref: the EXTENDED_IREF feature
5077 * Return 0 if no error found.
5078 * Return <0 for error.
5080 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5082 struct btrfs_path path;
5083 struct node_refs nrefs;
5084 struct btrfs_root_item *root_item = &root->root_item;
5090 * We need to manually check the first inode item(256)
5091 * As the following traversal function will only start from
5092 * the first inode item in the leaf, if inode item(256) is missing
5093 * we will just skip it forever.
5095 ret = check_fs_first_inode(root, ext_ref);
5099 memset(&nrefs, 0, sizeof(nrefs));
5100 level = btrfs_header_level(root->node);
5101 btrfs_init_path(&path);
5103 if (btrfs_root_refs(root_item) > 0 ||
5104 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5105 path.nodes[level] = root->node;
5106 path.slots[level] = 0;
5107 extent_buffer_get(root->node);
5109 struct btrfs_key key;
5111 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5112 level = root_item->drop_level;
5113 path.lowest_level = level;
5114 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5121 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5124 /* if ret is negative, walk shall stop */
5130 ret = walk_up_tree_v2(root, &path, &level);
5132 /* Normal exit, reset ret to err */
5139 btrfs_release_path(&path);
5144 * Find the relative ref for root_ref and root_backref.
5146 * @root: the root of the root tree.
5147 * @ref_key: the key of the root ref.
5149 * Return 0 if no error occurred.
5151 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5152 struct extent_buffer *node, int slot)
5154 struct btrfs_path path;
5155 struct btrfs_key key;
5156 struct btrfs_root_ref *ref;
5157 struct btrfs_root_ref *backref;
5158 char ref_name[BTRFS_NAME_LEN] = {0};
5159 char backref_name[BTRFS_NAME_LEN] = {0};
5165 u32 backref_namelen;
5170 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5171 ref_dirid = btrfs_root_ref_dirid(node, ref);
5172 ref_seq = btrfs_root_ref_sequence(node, ref);
5173 ref_namelen = btrfs_root_ref_name_len(node, ref);
5175 if (ref_namelen <= BTRFS_NAME_LEN) {
5178 len = BTRFS_NAME_LEN;
5179 warning("%s[%llu %llu] ref_name too long",
5180 ref_key->type == BTRFS_ROOT_REF_KEY ?
5181 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5184 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5186 /* Find relative root_ref */
5187 key.objectid = ref_key->offset;
5188 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5189 key.offset = ref_key->objectid;
5191 btrfs_init_path(&path);
5192 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5194 err |= ROOT_REF_MISSING;
5195 error("%s[%llu %llu] couldn't find relative ref",
5196 ref_key->type == BTRFS_ROOT_REF_KEY ?
5197 "ROOT_REF" : "ROOT_BACKREF",
5198 ref_key->objectid, ref_key->offset);
5202 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5203 struct btrfs_root_ref);
5204 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5205 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5206 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5208 if (backref_namelen <= BTRFS_NAME_LEN) {
5209 len = backref_namelen;
5211 len = BTRFS_NAME_LEN;
5212 warning("%s[%llu %llu] ref_name too long",
5213 key.type == BTRFS_ROOT_REF_KEY ?
5214 "ROOT_REF" : "ROOT_BACKREF",
5215 key.objectid, key.offset);
5217 read_extent_buffer(path.nodes[0], backref_name,
5218 (unsigned long)(backref + 1), len);
5220 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5221 ref_namelen != backref_namelen ||
5222 strncmp(ref_name, backref_name, len)) {
5223 err |= ROOT_REF_MISMATCH;
5224 error("%s[%llu %llu] mismatch relative ref",
5225 ref_key->type == BTRFS_ROOT_REF_KEY ?
5226 "ROOT_REF" : "ROOT_BACKREF",
5227 ref_key->objectid, ref_key->offset);
5230 btrfs_release_path(&path);
5235 * Check all fs/file tree in low_memory mode.
5237 * 1. for fs tree root item, call check_fs_root_v2()
5238 * 2. for fs tree root ref/backref, call check_root_ref()
5240 * Return 0 if no error occurred.
5242 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5244 struct btrfs_root *tree_root = fs_info->tree_root;
5245 struct btrfs_root *cur_root = NULL;
5246 struct btrfs_path path;
5247 struct btrfs_key key;
5248 struct extent_buffer *node;
5249 unsigned int ext_ref;
5254 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5256 btrfs_init_path(&path);
5257 key.objectid = BTRFS_FS_TREE_OBJECTID;
5259 key.type = BTRFS_ROOT_ITEM_KEY;
5261 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5265 } else if (ret > 0) {
5271 node = path.nodes[0];
5272 slot = path.slots[0];
5273 btrfs_item_key_to_cpu(node, &key, slot);
5274 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5276 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5277 fs_root_objectid(key.objectid)) {
5278 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5279 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5282 key.offset = (u64)-1;
5283 cur_root = btrfs_read_fs_root(fs_info, &key);
5286 if (IS_ERR(cur_root)) {
5287 error("Fail to read fs/subvol tree: %lld",
5293 ret = check_fs_root_v2(cur_root, ext_ref);
5296 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5297 btrfs_free_fs_root(cur_root);
5298 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5299 key.type == BTRFS_ROOT_BACKREF_KEY) {
5300 ret = check_root_ref(tree_root, &key, node, slot);
5304 ret = btrfs_next_item(tree_root, &path);
5314 btrfs_release_path(&path);
5318 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5320 struct list_head *cur = rec->backrefs.next;
5321 struct extent_backref *back;
5322 struct tree_backref *tback;
5323 struct data_backref *dback;
5327 while(cur != &rec->backrefs) {
5328 back = to_extent_backref(cur);
5330 if (!back->found_extent_tree) {
5334 if (back->is_data) {
5335 dback = to_data_backref(back);
5336 fprintf(stderr, "Backref %llu %s %llu"
5337 " owner %llu offset %llu num_refs %lu"
5338 " not found in extent tree\n",
5339 (unsigned long long)rec->start,
5340 back->full_backref ?
5342 back->full_backref ?
5343 (unsigned long long)dback->parent:
5344 (unsigned long long)dback->root,
5345 (unsigned long long)dback->owner,
5346 (unsigned long long)dback->offset,
5347 (unsigned long)dback->num_refs);
5349 tback = to_tree_backref(back);
5350 fprintf(stderr, "Backref %llu parent %llu"
5351 " root %llu not found in extent tree\n",
5352 (unsigned long long)rec->start,
5353 (unsigned long long)tback->parent,
5354 (unsigned long long)tback->root);
5357 if (!back->is_data && !back->found_ref) {
5361 tback = to_tree_backref(back);
5362 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5363 (unsigned long long)rec->start,
5364 back->full_backref ? "parent" : "root",
5365 back->full_backref ?
5366 (unsigned long long)tback->parent :
5367 (unsigned long long)tback->root, back);
5369 if (back->is_data) {
5370 dback = to_data_backref(back);
5371 if (dback->found_ref != dback->num_refs) {
5375 fprintf(stderr, "Incorrect local backref count"
5376 " on %llu %s %llu owner %llu"
5377 " offset %llu found %u wanted %u back %p\n",
5378 (unsigned long long)rec->start,
5379 back->full_backref ?
5381 back->full_backref ?
5382 (unsigned long long)dback->parent:
5383 (unsigned long long)dback->root,
5384 (unsigned long long)dback->owner,
5385 (unsigned long long)dback->offset,
5386 dback->found_ref, dback->num_refs, back);
5388 if (dback->disk_bytenr != rec->start) {
5392 fprintf(stderr, "Backref disk bytenr does not"
5393 " match extent record, bytenr=%llu, "
5394 "ref bytenr=%llu\n",
5395 (unsigned long long)rec->start,
5396 (unsigned long long)dback->disk_bytenr);
5399 if (dback->bytes != rec->nr) {
5403 fprintf(stderr, "Backref bytes do not match "
5404 "extent backref, bytenr=%llu, ref "
5405 "bytes=%llu, backref bytes=%llu\n",
5406 (unsigned long long)rec->start,
5407 (unsigned long long)rec->nr,
5408 (unsigned long long)dback->bytes);
5411 if (!back->is_data) {
5414 dback = to_data_backref(back);
5415 found += dback->found_ref;
5418 if (found != rec->refs) {
5422 fprintf(stderr, "Incorrect global backref count "
5423 "on %llu found %llu wanted %llu\n",
5424 (unsigned long long)rec->start,
5425 (unsigned long long)found,
5426 (unsigned long long)rec->refs);
5432 static int free_all_extent_backrefs(struct extent_record *rec)
5434 struct extent_backref *back;
5435 struct list_head *cur;
5436 while (!list_empty(&rec->backrefs)) {
5437 cur = rec->backrefs.next;
5438 back = to_extent_backref(cur);
5445 static void free_extent_record_cache(struct cache_tree *extent_cache)
5447 struct cache_extent *cache;
5448 struct extent_record *rec;
5451 cache = first_cache_extent(extent_cache);
5454 rec = container_of(cache, struct extent_record, cache);
5455 remove_cache_extent(extent_cache, cache);
5456 free_all_extent_backrefs(rec);
5461 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5462 struct extent_record *rec)
5464 if (rec->content_checked && rec->owner_ref_checked &&
5465 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5466 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5467 !rec->bad_full_backref && !rec->crossing_stripes &&
5468 !rec->wrong_chunk_type) {
5469 remove_cache_extent(extent_cache, &rec->cache);
5470 free_all_extent_backrefs(rec);
5471 list_del_init(&rec->list);
5477 static int check_owner_ref(struct btrfs_root *root,
5478 struct extent_record *rec,
5479 struct extent_buffer *buf)
5481 struct extent_backref *node;
5482 struct tree_backref *back;
5483 struct btrfs_root *ref_root;
5484 struct btrfs_key key;
5485 struct btrfs_path path;
5486 struct extent_buffer *parent;
5491 list_for_each_entry(node, &rec->backrefs, list) {
5494 if (!node->found_ref)
5496 if (node->full_backref)
5498 back = to_tree_backref(node);
5499 if (btrfs_header_owner(buf) == back->root)
5502 BUG_ON(rec->is_root);
5504 /* try to find the block by search corresponding fs tree */
5505 key.objectid = btrfs_header_owner(buf);
5506 key.type = BTRFS_ROOT_ITEM_KEY;
5507 key.offset = (u64)-1;
5509 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5510 if (IS_ERR(ref_root))
5513 level = btrfs_header_level(buf);
5515 btrfs_item_key_to_cpu(buf, &key, 0);
5517 btrfs_node_key_to_cpu(buf, &key, 0);
5519 btrfs_init_path(&path);
5520 path.lowest_level = level + 1;
5521 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5525 parent = path.nodes[level + 1];
5526 if (parent && buf->start == btrfs_node_blockptr(parent,
5527 path.slots[level + 1]))
5530 btrfs_release_path(&path);
5531 return found ? 0 : 1;
5534 static int is_extent_tree_record(struct extent_record *rec)
5536 struct list_head *cur = rec->backrefs.next;
5537 struct extent_backref *node;
5538 struct tree_backref *back;
5541 while(cur != &rec->backrefs) {
5542 node = to_extent_backref(cur);
5546 back = to_tree_backref(node);
5547 if (node->full_backref)
5549 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5556 static int record_bad_block_io(struct btrfs_fs_info *info,
5557 struct cache_tree *extent_cache,
5560 struct extent_record *rec;
5561 struct cache_extent *cache;
5562 struct btrfs_key key;
5564 cache = lookup_cache_extent(extent_cache, start, len);
5568 rec = container_of(cache, struct extent_record, cache);
5569 if (!is_extent_tree_record(rec))
5572 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5573 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5576 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5577 struct extent_buffer *buf, int slot)
5579 if (btrfs_header_level(buf)) {
5580 struct btrfs_key_ptr ptr1, ptr2;
5582 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5583 sizeof(struct btrfs_key_ptr));
5584 read_extent_buffer(buf, &ptr2,
5585 btrfs_node_key_ptr_offset(slot + 1),
5586 sizeof(struct btrfs_key_ptr));
5587 write_extent_buffer(buf, &ptr1,
5588 btrfs_node_key_ptr_offset(slot + 1),
5589 sizeof(struct btrfs_key_ptr));
5590 write_extent_buffer(buf, &ptr2,
5591 btrfs_node_key_ptr_offset(slot),
5592 sizeof(struct btrfs_key_ptr));
5594 struct btrfs_disk_key key;
5595 btrfs_node_key(buf, &key, 0);
5596 btrfs_fixup_low_keys(root, path, &key,
5597 btrfs_header_level(buf) + 1);
5600 struct btrfs_item *item1, *item2;
5601 struct btrfs_key k1, k2;
5602 char *item1_data, *item2_data;
5603 u32 item1_offset, item2_offset, item1_size, item2_size;
5605 item1 = btrfs_item_nr(slot);
5606 item2 = btrfs_item_nr(slot + 1);
5607 btrfs_item_key_to_cpu(buf, &k1, slot);
5608 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5609 item1_offset = btrfs_item_offset(buf, item1);
5610 item2_offset = btrfs_item_offset(buf, item2);
5611 item1_size = btrfs_item_size(buf, item1);
5612 item2_size = btrfs_item_size(buf, item2);
5614 item1_data = malloc(item1_size);
5617 item2_data = malloc(item2_size);
5623 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5624 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5626 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5627 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5631 btrfs_set_item_offset(buf, item1, item2_offset);
5632 btrfs_set_item_offset(buf, item2, item1_offset);
5633 btrfs_set_item_size(buf, item1, item2_size);
5634 btrfs_set_item_size(buf, item2, item1_size);
5636 path->slots[0] = slot;
5637 btrfs_set_item_key_unsafe(root, path, &k2);
5638 path->slots[0] = slot + 1;
5639 btrfs_set_item_key_unsafe(root, path, &k1);
5644 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5646 struct extent_buffer *buf;
5647 struct btrfs_key k1, k2;
5649 int level = path->lowest_level;
5652 buf = path->nodes[level];
5653 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5655 btrfs_node_key_to_cpu(buf, &k1, i);
5656 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5658 btrfs_item_key_to_cpu(buf, &k1, i);
5659 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5661 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5663 ret = swap_values(root, path, buf, i);
5666 btrfs_mark_buffer_dirty(buf);
5672 static int delete_bogus_item(struct btrfs_root *root,
5673 struct btrfs_path *path,
5674 struct extent_buffer *buf, int slot)
5676 struct btrfs_key key;
5677 int nritems = btrfs_header_nritems(buf);
5679 btrfs_item_key_to_cpu(buf, &key, slot);
5681 /* These are all the keys we can deal with missing. */
5682 if (key.type != BTRFS_DIR_INDEX_KEY &&
5683 key.type != BTRFS_EXTENT_ITEM_KEY &&
5684 key.type != BTRFS_METADATA_ITEM_KEY &&
5685 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5686 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5689 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5690 (unsigned long long)key.objectid, key.type,
5691 (unsigned long long)key.offset, slot, buf->start);
5692 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5693 btrfs_item_nr_offset(slot + 1),
5694 sizeof(struct btrfs_item) *
5695 (nritems - slot - 1));
5696 btrfs_set_header_nritems(buf, nritems - 1);
5698 struct btrfs_disk_key disk_key;
5700 btrfs_item_key(buf, &disk_key, 0);
5701 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5703 btrfs_mark_buffer_dirty(buf);
5707 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5709 struct extent_buffer *buf;
5713 /* We should only get this for leaves */
5714 BUG_ON(path->lowest_level);
5715 buf = path->nodes[0];
5717 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5718 unsigned int shift = 0, offset;
5720 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5721 BTRFS_LEAF_DATA_SIZE(root)) {
5722 if (btrfs_item_end_nr(buf, i) >
5723 BTRFS_LEAF_DATA_SIZE(root)) {
5724 ret = delete_bogus_item(root, path, buf, i);
5727 fprintf(stderr, "item is off the end of the "
5728 "leaf, can't fix\n");
5732 shift = BTRFS_LEAF_DATA_SIZE(root) -
5733 btrfs_item_end_nr(buf, i);
5734 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5735 btrfs_item_offset_nr(buf, i - 1)) {
5736 if (btrfs_item_end_nr(buf, i) >
5737 btrfs_item_offset_nr(buf, i - 1)) {
5738 ret = delete_bogus_item(root, path, buf, i);
5741 fprintf(stderr, "items overlap, can't fix\n");
5745 shift = btrfs_item_offset_nr(buf, i - 1) -
5746 btrfs_item_end_nr(buf, i);
5751 printf("Shifting item nr %d by %u bytes in block %llu\n",
5752 i, shift, (unsigned long long)buf->start);
5753 offset = btrfs_item_offset_nr(buf, i);
5754 memmove_extent_buffer(buf,
5755 btrfs_leaf_data(buf) + offset + shift,
5756 btrfs_leaf_data(buf) + offset,
5757 btrfs_item_size_nr(buf, i));
5758 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5760 btrfs_mark_buffer_dirty(buf);
5764 * We may have moved things, in which case we want to exit so we don't
5765 * write those changes out. Once we have proper abort functionality in
5766 * progs this can be changed to something nicer.
5773 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5774 * then just return -EIO.
5776 static int try_to_fix_bad_block(struct btrfs_root *root,
5777 struct extent_buffer *buf,
5778 enum btrfs_tree_block_status status)
5780 struct btrfs_trans_handle *trans;
5781 struct ulist *roots;
5782 struct ulist_node *node;
5783 struct btrfs_root *search_root;
5784 struct btrfs_path path;
5785 struct ulist_iterator iter;
5786 struct btrfs_key root_key, key;
5789 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5790 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5793 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5797 btrfs_init_path(&path);
5798 ULIST_ITER_INIT(&iter);
5799 while ((node = ulist_next(roots, &iter))) {
5800 root_key.objectid = node->val;
5801 root_key.type = BTRFS_ROOT_ITEM_KEY;
5802 root_key.offset = (u64)-1;
5804 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5811 trans = btrfs_start_transaction(search_root, 0);
5812 if (IS_ERR(trans)) {
5813 ret = PTR_ERR(trans);
5817 path.lowest_level = btrfs_header_level(buf);
5818 path.skip_check_block = 1;
5819 if (path.lowest_level)
5820 btrfs_node_key_to_cpu(buf, &key, 0);
5822 btrfs_item_key_to_cpu(buf, &key, 0);
5823 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5826 btrfs_commit_transaction(trans, search_root);
5829 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5830 ret = fix_key_order(search_root, &path);
5831 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5832 ret = fix_item_offset(search_root, &path);
5834 btrfs_commit_transaction(trans, search_root);
5837 btrfs_release_path(&path);
5838 btrfs_commit_transaction(trans, search_root);
5841 btrfs_release_path(&path);
5845 static int check_block(struct btrfs_root *root,
5846 struct cache_tree *extent_cache,
5847 struct extent_buffer *buf, u64 flags)
5849 struct extent_record *rec;
5850 struct cache_extent *cache;
5851 struct btrfs_key key;
5852 enum btrfs_tree_block_status status;
5856 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5859 rec = container_of(cache, struct extent_record, cache);
5860 rec->generation = btrfs_header_generation(buf);
5862 level = btrfs_header_level(buf);
5863 if (btrfs_header_nritems(buf) > 0) {
5866 btrfs_item_key_to_cpu(buf, &key, 0);
5868 btrfs_node_key_to_cpu(buf, &key, 0);
5870 rec->info_objectid = key.objectid;
5872 rec->info_level = level;
5874 if (btrfs_is_leaf(buf))
5875 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5877 status = btrfs_check_node(root, &rec->parent_key, buf);
5879 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5881 status = try_to_fix_bad_block(root, buf, status);
5882 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5884 fprintf(stderr, "bad block %llu\n",
5885 (unsigned long long)buf->start);
5888 * Signal to callers we need to start the scan over
5889 * again since we'll have cowed blocks.
5894 rec->content_checked = 1;
5895 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5896 rec->owner_ref_checked = 1;
5898 ret = check_owner_ref(root, rec, buf);
5900 rec->owner_ref_checked = 1;
5904 maybe_free_extent_rec(extent_cache, rec);
5908 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5909 u64 parent, u64 root)
5911 struct list_head *cur = rec->backrefs.next;
5912 struct extent_backref *node;
5913 struct tree_backref *back;
5915 while(cur != &rec->backrefs) {
5916 node = to_extent_backref(cur);
5920 back = to_tree_backref(node);
5922 if (!node->full_backref)
5924 if (parent == back->parent)
5927 if (node->full_backref)
5929 if (back->root == root)
5936 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5937 u64 parent, u64 root)
5939 struct tree_backref *ref = malloc(sizeof(*ref));
5943 memset(&ref->node, 0, sizeof(ref->node));
5945 ref->parent = parent;
5946 ref->node.full_backref = 1;
5949 ref->node.full_backref = 0;
5951 list_add_tail(&ref->node.list, &rec->backrefs);
5956 static struct data_backref *find_data_backref(struct extent_record *rec,
5957 u64 parent, u64 root,
5958 u64 owner, u64 offset,
5960 u64 disk_bytenr, u64 bytes)
5962 struct list_head *cur = rec->backrefs.next;
5963 struct extent_backref *node;
5964 struct data_backref *back;
5966 while(cur != &rec->backrefs) {
5967 node = to_extent_backref(cur);
5971 back = to_data_backref(node);
5973 if (!node->full_backref)
5975 if (parent == back->parent)
5978 if (node->full_backref)
5980 if (back->root == root && back->owner == owner &&
5981 back->offset == offset) {
5982 if (found_ref && node->found_ref &&
5983 (back->bytes != bytes ||
5984 back->disk_bytenr != disk_bytenr))
5993 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5994 u64 parent, u64 root,
5995 u64 owner, u64 offset,
5998 struct data_backref *ref = malloc(sizeof(*ref));
6002 memset(&ref->node, 0, sizeof(ref->node));
6003 ref->node.is_data = 1;
6006 ref->parent = parent;
6009 ref->node.full_backref = 1;
6013 ref->offset = offset;
6014 ref->node.full_backref = 0;
6016 ref->bytes = max_size;
6019 list_add_tail(&ref->node.list, &rec->backrefs);
6020 if (max_size > rec->max_size)
6021 rec->max_size = max_size;
6025 /* Check if the type of extent matches with its chunk */
6026 static void check_extent_type(struct extent_record *rec)
6028 struct btrfs_block_group_cache *bg_cache;
6030 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6034 /* data extent, check chunk directly*/
6035 if (!rec->metadata) {
6036 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6037 rec->wrong_chunk_type = 1;
6041 /* metadata extent, check the obvious case first */
6042 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6043 BTRFS_BLOCK_GROUP_METADATA))) {
6044 rec->wrong_chunk_type = 1;
6049 * Check SYSTEM extent, as it's also marked as metadata, we can only
6050 * make sure it's a SYSTEM extent by its backref
6052 if (!list_empty(&rec->backrefs)) {
6053 struct extent_backref *node;
6054 struct tree_backref *tback;
6057 node = to_extent_backref(rec->backrefs.next);
6058 if (node->is_data) {
6059 /* tree block shouldn't have data backref */
6060 rec->wrong_chunk_type = 1;
6063 tback = container_of(node, struct tree_backref, node);
6065 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6066 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6068 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6069 if (!(bg_cache->flags & bg_type))
6070 rec->wrong_chunk_type = 1;
6075 * Allocate a new extent record, fill default values from @tmpl and insert int
6076 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6077 * the cache, otherwise it fails.
6079 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6080 struct extent_record *tmpl)
6082 struct extent_record *rec;
6085 BUG_ON(tmpl->max_size == 0);
6086 rec = malloc(sizeof(*rec));
6089 rec->start = tmpl->start;
6090 rec->max_size = tmpl->max_size;
6091 rec->nr = max(tmpl->nr, tmpl->max_size);
6092 rec->found_rec = tmpl->found_rec;
6093 rec->content_checked = tmpl->content_checked;
6094 rec->owner_ref_checked = tmpl->owner_ref_checked;
6095 rec->num_duplicates = 0;
6096 rec->metadata = tmpl->metadata;
6097 rec->flag_block_full_backref = FLAG_UNSET;
6098 rec->bad_full_backref = 0;
6099 rec->crossing_stripes = 0;
6100 rec->wrong_chunk_type = 0;
6101 rec->is_root = tmpl->is_root;
6102 rec->refs = tmpl->refs;
6103 rec->extent_item_refs = tmpl->extent_item_refs;
6104 rec->parent_generation = tmpl->parent_generation;
6105 INIT_LIST_HEAD(&rec->backrefs);
6106 INIT_LIST_HEAD(&rec->dups);
6107 INIT_LIST_HEAD(&rec->list);
6108 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6109 rec->cache.start = tmpl->start;
6110 rec->cache.size = tmpl->nr;
6111 ret = insert_cache_extent(extent_cache, &rec->cache);
6116 bytes_used += rec->nr;
6119 rec->crossing_stripes = check_crossing_stripes(global_info,
6120 rec->start, global_info->nodesize);
6121 check_extent_type(rec);
6126 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6128 * - refs - if found, increase refs
6129 * - is_root - if found, set
6130 * - content_checked - if found, set
6131 * - owner_ref_checked - if found, set
6133 * If not found, create a new one, initialize and insert.
6135 static int add_extent_rec(struct cache_tree *extent_cache,
6136 struct extent_record *tmpl)
6138 struct extent_record *rec;
6139 struct cache_extent *cache;
6143 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6145 rec = container_of(cache, struct extent_record, cache);
6149 rec->nr = max(tmpl->nr, tmpl->max_size);
6152 * We need to make sure to reset nr to whatever the extent
6153 * record says was the real size, this way we can compare it to
6156 if (tmpl->found_rec) {
6157 if (tmpl->start != rec->start || rec->found_rec) {
6158 struct extent_record *tmp;
6161 if (list_empty(&rec->list))
6162 list_add_tail(&rec->list,
6163 &duplicate_extents);
6166 * We have to do this song and dance in case we
6167 * find an extent record that falls inside of
6168 * our current extent record but does not have
6169 * the same objectid.
6171 tmp = malloc(sizeof(*tmp));
6174 tmp->start = tmpl->start;
6175 tmp->max_size = tmpl->max_size;
6178 tmp->metadata = tmpl->metadata;
6179 tmp->extent_item_refs = tmpl->extent_item_refs;
6180 INIT_LIST_HEAD(&tmp->list);
6181 list_add_tail(&tmp->list, &rec->dups);
6182 rec->num_duplicates++;
6189 if (tmpl->extent_item_refs && !dup) {
6190 if (rec->extent_item_refs) {
6191 fprintf(stderr, "block %llu rec "
6192 "extent_item_refs %llu, passed %llu\n",
6193 (unsigned long long)tmpl->start,
6194 (unsigned long long)
6195 rec->extent_item_refs,
6196 (unsigned long long)tmpl->extent_item_refs);
6198 rec->extent_item_refs = tmpl->extent_item_refs;
6202 if (tmpl->content_checked)
6203 rec->content_checked = 1;
6204 if (tmpl->owner_ref_checked)
6205 rec->owner_ref_checked = 1;
6206 memcpy(&rec->parent_key, &tmpl->parent_key,
6207 sizeof(tmpl->parent_key));
6208 if (tmpl->parent_generation)
6209 rec->parent_generation = tmpl->parent_generation;
6210 if (rec->max_size < tmpl->max_size)
6211 rec->max_size = tmpl->max_size;
6214 * A metadata extent can't cross stripe_len boundary, otherwise
6215 * kernel scrub won't be able to handle it.
6216 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6220 rec->crossing_stripes = check_crossing_stripes(
6221 global_info, rec->start,
6222 global_info->nodesize);
6223 check_extent_type(rec);
6224 maybe_free_extent_rec(extent_cache, rec);
6228 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6233 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6234 u64 parent, u64 root, int found_ref)
6236 struct extent_record *rec;
6237 struct tree_backref *back;
6238 struct cache_extent *cache;
6241 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6243 struct extent_record tmpl;
6245 memset(&tmpl, 0, sizeof(tmpl));
6246 tmpl.start = bytenr;
6251 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6255 /* really a bug in cache_extent implement now */
6256 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6261 rec = container_of(cache, struct extent_record, cache);
6262 if (rec->start != bytenr) {
6264 * Several cause, from unaligned bytenr to over lapping extents
6269 back = find_tree_backref(rec, parent, root);
6271 back = alloc_tree_backref(rec, parent, root);
6277 if (back->node.found_ref) {
6278 fprintf(stderr, "Extent back ref already exists "
6279 "for %llu parent %llu root %llu \n",
6280 (unsigned long long)bytenr,
6281 (unsigned long long)parent,
6282 (unsigned long long)root);
6284 back->node.found_ref = 1;
6286 if (back->node.found_extent_tree) {
6287 fprintf(stderr, "Extent back ref already exists "
6288 "for %llu parent %llu root %llu \n",
6289 (unsigned long long)bytenr,
6290 (unsigned long long)parent,
6291 (unsigned long long)root);
6293 back->node.found_extent_tree = 1;
6295 check_extent_type(rec);
6296 maybe_free_extent_rec(extent_cache, rec);
6300 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6301 u64 parent, u64 root, u64 owner, u64 offset,
6302 u32 num_refs, int found_ref, u64 max_size)
6304 struct extent_record *rec;
6305 struct data_backref *back;
6306 struct cache_extent *cache;
6309 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6311 struct extent_record tmpl;
6313 memset(&tmpl, 0, sizeof(tmpl));
6314 tmpl.start = bytenr;
6316 tmpl.max_size = max_size;
6318 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6322 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6327 rec = container_of(cache, struct extent_record, cache);
6328 if (rec->max_size < max_size)
6329 rec->max_size = max_size;
6332 * If found_ref is set then max_size is the real size and must match the
6333 * existing refs. So if we have already found a ref then we need to
6334 * make sure that this ref matches the existing one, otherwise we need
6335 * to add a new backref so we can notice that the backrefs don't match
6336 * and we need to figure out who is telling the truth. This is to
6337 * account for that awful fsync bug I introduced where we'd end up with
6338 * a btrfs_file_extent_item that would have its length include multiple
6339 * prealloc extents or point inside of a prealloc extent.
6341 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6344 back = alloc_data_backref(rec, parent, root, owner, offset,
6350 BUG_ON(num_refs != 1);
6351 if (back->node.found_ref)
6352 BUG_ON(back->bytes != max_size);
6353 back->node.found_ref = 1;
6354 back->found_ref += 1;
6355 back->bytes = max_size;
6356 back->disk_bytenr = bytenr;
6358 rec->content_checked = 1;
6359 rec->owner_ref_checked = 1;
6361 if (back->node.found_extent_tree) {
6362 fprintf(stderr, "Extent back ref already exists "
6363 "for %llu parent %llu root %llu "
6364 "owner %llu offset %llu num_refs %lu\n",
6365 (unsigned long long)bytenr,
6366 (unsigned long long)parent,
6367 (unsigned long long)root,
6368 (unsigned long long)owner,
6369 (unsigned long long)offset,
6370 (unsigned long)num_refs);
6372 back->num_refs = num_refs;
6373 back->node.found_extent_tree = 1;
6375 maybe_free_extent_rec(extent_cache, rec);
6379 static int add_pending(struct cache_tree *pending,
6380 struct cache_tree *seen, u64 bytenr, u32 size)
6383 ret = add_cache_extent(seen, bytenr, size);
6386 add_cache_extent(pending, bytenr, size);
6390 static int pick_next_pending(struct cache_tree *pending,
6391 struct cache_tree *reada,
6392 struct cache_tree *nodes,
6393 u64 last, struct block_info *bits, int bits_nr,
6396 unsigned long node_start = last;
6397 struct cache_extent *cache;
6400 cache = search_cache_extent(reada, 0);
6402 bits[0].start = cache->start;
6403 bits[0].size = cache->size;
6408 if (node_start > 32768)
6409 node_start -= 32768;
6411 cache = search_cache_extent(nodes, node_start);
6413 cache = search_cache_extent(nodes, 0);
6416 cache = search_cache_extent(pending, 0);
6421 bits[ret].start = cache->start;
6422 bits[ret].size = cache->size;
6423 cache = next_cache_extent(cache);
6425 } while (cache && ret < bits_nr);
6431 bits[ret].start = cache->start;
6432 bits[ret].size = cache->size;
6433 cache = next_cache_extent(cache);
6435 } while (cache && ret < bits_nr);
6437 if (bits_nr - ret > 8) {
6438 u64 lookup = bits[0].start + bits[0].size;
6439 struct cache_extent *next;
6440 next = search_cache_extent(pending, lookup);
6442 if (next->start - lookup > 32768)
6444 bits[ret].start = next->start;
6445 bits[ret].size = next->size;
6446 lookup = next->start + next->size;
6450 next = next_cache_extent(next);
6458 static void free_chunk_record(struct cache_extent *cache)
6460 struct chunk_record *rec;
6462 rec = container_of(cache, struct chunk_record, cache);
6463 list_del_init(&rec->list);
6464 list_del_init(&rec->dextents);
6468 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6470 cache_tree_free_extents(chunk_cache, free_chunk_record);
6473 static void free_device_record(struct rb_node *node)
6475 struct device_record *rec;
6477 rec = container_of(node, struct device_record, node);
6481 FREE_RB_BASED_TREE(device_cache, free_device_record);
6483 int insert_block_group_record(struct block_group_tree *tree,
6484 struct block_group_record *bg_rec)
6488 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6492 list_add_tail(&bg_rec->list, &tree->block_groups);
6496 static void free_block_group_record(struct cache_extent *cache)
6498 struct block_group_record *rec;
6500 rec = container_of(cache, struct block_group_record, cache);
6501 list_del_init(&rec->list);
6505 void free_block_group_tree(struct block_group_tree *tree)
6507 cache_tree_free_extents(&tree->tree, free_block_group_record);
6510 int insert_device_extent_record(struct device_extent_tree *tree,
6511 struct device_extent_record *de_rec)
6516 * Device extent is a bit different from the other extents, because
6517 * the extents which belong to the different devices may have the
6518 * same start and size, so we need use the special extent cache
6519 * search/insert functions.
6521 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6525 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6526 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6530 static void free_device_extent_record(struct cache_extent *cache)
6532 struct device_extent_record *rec;
6534 rec = container_of(cache, struct device_extent_record, cache);
6535 if (!list_empty(&rec->chunk_list))
6536 list_del_init(&rec->chunk_list);
6537 if (!list_empty(&rec->device_list))
6538 list_del_init(&rec->device_list);
6542 void free_device_extent_tree(struct device_extent_tree *tree)
6544 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6547 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6548 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6549 struct extent_buffer *leaf, int slot)
6551 struct btrfs_extent_ref_v0 *ref0;
6552 struct btrfs_key key;
6555 btrfs_item_key_to_cpu(leaf, &key, slot);
6556 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6557 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6558 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6561 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6562 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6568 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6569 struct btrfs_key *key,
6572 struct btrfs_chunk *ptr;
6573 struct chunk_record *rec;
6576 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6577 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6579 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6581 fprintf(stderr, "memory allocation failed\n");
6585 INIT_LIST_HEAD(&rec->list);
6586 INIT_LIST_HEAD(&rec->dextents);
6589 rec->cache.start = key->offset;
6590 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6592 rec->generation = btrfs_header_generation(leaf);
6594 rec->objectid = key->objectid;
6595 rec->type = key->type;
6596 rec->offset = key->offset;
6598 rec->length = rec->cache.size;
6599 rec->owner = btrfs_chunk_owner(leaf, ptr);
6600 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6601 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6602 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6603 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6604 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6605 rec->num_stripes = num_stripes;
6606 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6608 for (i = 0; i < rec->num_stripes; ++i) {
6609 rec->stripes[i].devid =
6610 btrfs_stripe_devid_nr(leaf, ptr, i);
6611 rec->stripes[i].offset =
6612 btrfs_stripe_offset_nr(leaf, ptr, i);
6613 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6614 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6621 static int process_chunk_item(struct cache_tree *chunk_cache,
6622 struct btrfs_key *key, struct extent_buffer *eb,
6625 struct chunk_record *rec;
6626 struct btrfs_chunk *chunk;
6629 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6631 * Do extra check for this chunk item,
6633 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6634 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6635 * and owner<->key_type check.
6637 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6640 error("chunk(%llu, %llu) is not valid, ignore it",
6641 key->offset, btrfs_chunk_length(eb, chunk));
6644 rec = btrfs_new_chunk_record(eb, key, slot);
6645 ret = insert_cache_extent(chunk_cache, &rec->cache);
6647 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6648 rec->offset, rec->length);
6655 static int process_device_item(struct rb_root *dev_cache,
6656 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6658 struct btrfs_dev_item *ptr;
6659 struct device_record *rec;
6662 ptr = btrfs_item_ptr(eb,
6663 slot, struct btrfs_dev_item);
6665 rec = malloc(sizeof(*rec));
6667 fprintf(stderr, "memory allocation failed\n");
6671 rec->devid = key->offset;
6672 rec->generation = btrfs_header_generation(eb);
6674 rec->objectid = key->objectid;
6675 rec->type = key->type;
6676 rec->offset = key->offset;
6678 rec->devid = btrfs_device_id(eb, ptr);
6679 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6680 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6682 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6684 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6691 struct block_group_record *
6692 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6695 struct btrfs_block_group_item *ptr;
6696 struct block_group_record *rec;
6698 rec = calloc(1, sizeof(*rec));
6700 fprintf(stderr, "memory allocation failed\n");
6704 rec->cache.start = key->objectid;
6705 rec->cache.size = key->offset;
6707 rec->generation = btrfs_header_generation(leaf);
6709 rec->objectid = key->objectid;
6710 rec->type = key->type;
6711 rec->offset = key->offset;
6713 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6714 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6716 INIT_LIST_HEAD(&rec->list);
6721 static int process_block_group_item(struct block_group_tree *block_group_cache,
6722 struct btrfs_key *key,
6723 struct extent_buffer *eb, int slot)
6725 struct block_group_record *rec;
6728 rec = btrfs_new_block_group_record(eb, key, slot);
6729 ret = insert_block_group_record(block_group_cache, rec);
6731 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6732 rec->objectid, rec->offset);
6739 struct device_extent_record *
6740 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6741 struct btrfs_key *key, int slot)
6743 struct device_extent_record *rec;
6744 struct btrfs_dev_extent *ptr;
6746 rec = calloc(1, sizeof(*rec));
6748 fprintf(stderr, "memory allocation failed\n");
6752 rec->cache.objectid = key->objectid;
6753 rec->cache.start = key->offset;
6755 rec->generation = btrfs_header_generation(leaf);
6757 rec->objectid = key->objectid;
6758 rec->type = key->type;
6759 rec->offset = key->offset;
6761 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6762 rec->chunk_objecteid =
6763 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6765 btrfs_dev_extent_chunk_offset(leaf, ptr);
6766 rec->length = btrfs_dev_extent_length(leaf, ptr);
6767 rec->cache.size = rec->length;
6769 INIT_LIST_HEAD(&rec->chunk_list);
6770 INIT_LIST_HEAD(&rec->device_list);
6776 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6777 struct btrfs_key *key, struct extent_buffer *eb,
6780 struct device_extent_record *rec;
6783 rec = btrfs_new_device_extent_record(eb, key, slot);
6784 ret = insert_device_extent_record(dev_extent_cache, rec);
6787 "Device extent[%llu, %llu, %llu] existed.\n",
6788 rec->objectid, rec->offset, rec->length);
6795 static int process_extent_item(struct btrfs_root *root,
6796 struct cache_tree *extent_cache,
6797 struct extent_buffer *eb, int slot)
6799 struct btrfs_extent_item *ei;
6800 struct btrfs_extent_inline_ref *iref;
6801 struct btrfs_extent_data_ref *dref;
6802 struct btrfs_shared_data_ref *sref;
6803 struct btrfs_key key;
6804 struct extent_record tmpl;
6809 u32 item_size = btrfs_item_size_nr(eb, slot);
6815 btrfs_item_key_to_cpu(eb, &key, slot);
6817 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6819 num_bytes = root->fs_info->nodesize;
6821 num_bytes = key.offset;
6824 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6825 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6826 key.objectid, root->fs_info->sectorsize);
6829 if (item_size < sizeof(*ei)) {
6830 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6831 struct btrfs_extent_item_v0 *ei0;
6832 BUG_ON(item_size != sizeof(*ei0));
6833 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6834 refs = btrfs_extent_refs_v0(eb, ei0);
6838 memset(&tmpl, 0, sizeof(tmpl));
6839 tmpl.start = key.objectid;
6840 tmpl.nr = num_bytes;
6841 tmpl.extent_item_refs = refs;
6842 tmpl.metadata = metadata;
6844 tmpl.max_size = num_bytes;
6846 return add_extent_rec(extent_cache, &tmpl);
6849 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6850 refs = btrfs_extent_refs(eb, ei);
6851 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6855 if (metadata && num_bytes != root->fs_info->nodesize) {
6856 error("ignore invalid metadata extent, length %llu does not equal to %u",
6857 num_bytes, root->fs_info->nodesize);
6860 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6861 error("ignore invalid data extent, length %llu is not aligned to %u",
6862 num_bytes, root->fs_info->sectorsize);
6866 memset(&tmpl, 0, sizeof(tmpl));
6867 tmpl.start = key.objectid;
6868 tmpl.nr = num_bytes;
6869 tmpl.extent_item_refs = refs;
6870 tmpl.metadata = metadata;
6872 tmpl.max_size = num_bytes;
6873 add_extent_rec(extent_cache, &tmpl);
6875 ptr = (unsigned long)(ei + 1);
6876 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6877 key.type == BTRFS_EXTENT_ITEM_KEY)
6878 ptr += sizeof(struct btrfs_tree_block_info);
6880 end = (unsigned long)ei + item_size;
6882 iref = (struct btrfs_extent_inline_ref *)ptr;
6883 type = btrfs_extent_inline_ref_type(eb, iref);
6884 offset = btrfs_extent_inline_ref_offset(eb, iref);
6886 case BTRFS_TREE_BLOCK_REF_KEY:
6887 ret = add_tree_backref(extent_cache, key.objectid,
6891 "add_tree_backref failed (extent items tree block): %s",
6894 case BTRFS_SHARED_BLOCK_REF_KEY:
6895 ret = add_tree_backref(extent_cache, key.objectid,
6899 "add_tree_backref failed (extent items shared block): %s",
6902 case BTRFS_EXTENT_DATA_REF_KEY:
6903 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6904 add_data_backref(extent_cache, key.objectid, 0,
6905 btrfs_extent_data_ref_root(eb, dref),
6906 btrfs_extent_data_ref_objectid(eb,
6908 btrfs_extent_data_ref_offset(eb, dref),
6909 btrfs_extent_data_ref_count(eb, dref),
6912 case BTRFS_SHARED_DATA_REF_KEY:
6913 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6914 add_data_backref(extent_cache, key.objectid, offset,
6916 btrfs_shared_data_ref_count(eb, sref),
6920 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6921 key.objectid, key.type, num_bytes);
6924 ptr += btrfs_extent_inline_ref_size(type);
6931 static int check_cache_range(struct btrfs_root *root,
6932 struct btrfs_block_group_cache *cache,
6933 u64 offset, u64 bytes)
6935 struct btrfs_free_space *entry;
6941 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6942 bytenr = btrfs_sb_offset(i);
6943 ret = btrfs_rmap_block(root->fs_info,
6944 cache->key.objectid, bytenr, 0,
6945 &logical, &nr, &stripe_len);
6950 if (logical[nr] + stripe_len <= offset)
6952 if (offset + bytes <= logical[nr])
6954 if (logical[nr] == offset) {
6955 if (stripe_len >= bytes) {
6959 bytes -= stripe_len;
6960 offset += stripe_len;
6961 } else if (logical[nr] < offset) {
6962 if (logical[nr] + stripe_len >=
6967 bytes = (offset + bytes) -
6968 (logical[nr] + stripe_len);
6969 offset = logical[nr] + stripe_len;
6972 * Could be tricky, the super may land in the
6973 * middle of the area we're checking. First
6974 * check the easiest case, it's at the end.
6976 if (logical[nr] + stripe_len >=
6978 bytes = logical[nr] - offset;
6982 /* Check the left side */
6983 ret = check_cache_range(root, cache,
6985 logical[nr] - offset);
6991 /* Now we continue with the right side */
6992 bytes = (offset + bytes) -
6993 (logical[nr] + stripe_len);
6994 offset = logical[nr] + stripe_len;
7001 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7003 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7004 offset, offset+bytes);
7008 if (entry->offset != offset) {
7009 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7014 if (entry->bytes != bytes) {
7015 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7016 bytes, entry->bytes, offset);
7020 unlink_free_space(cache->free_space_ctl, entry);
7025 static int verify_space_cache(struct btrfs_root *root,
7026 struct btrfs_block_group_cache *cache)
7028 struct btrfs_path path;
7029 struct extent_buffer *leaf;
7030 struct btrfs_key key;
7034 root = root->fs_info->extent_root;
7036 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7038 btrfs_init_path(&path);
7039 key.objectid = last;
7041 key.type = BTRFS_EXTENT_ITEM_KEY;
7042 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7047 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7048 ret = btrfs_next_leaf(root, &path);
7056 leaf = path.nodes[0];
7057 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7058 if (key.objectid >= cache->key.offset + cache->key.objectid)
7060 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7061 key.type != BTRFS_METADATA_ITEM_KEY) {
7066 if (last == key.objectid) {
7067 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7068 last = key.objectid + key.offset;
7070 last = key.objectid + root->fs_info->nodesize;
7075 ret = check_cache_range(root, cache, last,
7076 key.objectid - last);
7079 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7080 last = key.objectid + key.offset;
7082 last = key.objectid + root->fs_info->nodesize;
7086 if (last < cache->key.objectid + cache->key.offset)
7087 ret = check_cache_range(root, cache, last,
7088 cache->key.objectid +
7089 cache->key.offset - last);
7092 btrfs_release_path(&path);
7095 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7096 fprintf(stderr, "There are still entries left in the space "
7104 static int check_space_cache(struct btrfs_root *root)
7106 struct btrfs_block_group_cache *cache;
7107 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7111 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7112 btrfs_super_generation(root->fs_info->super_copy) !=
7113 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7114 printf("cache and super generation don't match, space cache "
7115 "will be invalidated\n");
7119 if (ctx.progress_enabled) {
7120 ctx.tp = TASK_FREE_SPACE;
7121 task_start(ctx.info);
7125 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7129 start = cache->key.objectid + cache->key.offset;
7130 if (!cache->free_space_ctl) {
7131 if (btrfs_init_free_space_ctl(cache,
7132 root->fs_info->sectorsize)) {
7137 btrfs_remove_free_space_cache(cache);
7140 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7141 ret = exclude_super_stripes(root, cache);
7143 fprintf(stderr, "could not exclude super stripes: %s\n",
7148 ret = load_free_space_tree(root->fs_info, cache);
7149 free_excluded_extents(root, cache);
7151 fprintf(stderr, "could not load free space tree: %s\n",
7158 ret = load_free_space_cache(root->fs_info, cache);
7163 ret = verify_space_cache(root, cache);
7165 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7166 cache->key.objectid);
7171 task_stop(ctx.info);
7173 return error ? -EINVAL : 0;
7176 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7177 u64 num_bytes, unsigned long leaf_offset,
7178 struct extent_buffer *eb) {
7180 struct btrfs_fs_info *fs_info = root->fs_info;
7182 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7184 unsigned long csum_offset;
7188 u64 data_checked = 0;
7194 if (num_bytes % fs_info->sectorsize)
7197 data = malloc(num_bytes);
7201 while (offset < num_bytes) {
7204 read_len = num_bytes - offset;
7205 /* read as much space once a time */
7206 ret = read_extent_data(fs_info, data + offset,
7207 bytenr + offset, &read_len, mirror);
7211 /* verify every 4k data's checksum */
7212 while (data_checked < read_len) {
7214 tmp = offset + data_checked;
7216 csum = btrfs_csum_data((char *)data + tmp,
7217 csum, fs_info->sectorsize);
7218 btrfs_csum_final(csum, (u8 *)&csum);
7220 csum_offset = leaf_offset +
7221 tmp / fs_info->sectorsize * csum_size;
7222 read_extent_buffer(eb, (char *)&csum_expected,
7223 csum_offset, csum_size);
7224 /* try another mirror */
7225 if (csum != csum_expected) {
7226 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7227 mirror, bytenr + tmp,
7228 csum, csum_expected);
7229 num_copies = btrfs_num_copies(root->fs_info,
7231 if (mirror < num_copies - 1) {
7236 data_checked += fs_info->sectorsize;
7245 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7248 struct btrfs_path path;
7249 struct extent_buffer *leaf;
7250 struct btrfs_key key;
7253 btrfs_init_path(&path);
7254 key.objectid = bytenr;
7255 key.type = BTRFS_EXTENT_ITEM_KEY;
7256 key.offset = (u64)-1;
7259 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7262 fprintf(stderr, "Error looking up extent record %d\n", ret);
7263 btrfs_release_path(&path);
7266 if (path.slots[0] > 0) {
7269 ret = btrfs_prev_leaf(root, &path);
7272 } else if (ret > 0) {
7279 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7282 * Block group items come before extent items if they have the same
7283 * bytenr, so walk back one more just in case. Dear future traveller,
7284 * first congrats on mastering time travel. Now if it's not too much
7285 * trouble could you go back to 2006 and tell Chris to make the
7286 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7287 * EXTENT_ITEM_KEY please?
7289 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7290 if (path.slots[0] > 0) {
7293 ret = btrfs_prev_leaf(root, &path);
7296 } else if (ret > 0) {
7301 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7305 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7306 ret = btrfs_next_leaf(root, &path);
7308 fprintf(stderr, "Error going to next leaf "
7310 btrfs_release_path(&path);
7316 leaf = path.nodes[0];
7317 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7318 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7322 if (key.objectid + key.offset < bytenr) {
7326 if (key.objectid > bytenr + num_bytes)
7329 if (key.objectid == bytenr) {
7330 if (key.offset >= num_bytes) {
7334 num_bytes -= key.offset;
7335 bytenr += key.offset;
7336 } else if (key.objectid < bytenr) {
7337 if (key.objectid + key.offset >= bytenr + num_bytes) {
7341 num_bytes = (bytenr + num_bytes) -
7342 (key.objectid + key.offset);
7343 bytenr = key.objectid + key.offset;
7345 if (key.objectid + key.offset < bytenr + num_bytes) {
7346 u64 new_start = key.objectid + key.offset;
7347 u64 new_bytes = bytenr + num_bytes - new_start;
7350 * Weird case, the extent is in the middle of
7351 * our range, we'll have to search one side
7352 * and then the other. Not sure if this happens
7353 * in real life, but no harm in coding it up
7354 * anyway just in case.
7356 btrfs_release_path(&path);
7357 ret = check_extent_exists(root, new_start,
7360 fprintf(stderr, "Right section didn't "
7364 num_bytes = key.objectid - bytenr;
7367 num_bytes = key.objectid - bytenr;
7374 if (num_bytes && !ret) {
7375 fprintf(stderr, "There are no extents for csum range "
7376 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7380 btrfs_release_path(&path);
7384 static int check_csums(struct btrfs_root *root)
7386 struct btrfs_path path;
7387 struct extent_buffer *leaf;
7388 struct btrfs_key key;
7389 u64 offset = 0, num_bytes = 0;
7390 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7394 unsigned long leaf_offset;
7396 root = root->fs_info->csum_root;
7397 if (!extent_buffer_uptodate(root->node)) {
7398 fprintf(stderr, "No valid csum tree found\n");
7402 btrfs_init_path(&path);
7403 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7404 key.type = BTRFS_EXTENT_CSUM_KEY;
7406 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7408 fprintf(stderr, "Error searching csum tree %d\n", ret);
7409 btrfs_release_path(&path);
7413 if (ret > 0 && path.slots[0])
7418 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7419 ret = btrfs_next_leaf(root, &path);
7421 fprintf(stderr, "Error going to next leaf "
7428 leaf = path.nodes[0];
7430 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7431 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7436 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7437 csum_size) * root->fs_info->sectorsize;
7438 if (!check_data_csum)
7439 goto skip_csum_check;
7440 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7441 ret = check_extent_csums(root, key.offset, data_len,
7447 offset = key.offset;
7448 } else if (key.offset != offset + num_bytes) {
7449 ret = check_extent_exists(root, offset, num_bytes);
7451 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7452 "there is no extent record\n",
7453 offset, offset+num_bytes);
7456 offset = key.offset;
7459 num_bytes += data_len;
7463 btrfs_release_path(&path);
7467 static int is_dropped_key(struct btrfs_key *key,
7468 struct btrfs_key *drop_key) {
7469 if (key->objectid < drop_key->objectid)
7471 else if (key->objectid == drop_key->objectid) {
7472 if (key->type < drop_key->type)
7474 else if (key->type == drop_key->type) {
7475 if (key->offset < drop_key->offset)
7483 * Here are the rules for FULL_BACKREF.
7485 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7486 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7488 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7489 * if it happened after the relocation occurred since we'll have dropped the
7490 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7491 * have no real way to know for sure.
7493 * We process the blocks one root at a time, and we start from the lowest root
7494 * objectid and go to the highest. So we can just lookup the owner backref for
7495 * the record and if we don't find it then we know it doesn't exist and we have
7498 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7499 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7500 * be set or not and then we can check later once we've gathered all the refs.
7502 static int calc_extent_flag(struct cache_tree *extent_cache,
7503 struct extent_buffer *buf,
7504 struct root_item_record *ri,
7507 struct extent_record *rec;
7508 struct cache_extent *cache;
7509 struct tree_backref *tback;
7512 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7513 /* we have added this extent before */
7517 rec = container_of(cache, struct extent_record, cache);
7520 * Except file/reloc tree, we can not have
7523 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7528 if (buf->start == ri->bytenr)
7531 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7534 owner = btrfs_header_owner(buf);
7535 if (owner == ri->objectid)
7538 tback = find_tree_backref(rec, 0, owner);
7543 if (rec->flag_block_full_backref != FLAG_UNSET &&
7544 rec->flag_block_full_backref != 0)
7545 rec->bad_full_backref = 1;
7548 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7549 if (rec->flag_block_full_backref != FLAG_UNSET &&
7550 rec->flag_block_full_backref != 1)
7551 rec->bad_full_backref = 1;
7555 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7557 fprintf(stderr, "Invalid key type(");
7558 print_key_type(stderr, 0, key_type);
7559 fprintf(stderr, ") found in root(");
7560 print_objectid(stderr, rootid, 0);
7561 fprintf(stderr, ")\n");
7565 * Check if the key is valid with its extent buffer.
7567 * This is a early check in case invalid key exists in a extent buffer
7568 * This is not comprehensive yet, but should prevent wrong key/item passed
7571 static int check_type_with_root(u64 rootid, u8 key_type)
7574 /* Only valid in chunk tree */
7575 case BTRFS_DEV_ITEM_KEY:
7576 case BTRFS_CHUNK_ITEM_KEY:
7577 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7580 /* valid in csum and log tree */
7581 case BTRFS_CSUM_TREE_OBJECTID:
7582 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7586 case BTRFS_EXTENT_ITEM_KEY:
7587 case BTRFS_METADATA_ITEM_KEY:
7588 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7589 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7592 case BTRFS_ROOT_ITEM_KEY:
7593 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7596 case BTRFS_DEV_EXTENT_KEY:
7597 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7603 report_mismatch_key_root(key_type, rootid);
7607 static int run_next_block(struct btrfs_root *root,
7608 struct block_info *bits,
7611 struct cache_tree *pending,
7612 struct cache_tree *seen,
7613 struct cache_tree *reada,
7614 struct cache_tree *nodes,
7615 struct cache_tree *extent_cache,
7616 struct cache_tree *chunk_cache,
7617 struct rb_root *dev_cache,
7618 struct block_group_tree *block_group_cache,
7619 struct device_extent_tree *dev_extent_cache,
7620 struct root_item_record *ri)
7622 struct btrfs_fs_info *fs_info = root->fs_info;
7623 struct extent_buffer *buf;
7624 struct extent_record *rec = NULL;
7635 struct btrfs_key key;
7636 struct cache_extent *cache;
7639 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7640 bits_nr, &reada_bits);
7645 for(i = 0; i < nritems; i++) {
7646 ret = add_cache_extent(reada, bits[i].start,
7651 /* fixme, get the parent transid */
7652 readahead_tree_block(fs_info, bits[i].start, 0);
7655 *last = bits[0].start;
7656 bytenr = bits[0].start;
7657 size = bits[0].size;
7659 cache = lookup_cache_extent(pending, bytenr, size);
7661 remove_cache_extent(pending, cache);
7664 cache = lookup_cache_extent(reada, bytenr, size);
7666 remove_cache_extent(reada, cache);
7669 cache = lookup_cache_extent(nodes, bytenr, size);
7671 remove_cache_extent(nodes, cache);
7674 cache = lookup_cache_extent(extent_cache, bytenr, size);
7676 rec = container_of(cache, struct extent_record, cache);
7677 gen = rec->parent_generation;
7680 /* fixme, get the real parent transid */
7681 buf = read_tree_block(root->fs_info, bytenr, gen);
7682 if (!extent_buffer_uptodate(buf)) {
7683 record_bad_block_io(root->fs_info,
7684 extent_cache, bytenr, size);
7688 nritems = btrfs_header_nritems(buf);
7691 if (!init_extent_tree) {
7692 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7693 btrfs_header_level(buf), 1, NULL,
7696 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7698 fprintf(stderr, "Couldn't calc extent flags\n");
7699 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7704 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7706 fprintf(stderr, "Couldn't calc extent flags\n");
7707 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7711 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7713 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7714 ri->objectid == btrfs_header_owner(buf)) {
7716 * Ok we got to this block from it's original owner and
7717 * we have FULL_BACKREF set. Relocation can leave
7718 * converted blocks over so this is altogether possible,
7719 * however it's not possible if the generation > the
7720 * last snapshot, so check for this case.
7722 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7723 btrfs_header_generation(buf) > ri->last_snapshot) {
7724 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7725 rec->bad_full_backref = 1;
7730 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7731 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7732 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7733 rec->bad_full_backref = 1;
7737 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7738 rec->flag_block_full_backref = 1;
7742 rec->flag_block_full_backref = 0;
7744 owner = btrfs_header_owner(buf);
7747 ret = check_block(root, extent_cache, buf, flags);
7751 if (btrfs_is_leaf(buf)) {
7752 btree_space_waste += btrfs_leaf_free_space(root, buf);
7753 for (i = 0; i < nritems; i++) {
7754 struct btrfs_file_extent_item *fi;
7755 btrfs_item_key_to_cpu(buf, &key, i);
7757 * Check key type against the leaf owner.
7758 * Could filter quite a lot of early error if
7761 if (check_type_with_root(btrfs_header_owner(buf),
7763 fprintf(stderr, "ignoring invalid key\n");
7766 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7767 process_extent_item(root, extent_cache, buf,
7771 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7772 process_extent_item(root, extent_cache, buf,
7776 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7778 btrfs_item_size_nr(buf, i);
7781 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7782 process_chunk_item(chunk_cache, &key, buf, i);
7785 if (key.type == BTRFS_DEV_ITEM_KEY) {
7786 process_device_item(dev_cache, &key, buf, i);
7789 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7790 process_block_group_item(block_group_cache,
7794 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7795 process_device_extent_item(dev_extent_cache,
7800 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7801 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7802 process_extent_ref_v0(extent_cache, buf, i);
7809 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7810 ret = add_tree_backref(extent_cache,
7811 key.objectid, 0, key.offset, 0);
7814 "add_tree_backref failed (leaf tree block): %s",
7818 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7819 ret = add_tree_backref(extent_cache,
7820 key.objectid, key.offset, 0, 0);
7823 "add_tree_backref failed (leaf shared block): %s",
7827 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7828 struct btrfs_extent_data_ref *ref;
7829 ref = btrfs_item_ptr(buf, i,
7830 struct btrfs_extent_data_ref);
7831 add_data_backref(extent_cache,
7833 btrfs_extent_data_ref_root(buf, ref),
7834 btrfs_extent_data_ref_objectid(buf,
7836 btrfs_extent_data_ref_offset(buf, ref),
7837 btrfs_extent_data_ref_count(buf, ref),
7838 0, root->fs_info->sectorsize);
7841 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7842 struct btrfs_shared_data_ref *ref;
7843 ref = btrfs_item_ptr(buf, i,
7844 struct btrfs_shared_data_ref);
7845 add_data_backref(extent_cache,
7846 key.objectid, key.offset, 0, 0, 0,
7847 btrfs_shared_data_ref_count(buf, ref),
7848 0, root->fs_info->sectorsize);
7851 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7852 struct bad_item *bad;
7854 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7858 bad = malloc(sizeof(struct bad_item));
7861 INIT_LIST_HEAD(&bad->list);
7862 memcpy(&bad->key, &key,
7863 sizeof(struct btrfs_key));
7864 bad->root_id = owner;
7865 list_add_tail(&bad->list, &delete_items);
7868 if (key.type != BTRFS_EXTENT_DATA_KEY)
7870 fi = btrfs_item_ptr(buf, i,
7871 struct btrfs_file_extent_item);
7872 if (btrfs_file_extent_type(buf, fi) ==
7873 BTRFS_FILE_EXTENT_INLINE)
7875 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7878 data_bytes_allocated +=
7879 btrfs_file_extent_disk_num_bytes(buf, fi);
7880 if (data_bytes_allocated < root->fs_info->sectorsize) {
7883 data_bytes_referenced +=
7884 btrfs_file_extent_num_bytes(buf, fi);
7885 add_data_backref(extent_cache,
7886 btrfs_file_extent_disk_bytenr(buf, fi),
7887 parent, owner, key.objectid, key.offset -
7888 btrfs_file_extent_offset(buf, fi), 1, 1,
7889 btrfs_file_extent_disk_num_bytes(buf, fi));
7893 struct btrfs_key first_key;
7895 first_key.objectid = 0;
7898 btrfs_item_key_to_cpu(buf, &first_key, 0);
7899 level = btrfs_header_level(buf);
7900 for (i = 0; i < nritems; i++) {
7901 struct extent_record tmpl;
7903 ptr = btrfs_node_blockptr(buf, i);
7904 size = root->fs_info->nodesize;
7905 btrfs_node_key_to_cpu(buf, &key, i);
7907 if ((level == ri->drop_level)
7908 && is_dropped_key(&key, &ri->drop_key)) {
7913 memset(&tmpl, 0, sizeof(tmpl));
7914 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7915 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7920 tmpl.max_size = size;
7921 ret = add_extent_rec(extent_cache, &tmpl);
7925 ret = add_tree_backref(extent_cache, ptr, parent,
7929 "add_tree_backref failed (non-leaf block): %s",
7935 add_pending(nodes, seen, ptr, size);
7937 add_pending(pending, seen, ptr, size);
7940 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7941 nritems) * sizeof(struct btrfs_key_ptr);
7943 total_btree_bytes += buf->len;
7944 if (fs_root_objectid(btrfs_header_owner(buf)))
7945 total_fs_tree_bytes += buf->len;
7946 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7947 total_extent_tree_bytes += buf->len;
7948 if (!found_old_backref &&
7949 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7950 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7951 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7952 found_old_backref = 1;
7954 free_extent_buffer(buf);
7958 static int add_root_to_pending(struct extent_buffer *buf,
7959 struct cache_tree *extent_cache,
7960 struct cache_tree *pending,
7961 struct cache_tree *seen,
7962 struct cache_tree *nodes,
7965 struct extent_record tmpl;
7968 if (btrfs_header_level(buf) > 0)
7969 add_pending(nodes, seen, buf->start, buf->len);
7971 add_pending(pending, seen, buf->start, buf->len);
7973 memset(&tmpl, 0, sizeof(tmpl));
7974 tmpl.start = buf->start;
7979 tmpl.max_size = buf->len;
7980 add_extent_rec(extent_cache, &tmpl);
7982 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7983 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7984 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7987 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7992 /* as we fix the tree, we might be deleting blocks that
7993 * we're tracking for repair. This hook makes sure we
7994 * remove any backrefs for blocks as we are fixing them.
7996 static int free_extent_hook(struct btrfs_trans_handle *trans,
7997 struct btrfs_root *root,
7998 u64 bytenr, u64 num_bytes, u64 parent,
7999 u64 root_objectid, u64 owner, u64 offset,
8002 struct extent_record *rec;
8003 struct cache_extent *cache;
8005 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8007 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8008 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8012 rec = container_of(cache, struct extent_record, cache);
8014 struct data_backref *back;
8015 back = find_data_backref(rec, parent, root_objectid, owner,
8016 offset, 1, bytenr, num_bytes);
8019 if (back->node.found_ref) {
8020 back->found_ref -= refs_to_drop;
8022 rec->refs -= refs_to_drop;
8024 if (back->node.found_extent_tree) {
8025 back->num_refs -= refs_to_drop;
8026 if (rec->extent_item_refs)
8027 rec->extent_item_refs -= refs_to_drop;
8029 if (back->found_ref == 0)
8030 back->node.found_ref = 0;
8031 if (back->num_refs == 0)
8032 back->node.found_extent_tree = 0;
8034 if (!back->node.found_extent_tree && back->node.found_ref) {
8035 list_del(&back->node.list);
8039 struct tree_backref *back;
8040 back = find_tree_backref(rec, parent, root_objectid);
8043 if (back->node.found_ref) {
8046 back->node.found_ref = 0;
8048 if (back->node.found_extent_tree) {
8049 if (rec->extent_item_refs)
8050 rec->extent_item_refs--;
8051 back->node.found_extent_tree = 0;
8053 if (!back->node.found_extent_tree && back->node.found_ref) {
8054 list_del(&back->node.list);
8058 maybe_free_extent_rec(extent_cache, rec);
8063 static int delete_extent_records(struct btrfs_trans_handle *trans,
8064 struct btrfs_root *root,
8065 struct btrfs_path *path,
8068 struct btrfs_key key;
8069 struct btrfs_key found_key;
8070 struct extent_buffer *leaf;
8075 key.objectid = bytenr;
8077 key.offset = (u64)-1;
8080 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8087 if (path->slots[0] == 0)
8093 leaf = path->nodes[0];
8094 slot = path->slots[0];
8096 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8097 if (found_key.objectid != bytenr)
8100 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8101 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8102 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8103 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8104 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8105 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8106 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8107 btrfs_release_path(path);
8108 if (found_key.type == 0) {
8109 if (found_key.offset == 0)
8111 key.offset = found_key.offset - 1;
8112 key.type = found_key.type;
8114 key.type = found_key.type - 1;
8115 key.offset = (u64)-1;
8119 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8120 found_key.objectid, found_key.type, found_key.offset);
8122 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8125 btrfs_release_path(path);
8127 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8128 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8129 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8130 found_key.offset : root->fs_info->nodesize;
8132 ret = btrfs_update_block_group(trans, root, bytenr,
8139 btrfs_release_path(path);
8144 * for a single backref, this will allocate a new extent
8145 * and add the backref to it.
8147 static int record_extent(struct btrfs_trans_handle *trans,
8148 struct btrfs_fs_info *info,
8149 struct btrfs_path *path,
8150 struct extent_record *rec,
8151 struct extent_backref *back,
8152 int allocated, u64 flags)
8155 struct btrfs_root *extent_root = info->extent_root;
8156 struct extent_buffer *leaf;
8157 struct btrfs_key ins_key;
8158 struct btrfs_extent_item *ei;
8159 struct data_backref *dback;
8160 struct btrfs_tree_block_info *bi;
8163 rec->max_size = max_t(u64, rec->max_size,
8167 u32 item_size = sizeof(*ei);
8170 item_size += sizeof(*bi);
8172 ins_key.objectid = rec->start;
8173 ins_key.offset = rec->max_size;
8174 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8176 ret = btrfs_insert_empty_item(trans, extent_root, path,
8177 &ins_key, item_size);
8181 leaf = path->nodes[0];
8182 ei = btrfs_item_ptr(leaf, path->slots[0],
8183 struct btrfs_extent_item);
8185 btrfs_set_extent_refs(leaf, ei, 0);
8186 btrfs_set_extent_generation(leaf, ei, rec->generation);
8188 if (back->is_data) {
8189 btrfs_set_extent_flags(leaf, ei,
8190 BTRFS_EXTENT_FLAG_DATA);
8192 struct btrfs_disk_key copy_key;;
8194 bi = (struct btrfs_tree_block_info *)(ei + 1);
8195 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8198 btrfs_set_disk_key_objectid(©_key,
8199 rec->info_objectid);
8200 btrfs_set_disk_key_type(©_key, 0);
8201 btrfs_set_disk_key_offset(©_key, 0);
8203 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8204 btrfs_set_tree_block_key(leaf, bi, ©_key);
8206 btrfs_set_extent_flags(leaf, ei,
8207 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8210 btrfs_mark_buffer_dirty(leaf);
8211 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8212 rec->max_size, 1, 0);
8215 btrfs_release_path(path);
8218 if (back->is_data) {
8222 dback = to_data_backref(back);
8223 if (back->full_backref)
8224 parent = dback->parent;
8228 for (i = 0; i < dback->found_ref; i++) {
8229 /* if parent != 0, we're doing a full backref
8230 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8231 * just makes the backref allocator create a data
8234 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8235 rec->start, rec->max_size,
8239 BTRFS_FIRST_FREE_OBJECTID :
8245 fprintf(stderr, "adding new data backref"
8246 " on %llu %s %llu owner %llu"
8247 " offset %llu found %d\n",
8248 (unsigned long long)rec->start,
8249 back->full_backref ?
8251 back->full_backref ?
8252 (unsigned long long)parent :
8253 (unsigned long long)dback->root,
8254 (unsigned long long)dback->owner,
8255 (unsigned long long)dback->offset,
8259 struct tree_backref *tback;
8261 tback = to_tree_backref(back);
8262 if (back->full_backref)
8263 parent = tback->parent;
8267 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8268 rec->start, rec->max_size,
8269 parent, tback->root, 0, 0);
8270 fprintf(stderr, "adding new tree backref on "
8271 "start %llu len %llu parent %llu root %llu\n",
8272 rec->start, rec->max_size, parent, tback->root);
8275 btrfs_release_path(path);
8279 static struct extent_entry *find_entry(struct list_head *entries,
8280 u64 bytenr, u64 bytes)
8282 struct extent_entry *entry = NULL;
8284 list_for_each_entry(entry, entries, list) {
8285 if (entry->bytenr == bytenr && entry->bytes == bytes)
8292 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8294 struct extent_entry *entry, *best = NULL, *prev = NULL;
8296 list_for_each_entry(entry, entries, list) {
8298 * If there are as many broken entries as entries then we know
8299 * not to trust this particular entry.
8301 if (entry->broken == entry->count)
8305 * Special case, when there are only two entries and 'best' is
8315 * If our current entry == best then we can't be sure our best
8316 * is really the best, so we need to keep searching.
8318 if (best && best->count == entry->count) {
8324 /* Prev == entry, not good enough, have to keep searching */
8325 if (!prev->broken && prev->count == entry->count)
8329 best = (prev->count > entry->count) ? prev : entry;
8330 else if (best->count < entry->count)
8338 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8339 struct data_backref *dback, struct extent_entry *entry)
8341 struct btrfs_trans_handle *trans;
8342 struct btrfs_root *root;
8343 struct btrfs_file_extent_item *fi;
8344 struct extent_buffer *leaf;
8345 struct btrfs_key key;
8349 key.objectid = dback->root;
8350 key.type = BTRFS_ROOT_ITEM_KEY;
8351 key.offset = (u64)-1;
8352 root = btrfs_read_fs_root(info, &key);
8354 fprintf(stderr, "Couldn't find root for our ref\n");
8359 * The backref points to the original offset of the extent if it was
8360 * split, so we need to search down to the offset we have and then walk
8361 * forward until we find the backref we're looking for.
8363 key.objectid = dback->owner;
8364 key.type = BTRFS_EXTENT_DATA_KEY;
8365 key.offset = dback->offset;
8366 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8368 fprintf(stderr, "Error looking up ref %d\n", ret);
8373 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8374 ret = btrfs_next_leaf(root, path);
8376 fprintf(stderr, "Couldn't find our ref, next\n");
8380 leaf = path->nodes[0];
8381 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8382 if (key.objectid != dback->owner ||
8383 key.type != BTRFS_EXTENT_DATA_KEY) {
8384 fprintf(stderr, "Couldn't find our ref, search\n");
8387 fi = btrfs_item_ptr(leaf, path->slots[0],
8388 struct btrfs_file_extent_item);
8389 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8390 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8392 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8397 btrfs_release_path(path);
8399 trans = btrfs_start_transaction(root, 1);
8401 return PTR_ERR(trans);
8404 * Ok we have the key of the file extent we want to fix, now we can cow
8405 * down to the thing and fix it.
8407 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8409 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8410 key.objectid, key.type, key.offset, ret);
8414 fprintf(stderr, "Well that's odd, we just found this key "
8415 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8420 leaf = path->nodes[0];
8421 fi = btrfs_item_ptr(leaf, path->slots[0],
8422 struct btrfs_file_extent_item);
8424 if (btrfs_file_extent_compression(leaf, fi) &&
8425 dback->disk_bytenr != entry->bytenr) {
8426 fprintf(stderr, "Ref doesn't match the record start and is "
8427 "compressed, please take a btrfs-image of this file "
8428 "system and send it to a btrfs developer so they can "
8429 "complete this functionality for bytenr %Lu\n",
8430 dback->disk_bytenr);
8435 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8436 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8437 } else if (dback->disk_bytenr > entry->bytenr) {
8438 u64 off_diff, offset;
8440 off_diff = dback->disk_bytenr - entry->bytenr;
8441 offset = btrfs_file_extent_offset(leaf, fi);
8442 if (dback->disk_bytenr + offset +
8443 btrfs_file_extent_num_bytes(leaf, fi) >
8444 entry->bytenr + entry->bytes) {
8445 fprintf(stderr, "Ref is past the entry end, please "
8446 "take a btrfs-image of this file system and "
8447 "send it to a btrfs developer, ref %Lu\n",
8448 dback->disk_bytenr);
8453 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8454 btrfs_set_file_extent_offset(leaf, fi, offset);
8455 } else if (dback->disk_bytenr < entry->bytenr) {
8458 offset = btrfs_file_extent_offset(leaf, fi);
8459 if (dback->disk_bytenr + offset < entry->bytenr) {
8460 fprintf(stderr, "Ref is before the entry start, please"
8461 " take a btrfs-image of this file system and "
8462 "send it to a btrfs developer, ref %Lu\n",
8463 dback->disk_bytenr);
8468 offset += dback->disk_bytenr;
8469 offset -= entry->bytenr;
8470 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8471 btrfs_set_file_extent_offset(leaf, fi, offset);
8474 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8477 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8478 * only do this if we aren't using compression, otherwise it's a
8481 if (!btrfs_file_extent_compression(leaf, fi))
8482 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8484 printf("ram bytes may be wrong?\n");
8485 btrfs_mark_buffer_dirty(leaf);
8487 err = btrfs_commit_transaction(trans, root);
8488 btrfs_release_path(path);
8489 return ret ? ret : err;
8492 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8493 struct extent_record *rec)
8495 struct extent_backref *back;
8496 struct data_backref *dback;
8497 struct extent_entry *entry, *best = NULL;
8500 int broken_entries = 0;
8505 * Metadata is easy and the backrefs should always agree on bytenr and
8506 * size, if not we've got bigger issues.
8511 list_for_each_entry(back, &rec->backrefs, list) {
8512 if (back->full_backref || !back->is_data)
8515 dback = to_data_backref(back);
8518 * We only pay attention to backrefs that we found a real
8521 if (dback->found_ref == 0)
8525 * For now we only catch when the bytes don't match, not the
8526 * bytenr. We can easily do this at the same time, but I want
8527 * to have a fs image to test on before we just add repair
8528 * functionality willy-nilly so we know we won't screw up the
8532 entry = find_entry(&entries, dback->disk_bytenr,
8535 entry = malloc(sizeof(struct extent_entry));
8540 memset(entry, 0, sizeof(*entry));
8541 entry->bytenr = dback->disk_bytenr;
8542 entry->bytes = dback->bytes;
8543 list_add_tail(&entry->list, &entries);
8548 * If we only have on entry we may think the entries agree when
8549 * in reality they don't so we have to do some extra checking.
8551 if (dback->disk_bytenr != rec->start ||
8552 dback->bytes != rec->nr || back->broken)
8563 /* Yay all the backrefs agree, carry on good sir */
8564 if (nr_entries <= 1 && !mismatch)
8567 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8568 "%Lu\n", rec->start);
8571 * First we want to see if the backrefs can agree amongst themselves who
8572 * is right, so figure out which one of the entries has the highest
8575 best = find_most_right_entry(&entries);
8578 * Ok so we may have an even split between what the backrefs think, so
8579 * this is where we use the extent ref to see what it thinks.
8582 entry = find_entry(&entries, rec->start, rec->nr);
8583 if (!entry && (!broken_entries || !rec->found_rec)) {
8584 fprintf(stderr, "Backrefs don't agree with each other "
8585 "and extent record doesn't agree with anybody,"
8586 " so we can't fix bytenr %Lu bytes %Lu\n",
8587 rec->start, rec->nr);
8590 } else if (!entry) {
8592 * Ok our backrefs were broken, we'll assume this is the
8593 * correct value and add an entry for this range.
8595 entry = malloc(sizeof(struct extent_entry));
8600 memset(entry, 0, sizeof(*entry));
8601 entry->bytenr = rec->start;
8602 entry->bytes = rec->nr;
8603 list_add_tail(&entry->list, &entries);
8607 best = find_most_right_entry(&entries);
8609 fprintf(stderr, "Backrefs and extent record evenly "
8610 "split on who is right, this is going to "
8611 "require user input to fix bytenr %Lu bytes "
8612 "%Lu\n", rec->start, rec->nr);
8619 * I don't think this can happen currently as we'll abort() if we catch
8620 * this case higher up, but in case somebody removes that we still can't
8621 * deal with it properly here yet, so just bail out of that's the case.
8623 if (best->bytenr != rec->start) {
8624 fprintf(stderr, "Extent start and backref starts don't match, "
8625 "please use btrfs-image on this file system and send "
8626 "it to a btrfs developer so they can make fsck fix "
8627 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8628 rec->start, rec->nr);
8634 * Ok great we all agreed on an extent record, let's go find the real
8635 * references and fix up the ones that don't match.
8637 list_for_each_entry(back, &rec->backrefs, list) {
8638 if (back->full_backref || !back->is_data)
8641 dback = to_data_backref(back);
8644 * Still ignoring backrefs that don't have a real ref attached
8647 if (dback->found_ref == 0)
8650 if (dback->bytes == best->bytes &&
8651 dback->disk_bytenr == best->bytenr)
8654 ret = repair_ref(info, path, dback, best);
8660 * Ok we messed with the actual refs, which means we need to drop our
8661 * entire cache and go back and rescan. I know this is a huge pain and
8662 * adds a lot of extra work, but it's the only way to be safe. Once all
8663 * the backrefs agree we may not need to do anything to the extent
8668 while (!list_empty(&entries)) {
8669 entry = list_entry(entries.next, struct extent_entry, list);
8670 list_del_init(&entry->list);
8676 static int process_duplicates(struct cache_tree *extent_cache,
8677 struct extent_record *rec)
8679 struct extent_record *good, *tmp;
8680 struct cache_extent *cache;
8684 * If we found a extent record for this extent then return, or if we
8685 * have more than one duplicate we are likely going to need to delete
8688 if (rec->found_rec || rec->num_duplicates > 1)
8691 /* Shouldn't happen but just in case */
8692 BUG_ON(!rec->num_duplicates);
8695 * So this happens if we end up with a backref that doesn't match the
8696 * actual extent entry. So either the backref is bad or the extent
8697 * entry is bad. Either way we want to have the extent_record actually
8698 * reflect what we found in the extent_tree, so we need to take the
8699 * duplicate out and use that as the extent_record since the only way we
8700 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8702 remove_cache_extent(extent_cache, &rec->cache);
8704 good = to_extent_record(rec->dups.next);
8705 list_del_init(&good->list);
8706 INIT_LIST_HEAD(&good->backrefs);
8707 INIT_LIST_HEAD(&good->dups);
8708 good->cache.start = good->start;
8709 good->cache.size = good->nr;
8710 good->content_checked = 0;
8711 good->owner_ref_checked = 0;
8712 good->num_duplicates = 0;
8713 good->refs = rec->refs;
8714 list_splice_init(&rec->backrefs, &good->backrefs);
8716 cache = lookup_cache_extent(extent_cache, good->start,
8720 tmp = container_of(cache, struct extent_record, cache);
8723 * If we find another overlapping extent and it's found_rec is
8724 * set then it's a duplicate and we need to try and delete
8727 if (tmp->found_rec || tmp->num_duplicates > 0) {
8728 if (list_empty(&good->list))
8729 list_add_tail(&good->list,
8730 &duplicate_extents);
8731 good->num_duplicates += tmp->num_duplicates + 1;
8732 list_splice_init(&tmp->dups, &good->dups);
8733 list_del_init(&tmp->list);
8734 list_add_tail(&tmp->list, &good->dups);
8735 remove_cache_extent(extent_cache, &tmp->cache);
8740 * Ok we have another non extent item backed extent rec, so lets
8741 * just add it to this extent and carry on like we did above.
8743 good->refs += tmp->refs;
8744 list_splice_init(&tmp->backrefs, &good->backrefs);
8745 remove_cache_extent(extent_cache, &tmp->cache);
8748 ret = insert_cache_extent(extent_cache, &good->cache);
8751 return good->num_duplicates ? 0 : 1;
8754 static int delete_duplicate_records(struct btrfs_root *root,
8755 struct extent_record *rec)
8757 struct btrfs_trans_handle *trans;
8758 LIST_HEAD(delete_list);
8759 struct btrfs_path path;
8760 struct extent_record *tmp, *good, *n;
8763 struct btrfs_key key;
8765 btrfs_init_path(&path);
8768 /* Find the record that covers all of the duplicates. */
8769 list_for_each_entry(tmp, &rec->dups, list) {
8770 if (good->start < tmp->start)
8772 if (good->nr > tmp->nr)
8775 if (tmp->start + tmp->nr < good->start + good->nr) {
8776 fprintf(stderr, "Ok we have overlapping extents that "
8777 "aren't completely covered by each other, this "
8778 "is going to require more careful thought. "
8779 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8780 tmp->start, tmp->nr, good->start, good->nr);
8787 list_add_tail(&rec->list, &delete_list);
8789 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8792 list_move_tail(&tmp->list, &delete_list);
8795 root = root->fs_info->extent_root;
8796 trans = btrfs_start_transaction(root, 1);
8797 if (IS_ERR(trans)) {
8798 ret = PTR_ERR(trans);
8802 list_for_each_entry(tmp, &delete_list, list) {
8803 if (tmp->found_rec == 0)
8805 key.objectid = tmp->start;
8806 key.type = BTRFS_EXTENT_ITEM_KEY;
8807 key.offset = tmp->nr;
8809 /* Shouldn't happen but just in case */
8810 if (tmp->metadata) {
8811 fprintf(stderr, "Well this shouldn't happen, extent "
8812 "record overlaps but is metadata? "
8813 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8817 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8823 ret = btrfs_del_item(trans, root, &path);
8826 btrfs_release_path(&path);
8829 err = btrfs_commit_transaction(trans, root);
8833 while (!list_empty(&delete_list)) {
8834 tmp = to_extent_record(delete_list.next);
8835 list_del_init(&tmp->list);
8841 while (!list_empty(&rec->dups)) {
8842 tmp = to_extent_record(rec->dups.next);
8843 list_del_init(&tmp->list);
8847 btrfs_release_path(&path);
8849 if (!ret && !nr_del)
8850 rec->num_duplicates = 0;
8852 return ret ? ret : nr_del;
8855 static int find_possible_backrefs(struct btrfs_fs_info *info,
8856 struct btrfs_path *path,
8857 struct cache_tree *extent_cache,
8858 struct extent_record *rec)
8860 struct btrfs_root *root;
8861 struct extent_backref *back;
8862 struct data_backref *dback;
8863 struct cache_extent *cache;
8864 struct btrfs_file_extent_item *fi;
8865 struct btrfs_key key;
8869 list_for_each_entry(back, &rec->backrefs, list) {
8870 /* Don't care about full backrefs (poor unloved backrefs) */
8871 if (back->full_backref || !back->is_data)
8874 dback = to_data_backref(back);
8876 /* We found this one, we don't need to do a lookup */
8877 if (dback->found_ref)
8880 key.objectid = dback->root;
8881 key.type = BTRFS_ROOT_ITEM_KEY;
8882 key.offset = (u64)-1;
8884 root = btrfs_read_fs_root(info, &key);
8886 /* No root, definitely a bad ref, skip */
8887 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8889 /* Other err, exit */
8891 return PTR_ERR(root);
8893 key.objectid = dback->owner;
8894 key.type = BTRFS_EXTENT_DATA_KEY;
8895 key.offset = dback->offset;
8896 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8898 btrfs_release_path(path);
8901 /* Didn't find it, we can carry on */
8906 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8907 struct btrfs_file_extent_item);
8908 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8909 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8910 btrfs_release_path(path);
8911 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8913 struct extent_record *tmp;
8914 tmp = container_of(cache, struct extent_record, cache);
8917 * If we found an extent record for the bytenr for this
8918 * particular backref then we can't add it to our
8919 * current extent record. We only want to add backrefs
8920 * that don't have a corresponding extent item in the
8921 * extent tree since they likely belong to this record
8922 * and we need to fix it if it doesn't match bytenrs.
8928 dback->found_ref += 1;
8929 dback->disk_bytenr = bytenr;
8930 dback->bytes = bytes;
8933 * Set this so the verify backref code knows not to trust the
8934 * values in this backref.
8943 * Record orphan data ref into corresponding root.
8945 * Return 0 if the extent item contains data ref and recorded.
8946 * Return 1 if the extent item contains no useful data ref
8947 * On that case, it may contains only shared_dataref or metadata backref
8948 * or the file extent exists(this should be handled by the extent bytenr
8950 * Return <0 if something goes wrong.
8952 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8953 struct extent_record *rec)
8955 struct btrfs_key key;
8956 struct btrfs_root *dest_root;
8957 struct extent_backref *back;
8958 struct data_backref *dback;
8959 struct orphan_data_extent *orphan;
8960 struct btrfs_path path;
8961 int recorded_data_ref = 0;
8966 btrfs_init_path(&path);
8967 list_for_each_entry(back, &rec->backrefs, list) {
8968 if (back->full_backref || !back->is_data ||
8969 !back->found_extent_tree)
8971 dback = to_data_backref(back);
8972 if (dback->found_ref)
8974 key.objectid = dback->root;
8975 key.type = BTRFS_ROOT_ITEM_KEY;
8976 key.offset = (u64)-1;
8978 dest_root = btrfs_read_fs_root(fs_info, &key);
8980 /* For non-exist root we just skip it */
8981 if (IS_ERR(dest_root) || !dest_root)
8984 key.objectid = dback->owner;
8985 key.type = BTRFS_EXTENT_DATA_KEY;
8986 key.offset = dback->offset;
8988 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8989 btrfs_release_path(&path);
8991 * For ret < 0, it's OK since the fs-tree may be corrupted,
8992 * we need to record it for inode/file extent rebuild.
8993 * For ret > 0, we record it only for file extent rebuild.
8994 * For ret == 0, the file extent exists but only bytenr
8995 * mismatch, let the original bytenr fix routine to handle,
9001 orphan = malloc(sizeof(*orphan));
9006 INIT_LIST_HEAD(&orphan->list);
9007 orphan->root = dback->root;
9008 orphan->objectid = dback->owner;
9009 orphan->offset = dback->offset;
9010 orphan->disk_bytenr = rec->cache.start;
9011 orphan->disk_len = rec->cache.size;
9012 list_add(&dest_root->orphan_data_extents, &orphan->list);
9013 recorded_data_ref = 1;
9016 btrfs_release_path(&path);
9018 return !recorded_data_ref;
9024 * when an incorrect extent item is found, this will delete
9025 * all of the existing entries for it and recreate them
9026 * based on what the tree scan found.
9028 static int fixup_extent_refs(struct btrfs_fs_info *info,
9029 struct cache_tree *extent_cache,
9030 struct extent_record *rec)
9032 struct btrfs_trans_handle *trans = NULL;
9034 struct btrfs_path path;
9035 struct list_head *cur = rec->backrefs.next;
9036 struct cache_extent *cache;
9037 struct extent_backref *back;
9041 if (rec->flag_block_full_backref)
9042 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9044 btrfs_init_path(&path);
9045 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9047 * Sometimes the backrefs themselves are so broken they don't
9048 * get attached to any meaningful rec, so first go back and
9049 * check any of our backrefs that we couldn't find and throw
9050 * them into the list if we find the backref so that
9051 * verify_backrefs can figure out what to do.
9053 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9058 /* step one, make sure all of the backrefs agree */
9059 ret = verify_backrefs(info, &path, rec);
9063 trans = btrfs_start_transaction(info->extent_root, 1);
9064 if (IS_ERR(trans)) {
9065 ret = PTR_ERR(trans);
9069 /* step two, delete all the existing records */
9070 ret = delete_extent_records(trans, info->extent_root, &path,
9076 /* was this block corrupt? If so, don't add references to it */
9077 cache = lookup_cache_extent(info->corrupt_blocks,
9078 rec->start, rec->max_size);
9084 /* step three, recreate all the refs we did find */
9085 while(cur != &rec->backrefs) {
9086 back = to_extent_backref(cur);
9090 * if we didn't find any references, don't create a
9093 if (!back->found_ref)
9096 rec->bad_full_backref = 0;
9097 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9105 int err = btrfs_commit_transaction(trans, info->extent_root);
9111 fprintf(stderr, "Repaired extent references for %llu\n",
9112 (unsigned long long)rec->start);
9114 btrfs_release_path(&path);
9118 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9119 struct extent_record *rec)
9121 struct btrfs_trans_handle *trans;
9122 struct btrfs_root *root = fs_info->extent_root;
9123 struct btrfs_path path;
9124 struct btrfs_extent_item *ei;
9125 struct btrfs_key key;
9129 key.objectid = rec->start;
9130 if (rec->metadata) {
9131 key.type = BTRFS_METADATA_ITEM_KEY;
9132 key.offset = rec->info_level;
9134 key.type = BTRFS_EXTENT_ITEM_KEY;
9135 key.offset = rec->max_size;
9138 trans = btrfs_start_transaction(root, 0);
9140 return PTR_ERR(trans);
9142 btrfs_init_path(&path);
9143 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9145 btrfs_release_path(&path);
9146 btrfs_commit_transaction(trans, root);
9149 fprintf(stderr, "Didn't find extent for %llu\n",
9150 (unsigned long long)rec->start);
9151 btrfs_release_path(&path);
9152 btrfs_commit_transaction(trans, root);
9156 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9157 struct btrfs_extent_item);
9158 flags = btrfs_extent_flags(path.nodes[0], ei);
9159 if (rec->flag_block_full_backref) {
9160 fprintf(stderr, "setting full backref on %llu\n",
9161 (unsigned long long)key.objectid);
9162 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9164 fprintf(stderr, "clearing full backref on %llu\n",
9165 (unsigned long long)key.objectid);
9166 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9168 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9169 btrfs_mark_buffer_dirty(path.nodes[0]);
9170 btrfs_release_path(&path);
9171 ret = btrfs_commit_transaction(trans, root);
9173 fprintf(stderr, "Repaired extent flags for %llu\n",
9174 (unsigned long long)rec->start);
9179 /* right now we only prune from the extent allocation tree */
9180 static int prune_one_block(struct btrfs_trans_handle *trans,
9181 struct btrfs_fs_info *info,
9182 struct btrfs_corrupt_block *corrupt)
9185 struct btrfs_path path;
9186 struct extent_buffer *eb;
9190 int level = corrupt->level + 1;
9192 btrfs_init_path(&path);
9194 /* we want to stop at the parent to our busted block */
9195 path.lowest_level = level;
9197 ret = btrfs_search_slot(trans, info->extent_root,
9198 &corrupt->key, &path, -1, 1);
9203 eb = path.nodes[level];
9210 * hopefully the search gave us the block we want to prune,
9211 * lets try that first
9213 slot = path.slots[level];
9214 found = btrfs_node_blockptr(eb, slot);
9215 if (found == corrupt->cache.start)
9218 nritems = btrfs_header_nritems(eb);
9220 /* the search failed, lets scan this node and hope we find it */
9221 for (slot = 0; slot < nritems; slot++) {
9222 found = btrfs_node_blockptr(eb, slot);
9223 if (found == corrupt->cache.start)
9227 * we couldn't find the bad block. TODO, search all the nodes for pointers
9230 if (eb == info->extent_root->node) {
9235 btrfs_release_path(&path);
9240 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9241 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9244 btrfs_release_path(&path);
9248 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9250 struct btrfs_trans_handle *trans = NULL;
9251 struct cache_extent *cache;
9252 struct btrfs_corrupt_block *corrupt;
9255 cache = search_cache_extent(info->corrupt_blocks, 0);
9259 trans = btrfs_start_transaction(info->extent_root, 1);
9261 return PTR_ERR(trans);
9263 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9264 prune_one_block(trans, info, corrupt);
9265 remove_cache_extent(info->corrupt_blocks, cache);
9268 return btrfs_commit_transaction(trans, info->extent_root);
9272 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9274 struct btrfs_block_group_cache *cache;
9279 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9280 &start, &end, EXTENT_DIRTY);
9283 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9288 cache = btrfs_lookup_first_block_group(fs_info, start);
9293 start = cache->key.objectid + cache->key.offset;
9297 static int check_extent_refs(struct btrfs_root *root,
9298 struct cache_tree *extent_cache)
9300 struct extent_record *rec;
9301 struct cache_extent *cache;
9307 * if we're doing a repair, we have to make sure
9308 * we don't allocate from the problem extents.
9309 * In the worst case, this will be all the
9312 cache = search_cache_extent(extent_cache, 0);
9314 rec = container_of(cache, struct extent_record, cache);
9315 set_extent_dirty(root->fs_info->excluded_extents,
9317 rec->start + rec->max_size - 1);
9318 cache = next_cache_extent(cache);
9321 /* pin down all the corrupted blocks too */
9322 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9324 set_extent_dirty(root->fs_info->excluded_extents,
9326 cache->start + cache->size - 1);
9327 cache = next_cache_extent(cache);
9329 prune_corrupt_blocks(root->fs_info);
9330 reset_cached_block_groups(root->fs_info);
9333 reset_cached_block_groups(root->fs_info);
9336 * We need to delete any duplicate entries we find first otherwise we
9337 * could mess up the extent tree when we have backrefs that actually
9338 * belong to a different extent item and not the weird duplicate one.
9340 while (repair && !list_empty(&duplicate_extents)) {
9341 rec = to_extent_record(duplicate_extents.next);
9342 list_del_init(&rec->list);
9344 /* Sometimes we can find a backref before we find an actual
9345 * extent, so we need to process it a little bit to see if there
9346 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9347 * if this is a backref screwup. If we need to delete stuff
9348 * process_duplicates() will return 0, otherwise it will return
9351 if (process_duplicates(extent_cache, rec))
9353 ret = delete_duplicate_records(root, rec);
9357 * delete_duplicate_records will return the number of entries
9358 * deleted, so if it's greater than 0 then we know we actually
9359 * did something and we need to remove.
9372 cache = search_cache_extent(extent_cache, 0);
9375 rec = container_of(cache, struct extent_record, cache);
9376 if (rec->num_duplicates) {
9377 fprintf(stderr, "extent item %llu has multiple extent "
9378 "items\n", (unsigned long long)rec->start);
9382 if (rec->refs != rec->extent_item_refs) {
9383 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9384 (unsigned long long)rec->start,
9385 (unsigned long long)rec->nr);
9386 fprintf(stderr, "extent item %llu, found %llu\n",
9387 (unsigned long long)rec->extent_item_refs,
9388 (unsigned long long)rec->refs);
9389 ret = record_orphan_data_extents(root->fs_info, rec);
9395 if (all_backpointers_checked(rec, 1)) {
9396 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9397 (unsigned long long)rec->start,
9398 (unsigned long long)rec->nr);
9402 if (!rec->owner_ref_checked) {
9403 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9404 (unsigned long long)rec->start,
9405 (unsigned long long)rec->nr);
9410 if (repair && fix) {
9411 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9417 if (rec->bad_full_backref) {
9418 fprintf(stderr, "bad full backref, on [%llu]\n",
9419 (unsigned long long)rec->start);
9421 ret = fixup_extent_flags(root->fs_info, rec);
9429 * Although it's not a extent ref's problem, we reuse this
9430 * routine for error reporting.
9431 * No repair function yet.
9433 if (rec->crossing_stripes) {
9435 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9436 rec->start, rec->start + rec->max_size);
9440 if (rec->wrong_chunk_type) {
9442 "bad extent [%llu, %llu), type mismatch with chunk\n",
9443 rec->start, rec->start + rec->max_size);
9447 remove_cache_extent(extent_cache, cache);
9448 free_all_extent_backrefs(rec);
9449 if (!init_extent_tree && repair && (!cur_err || fix))
9450 clear_extent_dirty(root->fs_info->excluded_extents,
9452 rec->start + rec->max_size - 1);
9457 if (ret && ret != -EAGAIN) {
9458 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9461 struct btrfs_trans_handle *trans;
9463 root = root->fs_info->extent_root;
9464 trans = btrfs_start_transaction(root, 1);
9465 if (IS_ERR(trans)) {
9466 ret = PTR_ERR(trans);
9470 btrfs_fix_block_accounting(trans, root);
9471 ret = btrfs_commit_transaction(trans, root);
9480 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9484 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9485 stripe_size = length;
9486 stripe_size /= num_stripes;
9487 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9488 stripe_size = length * 2;
9489 stripe_size /= num_stripes;
9490 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9491 stripe_size = length;
9492 stripe_size /= (num_stripes - 1);
9493 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9494 stripe_size = length;
9495 stripe_size /= (num_stripes - 2);
9497 stripe_size = length;
9503 * Check the chunk with its block group/dev list ref:
9504 * Return 0 if all refs seems valid.
9505 * Return 1 if part of refs seems valid, need later check for rebuild ref
9506 * like missing block group and needs to search extent tree to rebuild them.
9507 * Return -1 if essential refs are missing and unable to rebuild.
9509 static int check_chunk_refs(struct chunk_record *chunk_rec,
9510 struct block_group_tree *block_group_cache,
9511 struct device_extent_tree *dev_extent_cache,
9514 struct cache_extent *block_group_item;
9515 struct block_group_record *block_group_rec;
9516 struct cache_extent *dev_extent_item;
9517 struct device_extent_record *dev_extent_rec;
9521 int metadump_v2 = 0;
9525 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9528 if (block_group_item) {
9529 block_group_rec = container_of(block_group_item,
9530 struct block_group_record,
9532 if (chunk_rec->length != block_group_rec->offset ||
9533 chunk_rec->offset != block_group_rec->objectid ||
9535 chunk_rec->type_flags != block_group_rec->flags)) {
9538 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9539 chunk_rec->objectid,
9544 chunk_rec->type_flags,
9545 block_group_rec->objectid,
9546 block_group_rec->type,
9547 block_group_rec->offset,
9548 block_group_rec->offset,
9549 block_group_rec->objectid,
9550 block_group_rec->flags);
9553 list_del_init(&block_group_rec->list);
9554 chunk_rec->bg_rec = block_group_rec;
9559 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9560 chunk_rec->objectid,
9565 chunk_rec->type_flags);
9572 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9573 chunk_rec->num_stripes);
9574 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9575 devid = chunk_rec->stripes[i].devid;
9576 offset = chunk_rec->stripes[i].offset;
9577 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9578 devid, offset, length);
9579 if (dev_extent_item) {
9580 dev_extent_rec = container_of(dev_extent_item,
9581 struct device_extent_record,
9583 if (dev_extent_rec->objectid != devid ||
9584 dev_extent_rec->offset != offset ||
9585 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9586 dev_extent_rec->length != length) {
9589 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9590 chunk_rec->objectid,
9593 chunk_rec->stripes[i].devid,
9594 chunk_rec->stripes[i].offset,
9595 dev_extent_rec->objectid,
9596 dev_extent_rec->offset,
9597 dev_extent_rec->length);
9600 list_move(&dev_extent_rec->chunk_list,
9601 &chunk_rec->dextents);
9606 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9607 chunk_rec->objectid,
9610 chunk_rec->stripes[i].devid,
9611 chunk_rec->stripes[i].offset);
9618 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9619 int check_chunks(struct cache_tree *chunk_cache,
9620 struct block_group_tree *block_group_cache,
9621 struct device_extent_tree *dev_extent_cache,
9622 struct list_head *good, struct list_head *bad,
9623 struct list_head *rebuild, int silent)
9625 struct cache_extent *chunk_item;
9626 struct chunk_record *chunk_rec;
9627 struct block_group_record *bg_rec;
9628 struct device_extent_record *dext_rec;
9632 chunk_item = first_cache_extent(chunk_cache);
9633 while (chunk_item) {
9634 chunk_rec = container_of(chunk_item, struct chunk_record,
9636 err = check_chunk_refs(chunk_rec, block_group_cache,
9637 dev_extent_cache, silent);
9640 if (err == 0 && good)
9641 list_add_tail(&chunk_rec->list, good);
9642 if (err > 0 && rebuild)
9643 list_add_tail(&chunk_rec->list, rebuild);
9645 list_add_tail(&chunk_rec->list, bad);
9646 chunk_item = next_cache_extent(chunk_item);
9649 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9652 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9660 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9664 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9675 static int check_device_used(struct device_record *dev_rec,
9676 struct device_extent_tree *dext_cache)
9678 struct cache_extent *cache;
9679 struct device_extent_record *dev_extent_rec;
9682 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9684 dev_extent_rec = container_of(cache,
9685 struct device_extent_record,
9687 if (dev_extent_rec->objectid != dev_rec->devid)
9690 list_del_init(&dev_extent_rec->device_list);
9691 total_byte += dev_extent_rec->length;
9692 cache = next_cache_extent(cache);
9695 if (total_byte != dev_rec->byte_used) {
9697 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9698 total_byte, dev_rec->byte_used, dev_rec->objectid,
9699 dev_rec->type, dev_rec->offset);
9706 /* check btrfs_dev_item -> btrfs_dev_extent */
9707 static int check_devices(struct rb_root *dev_cache,
9708 struct device_extent_tree *dev_extent_cache)
9710 struct rb_node *dev_node;
9711 struct device_record *dev_rec;
9712 struct device_extent_record *dext_rec;
9716 dev_node = rb_first(dev_cache);
9718 dev_rec = container_of(dev_node, struct device_record, node);
9719 err = check_device_used(dev_rec, dev_extent_cache);
9723 dev_node = rb_next(dev_node);
9725 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9728 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9729 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9736 static int add_root_item_to_list(struct list_head *head,
9737 u64 objectid, u64 bytenr, u64 last_snapshot,
9738 u8 level, u8 drop_level,
9739 struct btrfs_key *drop_key)
9742 struct root_item_record *ri_rec;
9743 ri_rec = malloc(sizeof(*ri_rec));
9746 ri_rec->bytenr = bytenr;
9747 ri_rec->objectid = objectid;
9748 ri_rec->level = level;
9749 ri_rec->drop_level = drop_level;
9750 ri_rec->last_snapshot = last_snapshot;
9752 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9753 list_add_tail(&ri_rec->list, head);
9758 static void free_root_item_list(struct list_head *list)
9760 struct root_item_record *ri_rec;
9762 while (!list_empty(list)) {
9763 ri_rec = list_first_entry(list, struct root_item_record,
9765 list_del_init(&ri_rec->list);
9770 static int deal_root_from_list(struct list_head *list,
9771 struct btrfs_root *root,
9772 struct block_info *bits,
9774 struct cache_tree *pending,
9775 struct cache_tree *seen,
9776 struct cache_tree *reada,
9777 struct cache_tree *nodes,
9778 struct cache_tree *extent_cache,
9779 struct cache_tree *chunk_cache,
9780 struct rb_root *dev_cache,
9781 struct block_group_tree *block_group_cache,
9782 struct device_extent_tree *dev_extent_cache)
9787 while (!list_empty(list)) {
9788 struct root_item_record *rec;
9789 struct extent_buffer *buf;
9790 rec = list_entry(list->next,
9791 struct root_item_record, list);
9793 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9794 if (!extent_buffer_uptodate(buf)) {
9795 free_extent_buffer(buf);
9799 ret = add_root_to_pending(buf, extent_cache, pending,
9800 seen, nodes, rec->objectid);
9804 * To rebuild extent tree, we need deal with snapshot
9805 * one by one, otherwise we deal with node firstly which
9806 * can maximize readahead.
9809 ret = run_next_block(root, bits, bits_nr, &last,
9810 pending, seen, reada, nodes,
9811 extent_cache, chunk_cache,
9812 dev_cache, block_group_cache,
9813 dev_extent_cache, rec);
9817 free_extent_buffer(buf);
9818 list_del(&rec->list);
9824 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9825 reada, nodes, extent_cache, chunk_cache,
9826 dev_cache, block_group_cache,
9827 dev_extent_cache, NULL);
9837 static int check_chunks_and_extents(struct btrfs_root *root)
9839 struct rb_root dev_cache;
9840 struct cache_tree chunk_cache;
9841 struct block_group_tree block_group_cache;
9842 struct device_extent_tree dev_extent_cache;
9843 struct cache_tree extent_cache;
9844 struct cache_tree seen;
9845 struct cache_tree pending;
9846 struct cache_tree reada;
9847 struct cache_tree nodes;
9848 struct extent_io_tree excluded_extents;
9849 struct cache_tree corrupt_blocks;
9850 struct btrfs_path path;
9851 struct btrfs_key key;
9852 struct btrfs_key found_key;
9854 struct block_info *bits;
9856 struct extent_buffer *leaf;
9858 struct btrfs_root_item ri;
9859 struct list_head dropping_trees;
9860 struct list_head normal_trees;
9861 struct btrfs_root *root1;
9865 dev_cache = RB_ROOT;
9866 cache_tree_init(&chunk_cache);
9867 block_group_tree_init(&block_group_cache);
9868 device_extent_tree_init(&dev_extent_cache);
9870 cache_tree_init(&extent_cache);
9871 cache_tree_init(&seen);
9872 cache_tree_init(&pending);
9873 cache_tree_init(&nodes);
9874 cache_tree_init(&reada);
9875 cache_tree_init(&corrupt_blocks);
9876 extent_io_tree_init(&excluded_extents);
9877 INIT_LIST_HEAD(&dropping_trees);
9878 INIT_LIST_HEAD(&normal_trees);
9881 root->fs_info->excluded_extents = &excluded_extents;
9882 root->fs_info->fsck_extent_cache = &extent_cache;
9883 root->fs_info->free_extent_hook = free_extent_hook;
9884 root->fs_info->corrupt_blocks = &corrupt_blocks;
9888 bits = malloc(bits_nr * sizeof(struct block_info));
9894 if (ctx.progress_enabled) {
9895 ctx.tp = TASK_EXTENTS;
9896 task_start(ctx.info);
9900 root1 = root->fs_info->tree_root;
9901 level = btrfs_header_level(root1->node);
9902 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9903 root1->node->start, 0, level, 0, NULL);
9906 root1 = root->fs_info->chunk_root;
9907 level = btrfs_header_level(root1->node);
9908 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9909 root1->node->start, 0, level, 0, NULL);
9912 btrfs_init_path(&path);
9915 key.type = BTRFS_ROOT_ITEM_KEY;
9916 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9921 leaf = path.nodes[0];
9922 slot = path.slots[0];
9923 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9924 ret = btrfs_next_leaf(root, &path);
9927 leaf = path.nodes[0];
9928 slot = path.slots[0];
9930 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9931 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9932 unsigned long offset;
9935 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9936 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9937 last_snapshot = btrfs_root_last_snapshot(&ri);
9938 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9939 level = btrfs_root_level(&ri);
9940 ret = add_root_item_to_list(&normal_trees,
9942 btrfs_root_bytenr(&ri),
9943 last_snapshot, level,
9948 level = btrfs_root_level(&ri);
9949 objectid = found_key.objectid;
9950 btrfs_disk_key_to_cpu(&found_key,
9952 ret = add_root_item_to_list(&dropping_trees,
9954 btrfs_root_bytenr(&ri),
9955 last_snapshot, level,
9956 ri.drop_level, &found_key);
9963 btrfs_release_path(&path);
9966 * check_block can return -EAGAIN if it fixes something, please keep
9967 * this in mind when dealing with return values from these functions, if
9968 * we get -EAGAIN we want to fall through and restart the loop.
9970 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9971 &seen, &reada, &nodes, &extent_cache,
9972 &chunk_cache, &dev_cache, &block_group_cache,
9979 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9980 &pending, &seen, &reada, &nodes,
9981 &extent_cache, &chunk_cache, &dev_cache,
9982 &block_group_cache, &dev_extent_cache);
9989 ret = check_chunks(&chunk_cache, &block_group_cache,
9990 &dev_extent_cache, NULL, NULL, NULL, 0);
9997 ret = check_extent_refs(root, &extent_cache);
10004 ret = check_devices(&dev_cache, &dev_extent_cache);
10009 task_stop(ctx.info);
10011 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10012 extent_io_tree_cleanup(&excluded_extents);
10013 root->fs_info->fsck_extent_cache = NULL;
10014 root->fs_info->free_extent_hook = NULL;
10015 root->fs_info->corrupt_blocks = NULL;
10016 root->fs_info->excluded_extents = NULL;
10019 free_chunk_cache_tree(&chunk_cache);
10020 free_device_cache_tree(&dev_cache);
10021 free_block_group_tree(&block_group_cache);
10022 free_device_extent_tree(&dev_extent_cache);
10023 free_extent_cache_tree(&seen);
10024 free_extent_cache_tree(&pending);
10025 free_extent_cache_tree(&reada);
10026 free_extent_cache_tree(&nodes);
10027 free_root_item_list(&normal_trees);
10028 free_root_item_list(&dropping_trees);
10031 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10032 free_extent_cache_tree(&seen);
10033 free_extent_cache_tree(&pending);
10034 free_extent_cache_tree(&reada);
10035 free_extent_cache_tree(&nodes);
10036 free_chunk_cache_tree(&chunk_cache);
10037 free_block_group_tree(&block_group_cache);
10038 free_device_cache_tree(&dev_cache);
10039 free_device_extent_tree(&dev_extent_cache);
10040 free_extent_record_cache(&extent_cache);
10041 free_root_item_list(&normal_trees);
10042 free_root_item_list(&dropping_trees);
10043 extent_io_tree_cleanup(&excluded_extents);
10048 * Check backrefs of a tree block given by @bytenr or @eb.
10050 * @root: the root containing the @bytenr or @eb
10051 * @eb: tree block extent buffer, can be NULL
10052 * @bytenr: bytenr of the tree block to search
10053 * @level: tree level of the tree block
10054 * @owner: owner of the tree block
10056 * Return >0 for any error found and output error message
10057 * Return 0 for no error found
10059 static int check_tree_block_ref(struct btrfs_root *root,
10060 struct extent_buffer *eb, u64 bytenr,
10061 int level, u64 owner)
10063 struct btrfs_key key;
10064 struct btrfs_root *extent_root = root->fs_info->extent_root;
10065 struct btrfs_path path;
10066 struct btrfs_extent_item *ei;
10067 struct btrfs_extent_inline_ref *iref;
10068 struct extent_buffer *leaf;
10074 u32 nodesize = root->fs_info->nodesize;
10077 int tree_reloc_root = 0;
10082 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10083 btrfs_header_bytenr(root->node) == bytenr)
10084 tree_reloc_root = 1;
10086 btrfs_init_path(&path);
10087 key.objectid = bytenr;
10088 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10089 key.type = BTRFS_METADATA_ITEM_KEY;
10091 key.type = BTRFS_EXTENT_ITEM_KEY;
10092 key.offset = (u64)-1;
10094 /* Search for the backref in extent tree */
10095 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10097 err |= BACKREF_MISSING;
10100 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10102 err |= BACKREF_MISSING;
10106 leaf = path.nodes[0];
10107 slot = path.slots[0];
10108 btrfs_item_key_to_cpu(leaf, &key, slot);
10110 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10112 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10113 skinny_level = (int)key.offset;
10114 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10116 struct btrfs_tree_block_info *info;
10118 info = (struct btrfs_tree_block_info *)(ei + 1);
10119 skinny_level = btrfs_tree_block_level(leaf, info);
10120 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10127 if (!(btrfs_extent_flags(leaf, ei) &
10128 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10130 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10131 key.objectid, nodesize,
10132 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10133 err = BACKREF_MISMATCH;
10135 header_gen = btrfs_header_generation(eb);
10136 extent_gen = btrfs_extent_generation(leaf, ei);
10137 if (header_gen != extent_gen) {
10139 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10140 key.objectid, nodesize, header_gen,
10142 err = BACKREF_MISMATCH;
10144 if (level != skinny_level) {
10146 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10147 key.objectid, nodesize, level, skinny_level);
10148 err = BACKREF_MISMATCH;
10150 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10152 "extent[%llu %u] is referred by other roots than %llu",
10153 key.objectid, nodesize, root->objectid);
10154 err = BACKREF_MISMATCH;
10159 * Iterate the extent/metadata item to find the exact backref
10161 item_size = btrfs_item_size_nr(leaf, slot);
10162 ptr = (unsigned long)iref;
10163 end = (unsigned long)ei + item_size;
10164 while (ptr < end) {
10165 iref = (struct btrfs_extent_inline_ref *)ptr;
10166 type = btrfs_extent_inline_ref_type(leaf, iref);
10167 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10169 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10170 (offset == root->objectid || offset == owner)) {
10172 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10174 * Backref of tree reloc root points to itself, no need
10175 * to check backref any more.
10177 if (tree_reloc_root)
10180 /* Check if the backref points to valid referencer */
10181 found_ref = !check_tree_block_ref(root, NULL,
10182 offset, level + 1, owner);
10187 ptr += btrfs_extent_inline_ref_size(type);
10191 * Inlined extent item doesn't have what we need, check
10192 * TREE_BLOCK_REF_KEY
10195 btrfs_release_path(&path);
10196 key.objectid = bytenr;
10197 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10198 key.offset = root->objectid;
10200 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10205 err |= BACKREF_MISSING;
10207 btrfs_release_path(&path);
10208 if (eb && (err & BACKREF_MISSING))
10209 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10210 bytenr, nodesize, owner, level);
10215 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10217 * Return >0 any error found and output error message
10218 * Return 0 for no error found
10220 static int check_extent_data_item(struct btrfs_root *root,
10221 struct extent_buffer *eb, int slot)
10223 struct btrfs_file_extent_item *fi;
10224 struct btrfs_path path;
10225 struct btrfs_root *extent_root = root->fs_info->extent_root;
10226 struct btrfs_key fi_key;
10227 struct btrfs_key dbref_key;
10228 struct extent_buffer *leaf;
10229 struct btrfs_extent_item *ei;
10230 struct btrfs_extent_inline_ref *iref;
10231 struct btrfs_extent_data_ref *dref;
10234 u64 disk_num_bytes;
10235 u64 extent_num_bytes;
10242 int found_dbackref = 0;
10246 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10247 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10249 /* Nothing to check for hole and inline data extents */
10250 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10251 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10254 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10255 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10256 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10258 /* Check unaligned disk_num_bytes and num_bytes */
10259 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10261 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10262 fi_key.objectid, fi_key.offset, disk_num_bytes,
10263 root->fs_info->sectorsize);
10264 err |= BYTES_UNALIGNED;
10266 data_bytes_allocated += disk_num_bytes;
10268 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10270 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10271 fi_key.objectid, fi_key.offset, extent_num_bytes,
10272 root->fs_info->sectorsize);
10273 err |= BYTES_UNALIGNED;
10275 data_bytes_referenced += extent_num_bytes;
10277 owner = btrfs_header_owner(eb);
10279 /* Check the extent item of the file extent in extent tree */
10280 btrfs_init_path(&path);
10281 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10282 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10283 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10285 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10289 leaf = path.nodes[0];
10290 slot = path.slots[0];
10291 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10293 extent_flags = btrfs_extent_flags(leaf, ei);
10295 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10297 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10298 disk_bytenr, disk_num_bytes,
10299 BTRFS_EXTENT_FLAG_DATA);
10300 err |= BACKREF_MISMATCH;
10303 /* Check data backref inside that extent item */
10304 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10305 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10306 ptr = (unsigned long)iref;
10307 end = (unsigned long)ei + item_size;
10308 while (ptr < end) {
10309 iref = (struct btrfs_extent_inline_ref *)ptr;
10310 type = btrfs_extent_inline_ref_type(leaf, iref);
10311 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10313 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10314 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10315 if (ref_root == owner || ref_root == root->objectid)
10316 found_dbackref = 1;
10317 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10318 found_dbackref = !check_tree_block_ref(root, NULL,
10319 btrfs_extent_inline_ref_offset(leaf, iref),
10323 if (found_dbackref)
10325 ptr += btrfs_extent_inline_ref_size(type);
10328 if (!found_dbackref) {
10329 btrfs_release_path(&path);
10331 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10332 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10333 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10334 dbref_key.offset = hash_extent_data_ref(root->objectid,
10335 fi_key.objectid, fi_key.offset);
10337 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10338 &dbref_key, &path, 0, 0);
10340 found_dbackref = 1;
10344 btrfs_release_path(&path);
10347 * Neither inlined nor EXTENT_DATA_REF found, try
10348 * SHARED_DATA_REF as last chance.
10350 dbref_key.objectid = disk_bytenr;
10351 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10352 dbref_key.offset = eb->start;
10354 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10355 &dbref_key, &path, 0, 0);
10357 found_dbackref = 1;
10363 if (!found_dbackref)
10364 err |= BACKREF_MISSING;
10365 btrfs_release_path(&path);
10366 if (err & BACKREF_MISSING) {
10367 error("data extent[%llu %llu] backref lost",
10368 disk_bytenr, disk_num_bytes);
10374 * Get real tree block level for the case like shared block
10375 * Return >= 0 as tree level
10376 * Return <0 for error
10378 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10380 struct extent_buffer *eb;
10381 struct btrfs_path path;
10382 struct btrfs_key key;
10383 struct btrfs_extent_item *ei;
10390 /* Search extent tree for extent generation and level */
10391 key.objectid = bytenr;
10392 key.type = BTRFS_METADATA_ITEM_KEY;
10393 key.offset = (u64)-1;
10395 btrfs_init_path(&path);
10396 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10399 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10407 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10408 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10409 struct btrfs_extent_item);
10410 flags = btrfs_extent_flags(path.nodes[0], ei);
10411 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10416 /* Get transid for later read_tree_block() check */
10417 transid = btrfs_extent_generation(path.nodes[0], ei);
10419 /* Get backref level as one source */
10420 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10421 backref_level = key.offset;
10423 struct btrfs_tree_block_info *info;
10425 info = (struct btrfs_tree_block_info *)(ei + 1);
10426 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10428 btrfs_release_path(&path);
10430 /* Get level from tree block as an alternative source */
10431 eb = read_tree_block(fs_info, bytenr, transid);
10432 if (!extent_buffer_uptodate(eb)) {
10433 free_extent_buffer(eb);
10436 header_level = btrfs_header_level(eb);
10437 free_extent_buffer(eb);
10439 if (header_level != backref_level)
10441 return header_level;
10444 btrfs_release_path(&path);
10449 * Check if a tree block backref is valid (points to a valid tree block)
10450 * if level == -1, level will be resolved
10451 * Return >0 for any error found and print error message
10453 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10454 u64 bytenr, int level)
10456 struct btrfs_root *root;
10457 struct btrfs_key key;
10458 struct btrfs_path path;
10459 struct extent_buffer *eb;
10460 struct extent_buffer *node;
10461 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10465 /* Query level for level == -1 special case */
10467 level = query_tree_block_level(fs_info, bytenr);
10469 err |= REFERENCER_MISSING;
10473 key.objectid = root_id;
10474 key.type = BTRFS_ROOT_ITEM_KEY;
10475 key.offset = (u64)-1;
10477 root = btrfs_read_fs_root(fs_info, &key);
10478 if (IS_ERR(root)) {
10479 err |= REFERENCER_MISSING;
10483 /* Read out the tree block to get item/node key */
10484 eb = read_tree_block(fs_info, bytenr, 0);
10485 if (!extent_buffer_uptodate(eb)) {
10486 err |= REFERENCER_MISSING;
10487 free_extent_buffer(eb);
10491 /* Empty tree, no need to check key */
10492 if (!btrfs_header_nritems(eb) && !level) {
10493 free_extent_buffer(eb);
10498 btrfs_node_key_to_cpu(eb, &key, 0);
10500 btrfs_item_key_to_cpu(eb, &key, 0);
10502 free_extent_buffer(eb);
10504 btrfs_init_path(&path);
10505 path.lowest_level = level;
10506 /* Search with the first key, to ensure we can reach it */
10507 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10509 err |= REFERENCER_MISSING;
10513 node = path.nodes[level];
10514 if (btrfs_header_bytenr(node) != bytenr) {
10516 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10517 bytenr, nodesize, bytenr,
10518 btrfs_header_bytenr(node));
10519 err |= REFERENCER_MISMATCH;
10521 if (btrfs_header_level(node) != level) {
10523 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10524 bytenr, nodesize, level,
10525 btrfs_header_level(node));
10526 err |= REFERENCER_MISMATCH;
10530 btrfs_release_path(&path);
10532 if (err & REFERENCER_MISSING) {
10534 error("extent [%llu %d] lost referencer (owner: %llu)",
10535 bytenr, nodesize, root_id);
10538 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10539 bytenr, nodesize, root_id, level);
10546 * Check if tree block @eb is tree reloc root.
10547 * Return 0 if it's not or any problem happens
10548 * Return 1 if it's a tree reloc root
10550 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10551 struct extent_buffer *eb)
10553 struct btrfs_root *tree_reloc_root;
10554 struct btrfs_key key;
10555 u64 bytenr = btrfs_header_bytenr(eb);
10556 u64 owner = btrfs_header_owner(eb);
10559 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10560 key.offset = owner;
10561 key.type = BTRFS_ROOT_ITEM_KEY;
10563 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10564 if (IS_ERR(tree_reloc_root))
10567 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10569 btrfs_free_fs_root(tree_reloc_root);
10574 * Check referencer for shared block backref
10575 * If level == -1, this function will resolve the level.
10577 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10578 u64 parent, u64 bytenr, int level)
10580 struct extent_buffer *eb;
10582 int found_parent = 0;
10585 eb = read_tree_block(fs_info, parent, 0);
10586 if (!extent_buffer_uptodate(eb))
10590 level = query_tree_block_level(fs_info, bytenr);
10594 /* It's possible it's a tree reloc root */
10595 if (parent == bytenr) {
10596 if (is_tree_reloc_root(fs_info, eb))
10601 if (level + 1 != btrfs_header_level(eb))
10604 nr = btrfs_header_nritems(eb);
10605 for (i = 0; i < nr; i++) {
10606 if (bytenr == btrfs_node_blockptr(eb, i)) {
10612 free_extent_buffer(eb);
10613 if (!found_parent) {
10615 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10616 bytenr, fs_info->nodesize, parent, level);
10617 return REFERENCER_MISSING;
10623 * Check referencer for normal (inlined) data ref
10624 * If len == 0, it will be resolved by searching in extent tree
10626 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10627 u64 root_id, u64 objectid, u64 offset,
10628 u64 bytenr, u64 len, u32 count)
10630 struct btrfs_root *root;
10631 struct btrfs_root *extent_root = fs_info->extent_root;
10632 struct btrfs_key key;
10633 struct btrfs_path path;
10634 struct extent_buffer *leaf;
10635 struct btrfs_file_extent_item *fi;
10636 u32 found_count = 0;
10641 key.objectid = bytenr;
10642 key.type = BTRFS_EXTENT_ITEM_KEY;
10643 key.offset = (u64)-1;
10645 btrfs_init_path(&path);
10646 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10649 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10652 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10653 if (key.objectid != bytenr ||
10654 key.type != BTRFS_EXTENT_ITEM_KEY)
10657 btrfs_release_path(&path);
10659 key.objectid = root_id;
10660 key.type = BTRFS_ROOT_ITEM_KEY;
10661 key.offset = (u64)-1;
10662 btrfs_init_path(&path);
10664 root = btrfs_read_fs_root(fs_info, &key);
10668 key.objectid = objectid;
10669 key.type = BTRFS_EXTENT_DATA_KEY;
10671 * It can be nasty as data backref offset is
10672 * file offset - file extent offset, which is smaller or
10673 * equal to original backref offset. The only special case is
10674 * overflow. So we need to special check and do further search.
10676 key.offset = offset & (1ULL << 63) ? 0 : offset;
10678 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10683 * Search afterwards to get correct one
10684 * NOTE: As we must do a comprehensive check on the data backref to
10685 * make sure the dref count also matches, we must iterate all file
10686 * extents for that inode.
10689 leaf = path.nodes[0];
10690 slot = path.slots[0];
10692 if (slot >= btrfs_header_nritems(leaf))
10694 btrfs_item_key_to_cpu(leaf, &key, slot);
10695 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10697 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10699 * Except normal disk bytenr and disk num bytes, we still
10700 * need to do extra check on dbackref offset as
10701 * dbackref offset = file_offset - file_extent_offset
10703 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10704 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10705 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10710 ret = btrfs_next_item(root, &path);
10715 btrfs_release_path(&path);
10716 if (found_count != count) {
10718 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10719 bytenr, len, root_id, objectid, offset, count, found_count);
10720 return REFERENCER_MISSING;
10726 * Check if the referencer of a shared data backref exists
10728 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10729 u64 parent, u64 bytenr)
10731 struct extent_buffer *eb;
10732 struct btrfs_key key;
10733 struct btrfs_file_extent_item *fi;
10735 int found_parent = 0;
10738 eb = read_tree_block(fs_info, parent, 0);
10739 if (!extent_buffer_uptodate(eb))
10742 nr = btrfs_header_nritems(eb);
10743 for (i = 0; i < nr; i++) {
10744 btrfs_item_key_to_cpu(eb, &key, i);
10745 if (key.type != BTRFS_EXTENT_DATA_KEY)
10748 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10749 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10752 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10759 free_extent_buffer(eb);
10760 if (!found_parent) {
10761 error("shared extent %llu referencer lost (parent: %llu)",
10763 return REFERENCER_MISSING;
10769 * This function will check a given extent item, including its backref and
10770 * itself (like crossing stripe boundary and type)
10772 * Since we don't use extent_record anymore, introduce new error bit
10774 static int check_extent_item(struct btrfs_fs_info *fs_info,
10775 struct extent_buffer *eb, int slot)
10777 struct btrfs_extent_item *ei;
10778 struct btrfs_extent_inline_ref *iref;
10779 struct btrfs_extent_data_ref *dref;
10783 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10784 u32 item_size = btrfs_item_size_nr(eb, slot);
10789 struct btrfs_key key;
10793 btrfs_item_key_to_cpu(eb, &key, slot);
10794 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10795 bytes_used += key.offset;
10797 bytes_used += nodesize;
10799 if (item_size < sizeof(*ei)) {
10801 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10802 * old thing when on disk format is still un-determined.
10803 * No need to care about it anymore
10805 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10809 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10810 flags = btrfs_extent_flags(eb, ei);
10812 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10814 if (metadata && check_crossing_stripes(global_info, key.objectid,
10816 error("bad metadata [%llu, %llu) crossing stripe boundary",
10817 key.objectid, key.objectid + nodesize);
10818 err |= CROSSING_STRIPE_BOUNDARY;
10821 ptr = (unsigned long)(ei + 1);
10823 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10824 /* Old EXTENT_ITEM metadata */
10825 struct btrfs_tree_block_info *info;
10827 info = (struct btrfs_tree_block_info *)ptr;
10828 level = btrfs_tree_block_level(eb, info);
10829 ptr += sizeof(struct btrfs_tree_block_info);
10831 /* New METADATA_ITEM */
10832 level = key.offset;
10834 end = (unsigned long)ei + item_size;
10837 /* Reached extent item end normally */
10841 /* Beyond extent item end, wrong item size */
10843 err |= ITEM_SIZE_MISMATCH;
10844 error("extent item at bytenr %llu slot %d has wrong size",
10849 /* Now check every backref in this extent item */
10850 iref = (struct btrfs_extent_inline_ref *)ptr;
10851 type = btrfs_extent_inline_ref_type(eb, iref);
10852 offset = btrfs_extent_inline_ref_offset(eb, iref);
10854 case BTRFS_TREE_BLOCK_REF_KEY:
10855 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10859 case BTRFS_SHARED_BLOCK_REF_KEY:
10860 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10864 case BTRFS_EXTENT_DATA_REF_KEY:
10865 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10866 ret = check_extent_data_backref(fs_info,
10867 btrfs_extent_data_ref_root(eb, dref),
10868 btrfs_extent_data_ref_objectid(eb, dref),
10869 btrfs_extent_data_ref_offset(eb, dref),
10870 key.objectid, key.offset,
10871 btrfs_extent_data_ref_count(eb, dref));
10874 case BTRFS_SHARED_DATA_REF_KEY:
10875 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10879 error("extent[%llu %d %llu] has unknown ref type: %d",
10880 key.objectid, key.type, key.offset, type);
10881 err |= UNKNOWN_TYPE;
10885 ptr += btrfs_extent_inline_ref_size(type);
10893 * Check if a dev extent item is referred correctly by its chunk
10895 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10896 struct extent_buffer *eb, int slot)
10898 struct btrfs_root *chunk_root = fs_info->chunk_root;
10899 struct btrfs_dev_extent *ptr;
10900 struct btrfs_path path;
10901 struct btrfs_key chunk_key;
10902 struct btrfs_key devext_key;
10903 struct btrfs_chunk *chunk;
10904 struct extent_buffer *l;
10908 int found_chunk = 0;
10911 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10912 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10913 length = btrfs_dev_extent_length(eb, ptr);
10915 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10916 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10917 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10919 btrfs_init_path(&path);
10920 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10925 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10926 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10931 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10934 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10935 for (i = 0; i < num_stripes; i++) {
10936 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10937 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10939 if (devid == devext_key.objectid &&
10940 offset == devext_key.offset) {
10946 btrfs_release_path(&path);
10947 if (!found_chunk) {
10949 "device extent[%llu, %llu, %llu] did not find the related chunk",
10950 devext_key.objectid, devext_key.offset, length);
10951 return REFERENCER_MISSING;
10957 * Check if the used space is correct with the dev item
10959 static int check_dev_item(struct btrfs_fs_info *fs_info,
10960 struct extent_buffer *eb, int slot)
10962 struct btrfs_root *dev_root = fs_info->dev_root;
10963 struct btrfs_dev_item *dev_item;
10964 struct btrfs_path path;
10965 struct btrfs_key key;
10966 struct btrfs_dev_extent *ptr;
10972 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10973 dev_id = btrfs_device_id(eb, dev_item);
10974 used = btrfs_device_bytes_used(eb, dev_item);
10976 key.objectid = dev_id;
10977 key.type = BTRFS_DEV_EXTENT_KEY;
10980 btrfs_init_path(&path);
10981 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10983 btrfs_item_key_to_cpu(eb, &key, slot);
10984 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10985 key.objectid, key.type, key.offset);
10986 btrfs_release_path(&path);
10987 return REFERENCER_MISSING;
10990 /* Iterate dev_extents to calculate the used space of a device */
10992 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10995 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10996 if (key.objectid > dev_id)
10998 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11001 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11002 struct btrfs_dev_extent);
11003 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11005 ret = btrfs_next_item(dev_root, &path);
11009 btrfs_release_path(&path);
11011 if (used != total) {
11012 btrfs_item_key_to_cpu(eb, &key, slot);
11014 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11015 total, used, BTRFS_ROOT_TREE_OBJECTID,
11016 BTRFS_DEV_EXTENT_KEY, dev_id);
11017 return ACCOUNTING_MISMATCH;
11023 * Check a block group item with its referener (chunk) and its used space
11024 * with extent/metadata item
11026 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11027 struct extent_buffer *eb, int slot)
11029 struct btrfs_root *extent_root = fs_info->extent_root;
11030 struct btrfs_root *chunk_root = fs_info->chunk_root;
11031 struct btrfs_block_group_item *bi;
11032 struct btrfs_block_group_item bg_item;
11033 struct btrfs_path path;
11034 struct btrfs_key bg_key;
11035 struct btrfs_key chunk_key;
11036 struct btrfs_key extent_key;
11037 struct btrfs_chunk *chunk;
11038 struct extent_buffer *leaf;
11039 struct btrfs_extent_item *ei;
11040 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11048 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11049 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11050 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11051 used = btrfs_block_group_used(&bg_item);
11052 bg_flags = btrfs_block_group_flags(&bg_item);
11054 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11055 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11056 chunk_key.offset = bg_key.objectid;
11058 btrfs_init_path(&path);
11059 /* Search for the referencer chunk */
11060 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11063 "block group[%llu %llu] did not find the related chunk item",
11064 bg_key.objectid, bg_key.offset);
11065 err |= REFERENCER_MISSING;
11067 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11068 struct btrfs_chunk);
11069 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11072 "block group[%llu %llu] related chunk item length does not match",
11073 bg_key.objectid, bg_key.offset);
11074 err |= REFERENCER_MISMATCH;
11077 btrfs_release_path(&path);
11079 /* Search from the block group bytenr */
11080 extent_key.objectid = bg_key.objectid;
11081 extent_key.type = 0;
11082 extent_key.offset = 0;
11084 btrfs_init_path(&path);
11085 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11089 /* Iterate extent tree to account used space */
11091 leaf = path.nodes[0];
11093 /* Search slot can point to the last item beyond leaf nritems */
11094 if (path.slots[0] >= btrfs_header_nritems(leaf))
11097 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11098 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11101 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11102 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11104 if (extent_key.objectid < bg_key.objectid)
11107 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11110 total += extent_key.offset;
11112 ei = btrfs_item_ptr(leaf, path.slots[0],
11113 struct btrfs_extent_item);
11114 flags = btrfs_extent_flags(leaf, ei);
11115 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11116 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11118 "bad extent[%llu, %llu) type mismatch with chunk",
11119 extent_key.objectid,
11120 extent_key.objectid + extent_key.offset);
11121 err |= CHUNK_TYPE_MISMATCH;
11123 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11124 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11125 BTRFS_BLOCK_GROUP_METADATA))) {
11127 "bad extent[%llu, %llu) type mismatch with chunk",
11128 extent_key.objectid,
11129 extent_key.objectid + nodesize);
11130 err |= CHUNK_TYPE_MISMATCH;
11134 ret = btrfs_next_item(extent_root, &path);
11140 btrfs_release_path(&path);
11142 if (total != used) {
11144 "block group[%llu %llu] used %llu but extent items used %llu",
11145 bg_key.objectid, bg_key.offset, used, total);
11146 err |= ACCOUNTING_MISMATCH;
11152 * Check a chunk item.
11153 * Including checking all referred dev_extents and block group
11155 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11156 struct extent_buffer *eb, int slot)
11158 struct btrfs_root *extent_root = fs_info->extent_root;
11159 struct btrfs_root *dev_root = fs_info->dev_root;
11160 struct btrfs_path path;
11161 struct btrfs_key chunk_key;
11162 struct btrfs_key bg_key;
11163 struct btrfs_key devext_key;
11164 struct btrfs_chunk *chunk;
11165 struct extent_buffer *leaf;
11166 struct btrfs_block_group_item *bi;
11167 struct btrfs_block_group_item bg_item;
11168 struct btrfs_dev_extent *ptr;
11180 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11181 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11182 length = btrfs_chunk_length(eb, chunk);
11183 chunk_end = chunk_key.offset + length;
11184 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11187 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11189 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11192 type = btrfs_chunk_type(eb, chunk);
11194 bg_key.objectid = chunk_key.offset;
11195 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11196 bg_key.offset = length;
11198 btrfs_init_path(&path);
11199 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11202 "chunk[%llu %llu) did not find the related block group item",
11203 chunk_key.offset, chunk_end);
11204 err |= REFERENCER_MISSING;
11206 leaf = path.nodes[0];
11207 bi = btrfs_item_ptr(leaf, path.slots[0],
11208 struct btrfs_block_group_item);
11209 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11211 if (btrfs_block_group_flags(&bg_item) != type) {
11213 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11214 chunk_key.offset, chunk_end, type,
11215 btrfs_block_group_flags(&bg_item));
11216 err |= REFERENCER_MISSING;
11220 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11221 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11222 for (i = 0; i < num_stripes; i++) {
11223 btrfs_release_path(&path);
11224 btrfs_init_path(&path);
11225 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11226 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11227 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11229 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11232 goto not_match_dev;
11234 leaf = path.nodes[0];
11235 ptr = btrfs_item_ptr(leaf, path.slots[0],
11236 struct btrfs_dev_extent);
11237 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11238 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11239 if (objectid != chunk_key.objectid ||
11240 offset != chunk_key.offset ||
11241 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11242 goto not_match_dev;
11245 err |= BACKREF_MISSING;
11247 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11248 chunk_key.objectid, chunk_end, i);
11251 btrfs_release_path(&path);
11257 * Main entry function to check known items and update related accounting info
11259 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11261 struct btrfs_fs_info *fs_info = root->fs_info;
11262 struct btrfs_key key;
11265 struct btrfs_extent_data_ref *dref;
11270 btrfs_item_key_to_cpu(eb, &key, slot);
11274 case BTRFS_EXTENT_DATA_KEY:
11275 ret = check_extent_data_item(root, eb, slot);
11278 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11279 ret = check_block_group_item(fs_info, eb, slot);
11282 case BTRFS_DEV_ITEM_KEY:
11283 ret = check_dev_item(fs_info, eb, slot);
11286 case BTRFS_CHUNK_ITEM_KEY:
11287 ret = check_chunk_item(fs_info, eb, slot);
11290 case BTRFS_DEV_EXTENT_KEY:
11291 ret = check_dev_extent_item(fs_info, eb, slot);
11294 case BTRFS_EXTENT_ITEM_KEY:
11295 case BTRFS_METADATA_ITEM_KEY:
11296 ret = check_extent_item(fs_info, eb, slot);
11299 case BTRFS_EXTENT_CSUM_KEY:
11300 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11302 case BTRFS_TREE_BLOCK_REF_KEY:
11303 ret = check_tree_block_backref(fs_info, key.offset,
11307 case BTRFS_EXTENT_DATA_REF_KEY:
11308 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11309 ret = check_extent_data_backref(fs_info,
11310 btrfs_extent_data_ref_root(eb, dref),
11311 btrfs_extent_data_ref_objectid(eb, dref),
11312 btrfs_extent_data_ref_offset(eb, dref),
11314 btrfs_extent_data_ref_count(eb, dref));
11317 case BTRFS_SHARED_BLOCK_REF_KEY:
11318 ret = check_shared_block_backref(fs_info, key.offset,
11322 case BTRFS_SHARED_DATA_REF_KEY:
11323 ret = check_shared_data_backref(fs_info, key.offset,
11331 if (++slot < btrfs_header_nritems(eb))
11338 * Helper function for later fs/subvol tree check. To determine if a tree
11339 * block should be checked.
11340 * This function will ensure only the direct referencer with lowest rootid to
11341 * check a fs/subvolume tree block.
11343 * Backref check at extent tree would detect errors like missing subvolume
11344 * tree, so we can do aggressive check to reduce duplicated checks.
11346 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11348 struct btrfs_root *extent_root = root->fs_info->extent_root;
11349 struct btrfs_key key;
11350 struct btrfs_path path;
11351 struct extent_buffer *leaf;
11353 struct btrfs_extent_item *ei;
11359 struct btrfs_extent_inline_ref *iref;
11362 btrfs_init_path(&path);
11363 key.objectid = btrfs_header_bytenr(eb);
11364 key.type = BTRFS_METADATA_ITEM_KEY;
11365 key.offset = (u64)-1;
11368 * Any failure in backref resolving means we can't determine
11369 * whom the tree block belongs to.
11370 * So in that case, we need to check that tree block
11372 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11376 ret = btrfs_previous_extent_item(extent_root, &path,
11377 btrfs_header_bytenr(eb));
11381 leaf = path.nodes[0];
11382 slot = path.slots[0];
11383 btrfs_item_key_to_cpu(leaf, &key, slot);
11384 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11386 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11387 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11389 struct btrfs_tree_block_info *info;
11391 info = (struct btrfs_tree_block_info *)(ei + 1);
11392 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11395 item_size = btrfs_item_size_nr(leaf, slot);
11396 ptr = (unsigned long)iref;
11397 end = (unsigned long)ei + item_size;
11398 while (ptr < end) {
11399 iref = (struct btrfs_extent_inline_ref *)ptr;
11400 type = btrfs_extent_inline_ref_type(leaf, iref);
11401 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11404 * We only check the tree block if current root is
11405 * the lowest referencer of it.
11407 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11408 offset < root->objectid) {
11409 btrfs_release_path(&path);
11413 ptr += btrfs_extent_inline_ref_size(type);
11416 * Normally we should also check keyed tree block ref, but that may be
11417 * very time consuming. Inlined ref should already make us skip a lot
11418 * of refs now. So skip search keyed tree block ref.
11422 btrfs_release_path(&path);
11427 * Traversal function for tree block. We will do:
11428 * 1) Skip shared fs/subvolume tree blocks
11429 * 2) Update related bytes accounting
11430 * 3) Pre-order traversal
11432 static int traverse_tree_block(struct btrfs_root *root,
11433 struct extent_buffer *node)
11435 struct extent_buffer *eb;
11436 struct btrfs_key key;
11437 struct btrfs_key drop_key;
11445 * Skip shared fs/subvolume tree block, in that case they will
11446 * be checked by referencer with lowest rootid
11448 if (is_fstree(root->objectid) && !should_check(root, node))
11451 /* Update bytes accounting */
11452 total_btree_bytes += node->len;
11453 if (fs_root_objectid(btrfs_header_owner(node)))
11454 total_fs_tree_bytes += node->len;
11455 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11456 total_extent_tree_bytes += node->len;
11457 if (!found_old_backref &&
11458 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11459 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11460 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11461 found_old_backref = 1;
11463 /* pre-order tranversal, check itself first */
11464 level = btrfs_header_level(node);
11465 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11466 btrfs_header_level(node),
11467 btrfs_header_owner(node));
11471 "check %s failed root %llu bytenr %llu level %d, force continue check",
11472 level ? "node":"leaf", root->objectid,
11473 btrfs_header_bytenr(node), btrfs_header_level(node));
11476 btree_space_waste += btrfs_leaf_free_space(root, node);
11477 ret = check_leaf_items(root, node);
11482 nr = btrfs_header_nritems(node);
11483 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11484 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11485 sizeof(struct btrfs_key_ptr);
11487 /* Then check all its children */
11488 for (i = 0; i < nr; i++) {
11489 u64 blocknr = btrfs_node_blockptr(node, i);
11491 btrfs_node_key_to_cpu(node, &key, i);
11492 if (level == root->root_item.drop_level &&
11493 is_dropped_key(&key, &drop_key))
11497 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11498 * to call the function itself.
11500 eb = read_tree_block(root->fs_info, blocknr, 0);
11501 if (extent_buffer_uptodate(eb)) {
11502 ret = traverse_tree_block(root, eb);
11505 free_extent_buffer(eb);
11512 * Low memory usage version check_chunks_and_extents.
11514 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11516 struct btrfs_path path;
11517 struct btrfs_key key;
11518 struct btrfs_root *root1;
11519 struct btrfs_root *cur_root;
11523 root1 = root->fs_info->chunk_root;
11524 ret = traverse_tree_block(root1, root1->node);
11527 root1 = root->fs_info->tree_root;
11528 ret = traverse_tree_block(root1, root1->node);
11531 btrfs_init_path(&path);
11532 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11534 key.type = BTRFS_ROOT_ITEM_KEY;
11536 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11538 error("cannot find extent treet in tree_root");
11543 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11544 if (key.type != BTRFS_ROOT_ITEM_KEY)
11546 key.offset = (u64)-1;
11548 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11549 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11552 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11553 if (IS_ERR(cur_root) || !cur_root) {
11554 error("failed to read tree: %lld", key.objectid);
11558 ret = traverse_tree_block(cur_root, cur_root->node);
11561 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11562 btrfs_free_fs_root(cur_root);
11564 ret = btrfs_next_item(root1, &path);
11570 btrfs_release_path(&path);
11574 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11575 struct btrfs_root *root, int overwrite)
11577 struct extent_buffer *c;
11578 struct extent_buffer *old = root->node;
11581 struct btrfs_disk_key disk_key = {0,0,0};
11587 extent_buffer_get(c);
11590 c = btrfs_alloc_free_block(trans, root,
11591 root->fs_info->nodesize,
11592 root->root_key.objectid,
11593 &disk_key, level, 0, 0);
11596 extent_buffer_get(c);
11600 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11601 btrfs_set_header_level(c, level);
11602 btrfs_set_header_bytenr(c, c->start);
11603 btrfs_set_header_generation(c, trans->transid);
11604 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11605 btrfs_set_header_owner(c, root->root_key.objectid);
11607 write_extent_buffer(c, root->fs_info->fsid,
11608 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11610 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11611 btrfs_header_chunk_tree_uuid(c),
11614 btrfs_mark_buffer_dirty(c);
11616 * this case can happen in the following case:
11618 * 1.overwrite previous root.
11620 * 2.reinit reloc data root, this is because we skip pin
11621 * down reloc data tree before which means we can allocate
11622 * same block bytenr here.
11624 if (old->start == c->start) {
11625 btrfs_set_root_generation(&root->root_item,
11627 root->root_item.level = btrfs_header_level(root->node);
11628 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11629 &root->root_key, &root->root_item);
11631 free_extent_buffer(c);
11635 free_extent_buffer(old);
11637 add_root_to_dirty_list(root);
11641 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11642 struct extent_buffer *eb, int tree_root)
11644 struct extent_buffer *tmp;
11645 struct btrfs_root_item *ri;
11646 struct btrfs_key key;
11648 int level = btrfs_header_level(eb);
11654 * If we have pinned this block before, don't pin it again.
11655 * This can not only avoid forever loop with broken filesystem
11656 * but also give us some speedups.
11658 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11659 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11662 btrfs_pin_extent(fs_info, eb->start, eb->len);
11664 nritems = btrfs_header_nritems(eb);
11665 for (i = 0; i < nritems; i++) {
11667 btrfs_item_key_to_cpu(eb, &key, i);
11668 if (key.type != BTRFS_ROOT_ITEM_KEY)
11670 /* Skip the extent root and reloc roots */
11671 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11672 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11673 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11675 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11676 bytenr = btrfs_disk_root_bytenr(eb, ri);
11679 * If at any point we start needing the real root we
11680 * will have to build a stump root for the root we are
11681 * in, but for now this doesn't actually use the root so
11682 * just pass in extent_root.
11684 tmp = read_tree_block(fs_info, bytenr, 0);
11685 if (!extent_buffer_uptodate(tmp)) {
11686 fprintf(stderr, "Error reading root block\n");
11689 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11690 free_extent_buffer(tmp);
11694 bytenr = btrfs_node_blockptr(eb, i);
11696 /* If we aren't the tree root don't read the block */
11697 if (level == 1 && !tree_root) {
11698 btrfs_pin_extent(fs_info, bytenr,
11699 fs_info->nodesize);
11703 tmp = read_tree_block(fs_info, bytenr, 0);
11704 if (!extent_buffer_uptodate(tmp)) {
11705 fprintf(stderr, "Error reading tree block\n");
11708 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11709 free_extent_buffer(tmp);
11718 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11722 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11726 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11729 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11731 struct btrfs_block_group_cache *cache;
11732 struct btrfs_path path;
11733 struct extent_buffer *leaf;
11734 struct btrfs_chunk *chunk;
11735 struct btrfs_key key;
11739 btrfs_init_path(&path);
11741 key.type = BTRFS_CHUNK_ITEM_KEY;
11743 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11745 btrfs_release_path(&path);
11750 * We do this in case the block groups were screwed up and had alloc
11751 * bits that aren't actually set on the chunks. This happens with
11752 * restored images every time and could happen in real life I guess.
11754 fs_info->avail_data_alloc_bits = 0;
11755 fs_info->avail_metadata_alloc_bits = 0;
11756 fs_info->avail_system_alloc_bits = 0;
11758 /* First we need to create the in-memory block groups */
11760 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11761 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11763 btrfs_release_path(&path);
11771 leaf = path.nodes[0];
11772 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11773 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11778 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11779 btrfs_add_block_group(fs_info, 0,
11780 btrfs_chunk_type(leaf, chunk),
11781 key.objectid, key.offset,
11782 btrfs_chunk_length(leaf, chunk));
11783 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11784 key.offset + btrfs_chunk_length(leaf, chunk));
11789 cache = btrfs_lookup_first_block_group(fs_info, start);
11793 start = cache->key.objectid + cache->key.offset;
11796 btrfs_release_path(&path);
11800 static int reset_balance(struct btrfs_trans_handle *trans,
11801 struct btrfs_fs_info *fs_info)
11803 struct btrfs_root *root = fs_info->tree_root;
11804 struct btrfs_path path;
11805 struct extent_buffer *leaf;
11806 struct btrfs_key key;
11807 int del_slot, del_nr = 0;
11811 btrfs_init_path(&path);
11812 key.objectid = BTRFS_BALANCE_OBJECTID;
11813 key.type = BTRFS_BALANCE_ITEM_KEY;
11815 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11820 goto reinit_data_reloc;
11825 ret = btrfs_del_item(trans, root, &path);
11828 btrfs_release_path(&path);
11830 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11831 key.type = BTRFS_ROOT_ITEM_KEY;
11833 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11837 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11842 ret = btrfs_del_items(trans, root, &path,
11849 btrfs_release_path(&path);
11852 ret = btrfs_search_slot(trans, root, &key, &path,
11859 leaf = path.nodes[0];
11860 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11861 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11863 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11868 del_slot = path.slots[0];
11877 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11881 btrfs_release_path(&path);
11884 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11885 key.type = BTRFS_ROOT_ITEM_KEY;
11886 key.offset = (u64)-1;
11887 root = btrfs_read_fs_root(fs_info, &key);
11888 if (IS_ERR(root)) {
11889 fprintf(stderr, "Error reading data reloc tree\n");
11890 ret = PTR_ERR(root);
11893 record_root_in_trans(trans, root);
11894 ret = btrfs_fsck_reinit_root(trans, root, 0);
11897 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11899 btrfs_release_path(&path);
11903 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11904 struct btrfs_fs_info *fs_info)
11910 * The only reason we don't do this is because right now we're just
11911 * walking the trees we find and pinning down their bytes, we don't look
11912 * at any of the leaves. In order to do mixed groups we'd have to check
11913 * the leaves of any fs roots and pin down the bytes for any file
11914 * extents we find. Not hard but why do it if we don't have to?
11916 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11917 fprintf(stderr, "We don't support re-initing the extent tree "
11918 "for mixed block groups yet, please notify a btrfs "
11919 "developer you want to do this so they can add this "
11920 "functionality.\n");
11925 * first we need to walk all of the trees except the extent tree and pin
11926 * down the bytes that are in use so we don't overwrite any existing
11929 ret = pin_metadata_blocks(fs_info);
11931 fprintf(stderr, "error pinning down used bytes\n");
11936 * Need to drop all the block groups since we're going to recreate all
11939 btrfs_free_block_groups(fs_info);
11940 ret = reset_block_groups(fs_info);
11942 fprintf(stderr, "error resetting the block groups\n");
11946 /* Ok we can allocate now, reinit the extent root */
11947 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11949 fprintf(stderr, "extent root initialization failed\n");
11951 * When the transaction code is updated we should end the
11952 * transaction, but for now progs only knows about commit so
11953 * just return an error.
11959 * Now we have all the in-memory block groups setup so we can make
11960 * allocations properly, and the metadata we care about is safe since we
11961 * pinned all of it above.
11964 struct btrfs_block_group_cache *cache;
11966 cache = btrfs_lookup_first_block_group(fs_info, start);
11969 start = cache->key.objectid + cache->key.offset;
11970 ret = btrfs_insert_item(trans, fs_info->extent_root,
11971 &cache->key, &cache->item,
11972 sizeof(cache->item));
11974 fprintf(stderr, "Error adding block group\n");
11977 btrfs_extent_post_op(trans, fs_info->extent_root);
11980 ret = reset_balance(trans, fs_info);
11982 fprintf(stderr, "error resetting the pending balance\n");
11987 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11989 struct btrfs_path path;
11990 struct btrfs_trans_handle *trans;
11991 struct btrfs_key key;
11994 printf("Recowing metadata block %llu\n", eb->start);
11995 key.objectid = btrfs_header_owner(eb);
11996 key.type = BTRFS_ROOT_ITEM_KEY;
11997 key.offset = (u64)-1;
11999 root = btrfs_read_fs_root(root->fs_info, &key);
12000 if (IS_ERR(root)) {
12001 fprintf(stderr, "Couldn't find owner root %llu\n",
12003 return PTR_ERR(root);
12006 trans = btrfs_start_transaction(root, 1);
12008 return PTR_ERR(trans);
12010 btrfs_init_path(&path);
12011 path.lowest_level = btrfs_header_level(eb);
12012 if (path.lowest_level)
12013 btrfs_node_key_to_cpu(eb, &key, 0);
12015 btrfs_item_key_to_cpu(eb, &key, 0);
12017 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12018 btrfs_commit_transaction(trans, root);
12019 btrfs_release_path(&path);
12023 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12025 struct btrfs_path path;
12026 struct btrfs_trans_handle *trans;
12027 struct btrfs_key key;
12030 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12031 bad->key.type, bad->key.offset);
12032 key.objectid = bad->root_id;
12033 key.type = BTRFS_ROOT_ITEM_KEY;
12034 key.offset = (u64)-1;
12036 root = btrfs_read_fs_root(root->fs_info, &key);
12037 if (IS_ERR(root)) {
12038 fprintf(stderr, "Couldn't find owner root %llu\n",
12040 return PTR_ERR(root);
12043 trans = btrfs_start_transaction(root, 1);
12045 return PTR_ERR(trans);
12047 btrfs_init_path(&path);
12048 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12054 ret = btrfs_del_item(trans, root, &path);
12056 btrfs_commit_transaction(trans, root);
12057 btrfs_release_path(&path);
12061 static int zero_log_tree(struct btrfs_root *root)
12063 struct btrfs_trans_handle *trans;
12066 trans = btrfs_start_transaction(root, 1);
12067 if (IS_ERR(trans)) {
12068 ret = PTR_ERR(trans);
12071 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12072 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12073 ret = btrfs_commit_transaction(trans, root);
12077 static int populate_csum(struct btrfs_trans_handle *trans,
12078 struct btrfs_root *csum_root, char *buf, u64 start,
12081 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12086 while (offset < len) {
12087 sectorsize = fs_info->sectorsize;
12088 ret = read_extent_data(fs_info, buf, start + offset,
12092 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12093 start + offset, buf, sectorsize);
12096 offset += sectorsize;
12101 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12102 struct btrfs_root *csum_root,
12103 struct btrfs_root *cur_root)
12105 struct btrfs_path path;
12106 struct btrfs_key key;
12107 struct extent_buffer *node;
12108 struct btrfs_file_extent_item *fi;
12115 buf = malloc(cur_root->fs_info->sectorsize);
12119 btrfs_init_path(&path);
12123 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12126 /* Iterate all regular file extents and fill its csum */
12128 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12130 if (key.type != BTRFS_EXTENT_DATA_KEY)
12132 node = path.nodes[0];
12133 slot = path.slots[0];
12134 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12135 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12137 start = btrfs_file_extent_disk_bytenr(node, fi);
12138 len = btrfs_file_extent_disk_num_bytes(node, fi);
12140 ret = populate_csum(trans, csum_root, buf, start, len);
12141 if (ret == -EEXIST)
12147 * TODO: if next leaf is corrupted, jump to nearest next valid
12150 ret = btrfs_next_item(cur_root, &path);
12160 btrfs_release_path(&path);
12165 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12166 struct btrfs_root *csum_root)
12168 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12169 struct btrfs_path path;
12170 struct btrfs_root *tree_root = fs_info->tree_root;
12171 struct btrfs_root *cur_root;
12172 struct extent_buffer *node;
12173 struct btrfs_key key;
12177 btrfs_init_path(&path);
12178 key.objectid = BTRFS_FS_TREE_OBJECTID;
12180 key.type = BTRFS_ROOT_ITEM_KEY;
12181 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12190 node = path.nodes[0];
12191 slot = path.slots[0];
12192 btrfs_item_key_to_cpu(node, &key, slot);
12193 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12195 if (key.type != BTRFS_ROOT_ITEM_KEY)
12197 if (!is_fstree(key.objectid))
12199 key.offset = (u64)-1;
12201 cur_root = btrfs_read_fs_root(fs_info, &key);
12202 if (IS_ERR(cur_root) || !cur_root) {
12203 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12207 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12212 ret = btrfs_next_item(tree_root, &path);
12222 btrfs_release_path(&path);
12226 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12227 struct btrfs_root *csum_root)
12229 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12230 struct btrfs_path path;
12231 struct btrfs_extent_item *ei;
12232 struct extent_buffer *leaf;
12234 struct btrfs_key key;
12237 btrfs_init_path(&path);
12239 key.type = BTRFS_EXTENT_ITEM_KEY;
12241 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12243 btrfs_release_path(&path);
12247 buf = malloc(csum_root->fs_info->sectorsize);
12249 btrfs_release_path(&path);
12254 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12255 ret = btrfs_next_leaf(extent_root, &path);
12263 leaf = path.nodes[0];
12265 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12266 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12271 ei = btrfs_item_ptr(leaf, path.slots[0],
12272 struct btrfs_extent_item);
12273 if (!(btrfs_extent_flags(leaf, ei) &
12274 BTRFS_EXTENT_FLAG_DATA)) {
12279 ret = populate_csum(trans, csum_root, buf, key.objectid,
12286 btrfs_release_path(&path);
12292 * Recalculate the csum and put it into the csum tree.
12294 * Extent tree init will wipe out all the extent info, so in that case, we
12295 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12296 * will use fs/subvol trees to init the csum tree.
12298 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12299 struct btrfs_root *csum_root,
12300 int search_fs_tree)
12302 if (search_fs_tree)
12303 return fill_csum_tree_from_fs(trans, csum_root);
12305 return fill_csum_tree_from_extent(trans, csum_root);
12308 static void free_roots_info_cache(void)
12310 if (!roots_info_cache)
12313 while (!cache_tree_empty(roots_info_cache)) {
12314 struct cache_extent *entry;
12315 struct root_item_info *rii;
12317 entry = first_cache_extent(roots_info_cache);
12320 remove_cache_extent(roots_info_cache, entry);
12321 rii = container_of(entry, struct root_item_info, cache_extent);
12325 free(roots_info_cache);
12326 roots_info_cache = NULL;
12329 static int build_roots_info_cache(struct btrfs_fs_info *info)
12332 struct btrfs_key key;
12333 struct extent_buffer *leaf;
12334 struct btrfs_path path;
12336 if (!roots_info_cache) {
12337 roots_info_cache = malloc(sizeof(*roots_info_cache));
12338 if (!roots_info_cache)
12340 cache_tree_init(roots_info_cache);
12343 btrfs_init_path(&path);
12345 key.type = BTRFS_EXTENT_ITEM_KEY;
12347 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12350 leaf = path.nodes[0];
12353 struct btrfs_key found_key;
12354 struct btrfs_extent_item *ei;
12355 struct btrfs_extent_inline_ref *iref;
12356 int slot = path.slots[0];
12361 struct cache_extent *entry;
12362 struct root_item_info *rii;
12364 if (slot >= btrfs_header_nritems(leaf)) {
12365 ret = btrfs_next_leaf(info->extent_root, &path);
12372 leaf = path.nodes[0];
12373 slot = path.slots[0];
12376 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12378 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12379 found_key.type != BTRFS_METADATA_ITEM_KEY)
12382 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12383 flags = btrfs_extent_flags(leaf, ei);
12385 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12386 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12389 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12390 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12391 level = found_key.offset;
12393 struct btrfs_tree_block_info *binfo;
12395 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12396 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12397 level = btrfs_tree_block_level(leaf, binfo);
12401 * For a root extent, it must be of the following type and the
12402 * first (and only one) iref in the item.
12404 type = btrfs_extent_inline_ref_type(leaf, iref);
12405 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12408 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12409 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12411 rii = malloc(sizeof(struct root_item_info));
12416 rii->cache_extent.start = root_id;
12417 rii->cache_extent.size = 1;
12418 rii->level = (u8)-1;
12419 entry = &rii->cache_extent;
12420 ret = insert_cache_extent(roots_info_cache, entry);
12423 rii = container_of(entry, struct root_item_info,
12427 ASSERT(rii->cache_extent.start == root_id);
12428 ASSERT(rii->cache_extent.size == 1);
12430 if (level > rii->level || rii->level == (u8)-1) {
12431 rii->level = level;
12432 rii->bytenr = found_key.objectid;
12433 rii->gen = btrfs_extent_generation(leaf, ei);
12434 rii->node_count = 1;
12435 } else if (level == rii->level) {
12443 btrfs_release_path(&path);
12448 static int maybe_repair_root_item(struct btrfs_path *path,
12449 const struct btrfs_key *root_key,
12450 const int read_only_mode)
12452 const u64 root_id = root_key->objectid;
12453 struct cache_extent *entry;
12454 struct root_item_info *rii;
12455 struct btrfs_root_item ri;
12456 unsigned long offset;
12458 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12461 "Error: could not find extent items for root %llu\n",
12462 root_key->objectid);
12466 rii = container_of(entry, struct root_item_info, cache_extent);
12467 ASSERT(rii->cache_extent.start == root_id);
12468 ASSERT(rii->cache_extent.size == 1);
12470 if (rii->node_count != 1) {
12472 "Error: could not find btree root extent for root %llu\n",
12477 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12478 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12480 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12481 btrfs_root_level(&ri) != rii->level ||
12482 btrfs_root_generation(&ri) != rii->gen) {
12485 * If we're in repair mode but our caller told us to not update
12486 * the root item, i.e. just check if it needs to be updated, don't
12487 * print this message, since the caller will call us again shortly
12488 * for the same root item without read only mode (the caller will
12489 * open a transaction first).
12491 if (!(read_only_mode && repair))
12493 "%sroot item for root %llu,"
12494 " current bytenr %llu, current gen %llu, current level %u,"
12495 " new bytenr %llu, new gen %llu, new level %u\n",
12496 (read_only_mode ? "" : "fixing "),
12498 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12499 btrfs_root_level(&ri),
12500 rii->bytenr, rii->gen, rii->level);
12502 if (btrfs_root_generation(&ri) > rii->gen) {
12504 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12505 root_id, btrfs_root_generation(&ri), rii->gen);
12509 if (!read_only_mode) {
12510 btrfs_set_root_bytenr(&ri, rii->bytenr);
12511 btrfs_set_root_level(&ri, rii->level);
12512 btrfs_set_root_generation(&ri, rii->gen);
12513 write_extent_buffer(path->nodes[0], &ri,
12514 offset, sizeof(ri));
12524 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12525 * caused read-only snapshots to be corrupted if they were created at a moment
12526 * when the source subvolume/snapshot had orphan items. The issue was that the
12527 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12528 * node instead of the post orphan cleanup root node.
12529 * So this function, and its callees, just detects and fixes those cases. Even
12530 * though the regression was for read-only snapshots, this function applies to
12531 * any snapshot/subvolume root.
12532 * This must be run before any other repair code - not doing it so, makes other
12533 * repair code delete or modify backrefs in the extent tree for example, which
12534 * will result in an inconsistent fs after repairing the root items.
12536 static int repair_root_items(struct btrfs_fs_info *info)
12538 struct btrfs_path path;
12539 struct btrfs_key key;
12540 struct extent_buffer *leaf;
12541 struct btrfs_trans_handle *trans = NULL;
12544 int need_trans = 0;
12546 btrfs_init_path(&path);
12548 ret = build_roots_info_cache(info);
12552 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12553 key.type = BTRFS_ROOT_ITEM_KEY;
12558 * Avoid opening and committing transactions if a leaf doesn't have
12559 * any root items that need to be fixed, so that we avoid rotating
12560 * backup roots unnecessarily.
12563 trans = btrfs_start_transaction(info->tree_root, 1);
12564 if (IS_ERR(trans)) {
12565 ret = PTR_ERR(trans);
12570 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12574 leaf = path.nodes[0];
12577 struct btrfs_key found_key;
12579 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12580 int no_more_keys = find_next_key(&path, &key);
12582 btrfs_release_path(&path);
12584 ret = btrfs_commit_transaction(trans,
12596 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12598 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12600 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12603 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12607 if (!trans && repair) {
12610 btrfs_release_path(&path);
12620 free_roots_info_cache();
12621 btrfs_release_path(&path);
12623 btrfs_commit_transaction(trans, info->tree_root);
12630 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12632 struct btrfs_trans_handle *trans;
12633 struct btrfs_block_group_cache *bg_cache;
12637 /* Clear all free space cache inodes and its extent data */
12639 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12642 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12645 current = bg_cache->key.objectid + bg_cache->key.offset;
12648 /* Don't forget to set cache_generation to -1 */
12649 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12650 if (IS_ERR(trans)) {
12651 error("failed to update super block cache generation");
12652 return PTR_ERR(trans);
12654 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12655 btrfs_commit_transaction(trans, fs_info->tree_root);
12660 const char * const cmd_check_usage[] = {
12661 "btrfs check [options] <device>",
12662 "Check structural integrity of a filesystem (unmounted).",
12663 "Check structural integrity of an unmounted filesystem. Verify internal",
12664 "trees' consistency and item connectivity. In the repair mode try to",
12665 "fix the problems found. ",
12666 "WARNING: the repair mode is considered dangerous",
12668 "-s|--super <superblock> use this superblock copy",
12669 "-b|--backup use the first valid backup root copy",
12670 "--repair try to repair the filesystem",
12671 "--readonly run in read-only mode (default)",
12672 "--init-csum-tree create a new CRC tree",
12673 "--init-extent-tree create a new extent tree",
12674 "--mode <MODE> allows choice of memory/IO trade-offs",
12675 " where MODE is one of:",
12676 " original - read inodes and extents to memory (requires",
12677 " more memory, does less IO)",
12678 " lowmem - try to use less memory but read blocks again",
12680 "--check-data-csum verify checksums of data blocks",
12681 "-Q|--qgroup-report print a report on qgroup consistency",
12682 "-E|--subvol-extents <subvolid>",
12683 " print subvolume extents and sharing state",
12684 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12685 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12686 "-p|--progress indicate progress",
12687 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12691 int cmd_check(int argc, char **argv)
12693 struct cache_tree root_cache;
12694 struct btrfs_root *root;
12695 struct btrfs_fs_info *info;
12698 u64 tree_root_bytenr = 0;
12699 u64 chunk_root_bytenr = 0;
12700 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12704 int init_csum_tree = 0;
12706 int clear_space_cache = 0;
12707 int qgroup_report = 0;
12708 int qgroups_repaired = 0;
12709 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12713 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12714 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12715 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12716 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12717 static const struct option long_options[] = {
12718 { "super", required_argument, NULL, 's' },
12719 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12720 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12721 { "init-csum-tree", no_argument, NULL,
12722 GETOPT_VAL_INIT_CSUM },
12723 { "init-extent-tree", no_argument, NULL,
12724 GETOPT_VAL_INIT_EXTENT },
12725 { "check-data-csum", no_argument, NULL,
12726 GETOPT_VAL_CHECK_CSUM },
12727 { "backup", no_argument, NULL, 'b' },
12728 { "subvol-extents", required_argument, NULL, 'E' },
12729 { "qgroup-report", no_argument, NULL, 'Q' },
12730 { "tree-root", required_argument, NULL, 'r' },
12731 { "chunk-root", required_argument, NULL,
12732 GETOPT_VAL_CHUNK_TREE },
12733 { "progress", no_argument, NULL, 'p' },
12734 { "mode", required_argument, NULL,
12736 { "clear-space-cache", required_argument, NULL,
12737 GETOPT_VAL_CLEAR_SPACE_CACHE},
12738 { NULL, 0, NULL, 0}
12741 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12745 case 'a': /* ignored */ break;
12747 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12750 num = arg_strtou64(optarg);
12751 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12753 "super mirror should be less than %d",
12754 BTRFS_SUPER_MIRROR_MAX);
12757 bytenr = btrfs_sb_offset(((int)num));
12758 printf("using SB copy %llu, bytenr %llu\n", num,
12759 (unsigned long long)bytenr);
12765 subvolid = arg_strtou64(optarg);
12768 tree_root_bytenr = arg_strtou64(optarg);
12770 case GETOPT_VAL_CHUNK_TREE:
12771 chunk_root_bytenr = arg_strtou64(optarg);
12774 ctx.progress_enabled = true;
12778 usage(cmd_check_usage);
12779 case GETOPT_VAL_REPAIR:
12780 printf("enabling repair mode\n");
12782 ctree_flags |= OPEN_CTREE_WRITES;
12784 case GETOPT_VAL_READONLY:
12787 case GETOPT_VAL_INIT_CSUM:
12788 printf("Creating a new CRC tree\n");
12789 init_csum_tree = 1;
12791 ctree_flags |= OPEN_CTREE_WRITES;
12793 case GETOPT_VAL_INIT_EXTENT:
12794 init_extent_tree = 1;
12795 ctree_flags |= (OPEN_CTREE_WRITES |
12796 OPEN_CTREE_NO_BLOCK_GROUPS);
12799 case GETOPT_VAL_CHECK_CSUM:
12800 check_data_csum = 1;
12802 case GETOPT_VAL_MODE:
12803 check_mode = parse_check_mode(optarg);
12804 if (check_mode == CHECK_MODE_UNKNOWN) {
12805 error("unknown mode: %s", optarg);
12809 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12810 if (strcmp(optarg, "v1") == 0) {
12811 clear_space_cache = 1;
12812 } else if (strcmp(optarg, "v2") == 0) {
12813 clear_space_cache = 2;
12814 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12817 "invalid argument to --clear-space-cache, must be v1 or v2");
12820 ctree_flags |= OPEN_CTREE_WRITES;
12825 if (check_argc_exact(argc - optind, 1))
12826 usage(cmd_check_usage);
12828 if (ctx.progress_enabled) {
12829 ctx.tp = TASK_NOTHING;
12830 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12833 /* This check is the only reason for --readonly to exist */
12834 if (readonly && repair) {
12835 error("repair options are not compatible with --readonly");
12840 * Not supported yet
12842 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12843 error("low memory mode doesn't support repair yet");
12848 cache_tree_init(&root_cache);
12850 if((ret = check_mounted(argv[optind])) < 0) {
12851 error("could not check mount status: %s", strerror(-ret));
12855 error("%s is currently mounted, aborting", argv[optind]);
12861 /* only allow partial opening under repair mode */
12863 ctree_flags |= OPEN_CTREE_PARTIAL;
12865 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12866 chunk_root_bytenr, ctree_flags);
12868 error("cannot open file system");
12874 global_info = info;
12875 root = info->fs_root;
12876 if (clear_space_cache == 1) {
12877 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12879 "free space cache v2 detected, use --clear-space-cache v2");
12883 printf("Clearing free space cache\n");
12884 ret = clear_free_space_cache(info);
12886 error("failed to clear free space cache");
12889 printf("Free space cache cleared\n");
12892 } else if (clear_space_cache == 2) {
12893 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12894 printf("no free space cache v2 to clear\n");
12898 printf("Clear free space cache v2\n");
12899 ret = btrfs_clear_free_space_tree(info);
12901 error("failed to clear free space cache v2: %d", ret);
12904 printf("free space cache v2 cleared\n");
12910 * repair mode will force us to commit transaction which
12911 * will make us fail to load log tree when mounting.
12913 if (repair && btrfs_super_log_root(info->super_copy)) {
12914 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12920 ret = zero_log_tree(root);
12923 error("failed to zero log tree: %d", ret);
12928 uuid_unparse(info->super_copy->fsid, uuidbuf);
12929 if (qgroup_report) {
12930 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12932 ret = qgroup_verify_all(info);
12939 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12940 subvolid, argv[optind], uuidbuf);
12941 ret = print_extent_state(info, subvolid);
12945 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12947 if (!extent_buffer_uptodate(info->tree_root->node) ||
12948 !extent_buffer_uptodate(info->dev_root->node) ||
12949 !extent_buffer_uptodate(info->chunk_root->node)) {
12950 error("critical roots corrupted, unable to check the filesystem");
12956 if (init_extent_tree || init_csum_tree) {
12957 struct btrfs_trans_handle *trans;
12959 trans = btrfs_start_transaction(info->extent_root, 0);
12960 if (IS_ERR(trans)) {
12961 error("error starting transaction");
12962 ret = PTR_ERR(trans);
12967 if (init_extent_tree) {
12968 printf("Creating a new extent tree\n");
12969 ret = reinit_extent_tree(trans, info);
12975 if (init_csum_tree) {
12976 printf("Reinitialize checksum tree\n");
12977 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12979 error("checksum tree initialization failed: %d",
12986 ret = fill_csum_tree(trans, info->csum_root,
12990 error("checksum tree refilling failed: %d", ret);
12995 * Ok now we commit and run the normal fsck, which will add
12996 * extent entries for all of the items it finds.
12998 ret = btrfs_commit_transaction(trans, info->extent_root);
13003 if (!extent_buffer_uptodate(info->extent_root->node)) {
13004 error("critical: extent_root, unable to check the filesystem");
13009 if (!extent_buffer_uptodate(info->csum_root->node)) {
13010 error("critical: csum_root, unable to check the filesystem");
13016 if (!ctx.progress_enabled)
13017 fprintf(stderr, "checking extents\n");
13018 if (check_mode == CHECK_MODE_LOWMEM)
13019 ret = check_chunks_and_extents_v2(root);
13021 ret = check_chunks_and_extents(root);
13025 "errors found in extent allocation tree or chunk allocation");
13027 ret = repair_root_items(info);
13030 error("failed to repair root items: %s", strerror(-ret));
13034 fprintf(stderr, "Fixed %d roots.\n", ret);
13036 } else if (ret > 0) {
13038 "Found %d roots with an outdated root item.\n",
13041 "Please run a filesystem check with the option --repair to fix them.\n");
13047 if (!ctx.progress_enabled) {
13048 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13049 fprintf(stderr, "checking free space tree\n");
13051 fprintf(stderr, "checking free space cache\n");
13053 ret = check_space_cache(root);
13056 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13057 error("errors found in free space tree");
13059 error("errors found in free space cache");
13064 * We used to have to have these hole extents in between our real
13065 * extents so if we don't have this flag set we need to make sure there
13066 * are no gaps in the file extents for inodes, otherwise we can just
13067 * ignore it when this happens.
13069 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13070 if (!ctx.progress_enabled)
13071 fprintf(stderr, "checking fs roots\n");
13072 if (check_mode == CHECK_MODE_LOWMEM)
13073 ret = check_fs_roots_v2(root->fs_info);
13075 ret = check_fs_roots(root, &root_cache);
13078 error("errors found in fs roots");
13082 fprintf(stderr, "checking csums\n");
13083 ret = check_csums(root);
13086 error("errors found in csum tree");
13090 fprintf(stderr, "checking root refs\n");
13091 /* For low memory mode, check_fs_roots_v2 handles root refs */
13092 if (check_mode != CHECK_MODE_LOWMEM) {
13093 ret = check_root_refs(root, &root_cache);
13096 error("errors found in root refs");
13101 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13102 struct extent_buffer *eb;
13104 eb = list_first_entry(&root->fs_info->recow_ebs,
13105 struct extent_buffer, recow);
13106 list_del_init(&eb->recow);
13107 ret = recow_extent_buffer(root, eb);
13110 error("fails to fix transid errors");
13115 while (!list_empty(&delete_items)) {
13116 struct bad_item *bad;
13118 bad = list_first_entry(&delete_items, struct bad_item, list);
13119 list_del_init(&bad->list);
13121 ret = delete_bad_item(root, bad);
13127 if (info->quota_enabled) {
13128 fprintf(stderr, "checking quota groups\n");
13129 ret = qgroup_verify_all(info);
13132 error("failed to check quota groups");
13136 ret = repair_qgroups(info, &qgroups_repaired);
13139 error("failed to repair quota groups");
13145 if (!list_empty(&root->fs_info->recow_ebs)) {
13146 error("transid errors in file system");
13151 if (found_old_backref) { /*
13152 * there was a disk format change when mixed
13153 * backref was in testing tree. The old format
13154 * existed about one week.
13156 printf("\n * Found old mixed backref format. "
13157 "The old format is not supported! *"
13158 "\n * Please mount the FS in readonly mode, "
13159 "backup data and re-format the FS. *\n\n");
13162 printf("found %llu bytes used, ",
13163 (unsigned long long)bytes_used);
13165 printf("error(s) found\n");
13167 printf("no error found\n");
13168 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13169 printf("total tree bytes: %llu\n",
13170 (unsigned long long)total_btree_bytes);
13171 printf("total fs tree bytes: %llu\n",
13172 (unsigned long long)total_fs_tree_bytes);
13173 printf("total extent tree bytes: %llu\n",
13174 (unsigned long long)total_extent_tree_bytes);
13175 printf("btree space waste bytes: %llu\n",
13176 (unsigned long long)btree_space_waste);
13177 printf("file data blocks allocated: %llu\n referenced %llu\n",
13178 (unsigned long long)data_bytes_allocated,
13179 (unsigned long long)data_bytes_referenced);
13181 free_qgroup_counts();
13182 free_root_recs_tree(&root_cache);
13186 if (ctx.progress_enabled)
13187 task_deinit(ctx.info);