2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
229 struct btrfs_key drop_key;
232 #define REF_ERR_NO_DIR_ITEM (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX (1 << 1)
234 #define REF_ERR_NO_INODE_REF (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
237 #define REF_ERR_DUP_INODE_REF (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
246 struct file_extent_hole {
252 struct inode_record {
253 struct list_head backrefs;
254 unsigned int checked:1;
255 unsigned int merging:1;
256 unsigned int found_inode_item:1;
257 unsigned int found_dir_item:1;
258 unsigned int found_file_extent:1;
259 unsigned int found_csum_item:1;
260 unsigned int some_csum_missing:1;
261 unsigned int nodatasum:1;
274 struct rb_root holes;
275 struct list_head orphan_extents;
280 #define I_ERR_NO_INODE_ITEM (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
296 struct root_backref {
297 struct list_head list;
298 unsigned int found_dir_item:1;
299 unsigned int found_dir_index:1;
300 unsigned int found_back_ref:1;
301 unsigned int found_forward_ref:1;
302 unsigned int reachable:1;
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 return list_entry(entry, struct root_backref, list);
317 struct list_head backrefs;
318 struct cache_extent cache;
319 unsigned int found_root_item:1;
325 struct cache_extent cache;
330 struct cache_extent cache;
331 struct cache_tree root_cache;
332 struct cache_tree inode_cache;
333 struct inode_record *current;
342 struct walk_control {
343 struct cache_tree shared;
344 struct shared_node *nodes[BTRFS_MAX_LEVEL];
350 struct btrfs_key key;
352 struct list_head list;
355 struct extent_entry {
360 struct list_head list;
363 struct root_item_info {
364 /* level of the root */
366 /* number of nodes at this level, must be 1 for a root */
370 struct cache_extent cache_extent;
374 * Error bit for low memory mode check.
376 * Currently no caller cares about it yet. Just internal use for error
379 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH (1 << 8)
390 static void *print_status_check(void *p)
392 struct task_ctx *priv = p;
393 const char work_indicator[] = { '.', 'o', 'O', 'o' };
395 static char *task_position_string[] = {
397 "checking free space cache",
401 task_period_start(priv->info, 1000 /* 1s */);
403 if (priv->tp == TASK_NOTHING)
407 printf("%s [%c]\r", task_position_string[priv->tp],
408 work_indicator[count % 4]);
411 task_period_wait(priv->info);
416 static int print_status_return(void *p)
424 static enum btrfs_check_mode parse_check_mode(const char *str)
426 if (strcmp(str, "lowmem") == 0)
427 return CHECK_MODE_LOWMEM;
428 if (strcmp(str, "orig") == 0)
429 return CHECK_MODE_ORIGINAL;
430 if (strcmp(str, "original") == 0)
431 return CHECK_MODE_ORIGINAL;
433 return CHECK_MODE_UNKNOWN;
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
439 struct file_extent_hole *hole;
441 if (RB_EMPTY_ROOT(holes))
444 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 struct file_extent_hole *hole1;
451 struct file_extent_hole *hole2;
453 hole1 = rb_entry(node1, struct file_extent_hole, node);
454 hole2 = rb_entry(node2, struct file_extent_hole, node);
456 if (hole1->start > hole2->start)
458 if (hole1->start < hole2->start)
460 /* Now hole1->start == hole2->start */
461 if (hole1->len >= hole2->len)
463 * Hole 1 will be merge center
464 * Same hole will be merged later
467 /* Hole 2 will be merge center */
472 * Add a hole to the record
474 * This will do hole merge for copy_file_extent_holes(),
475 * which will ensure there won't be continuous holes.
477 static int add_file_extent_hole(struct rb_root *holes,
480 struct file_extent_hole *hole;
481 struct file_extent_hole *prev = NULL;
482 struct file_extent_hole *next = NULL;
484 hole = malloc(sizeof(*hole));
489 /* Since compare will not return 0, no -EEXIST will happen */
490 rb_insert(holes, &hole->node, compare_hole);
492 /* simple merge with previous hole */
493 if (rb_prev(&hole->node))
494 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496 if (prev && prev->start + prev->len >= hole->start) {
497 hole->len = hole->start + hole->len - prev->start;
498 hole->start = prev->start;
499 rb_erase(&prev->node, holes);
504 /* iterate merge with next holes */
506 if (!rb_next(&hole->node))
508 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510 if (hole->start + hole->len >= next->start) {
511 if (hole->start + hole->len <= next->start + next->len)
512 hole->len = next->start + next->len -
514 rb_erase(&next->node, holes);
523 static int compare_hole_range(struct rb_node *node, void *data)
525 struct file_extent_hole *hole;
528 hole = (struct file_extent_hole *)data;
531 hole = rb_entry(node, struct file_extent_hole, node);
532 if (start < hole->start)
534 if (start >= hole->start && start < hole->start + hole->len)
540 * Delete a hole in the record
542 * This will do the hole split and is much restrict than add.
544 static int del_file_extent_hole(struct rb_root *holes,
547 struct file_extent_hole *hole;
548 struct file_extent_hole tmp;
553 struct rb_node *node;
560 node = rb_search(holes, &tmp, compare_hole_range, NULL);
563 hole = rb_entry(node, struct file_extent_hole, node);
564 if (start + len > hole->start + hole->len)
568 * Now there will be no overlap, delete the hole and re-add the
569 * split(s) if they exists.
571 if (start > hole->start) {
572 prev_start = hole->start;
573 prev_len = start - hole->start;
576 if (hole->start + hole->len > start + len) {
577 next_start = start + len;
578 next_len = hole->start + hole->len - start - len;
581 rb_erase(node, holes);
584 ret = add_file_extent_hole(holes, prev_start, prev_len);
589 ret = add_file_extent_hole(holes, next_start, next_len);
596 static int copy_file_extent_holes(struct rb_root *dst,
599 struct file_extent_hole *hole;
600 struct rb_node *node;
603 node = rb_first(src);
605 hole = rb_entry(node, struct file_extent_hole, node);
606 ret = add_file_extent_hole(dst, hole->start, hole->len);
609 node = rb_next(node);
614 static void free_file_extent_holes(struct rb_root *holes)
616 struct rb_node *node;
617 struct file_extent_hole *hole;
619 node = rb_first(holes);
621 hole = rb_entry(node, struct file_extent_hole, node);
622 rb_erase(node, holes);
624 node = rb_first(holes);
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631 struct btrfs_root *root)
633 if (root->last_trans != trans->transid) {
634 root->track_dirty = 1;
635 root->last_trans = trans->transid;
636 root->commit_root = root->node;
637 extent_buffer_get(root->node);
641 static u8 imode_to_type(u32 imode)
644 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
646 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
647 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
648 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
649 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
650 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
651 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
654 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 struct device_record *rec1;
661 struct device_record *rec2;
663 rec1 = rb_entry(node1, struct device_record, node);
664 rec2 = rb_entry(node2, struct device_record, node);
665 if (rec1->devid > rec2->devid)
667 else if (rec1->devid < rec2->devid)
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 struct inode_record *rec;
676 struct inode_backref *backref;
677 struct inode_backref *orig;
678 struct inode_backref *tmp;
679 struct orphan_data_extent *src_orphan;
680 struct orphan_data_extent *dst_orphan;
685 rec = malloc(sizeof(*rec));
687 return ERR_PTR(-ENOMEM);
688 memcpy(rec, orig_rec, sizeof(*rec));
690 INIT_LIST_HEAD(&rec->backrefs);
691 INIT_LIST_HEAD(&rec->orphan_extents);
692 rec->holes = RB_ROOT;
694 list_for_each_entry(orig, &orig_rec->backrefs, list) {
695 size = sizeof(*orig) + orig->namelen + 1;
696 backref = malloc(size);
701 memcpy(backref, orig, size);
702 list_add_tail(&backref->list, &rec->backrefs);
704 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705 dst_orphan = malloc(sizeof(*dst_orphan));
710 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
720 rb = rb_first(&rec->holes);
722 struct file_extent_hole *hole;
724 hole = rb_entry(rb, struct file_extent_hole, node);
730 if (!list_empty(&rec->backrefs))
731 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732 list_del(&orig->list);
736 if (!list_empty(&rec->orphan_extents))
737 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738 list_del(&orig->list);
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
750 struct orphan_data_extent *orphan;
752 if (list_empty(orphan_extents))
754 printf("The following data extent is lost in tree %llu:\n",
756 list_for_each_entry(orphan, orphan_extents, list) {
757 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758 orphan->objectid, orphan->offset, orphan->disk_bytenr,
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 u64 root_objectid = root->root_key.objectid;
766 int errors = rec->errors;
770 /* reloc root errors, we print its corresponding fs root objectid*/
771 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772 root_objectid = root->root_key.offset;
773 fprintf(stderr, "reloc");
775 fprintf(stderr, "root %llu inode %llu errors %x",
776 (unsigned long long) root_objectid,
777 (unsigned long long) rec->ino, rec->errors);
779 if (errors & I_ERR_NO_INODE_ITEM)
780 fprintf(stderr, ", no inode item");
781 if (errors & I_ERR_NO_ORPHAN_ITEM)
782 fprintf(stderr, ", no orphan item");
783 if (errors & I_ERR_DUP_INODE_ITEM)
784 fprintf(stderr, ", dup inode item");
785 if (errors & I_ERR_DUP_DIR_INDEX)
786 fprintf(stderr, ", dup dir index");
787 if (errors & I_ERR_ODD_DIR_ITEM)
788 fprintf(stderr, ", odd dir item");
789 if (errors & I_ERR_ODD_FILE_EXTENT)
790 fprintf(stderr, ", odd file extent");
791 if (errors & I_ERR_BAD_FILE_EXTENT)
792 fprintf(stderr, ", bad file extent");
793 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794 fprintf(stderr, ", file extent overlap");
795 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796 fprintf(stderr, ", file extent discount");
797 if (errors & I_ERR_DIR_ISIZE_WRONG)
798 fprintf(stderr, ", dir isize wrong");
799 if (errors & I_ERR_FILE_NBYTES_WRONG)
800 fprintf(stderr, ", nbytes wrong");
801 if (errors & I_ERR_ODD_CSUM_ITEM)
802 fprintf(stderr, ", odd csum item");
803 if (errors & I_ERR_SOME_CSUM_MISSING)
804 fprintf(stderr, ", some csum missing");
805 if (errors & I_ERR_LINK_COUNT_WRONG)
806 fprintf(stderr, ", link count wrong");
807 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808 fprintf(stderr, ", orphan file extent");
809 fprintf(stderr, "\n");
810 /* Print the orphan extents if needed */
811 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814 /* Print the holes if needed */
815 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816 struct file_extent_hole *hole;
817 struct rb_node *node;
820 node = rb_first(&rec->holes);
821 fprintf(stderr, "Found file extent holes:\n");
824 hole = rb_entry(node, struct file_extent_hole, node);
825 fprintf(stderr, "\tstart: %llu, len: %llu\n",
826 hole->start, hole->len);
827 node = rb_next(node);
830 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 root->fs_info->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (cur + sizeof(*di) + name_len > total ||
1516 name_len > BTRFS_NAME_LEN) {
1517 error = REF_ERR_NAME_TOO_LONG;
1519 if (cur + sizeof(*di) > total)
1521 len = min_t(u32, total - cur - sizeof(*di),
1528 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530 if (key->type == BTRFS_DIR_ITEM_KEY &&
1531 key->offset != btrfs_name_hash(namebuf, len)) {
1532 rec->errors |= I_ERR_ODD_DIR_ITEM;
1533 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1534 key->objectid, key->offset, namebuf, len, filetype,
1535 key->offset, btrfs_name_hash(namebuf, len));
1538 if (location.type == BTRFS_INODE_ITEM_KEY) {
1539 add_inode_backref(inode_cache, location.objectid,
1540 key->objectid, key->offset, namebuf,
1541 len, filetype, key->type, error);
1542 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1543 add_inode_backref(root_cache, location.objectid,
1544 key->objectid, key->offset,
1545 namebuf, len, filetype,
1548 fprintf(stderr, "invalid location in dir item %u\n",
1550 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1551 key->objectid, key->offset, namebuf,
1552 len, filetype, key->type, error);
1555 len = sizeof(*di) + name_len + data_len;
1556 di = (struct btrfs_dir_item *)((char *)di + len);
1559 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1560 rec->errors |= I_ERR_DUP_DIR_INDEX;
1565 static int process_inode_ref(struct extent_buffer *eb,
1566 int slot, struct btrfs_key *key,
1567 struct shared_node *active_node)
1575 struct cache_tree *inode_cache;
1576 struct btrfs_inode_ref *ref;
1577 char namebuf[BTRFS_NAME_LEN];
1579 inode_cache = &active_node->inode_cache;
1581 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1582 total = btrfs_item_size_nr(eb, slot);
1583 while (cur < total) {
1584 name_len = btrfs_inode_ref_name_len(eb, ref);
1585 index = btrfs_inode_ref_index(eb, ref);
1587 /* inode_ref + namelen should not cross item boundary */
1588 if (cur + sizeof(*ref) + name_len > total ||
1589 name_len > BTRFS_NAME_LEN) {
1590 if (total < cur + sizeof(*ref))
1593 /* Still try to read out the remaining part */
1594 len = min_t(u32, total - cur - sizeof(*ref),
1596 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, key->offset,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*ref) + name_len;
1607 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1613 static int process_inode_extref(struct extent_buffer *eb,
1614 int slot, struct btrfs_key *key,
1615 struct shared_node *active_node)
1624 struct cache_tree *inode_cache;
1625 struct btrfs_inode_extref *extref;
1626 char namebuf[BTRFS_NAME_LEN];
1628 inode_cache = &active_node->inode_cache;
1630 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1631 total = btrfs_item_size_nr(eb, slot);
1632 while (cur < total) {
1633 name_len = btrfs_inode_extref_name_len(eb, extref);
1634 index = btrfs_inode_extref_index(eb, extref);
1635 parent = btrfs_inode_extref_parent(eb, extref);
1636 if (name_len <= BTRFS_NAME_LEN) {
1640 len = BTRFS_NAME_LEN;
1641 error = REF_ERR_NAME_TOO_LONG;
1643 read_extent_buffer(eb, namebuf,
1644 (unsigned long)(extref + 1), len);
1645 add_inode_backref(inode_cache, key->objectid, parent,
1646 index, namebuf, len, 0, key->type, error);
1648 len = sizeof(*extref) + name_len;
1649 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1656 static int count_csum_range(struct btrfs_root *root, u64 start,
1657 u64 len, u64 *found)
1659 struct btrfs_key key;
1660 struct btrfs_path path;
1661 struct extent_buffer *leaf;
1666 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1668 btrfs_init_path(&path);
1670 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1672 key.type = BTRFS_EXTENT_CSUM_KEY;
1674 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1678 if (ret > 0 && path.slots[0] > 0) {
1679 leaf = path.nodes[0];
1680 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1681 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1682 key.type == BTRFS_EXTENT_CSUM_KEY)
1687 leaf = path.nodes[0];
1688 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1689 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1694 leaf = path.nodes[0];
1697 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1698 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1699 key.type != BTRFS_EXTENT_CSUM_KEY)
1702 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1703 if (key.offset >= start + len)
1706 if (key.offset > start)
1709 size = btrfs_item_size_nr(leaf, path.slots[0]);
1710 csum_end = key.offset + (size / csum_size) *
1711 root->fs_info->sectorsize;
1712 if (csum_end > start) {
1713 size = min(csum_end - start, len);
1722 btrfs_release_path(&path);
1728 static int process_file_extent(struct btrfs_root *root,
1729 struct extent_buffer *eb,
1730 int slot, struct btrfs_key *key,
1731 struct shared_node *active_node)
1733 struct inode_record *rec;
1734 struct btrfs_file_extent_item *fi;
1736 u64 disk_bytenr = 0;
1737 u64 extent_offset = 0;
1738 u64 mask = root->fs_info->sectorsize - 1;
1742 rec = active_node->current;
1743 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1744 rec->found_file_extent = 1;
1746 if (rec->extent_start == (u64)-1) {
1747 rec->extent_start = key->offset;
1748 rec->extent_end = key->offset;
1751 if (rec->extent_end > key->offset)
1752 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1753 else if (rec->extent_end < key->offset) {
1754 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1755 key->offset - rec->extent_end);
1760 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1761 extent_type = btrfs_file_extent_type(eb, fi);
1763 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1764 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1766 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767 rec->found_size += num_bytes;
1768 num_bytes = (num_bytes + mask) & ~mask;
1769 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1770 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1771 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1772 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1773 extent_offset = btrfs_file_extent_offset(eb, fi);
1774 if (num_bytes == 0 || (num_bytes & mask))
1775 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1776 if (num_bytes + extent_offset >
1777 btrfs_file_extent_ram_bytes(eb, fi))
1778 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1779 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1780 (btrfs_file_extent_compression(eb, fi) ||
1781 btrfs_file_extent_encryption(eb, fi) ||
1782 btrfs_file_extent_other_encoding(eb, fi)))
1783 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784 if (disk_bytenr > 0)
1785 rec->found_size += num_bytes;
1787 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1789 rec->extent_end = key->offset + num_bytes;
1792 * The data reloc tree will copy full extents into its inode and then
1793 * copy the corresponding csums. Because the extent it copied could be
1794 * a preallocated extent that hasn't been written to yet there may be no
1795 * csums to copy, ergo we won't have csums for our file extent. This is
1796 * ok so just don't bother checking csums if the inode belongs to the
1799 if (disk_bytenr > 0 &&
1800 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1802 if (btrfs_file_extent_compression(eb, fi))
1803 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1805 disk_bytenr += extent_offset;
1807 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1810 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1812 rec->found_csum_item = 1;
1813 if (found < num_bytes)
1814 rec->some_csum_missing = 1;
1815 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1817 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1823 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1824 struct walk_control *wc)
1826 struct btrfs_key key;
1830 struct cache_tree *inode_cache;
1831 struct shared_node *active_node;
1833 if (wc->root_level == wc->active_node &&
1834 btrfs_root_refs(&root->root_item) == 0)
1837 active_node = wc->nodes[wc->active_node];
1838 inode_cache = &active_node->inode_cache;
1839 nritems = btrfs_header_nritems(eb);
1840 for (i = 0; i < nritems; i++) {
1841 btrfs_item_key_to_cpu(eb, &key, i);
1843 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1845 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1848 if (active_node->current == NULL ||
1849 active_node->current->ino < key.objectid) {
1850 if (active_node->current) {
1851 active_node->current->checked = 1;
1852 maybe_free_inode_rec(inode_cache,
1853 active_node->current);
1855 active_node->current = get_inode_rec(inode_cache,
1857 BUG_ON(IS_ERR(active_node->current));
1860 case BTRFS_DIR_ITEM_KEY:
1861 case BTRFS_DIR_INDEX_KEY:
1862 ret = process_dir_item(eb, i, &key, active_node);
1864 case BTRFS_INODE_REF_KEY:
1865 ret = process_inode_ref(eb, i, &key, active_node);
1867 case BTRFS_INODE_EXTREF_KEY:
1868 ret = process_inode_extref(eb, i, &key, active_node);
1870 case BTRFS_INODE_ITEM_KEY:
1871 ret = process_inode_item(eb, i, &key, active_node);
1873 case BTRFS_EXTENT_DATA_KEY:
1874 ret = process_file_extent(root, eb, i, &key,
1885 u64 bytenr[BTRFS_MAX_LEVEL];
1886 u64 refs[BTRFS_MAX_LEVEL];
1887 int need_check[BTRFS_MAX_LEVEL];
1890 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1891 struct node_refs *nrefs, u64 level);
1892 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1893 unsigned int ext_ref);
1896 * Returns >0 Found error, not fatal, should continue
1897 * Returns <0 Fatal error, must exit the whole check
1898 * Returns 0 No errors found
1900 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1901 struct node_refs *nrefs, int *level, int ext_ref)
1903 struct extent_buffer *cur = path->nodes[0];
1904 struct btrfs_key key;
1908 int root_level = btrfs_header_level(root->node);
1910 int ret = 0; /* Final return value */
1911 int err = 0; /* Positive error bitmap */
1913 cur_bytenr = cur->start;
1915 /* skip to first inode item or the first inode number change */
1916 nritems = btrfs_header_nritems(cur);
1917 for (i = 0; i < nritems; i++) {
1918 btrfs_item_key_to_cpu(cur, &key, i);
1920 first_ino = key.objectid;
1921 if (key.type == BTRFS_INODE_ITEM_KEY ||
1922 (first_ino && first_ino != key.objectid))
1926 path->slots[0] = nritems;
1932 err |= check_inode_item(root, path, ext_ref);
1934 if (err & LAST_ITEM)
1937 /* still have inode items in thie leaf */
1938 if (cur->start == cur_bytenr)
1942 * we have switched to another leaf, above nodes may
1943 * have changed, here walk down the path, if a node
1944 * or leaf is shared, check whether we can skip this
1947 for (i = root_level; i >= 0; i--) {
1948 if (path->nodes[i]->start == nrefs->bytenr[i])
1951 ret = update_nodes_refs(root,
1952 path->nodes[i]->start,
1957 if (!nrefs->need_check[i]) {
1963 for (i = 0; i < *level; i++) {
1964 free_extent_buffer(path->nodes[i]);
1965 path->nodes[i] = NULL;
1974 static void reada_walk_down(struct btrfs_root *root,
1975 struct extent_buffer *node, int slot)
1977 struct btrfs_fs_info *fs_info = root->fs_info;
1984 level = btrfs_header_level(node);
1988 nritems = btrfs_header_nritems(node);
1989 for (i = slot; i < nritems; i++) {
1990 bytenr = btrfs_node_blockptr(node, i);
1991 ptr_gen = btrfs_node_ptr_generation(node, i);
1992 readahead_tree_block(fs_info, bytenr, fs_info->nodesize,
1998 * Check the child node/leaf by the following condition:
1999 * 1. the first item key of the node/leaf should be the same with the one
2001 * 2. block in parent node should match the child node/leaf.
2002 * 3. generation of parent node and child's header should be consistent.
2004 * Or the child node/leaf pointed by the key in parent is not valid.
2006 * We hope to check leaf owner too, but since subvol may share leaves,
2007 * which makes leaf owner check not so strong, key check should be
2008 * sufficient enough for that case.
2010 static int check_child_node(struct extent_buffer *parent, int slot,
2011 struct extent_buffer *child)
2013 struct btrfs_key parent_key;
2014 struct btrfs_key child_key;
2017 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2018 if (btrfs_header_level(child) == 0)
2019 btrfs_item_key_to_cpu(child, &child_key, 0);
2021 btrfs_node_key_to_cpu(child, &child_key, 0);
2023 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2026 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2027 parent_key.objectid, parent_key.type, parent_key.offset,
2028 child_key.objectid, child_key.type, child_key.offset);
2030 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2032 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2033 btrfs_node_blockptr(parent, slot),
2034 btrfs_header_bytenr(child));
2036 if (btrfs_node_ptr_generation(parent, slot) !=
2037 btrfs_header_generation(child)) {
2039 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2040 btrfs_header_generation(child),
2041 btrfs_node_ptr_generation(parent, slot));
2047 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2048 * in every fs or file tree check. Here we find its all root ids, and only check
2049 * it in the fs or file tree which has the smallest root id.
2051 static int need_check(struct btrfs_root *root, struct ulist *roots)
2053 struct rb_node *node;
2054 struct ulist_node *u;
2056 if (roots->nnodes == 1)
2059 node = rb_first(&roots->root);
2060 u = rb_entry(node, struct ulist_node, rb_node);
2062 * current root id is not smallest, we skip it and let it be checked
2063 * in the fs or file tree who hash the smallest root id.
2065 if (root->objectid != u->val)
2072 * for a tree node or leaf, we record its reference count, so later if we still
2073 * process this node or leaf, don't need to compute its reference count again.
2075 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2076 struct node_refs *nrefs, u64 level)
2080 struct ulist *roots;
2082 if (nrefs->bytenr[level] != bytenr) {
2083 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2084 level, 1, &refs, NULL);
2088 nrefs->bytenr[level] = bytenr;
2089 nrefs->refs[level] = refs;
2091 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2096 check = need_check(root, roots);
2098 nrefs->need_check[level] = check;
2100 nrefs->need_check[level] = 1;
2107 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2108 struct walk_control *wc, int *level,
2109 struct node_refs *nrefs)
2111 enum btrfs_tree_block_status status;
2114 struct btrfs_fs_info *fs_info = root->fs_info;
2115 struct extent_buffer *next;
2116 struct extent_buffer *cur;
2120 WARN_ON(*level < 0);
2121 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2123 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2124 refs = nrefs->refs[*level];
2127 ret = btrfs_lookup_extent_info(NULL, root,
2128 path->nodes[*level]->start,
2129 *level, 1, &refs, NULL);
2134 nrefs->bytenr[*level] = path->nodes[*level]->start;
2135 nrefs->refs[*level] = refs;
2139 ret = enter_shared_node(root, path->nodes[*level]->start,
2147 while (*level >= 0) {
2148 WARN_ON(*level < 0);
2149 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2150 cur = path->nodes[*level];
2152 if (btrfs_header_level(cur) != *level)
2155 if (path->slots[*level] >= btrfs_header_nritems(cur))
2158 ret = process_one_leaf(root, cur, wc);
2163 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2164 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2166 if (bytenr == nrefs->bytenr[*level - 1]) {
2167 refs = nrefs->refs[*level - 1];
2169 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2170 *level - 1, 1, &refs, NULL);
2174 nrefs->bytenr[*level - 1] = bytenr;
2175 nrefs->refs[*level - 1] = refs;
2180 ret = enter_shared_node(root, bytenr, refs,
2183 path->slots[*level]++;
2188 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2189 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2190 free_extent_buffer(next);
2191 reada_walk_down(root, cur, path->slots[*level]);
2192 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2193 if (!extent_buffer_uptodate(next)) {
2194 struct btrfs_key node_key;
2196 btrfs_node_key_to_cpu(path->nodes[*level],
2198 path->slots[*level]);
2199 btrfs_add_corrupt_extent_record(root->fs_info,
2201 path->nodes[*level]->start,
2202 root->fs_info->nodesize,
2209 ret = check_child_node(cur, path->slots[*level], next);
2211 free_extent_buffer(next);
2216 if (btrfs_is_leaf(next))
2217 status = btrfs_check_leaf(root, NULL, next);
2219 status = btrfs_check_node(root, NULL, next);
2220 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2221 free_extent_buffer(next);
2226 *level = *level - 1;
2227 free_extent_buffer(path->nodes[*level]);
2228 path->nodes[*level] = next;
2229 path->slots[*level] = 0;
2232 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2236 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2237 unsigned int ext_ref);
2240 * Returns >0 Found error, should continue
2241 * Returns <0 Fatal error, must exit the whole check
2242 * Returns 0 No errors found
2244 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2245 int *level, struct node_refs *nrefs, int ext_ref)
2247 enum btrfs_tree_block_status status;
2250 struct btrfs_fs_info *fs_info = root->fs_info;
2251 struct extent_buffer *next;
2252 struct extent_buffer *cur;
2255 WARN_ON(*level < 0);
2256 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2258 ret = update_nodes_refs(root, path->nodes[*level]->start,
2263 while (*level >= 0) {
2264 WARN_ON(*level < 0);
2265 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2266 cur = path->nodes[*level];
2268 if (btrfs_header_level(cur) != *level)
2271 if (path->slots[*level] >= btrfs_header_nritems(cur))
2273 /* Don't forgot to check leaf/node validation */
2275 ret = btrfs_check_leaf(root, NULL, cur);
2276 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2280 ret = process_one_leaf_v2(root, path, nrefs,
2284 ret = btrfs_check_node(root, NULL, cur);
2285 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2290 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2291 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2293 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2296 if (!nrefs->need_check[*level - 1]) {
2297 path->slots[*level]++;
2301 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2302 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2303 free_extent_buffer(next);
2304 reada_walk_down(root, cur, path->slots[*level]);
2305 next = read_tree_block(fs_info, bytenr, ptr_gen);
2306 if (!extent_buffer_uptodate(next)) {
2307 struct btrfs_key node_key;
2309 btrfs_node_key_to_cpu(path->nodes[*level],
2311 path->slots[*level]);
2312 btrfs_add_corrupt_extent_record(fs_info,
2314 path->nodes[*level]->start,
2322 ret = check_child_node(cur, path->slots[*level], next);
2326 if (btrfs_is_leaf(next))
2327 status = btrfs_check_leaf(root, NULL, next);
2329 status = btrfs_check_node(root, NULL, next);
2330 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2331 free_extent_buffer(next);
2336 *level = *level - 1;
2337 free_extent_buffer(path->nodes[*level]);
2338 path->nodes[*level] = next;
2339 path->slots[*level] = 0;
2344 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2345 struct walk_control *wc, int *level)
2348 struct extent_buffer *leaf;
2350 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2351 leaf = path->nodes[i];
2352 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2357 free_extent_buffer(path->nodes[*level]);
2358 path->nodes[*level] = NULL;
2359 BUG_ON(*level > wc->active_node);
2360 if (*level == wc->active_node)
2361 leave_shared_node(root, wc, *level);
2368 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2372 struct extent_buffer *leaf;
2374 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2375 leaf = path->nodes[i];
2376 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2381 free_extent_buffer(path->nodes[*level]);
2382 path->nodes[*level] = NULL;
2389 static int check_root_dir(struct inode_record *rec)
2391 struct inode_backref *backref;
2394 if (!rec->found_inode_item || rec->errors)
2396 if (rec->nlink != 1 || rec->found_link != 0)
2398 if (list_empty(&rec->backrefs))
2400 backref = to_inode_backref(rec->backrefs.next);
2401 if (!backref->found_inode_ref)
2403 if (backref->index != 0 || backref->namelen != 2 ||
2404 memcmp(backref->name, "..", 2))
2406 if (backref->found_dir_index || backref->found_dir_item)
2413 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2414 struct btrfs_root *root, struct btrfs_path *path,
2415 struct inode_record *rec)
2417 struct btrfs_inode_item *ei;
2418 struct btrfs_key key;
2421 key.objectid = rec->ino;
2422 key.type = BTRFS_INODE_ITEM_KEY;
2423 key.offset = (u64)-1;
2425 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2429 if (!path->slots[0]) {
2436 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2437 if (key.objectid != rec->ino) {
2442 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2443 struct btrfs_inode_item);
2444 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2445 btrfs_mark_buffer_dirty(path->nodes[0]);
2446 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2447 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2448 root->root_key.objectid);
2450 btrfs_release_path(path);
2454 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2455 struct btrfs_root *root,
2456 struct btrfs_path *path,
2457 struct inode_record *rec)
2461 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2462 btrfs_release_path(path);
2464 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2468 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2469 struct btrfs_root *root,
2470 struct btrfs_path *path,
2471 struct inode_record *rec)
2473 struct btrfs_inode_item *ei;
2474 struct btrfs_key key;
2477 key.objectid = rec->ino;
2478 key.type = BTRFS_INODE_ITEM_KEY;
2481 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2488 /* Since ret == 0, no need to check anything */
2489 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2490 struct btrfs_inode_item);
2491 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2492 btrfs_mark_buffer_dirty(path->nodes[0]);
2493 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2494 printf("reset nbytes for ino %llu root %llu\n",
2495 rec->ino, root->root_key.objectid);
2497 btrfs_release_path(path);
2501 static int add_missing_dir_index(struct btrfs_root *root,
2502 struct cache_tree *inode_cache,
2503 struct inode_record *rec,
2504 struct inode_backref *backref)
2506 struct btrfs_path path;
2507 struct btrfs_trans_handle *trans;
2508 struct btrfs_dir_item *dir_item;
2509 struct extent_buffer *leaf;
2510 struct btrfs_key key;
2511 struct btrfs_disk_key disk_key;
2512 struct inode_record *dir_rec;
2513 unsigned long name_ptr;
2514 u32 data_size = sizeof(*dir_item) + backref->namelen;
2517 trans = btrfs_start_transaction(root, 1);
2519 return PTR_ERR(trans);
2521 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2522 (unsigned long long)rec->ino);
2524 btrfs_init_path(&path);
2525 key.objectid = backref->dir;
2526 key.type = BTRFS_DIR_INDEX_KEY;
2527 key.offset = backref->index;
2528 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2531 leaf = path.nodes[0];
2532 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2534 disk_key.objectid = cpu_to_le64(rec->ino);
2535 disk_key.type = BTRFS_INODE_ITEM_KEY;
2536 disk_key.offset = 0;
2538 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2539 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2540 btrfs_set_dir_data_len(leaf, dir_item, 0);
2541 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2542 name_ptr = (unsigned long)(dir_item + 1);
2543 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2544 btrfs_mark_buffer_dirty(leaf);
2545 btrfs_release_path(&path);
2546 btrfs_commit_transaction(trans, root);
2548 backref->found_dir_index = 1;
2549 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2550 BUG_ON(IS_ERR(dir_rec));
2553 dir_rec->found_size += backref->namelen;
2554 if (dir_rec->found_size == dir_rec->isize &&
2555 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2556 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2557 if (dir_rec->found_size != dir_rec->isize)
2558 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2563 static int delete_dir_index(struct btrfs_root *root,
2564 struct inode_backref *backref)
2566 struct btrfs_trans_handle *trans;
2567 struct btrfs_dir_item *di;
2568 struct btrfs_path path;
2571 trans = btrfs_start_transaction(root, 1);
2573 return PTR_ERR(trans);
2575 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2576 (unsigned long long)backref->dir,
2577 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2578 (unsigned long long)root->objectid);
2580 btrfs_init_path(&path);
2581 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2582 backref->name, backref->namelen,
2583 backref->index, -1);
2586 btrfs_release_path(&path);
2587 btrfs_commit_transaction(trans, root);
2594 ret = btrfs_del_item(trans, root, &path);
2596 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2598 btrfs_release_path(&path);
2599 btrfs_commit_transaction(trans, root);
2603 static int create_inode_item(struct btrfs_root *root,
2604 struct inode_record *rec,
2607 struct btrfs_trans_handle *trans;
2608 struct btrfs_inode_item inode_item;
2609 time_t now = time(NULL);
2612 trans = btrfs_start_transaction(root, 1);
2613 if (IS_ERR(trans)) {
2614 ret = PTR_ERR(trans);
2618 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2619 "be incomplete, please check permissions and content after "
2620 "the fsck completes.\n", (unsigned long long)root->objectid,
2621 (unsigned long long)rec->ino);
2623 memset(&inode_item, 0, sizeof(inode_item));
2624 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2626 btrfs_set_stack_inode_nlink(&inode_item, 1);
2628 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2629 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2630 if (rec->found_dir_item) {
2631 if (rec->found_file_extent)
2632 fprintf(stderr, "root %llu inode %llu has both a dir "
2633 "item and extents, unsure if it is a dir or a "
2634 "regular file so setting it as a directory\n",
2635 (unsigned long long)root->objectid,
2636 (unsigned long long)rec->ino);
2637 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2638 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2639 } else if (!rec->found_dir_item) {
2640 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2641 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2643 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2644 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2645 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2646 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2647 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2648 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2649 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2650 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2652 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2654 btrfs_commit_transaction(trans, root);
2658 static int repair_inode_backrefs(struct btrfs_root *root,
2659 struct inode_record *rec,
2660 struct cache_tree *inode_cache,
2663 struct inode_backref *tmp, *backref;
2664 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2668 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2669 if (!delete && rec->ino == root_dirid) {
2670 if (!rec->found_inode_item) {
2671 ret = create_inode_item(root, rec, 1);
2678 /* Index 0 for root dir's are special, don't mess with it */
2679 if (rec->ino == root_dirid && backref->index == 0)
2683 ((backref->found_dir_index && !backref->found_inode_ref) ||
2684 (backref->found_dir_index && backref->found_inode_ref &&
2685 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2686 ret = delete_dir_index(root, backref);
2690 list_del(&backref->list);
2695 if (!delete && !backref->found_dir_index &&
2696 backref->found_dir_item && backref->found_inode_ref) {
2697 ret = add_missing_dir_index(root, inode_cache, rec,
2702 if (backref->found_dir_item &&
2703 backref->found_dir_index) {
2704 if (!backref->errors &&
2705 backref->found_inode_ref) {
2706 list_del(&backref->list);
2713 if (!delete && (!backref->found_dir_index &&
2714 !backref->found_dir_item &&
2715 backref->found_inode_ref)) {
2716 struct btrfs_trans_handle *trans;
2717 struct btrfs_key location;
2719 ret = check_dir_conflict(root, backref->name,
2725 * let nlink fixing routine to handle it,
2726 * which can do it better.
2731 location.objectid = rec->ino;
2732 location.type = BTRFS_INODE_ITEM_KEY;
2733 location.offset = 0;
2735 trans = btrfs_start_transaction(root, 1);
2736 if (IS_ERR(trans)) {
2737 ret = PTR_ERR(trans);
2740 fprintf(stderr, "adding missing dir index/item pair "
2742 (unsigned long long)rec->ino);
2743 ret = btrfs_insert_dir_item(trans, root, backref->name,
2745 backref->dir, &location,
2746 imode_to_type(rec->imode),
2749 btrfs_commit_transaction(trans, root);
2753 if (!delete && (backref->found_inode_ref &&
2754 backref->found_dir_index &&
2755 backref->found_dir_item &&
2756 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2757 !rec->found_inode_item)) {
2758 ret = create_inode_item(root, rec, 0);
2765 return ret ? ret : repaired;
2769 * To determine the file type for nlink/inode_item repair
2771 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2772 * Return -ENOENT if file type is not found.
2774 static int find_file_type(struct inode_record *rec, u8 *type)
2776 struct inode_backref *backref;
2778 /* For inode item recovered case */
2779 if (rec->found_inode_item) {
2780 *type = imode_to_type(rec->imode);
2784 list_for_each_entry(backref, &rec->backrefs, list) {
2785 if (backref->found_dir_index || backref->found_dir_item) {
2786 *type = backref->filetype;
2794 * To determine the file name for nlink repair
2796 * Return 0 if file name is found, set name and namelen.
2797 * Return -ENOENT if file name is not found.
2799 static int find_file_name(struct inode_record *rec,
2800 char *name, int *namelen)
2802 struct inode_backref *backref;
2804 list_for_each_entry(backref, &rec->backrefs, list) {
2805 if (backref->found_dir_index || backref->found_dir_item ||
2806 backref->found_inode_ref) {
2807 memcpy(name, backref->name, backref->namelen);
2808 *namelen = backref->namelen;
2815 /* Reset the nlink of the inode to the correct one */
2816 static int reset_nlink(struct btrfs_trans_handle *trans,
2817 struct btrfs_root *root,
2818 struct btrfs_path *path,
2819 struct inode_record *rec)
2821 struct inode_backref *backref;
2822 struct inode_backref *tmp;
2823 struct btrfs_key key;
2824 struct btrfs_inode_item *inode_item;
2827 /* We don't believe this either, reset it and iterate backref */
2828 rec->found_link = 0;
2830 /* Remove all backref including the valid ones */
2831 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2832 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2833 backref->index, backref->name,
2834 backref->namelen, 0);
2838 /* remove invalid backref, so it won't be added back */
2839 if (!(backref->found_dir_index &&
2840 backref->found_dir_item &&
2841 backref->found_inode_ref)) {
2842 list_del(&backref->list);
2849 /* Set nlink to 0 */
2850 key.objectid = rec->ino;
2851 key.type = BTRFS_INODE_ITEM_KEY;
2853 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2860 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2861 struct btrfs_inode_item);
2862 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2863 btrfs_mark_buffer_dirty(path->nodes[0]);
2864 btrfs_release_path(path);
2867 * Add back valid inode_ref/dir_item/dir_index,
2868 * add_link() will handle the nlink inc, so new nlink must be correct
2870 list_for_each_entry(backref, &rec->backrefs, list) {
2871 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2872 backref->name, backref->namelen,
2873 backref->filetype, &backref->index, 1);
2878 btrfs_release_path(path);
2882 static int get_highest_inode(struct btrfs_trans_handle *trans,
2883 struct btrfs_root *root,
2884 struct btrfs_path *path,
2887 struct btrfs_key key, found_key;
2890 btrfs_init_path(path);
2891 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2893 key.type = BTRFS_INODE_ITEM_KEY;
2894 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2896 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2897 path->slots[0] - 1);
2898 *highest_ino = found_key.objectid;
2901 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2903 btrfs_release_path(path);
2907 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root,
2909 struct btrfs_path *path,
2910 struct inode_record *rec)
2912 char *dir_name = "lost+found";
2913 char namebuf[BTRFS_NAME_LEN] = {0};
2918 int name_recovered = 0;
2919 int type_recovered = 0;
2923 * Get file name and type first before these invalid inode ref
2924 * are deleted by remove_all_invalid_backref()
2926 name_recovered = !find_file_name(rec, namebuf, &namelen);
2927 type_recovered = !find_file_type(rec, &type);
2929 if (!name_recovered) {
2930 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2931 rec->ino, rec->ino);
2932 namelen = count_digits(rec->ino);
2933 sprintf(namebuf, "%llu", rec->ino);
2936 if (!type_recovered) {
2937 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2939 type = BTRFS_FT_REG_FILE;
2943 ret = reset_nlink(trans, root, path, rec);
2946 "Failed to reset nlink for inode %llu: %s\n",
2947 rec->ino, strerror(-ret));
2951 if (rec->found_link == 0) {
2952 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2956 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2957 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2960 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2961 dir_name, strerror(-ret));
2964 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2965 namebuf, namelen, type, NULL, 1);
2967 * Add ".INO" suffix several times to handle case where
2968 * "FILENAME.INO" is already taken by another file.
2970 while (ret == -EEXIST) {
2972 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2974 if (namelen + count_digits(rec->ino) + 1 >
2979 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2981 namelen += count_digits(rec->ino) + 1;
2982 ret = btrfs_add_link(trans, root, rec->ino,
2983 lost_found_ino, namebuf,
2984 namelen, type, NULL, 1);
2988 "Failed to link the inode %llu to %s dir: %s\n",
2989 rec->ino, dir_name, strerror(-ret));
2993 * Just increase the found_link, don't actually add the
2994 * backref. This will make things easier and this inode
2995 * record will be freed after the repair is done.
2996 * So fsck will not report problem about this inode.
2999 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3000 namelen, namebuf, dir_name);
3002 printf("Fixed the nlink of inode %llu\n", rec->ino);
3005 * Clear the flag anyway, or we will loop forever for the same inode
3006 * as it will not be removed from the bad inode list and the dead loop
3009 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3010 btrfs_release_path(path);
3015 * Check if there is any normal(reg or prealloc) file extent for given
3017 * This is used to determine the file type when neither its dir_index/item or
3018 * inode_item exists.
3020 * This will *NOT* report error, if any error happens, just consider it does
3021 * not have any normal file extent.
3023 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3025 struct btrfs_path path;
3026 struct btrfs_key key;
3027 struct btrfs_key found_key;
3028 struct btrfs_file_extent_item *fi;
3032 btrfs_init_path(&path);
3034 key.type = BTRFS_EXTENT_DATA_KEY;
3037 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3042 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3043 ret = btrfs_next_leaf(root, &path);
3050 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3052 if (found_key.objectid != ino ||
3053 found_key.type != BTRFS_EXTENT_DATA_KEY)
3055 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3056 struct btrfs_file_extent_item);
3057 type = btrfs_file_extent_type(path.nodes[0], fi);
3058 if (type != BTRFS_FILE_EXTENT_INLINE) {
3064 btrfs_release_path(&path);
3068 static u32 btrfs_type_to_imode(u8 type)
3070 static u32 imode_by_btrfs_type[] = {
3071 [BTRFS_FT_REG_FILE] = S_IFREG,
3072 [BTRFS_FT_DIR] = S_IFDIR,
3073 [BTRFS_FT_CHRDEV] = S_IFCHR,
3074 [BTRFS_FT_BLKDEV] = S_IFBLK,
3075 [BTRFS_FT_FIFO] = S_IFIFO,
3076 [BTRFS_FT_SOCK] = S_IFSOCK,
3077 [BTRFS_FT_SYMLINK] = S_IFLNK,
3080 return imode_by_btrfs_type[(type)];
3083 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3084 struct btrfs_root *root,
3085 struct btrfs_path *path,
3086 struct inode_record *rec)
3090 int type_recovered = 0;
3093 printf("Trying to rebuild inode:%llu\n", rec->ino);
3095 type_recovered = !find_file_type(rec, &filetype);
3098 * Try to determine inode type if type not found.
3100 * For found regular file extent, it must be FILE.
3101 * For found dir_item/index, it must be DIR.
3103 * For undetermined one, use FILE as fallback.
3106 * 1. If found backref(inode_index/item is already handled) to it,
3108 * Need new inode-inode ref structure to allow search for that.
3110 if (!type_recovered) {
3111 if (rec->found_file_extent &&
3112 find_normal_file_extent(root, rec->ino)) {
3114 filetype = BTRFS_FT_REG_FILE;
3115 } else if (rec->found_dir_item) {
3117 filetype = BTRFS_FT_DIR;
3118 } else if (!list_empty(&rec->orphan_extents)) {
3120 filetype = BTRFS_FT_REG_FILE;
3122 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3125 filetype = BTRFS_FT_REG_FILE;
3129 ret = btrfs_new_inode(trans, root, rec->ino,
3130 mode | btrfs_type_to_imode(filetype));
3135 * Here inode rebuild is done, we only rebuild the inode item,
3136 * don't repair the nlink(like move to lost+found).
3137 * That is the job of nlink repair.
3139 * We just fill the record and return
3141 rec->found_dir_item = 1;
3142 rec->imode = mode | btrfs_type_to_imode(filetype);
3144 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3145 /* Ensure the inode_nlinks repair function will be called */
3146 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3151 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3152 struct btrfs_root *root,
3153 struct btrfs_path *path,
3154 struct inode_record *rec)
3156 struct orphan_data_extent *orphan;
3157 struct orphan_data_extent *tmp;
3160 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3162 * Check for conflicting file extents
3164 * Here we don't know whether the extents is compressed or not,
3165 * so we can only assume it not compressed nor data offset,
3166 * and use its disk_len as extent length.
3168 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3169 orphan->offset, orphan->disk_len, 0);
3170 btrfs_release_path(path);
3175 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3176 orphan->disk_bytenr, orphan->disk_len);
3177 ret = btrfs_free_extent(trans,
3178 root->fs_info->extent_root,
3179 orphan->disk_bytenr, orphan->disk_len,
3180 0, root->objectid, orphan->objectid,
3185 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3186 orphan->offset, orphan->disk_bytenr,
3187 orphan->disk_len, orphan->disk_len);
3191 /* Update file size info */
3192 rec->found_size += orphan->disk_len;
3193 if (rec->found_size == rec->nbytes)
3194 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3196 /* Update the file extent hole info too */
3197 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3201 if (RB_EMPTY_ROOT(&rec->holes))
3202 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3204 list_del(&orphan->list);
3207 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3212 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3213 struct btrfs_root *root,
3214 struct btrfs_path *path,
3215 struct inode_record *rec)
3217 struct rb_node *node;
3218 struct file_extent_hole *hole;
3222 node = rb_first(&rec->holes);
3226 hole = rb_entry(node, struct file_extent_hole, node);
3227 ret = btrfs_punch_hole(trans, root, rec->ino,
3228 hole->start, hole->len);
3231 ret = del_file_extent_hole(&rec->holes, hole->start,
3235 if (RB_EMPTY_ROOT(&rec->holes))
3236 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3237 node = rb_first(&rec->holes);
3239 /* special case for a file losing all its file extent */
3241 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3242 round_up(rec->isize,
3243 root->fs_info->sectorsize));
3247 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3248 rec->ino, root->objectid);
3253 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3255 struct btrfs_trans_handle *trans;
3256 struct btrfs_path path;
3259 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3260 I_ERR_NO_ORPHAN_ITEM |
3261 I_ERR_LINK_COUNT_WRONG |
3262 I_ERR_NO_INODE_ITEM |
3263 I_ERR_FILE_EXTENT_ORPHAN |
3264 I_ERR_FILE_EXTENT_DISCOUNT|
3265 I_ERR_FILE_NBYTES_WRONG)))
3269 * For nlink repair, it may create a dir and add link, so
3270 * 2 for parent(256)'s dir_index and dir_item
3271 * 2 for lost+found dir's inode_item and inode_ref
3272 * 1 for the new inode_ref of the file
3273 * 2 for lost+found dir's dir_index and dir_item for the file
3275 trans = btrfs_start_transaction(root, 7);
3277 return PTR_ERR(trans);
3279 btrfs_init_path(&path);
3280 if (rec->errors & I_ERR_NO_INODE_ITEM)
3281 ret = repair_inode_no_item(trans, root, &path, rec);
3282 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3283 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3284 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3285 ret = repair_inode_discount_extent(trans, root, &path, rec);
3286 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3287 ret = repair_inode_isize(trans, root, &path, rec);
3288 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3289 ret = repair_inode_orphan_item(trans, root, &path, rec);
3290 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3291 ret = repair_inode_nlinks(trans, root, &path, rec);
3292 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3293 ret = repair_inode_nbytes(trans, root, &path, rec);
3294 btrfs_commit_transaction(trans, root);
3295 btrfs_release_path(&path);
3299 static int check_inode_recs(struct btrfs_root *root,
3300 struct cache_tree *inode_cache)
3302 struct cache_extent *cache;
3303 struct ptr_node *node;
3304 struct inode_record *rec;
3305 struct inode_backref *backref;
3310 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3312 if (btrfs_root_refs(&root->root_item) == 0) {
3313 if (!cache_tree_empty(inode_cache))
3314 fprintf(stderr, "warning line %d\n", __LINE__);
3319 * We need to repair backrefs first because we could change some of the
3320 * errors in the inode recs.
3322 * We also need to go through and delete invalid backrefs first and then
3323 * add the correct ones second. We do this because we may get EEXIST
3324 * when adding back the correct index because we hadn't yet deleted the
3327 * For example, if we were missing a dir index then the directories
3328 * isize would be wrong, so if we fixed the isize to what we thought it
3329 * would be and then fixed the backref we'd still have a invalid fs, so
3330 * we need to add back the dir index and then check to see if the isize
3335 if (stage == 3 && !err)
3338 cache = search_cache_extent(inode_cache, 0);
3339 while (repair && cache) {
3340 node = container_of(cache, struct ptr_node, cache);
3342 cache = next_cache_extent(cache);
3344 /* Need to free everything up and rescan */
3346 remove_cache_extent(inode_cache, &node->cache);
3348 free_inode_rec(rec);
3352 if (list_empty(&rec->backrefs))
3355 ret = repair_inode_backrefs(root, rec, inode_cache,
3369 rec = get_inode_rec(inode_cache, root_dirid, 0);
3370 BUG_ON(IS_ERR(rec));
3372 ret = check_root_dir(rec);
3374 fprintf(stderr, "root %llu root dir %llu error\n",
3375 (unsigned long long)root->root_key.objectid,
3376 (unsigned long long)root_dirid);
3377 print_inode_error(root, rec);
3382 struct btrfs_trans_handle *trans;
3384 trans = btrfs_start_transaction(root, 1);
3385 if (IS_ERR(trans)) {
3386 err = PTR_ERR(trans);
3391 "root %llu missing its root dir, recreating\n",
3392 (unsigned long long)root->objectid);
3394 ret = btrfs_make_root_dir(trans, root, root_dirid);
3397 btrfs_commit_transaction(trans, root);
3401 fprintf(stderr, "root %llu root dir %llu not found\n",
3402 (unsigned long long)root->root_key.objectid,
3403 (unsigned long long)root_dirid);
3407 cache = search_cache_extent(inode_cache, 0);
3410 node = container_of(cache, struct ptr_node, cache);
3412 remove_cache_extent(inode_cache, &node->cache);
3414 if (rec->ino == root_dirid ||
3415 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3416 free_inode_rec(rec);
3420 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3421 ret = check_orphan_item(root, rec->ino);
3423 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3424 if (can_free_inode_rec(rec)) {
3425 free_inode_rec(rec);
3430 if (!rec->found_inode_item)
3431 rec->errors |= I_ERR_NO_INODE_ITEM;
3432 if (rec->found_link != rec->nlink)
3433 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3435 ret = try_repair_inode(root, rec);
3436 if (ret == 0 && can_free_inode_rec(rec)) {
3437 free_inode_rec(rec);
3443 if (!(repair && ret == 0))
3445 print_inode_error(root, rec);
3446 list_for_each_entry(backref, &rec->backrefs, list) {
3447 if (!backref->found_dir_item)
3448 backref->errors |= REF_ERR_NO_DIR_ITEM;
3449 if (!backref->found_dir_index)
3450 backref->errors |= REF_ERR_NO_DIR_INDEX;
3451 if (!backref->found_inode_ref)
3452 backref->errors |= REF_ERR_NO_INODE_REF;
3453 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3454 " namelen %u name %s filetype %d errors %x",
3455 (unsigned long long)backref->dir,
3456 (unsigned long long)backref->index,
3457 backref->namelen, backref->name,
3458 backref->filetype, backref->errors);
3459 print_ref_error(backref->errors);
3461 free_inode_rec(rec);
3463 return (error > 0) ? -1 : 0;
3466 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3469 struct cache_extent *cache;
3470 struct root_record *rec = NULL;
3473 cache = lookup_cache_extent(root_cache, objectid, 1);
3475 rec = container_of(cache, struct root_record, cache);
3477 rec = calloc(1, sizeof(*rec));
3479 return ERR_PTR(-ENOMEM);
3480 rec->objectid = objectid;
3481 INIT_LIST_HEAD(&rec->backrefs);
3482 rec->cache.start = objectid;
3483 rec->cache.size = 1;
3485 ret = insert_cache_extent(root_cache, &rec->cache);
3487 return ERR_PTR(-EEXIST);
3492 static struct root_backref *get_root_backref(struct root_record *rec,
3493 u64 ref_root, u64 dir, u64 index,
3494 const char *name, int namelen)
3496 struct root_backref *backref;
3498 list_for_each_entry(backref, &rec->backrefs, list) {
3499 if (backref->ref_root != ref_root || backref->dir != dir ||
3500 backref->namelen != namelen)
3502 if (memcmp(name, backref->name, namelen))
3507 backref = calloc(1, sizeof(*backref) + namelen + 1);
3510 backref->ref_root = ref_root;
3512 backref->index = index;
3513 backref->namelen = namelen;
3514 memcpy(backref->name, name, namelen);
3515 backref->name[namelen] = '\0';
3516 list_add_tail(&backref->list, &rec->backrefs);
3520 static void free_root_record(struct cache_extent *cache)
3522 struct root_record *rec;
3523 struct root_backref *backref;
3525 rec = container_of(cache, struct root_record, cache);
3526 while (!list_empty(&rec->backrefs)) {
3527 backref = to_root_backref(rec->backrefs.next);
3528 list_del(&backref->list);
3535 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3537 static int add_root_backref(struct cache_tree *root_cache,
3538 u64 root_id, u64 ref_root, u64 dir, u64 index,
3539 const char *name, int namelen,
3540 int item_type, int errors)
3542 struct root_record *rec;
3543 struct root_backref *backref;
3545 rec = get_root_rec(root_cache, root_id);
3546 BUG_ON(IS_ERR(rec));
3547 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3550 backref->errors |= errors;
3552 if (item_type != BTRFS_DIR_ITEM_KEY) {
3553 if (backref->found_dir_index || backref->found_back_ref ||
3554 backref->found_forward_ref) {
3555 if (backref->index != index)
3556 backref->errors |= REF_ERR_INDEX_UNMATCH;
3558 backref->index = index;
3562 if (item_type == BTRFS_DIR_ITEM_KEY) {
3563 if (backref->found_forward_ref)
3565 backref->found_dir_item = 1;
3566 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3567 backref->found_dir_index = 1;
3568 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3569 if (backref->found_forward_ref)
3570 backref->errors |= REF_ERR_DUP_ROOT_REF;
3571 else if (backref->found_dir_item)
3573 backref->found_forward_ref = 1;
3574 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3575 if (backref->found_back_ref)
3576 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3577 backref->found_back_ref = 1;
3582 if (backref->found_forward_ref && backref->found_dir_item)
3583 backref->reachable = 1;
3587 static int merge_root_recs(struct btrfs_root *root,
3588 struct cache_tree *src_cache,
3589 struct cache_tree *dst_cache)
3591 struct cache_extent *cache;
3592 struct ptr_node *node;
3593 struct inode_record *rec;
3594 struct inode_backref *backref;
3597 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3598 free_inode_recs_tree(src_cache);
3603 cache = search_cache_extent(src_cache, 0);
3606 node = container_of(cache, struct ptr_node, cache);
3608 remove_cache_extent(src_cache, &node->cache);
3611 ret = is_child_root(root, root->objectid, rec->ino);
3617 list_for_each_entry(backref, &rec->backrefs, list) {
3618 BUG_ON(backref->found_inode_ref);
3619 if (backref->found_dir_item)
3620 add_root_backref(dst_cache, rec->ino,
3621 root->root_key.objectid, backref->dir,
3622 backref->index, backref->name,
3623 backref->namelen, BTRFS_DIR_ITEM_KEY,
3625 if (backref->found_dir_index)
3626 add_root_backref(dst_cache, rec->ino,
3627 root->root_key.objectid, backref->dir,
3628 backref->index, backref->name,
3629 backref->namelen, BTRFS_DIR_INDEX_KEY,
3633 free_inode_rec(rec);
3640 static int check_root_refs(struct btrfs_root *root,
3641 struct cache_tree *root_cache)
3643 struct root_record *rec;
3644 struct root_record *ref_root;
3645 struct root_backref *backref;
3646 struct cache_extent *cache;
3652 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3653 BUG_ON(IS_ERR(rec));
3656 /* fixme: this can not detect circular references */
3659 cache = search_cache_extent(root_cache, 0);
3663 rec = container_of(cache, struct root_record, cache);
3664 cache = next_cache_extent(cache);
3666 if (rec->found_ref == 0)
3669 list_for_each_entry(backref, &rec->backrefs, list) {
3670 if (!backref->reachable)
3673 ref_root = get_root_rec(root_cache,
3675 BUG_ON(IS_ERR(ref_root));
3676 if (ref_root->found_ref > 0)
3679 backref->reachable = 0;
3681 if (rec->found_ref == 0)
3687 cache = search_cache_extent(root_cache, 0);
3691 rec = container_of(cache, struct root_record, cache);
3692 cache = next_cache_extent(cache);
3694 if (rec->found_ref == 0 &&
3695 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3696 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3697 ret = check_orphan_item(root->fs_info->tree_root,
3703 * If we don't have a root item then we likely just have
3704 * a dir item in a snapshot for this root but no actual
3705 * ref key or anything so it's meaningless.
3707 if (!rec->found_root_item)
3710 fprintf(stderr, "fs tree %llu not referenced\n",
3711 (unsigned long long)rec->objectid);
3715 if (rec->found_ref > 0 && !rec->found_root_item)
3717 list_for_each_entry(backref, &rec->backrefs, list) {
3718 if (!backref->found_dir_item)
3719 backref->errors |= REF_ERR_NO_DIR_ITEM;
3720 if (!backref->found_dir_index)
3721 backref->errors |= REF_ERR_NO_DIR_INDEX;
3722 if (!backref->found_back_ref)
3723 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3724 if (!backref->found_forward_ref)
3725 backref->errors |= REF_ERR_NO_ROOT_REF;
3726 if (backref->reachable && backref->errors)
3733 fprintf(stderr, "fs tree %llu refs %u %s\n",
3734 (unsigned long long)rec->objectid, rec->found_ref,
3735 rec->found_root_item ? "" : "not found");
3737 list_for_each_entry(backref, &rec->backrefs, list) {
3738 if (!backref->reachable)
3740 if (!backref->errors && rec->found_root_item)
3742 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3743 " index %llu namelen %u name %s errors %x\n",
3744 (unsigned long long)backref->ref_root,
3745 (unsigned long long)backref->dir,
3746 (unsigned long long)backref->index,
3747 backref->namelen, backref->name,
3749 print_ref_error(backref->errors);
3752 return errors > 0 ? 1 : 0;
3755 static int process_root_ref(struct extent_buffer *eb, int slot,
3756 struct btrfs_key *key,
3757 struct cache_tree *root_cache)
3763 struct btrfs_root_ref *ref;
3764 char namebuf[BTRFS_NAME_LEN];
3767 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3769 dirid = btrfs_root_ref_dirid(eb, ref);
3770 index = btrfs_root_ref_sequence(eb, ref);
3771 name_len = btrfs_root_ref_name_len(eb, ref);
3773 if (name_len <= BTRFS_NAME_LEN) {
3777 len = BTRFS_NAME_LEN;
3778 error = REF_ERR_NAME_TOO_LONG;
3780 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3782 if (key->type == BTRFS_ROOT_REF_KEY) {
3783 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3784 index, namebuf, len, key->type, error);
3786 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3787 index, namebuf, len, key->type, error);
3792 static void free_corrupt_block(struct cache_extent *cache)
3794 struct btrfs_corrupt_block *corrupt;
3796 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3800 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3803 * Repair the btree of the given root.
3805 * The fix is to remove the node key in corrupt_blocks cache_tree.
3806 * and rebalance the tree.
3807 * After the fix, the btree should be writeable.
3809 static int repair_btree(struct btrfs_root *root,
3810 struct cache_tree *corrupt_blocks)
3812 struct btrfs_trans_handle *trans;
3813 struct btrfs_path path;
3814 struct btrfs_corrupt_block *corrupt;
3815 struct cache_extent *cache;
3816 struct btrfs_key key;
3821 if (cache_tree_empty(corrupt_blocks))
3824 trans = btrfs_start_transaction(root, 1);
3825 if (IS_ERR(trans)) {
3826 ret = PTR_ERR(trans);
3827 fprintf(stderr, "Error starting transaction: %s\n",
3831 btrfs_init_path(&path);
3832 cache = first_cache_extent(corrupt_blocks);
3834 corrupt = container_of(cache, struct btrfs_corrupt_block,
3836 level = corrupt->level;
3837 path.lowest_level = level;
3838 key.objectid = corrupt->key.objectid;
3839 key.type = corrupt->key.type;
3840 key.offset = corrupt->key.offset;
3843 * Here we don't want to do any tree balance, since it may
3844 * cause a balance with corrupted brother leaf/node,
3845 * so ins_len set to 0 here.
3846 * Balance will be done after all corrupt node/leaf is deleted.
3848 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3851 offset = btrfs_node_blockptr(path.nodes[level],
3854 /* Remove the ptr */
3855 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3859 * Remove the corresponding extent
3860 * return value is not concerned.
3862 btrfs_release_path(&path);
3863 ret = btrfs_free_extent(trans, root, offset,
3864 root->fs_info->nodesize, 0,
3865 root->root_key.objectid, level - 1, 0);
3866 cache = next_cache_extent(cache);
3869 /* Balance the btree using btrfs_search_slot() */
3870 cache = first_cache_extent(corrupt_blocks);
3872 corrupt = container_of(cache, struct btrfs_corrupt_block,
3874 memcpy(&key, &corrupt->key, sizeof(key));
3875 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3878 /* return will always >0 since it won't find the item */
3880 btrfs_release_path(&path);
3881 cache = next_cache_extent(cache);
3884 btrfs_commit_transaction(trans, root);
3885 btrfs_release_path(&path);
3889 static int check_fs_root(struct btrfs_root *root,
3890 struct cache_tree *root_cache,
3891 struct walk_control *wc)
3897 struct btrfs_path path;
3898 struct shared_node root_node;
3899 struct root_record *rec;
3900 struct btrfs_root_item *root_item = &root->root_item;
3901 struct cache_tree corrupt_blocks;
3902 struct orphan_data_extent *orphan;
3903 struct orphan_data_extent *tmp;
3904 enum btrfs_tree_block_status status;
3905 struct node_refs nrefs;
3908 * Reuse the corrupt_block cache tree to record corrupted tree block
3910 * Unlike the usage in extent tree check, here we do it in a per
3911 * fs/subvol tree base.
3913 cache_tree_init(&corrupt_blocks);
3914 root->fs_info->corrupt_blocks = &corrupt_blocks;
3916 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3917 rec = get_root_rec(root_cache, root->root_key.objectid);
3918 BUG_ON(IS_ERR(rec));
3919 if (btrfs_root_refs(root_item) > 0)
3920 rec->found_root_item = 1;
3923 btrfs_init_path(&path);
3924 memset(&root_node, 0, sizeof(root_node));
3925 cache_tree_init(&root_node.root_cache);
3926 cache_tree_init(&root_node.inode_cache);
3927 memset(&nrefs, 0, sizeof(nrefs));
3929 /* Move the orphan extent record to corresponding inode_record */
3930 list_for_each_entry_safe(orphan, tmp,
3931 &root->orphan_data_extents, list) {
3932 struct inode_record *inode;
3934 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3936 BUG_ON(IS_ERR(inode));
3937 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3938 list_move(&orphan->list, &inode->orphan_extents);
3941 level = btrfs_header_level(root->node);
3942 memset(wc->nodes, 0, sizeof(wc->nodes));
3943 wc->nodes[level] = &root_node;
3944 wc->active_node = level;
3945 wc->root_level = level;
3947 /* We may not have checked the root block, lets do that now */
3948 if (btrfs_is_leaf(root->node))
3949 status = btrfs_check_leaf(root, NULL, root->node);
3951 status = btrfs_check_node(root, NULL, root->node);
3952 if (status != BTRFS_TREE_BLOCK_CLEAN)
3955 if (btrfs_root_refs(root_item) > 0 ||
3956 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3957 path.nodes[level] = root->node;
3958 extent_buffer_get(root->node);
3959 path.slots[level] = 0;
3961 struct btrfs_key key;
3962 struct btrfs_disk_key found_key;
3964 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3965 level = root_item->drop_level;
3966 path.lowest_level = level;
3967 if (level > btrfs_header_level(root->node) ||
3968 level >= BTRFS_MAX_LEVEL) {
3969 error("ignoring invalid drop level: %u", level);
3972 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3975 btrfs_node_key(path.nodes[level], &found_key,
3977 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3978 sizeof(found_key)));
3982 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3988 wret = walk_up_tree(root, &path, wc, &level);
3995 btrfs_release_path(&path);
3997 if (!cache_tree_empty(&corrupt_blocks)) {
3998 struct cache_extent *cache;
3999 struct btrfs_corrupt_block *corrupt;
4001 printf("The following tree block(s) is corrupted in tree %llu:\n",
4002 root->root_key.objectid);
4003 cache = first_cache_extent(&corrupt_blocks);
4005 corrupt = container_of(cache,
4006 struct btrfs_corrupt_block,
4008 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4009 cache->start, corrupt->level,
4010 corrupt->key.objectid, corrupt->key.type,
4011 corrupt->key.offset);
4012 cache = next_cache_extent(cache);
4015 printf("Try to repair the btree for root %llu\n",
4016 root->root_key.objectid);
4017 ret = repair_btree(root, &corrupt_blocks);
4019 fprintf(stderr, "Failed to repair btree: %s\n",
4022 printf("Btree for root %llu is fixed\n",
4023 root->root_key.objectid);
4027 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4031 if (root_node.current) {
4032 root_node.current->checked = 1;
4033 maybe_free_inode_rec(&root_node.inode_cache,
4037 err = check_inode_recs(root, &root_node.inode_cache);
4041 free_corrupt_blocks_tree(&corrupt_blocks);
4042 root->fs_info->corrupt_blocks = NULL;
4043 free_orphan_data_extents(&root->orphan_data_extents);
4047 static int fs_root_objectid(u64 objectid)
4049 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4050 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4052 return is_fstree(objectid);
4055 static int check_fs_roots(struct btrfs_root *root,
4056 struct cache_tree *root_cache)
4058 struct btrfs_path path;
4059 struct btrfs_key key;
4060 struct walk_control wc;
4061 struct extent_buffer *leaf, *tree_node;
4062 struct btrfs_root *tmp_root;
4063 struct btrfs_root *tree_root = root->fs_info->tree_root;
4067 if (ctx.progress_enabled) {
4068 ctx.tp = TASK_FS_ROOTS;
4069 task_start(ctx.info);
4073 * Just in case we made any changes to the extent tree that weren't
4074 * reflected into the free space cache yet.
4077 reset_cached_block_groups(root->fs_info);
4078 memset(&wc, 0, sizeof(wc));
4079 cache_tree_init(&wc.shared);
4080 btrfs_init_path(&path);
4085 key.type = BTRFS_ROOT_ITEM_KEY;
4086 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4091 tree_node = tree_root->node;
4093 if (tree_node != tree_root->node) {
4094 free_root_recs_tree(root_cache);
4095 btrfs_release_path(&path);
4098 leaf = path.nodes[0];
4099 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4100 ret = btrfs_next_leaf(tree_root, &path);
4106 leaf = path.nodes[0];
4108 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4109 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4110 fs_root_objectid(key.objectid)) {
4111 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4112 tmp_root = btrfs_read_fs_root_no_cache(
4113 root->fs_info, &key);
4115 key.offset = (u64)-1;
4116 tmp_root = btrfs_read_fs_root(
4117 root->fs_info, &key);
4119 if (IS_ERR(tmp_root)) {
4123 ret = check_fs_root(tmp_root, root_cache, &wc);
4124 if (ret == -EAGAIN) {
4125 free_root_recs_tree(root_cache);
4126 btrfs_release_path(&path);
4131 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4132 btrfs_free_fs_root(tmp_root);
4133 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4134 key.type == BTRFS_ROOT_BACKREF_KEY) {
4135 process_root_ref(leaf, path.slots[0], &key,
4142 btrfs_release_path(&path);
4144 free_extent_cache_tree(&wc.shared);
4145 if (!cache_tree_empty(&wc.shared))
4146 fprintf(stderr, "warning line %d\n", __LINE__);
4148 task_stop(ctx.info);
4154 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4155 * INODE_REF/INODE_EXTREF match.
4157 * @root: the root of the fs/file tree
4158 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4159 * @key: the key of the DIR_ITEM/DIR_INDEX
4160 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4161 * distinguish root_dir between normal dir/file
4162 * @name: the name in the INODE_REF/INODE_EXTREF
4163 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4164 * @mode: the st_mode of INODE_ITEM
4166 * Return 0 if no error occurred.
4167 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4168 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4170 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4171 * not match for normal dir/file.
4173 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4174 struct btrfs_key *key, u64 index, char *name,
4175 u32 namelen, u32 mode)
4177 struct btrfs_path path;
4178 struct extent_buffer *node;
4179 struct btrfs_dir_item *di;
4180 struct btrfs_key location;
4181 char namebuf[BTRFS_NAME_LEN] = {0};
4191 btrfs_init_path(&path);
4192 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4194 ret = DIR_ITEM_MISSING;
4198 /* Process root dir and goto out*/
4201 ret = ROOT_DIR_ERROR;
4203 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4205 ref_key->type == BTRFS_INODE_REF_KEY ?
4207 ref_key->objectid, ref_key->offset,
4208 key->type == BTRFS_DIR_ITEM_KEY ?
4209 "DIR_ITEM" : "DIR_INDEX");
4217 /* Process normal file/dir */
4219 ret = DIR_ITEM_MISSING;
4221 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4223 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4224 ref_key->objectid, ref_key->offset,
4225 key->type == BTRFS_DIR_ITEM_KEY ?
4226 "DIR_ITEM" : "DIR_INDEX",
4227 key->objectid, key->offset, namelen, name,
4228 imode_to_type(mode));
4232 /* Check whether inode_id/filetype/name match */
4233 node = path.nodes[0];
4234 slot = path.slots[0];
4235 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4236 total = btrfs_item_size_nr(node, slot);
4237 while (cur < total) {
4238 ret = DIR_ITEM_MISMATCH;
4239 name_len = btrfs_dir_name_len(node, di);
4240 data_len = btrfs_dir_data_len(node, di);
4242 btrfs_dir_item_key_to_cpu(node, di, &location);
4243 if (location.objectid != ref_key->objectid ||
4244 location.type != BTRFS_INODE_ITEM_KEY ||
4245 location.offset != 0)
4248 filetype = btrfs_dir_type(node, di);
4249 if (imode_to_type(mode) != filetype)
4252 if (cur + sizeof(*di) + name_len > total ||
4253 name_len > BTRFS_NAME_LEN) {
4254 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4256 key->type == BTRFS_DIR_ITEM_KEY ?
4257 "DIR_ITEM" : "DIR_INDEX",
4258 key->objectid, key->offset, name_len);
4260 if (cur + sizeof(*di) > total)
4262 len = min_t(u32, total - cur - sizeof(*di),
4268 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4269 if (len != namelen || strncmp(namebuf, name, len))
4275 len = sizeof(*di) + name_len + data_len;
4276 di = (struct btrfs_dir_item *)((char *)di + len);
4279 if (ret == DIR_ITEM_MISMATCH)
4281 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4283 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4284 ref_key->objectid, ref_key->offset,
4285 key->type == BTRFS_DIR_ITEM_KEY ?
4286 "DIR_ITEM" : "DIR_INDEX",
4287 key->objectid, key->offset, namelen, name,
4288 imode_to_type(mode));
4290 btrfs_release_path(&path);
4295 * Traverse the given INODE_REF and call find_dir_item() to find related
4296 * DIR_ITEM/DIR_INDEX.
4298 * @root: the root of the fs/file tree
4299 * @ref_key: the key of the INODE_REF
4300 * @refs: the count of INODE_REF
4301 * @mode: the st_mode of INODE_ITEM
4303 * Return 0 if no error occurred.
4305 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4306 struct extent_buffer *node, int slot, u64 *refs,
4309 struct btrfs_key key;
4310 struct btrfs_inode_ref *ref;
4311 char namebuf[BTRFS_NAME_LEN] = {0};
4319 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4320 total = btrfs_item_size_nr(node, slot);
4323 /* Update inode ref count */
4326 index = btrfs_inode_ref_index(node, ref);
4327 name_len = btrfs_inode_ref_name_len(node, ref);
4328 if (cur + sizeof(*ref) + name_len > total ||
4329 name_len > BTRFS_NAME_LEN) {
4330 warning("root %llu INODE_REF[%llu %llu] name too long",
4331 root->objectid, ref_key->objectid, ref_key->offset);
4333 if (total < cur + sizeof(*ref))
4335 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4340 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4342 /* Check root dir ref name */
4343 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4344 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4345 root->objectid, ref_key->objectid, ref_key->offset,
4347 err |= ROOT_DIR_ERROR;
4350 /* Find related DIR_INDEX */
4351 key.objectid = ref_key->offset;
4352 key.type = BTRFS_DIR_INDEX_KEY;
4354 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4357 /* Find related dir_item */
4358 key.objectid = ref_key->offset;
4359 key.type = BTRFS_DIR_ITEM_KEY;
4360 key.offset = btrfs_name_hash(namebuf, len);
4361 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4364 len = sizeof(*ref) + name_len;
4365 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4375 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4376 * DIR_ITEM/DIR_INDEX.
4378 * @root: the root of the fs/file tree
4379 * @ref_key: the key of the INODE_EXTREF
4380 * @refs: the count of INODE_EXTREF
4381 * @mode: the st_mode of INODE_ITEM
4383 * Return 0 if no error occurred.
4385 static int check_inode_extref(struct btrfs_root *root,
4386 struct btrfs_key *ref_key,
4387 struct extent_buffer *node, int slot, u64 *refs,
4390 struct btrfs_key key;
4391 struct btrfs_inode_extref *extref;
4392 char namebuf[BTRFS_NAME_LEN] = {0};
4402 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4403 total = btrfs_item_size_nr(node, slot);
4406 /* update inode ref count */
4408 name_len = btrfs_inode_extref_name_len(node, extref);
4409 index = btrfs_inode_extref_index(node, extref);
4410 parent = btrfs_inode_extref_parent(node, extref);
4411 if (name_len <= BTRFS_NAME_LEN) {
4414 len = BTRFS_NAME_LEN;
4415 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4416 root->objectid, ref_key->objectid, ref_key->offset);
4418 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4420 /* Check root dir ref name */
4421 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4422 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4423 root->objectid, ref_key->objectid, ref_key->offset,
4425 err |= ROOT_DIR_ERROR;
4428 /* find related dir_index */
4429 key.objectid = parent;
4430 key.type = BTRFS_DIR_INDEX_KEY;
4432 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4435 /* find related dir_item */
4436 key.objectid = parent;
4437 key.type = BTRFS_DIR_ITEM_KEY;
4438 key.offset = btrfs_name_hash(namebuf, len);
4439 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4442 len = sizeof(*extref) + name_len;
4443 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4453 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4454 * DIR_ITEM/DIR_INDEX match.
4456 * @root: the root of the fs/file tree
4457 * @key: the key of the INODE_REF/INODE_EXTREF
4458 * @name: the name in the INODE_REF/INODE_EXTREF
4459 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4460 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4462 * @ext_ref: the EXTENDED_IREF feature
4464 * Return 0 if no error occurred.
4465 * Return >0 for error bitmap
4467 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4468 char *name, int namelen, u64 index,
4469 unsigned int ext_ref)
4471 struct btrfs_path path;
4472 struct btrfs_inode_ref *ref;
4473 struct btrfs_inode_extref *extref;
4474 struct extent_buffer *node;
4475 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4486 btrfs_init_path(&path);
4487 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4489 ret = INODE_REF_MISSING;
4493 node = path.nodes[0];
4494 slot = path.slots[0];
4496 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4497 total = btrfs_item_size_nr(node, slot);
4499 /* Iterate all entry of INODE_REF */
4500 while (cur < total) {
4501 ret = INODE_REF_MISSING;
4503 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4504 ref_index = btrfs_inode_ref_index(node, ref);
4505 if (index != (u64)-1 && index != ref_index)
4508 if (cur + sizeof(*ref) + ref_namelen > total ||
4509 ref_namelen > BTRFS_NAME_LEN) {
4510 warning("root %llu INODE %s[%llu %llu] name too long",
4512 key->type == BTRFS_INODE_REF_KEY ?
4514 key->objectid, key->offset);
4516 if (cur + sizeof(*ref) > total)
4518 len = min_t(u32, total - cur - sizeof(*ref),
4524 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4527 if (len != namelen || strncmp(ref_namebuf, name, len))
4533 len = sizeof(*ref) + ref_namelen;
4534 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4539 /* Skip if not support EXTENDED_IREF feature */
4543 btrfs_release_path(&path);
4544 btrfs_init_path(&path);
4546 dir_id = key->offset;
4547 key->type = BTRFS_INODE_EXTREF_KEY;
4548 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4550 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4552 ret = INODE_REF_MISSING;
4556 node = path.nodes[0];
4557 slot = path.slots[0];
4559 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4561 total = btrfs_item_size_nr(node, slot);
4563 /* Iterate all entry of INODE_EXTREF */
4564 while (cur < total) {
4565 ret = INODE_REF_MISSING;
4567 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4568 ref_index = btrfs_inode_extref_index(node, extref);
4569 parent = btrfs_inode_extref_parent(node, extref);
4570 if (index != (u64)-1 && index != ref_index)
4573 if (parent != dir_id)
4576 if (ref_namelen <= BTRFS_NAME_LEN) {
4579 len = BTRFS_NAME_LEN;
4580 warning("root %llu INODE %s[%llu %llu] name too long",
4582 key->type == BTRFS_INODE_REF_KEY ?
4584 key->objectid, key->offset);
4586 read_extent_buffer(node, ref_namebuf,
4587 (unsigned long)(extref + 1), len);
4589 if (len != namelen || strncmp(ref_namebuf, name, len))
4596 len = sizeof(*extref) + ref_namelen;
4597 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4602 btrfs_release_path(&path);
4607 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4608 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4610 * @root: the root of the fs/file tree
4611 * @key: the key of the INODE_REF/INODE_EXTREF
4612 * @size: the st_size of the INODE_ITEM
4613 * @ext_ref: the EXTENDED_IREF feature
4615 * Return 0 if no error occurred.
4617 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4618 struct extent_buffer *node, int slot, u64 *size,
4619 unsigned int ext_ref)
4621 struct btrfs_dir_item *di;
4622 struct btrfs_inode_item *ii;
4623 struct btrfs_path path;
4624 struct btrfs_key location;
4625 char namebuf[BTRFS_NAME_LEN] = {0};
4638 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4639 * ignore index check.
4641 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4643 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4644 total = btrfs_item_size_nr(node, slot);
4646 while (cur < total) {
4647 data_len = btrfs_dir_data_len(node, di);
4649 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4650 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4651 "DIR_ITEM" : "DIR_INDEX",
4652 key->objectid, key->offset, data_len);
4654 name_len = btrfs_dir_name_len(node, di);
4655 if (cur + sizeof(*di) + name_len > total ||
4656 name_len > BTRFS_NAME_LEN) {
4657 warning("root %llu %s[%llu %llu] name too long",
4659 key->type == BTRFS_DIR_ITEM_KEY ?
4660 "DIR_ITEM" : "DIR_INDEX",
4661 key->objectid, key->offset);
4663 if (cur + sizeof(*di) > total)
4665 len = min_t(u32, total - cur - sizeof(*di),
4670 (*size) += name_len;
4672 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4673 filetype = btrfs_dir_type(node, di);
4675 if (key->type == BTRFS_DIR_ITEM_KEY &&
4676 key->offset != btrfs_name_hash(namebuf, len)) {
4678 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4679 root->objectid, key->objectid, key->offset,
4680 namebuf, len, filetype, key->offset,
4681 btrfs_name_hash(namebuf, len));
4684 btrfs_init_path(&path);
4685 btrfs_dir_item_key_to_cpu(node, di, &location);
4687 /* Ignore related ROOT_ITEM check */
4688 if (location.type == BTRFS_ROOT_ITEM_KEY)
4691 /* Check relative INODE_ITEM(existence/filetype) */
4692 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4694 err |= INODE_ITEM_MISSING;
4695 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4696 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4697 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4698 key->offset, location.objectid, name_len,
4703 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4704 struct btrfs_inode_item);
4705 mode = btrfs_inode_mode(path.nodes[0], ii);
4707 if (imode_to_type(mode) != filetype) {
4708 err |= INODE_ITEM_MISMATCH;
4709 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4710 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4711 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4712 key->offset, name_len, namebuf, filetype);
4715 /* Check relative INODE_REF/INODE_EXTREF */
4716 location.type = BTRFS_INODE_REF_KEY;
4717 location.offset = key->objectid;
4718 ret = find_inode_ref(root, &location, namebuf, len,
4721 if (ret & INODE_REF_MISSING)
4722 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4723 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4724 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4725 key->offset, name_len, namebuf, filetype);
4728 btrfs_release_path(&path);
4729 len = sizeof(*di) + name_len + data_len;
4730 di = (struct btrfs_dir_item *)((char *)di + len);
4733 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4734 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4735 root->objectid, key->objectid, key->offset);
4744 * Check file extent datasum/hole, update the size of the file extents,
4745 * check and update the last offset of the file extent.
4747 * @root: the root of fs/file tree.
4748 * @fkey: the key of the file extent.
4749 * @nodatasum: INODE_NODATASUM feature.
4750 * @size: the sum of all EXTENT_DATA items size for this inode.
4751 * @end: the offset of the last extent.
4753 * Return 0 if no error occurred.
4755 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4756 struct extent_buffer *node, int slot,
4757 unsigned int nodatasum, u64 *size, u64 *end)
4759 struct btrfs_file_extent_item *fi;
4762 u64 extent_num_bytes;
4764 u64 csum_found; /* In byte size, sectorsize aligned */
4765 u64 search_start; /* Logical range start we search for csum */
4766 u64 search_len; /* Logical range len we search for csum */
4767 unsigned int extent_type;
4768 unsigned int is_hole;
4773 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4775 /* Check inline extent */
4776 extent_type = btrfs_file_extent_type(node, fi);
4777 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4778 struct btrfs_item *e = btrfs_item_nr(slot);
4779 u32 item_inline_len;
4781 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4782 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4783 compressed = btrfs_file_extent_compression(node, fi);
4784 if (extent_num_bytes == 0) {
4786 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4787 root->objectid, fkey->objectid, fkey->offset);
4788 err |= FILE_EXTENT_ERROR;
4790 if (!compressed && extent_num_bytes != item_inline_len) {
4792 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4793 root->objectid, fkey->objectid, fkey->offset,
4794 extent_num_bytes, item_inline_len);
4795 err |= FILE_EXTENT_ERROR;
4797 *end += extent_num_bytes;
4798 *size += extent_num_bytes;
4802 /* Check extent type */
4803 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4804 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4805 err |= FILE_EXTENT_ERROR;
4806 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4807 root->objectid, fkey->objectid, fkey->offset);
4811 /* Check REG_EXTENT/PREALLOC_EXTENT */
4812 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4813 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4814 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4815 extent_offset = btrfs_file_extent_offset(node, fi);
4816 compressed = btrfs_file_extent_compression(node, fi);
4817 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4820 * Check EXTENT_DATA csum
4822 * For plain (uncompressed) extent, we should only check the range
4823 * we're referring to, as it's possible that part of prealloc extent
4824 * has been written, and has csum:
4826 * |<--- Original large preallocated extent A ---->|
4827 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4830 * For compressed extent, we should check the whole range.
4833 search_start = disk_bytenr + extent_offset;
4834 search_len = extent_num_bytes;
4836 search_start = disk_bytenr;
4837 search_len = disk_num_bytes;
4839 ret = count_csum_range(root, search_start, search_len, &csum_found);
4840 if (csum_found > 0 && nodatasum) {
4841 err |= ODD_CSUM_ITEM;
4842 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4843 root->objectid, fkey->objectid, fkey->offset);
4844 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4845 !is_hole && (ret < 0 || csum_found < search_len)) {
4846 err |= CSUM_ITEM_MISSING;
4847 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4848 root->objectid, fkey->objectid, fkey->offset,
4849 csum_found, search_len);
4850 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4851 err |= ODD_CSUM_ITEM;
4852 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4853 root->objectid, fkey->objectid, fkey->offset, csum_found);
4856 /* Check EXTENT_DATA hole */
4857 if (!no_holes && *end != fkey->offset) {
4858 err |= FILE_EXTENT_ERROR;
4859 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4860 root->objectid, fkey->objectid, fkey->offset);
4863 *end += extent_num_bytes;
4865 *size += extent_num_bytes;
4871 * Check INODE_ITEM and related ITEMs (the same inode number)
4872 * 1. check link count
4873 * 2. check inode ref/extref
4874 * 3. check dir item/index
4876 * @ext_ref: the EXTENDED_IREF feature
4878 * Return 0 if no error occurred.
4879 * Return >0 for error or hit the traversal is done(by error bitmap)
4881 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4882 unsigned int ext_ref)
4884 struct extent_buffer *node;
4885 struct btrfs_inode_item *ii;
4886 struct btrfs_key key;
4895 u64 extent_size = 0;
4897 unsigned int nodatasum;
4902 node = path->nodes[0];
4903 slot = path->slots[0];
4905 btrfs_item_key_to_cpu(node, &key, slot);
4906 inode_id = key.objectid;
4908 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4909 ret = btrfs_next_item(root, path);
4915 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4916 isize = btrfs_inode_size(node, ii);
4917 nbytes = btrfs_inode_nbytes(node, ii);
4918 mode = btrfs_inode_mode(node, ii);
4919 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4920 nlink = btrfs_inode_nlink(node, ii);
4921 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4924 ret = btrfs_next_item(root, path);
4926 /* out will fill 'err' rusing current statistics */
4928 } else if (ret > 0) {
4933 node = path->nodes[0];
4934 slot = path->slots[0];
4935 btrfs_item_key_to_cpu(node, &key, slot);
4936 if (key.objectid != inode_id)
4940 case BTRFS_INODE_REF_KEY:
4941 ret = check_inode_ref(root, &key, node, slot, &refs,
4945 case BTRFS_INODE_EXTREF_KEY:
4946 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4947 warning("root %llu EXTREF[%llu %llu] isn't supported",
4948 root->objectid, key.objectid,
4950 ret = check_inode_extref(root, &key, node, slot, &refs,
4954 case BTRFS_DIR_ITEM_KEY:
4955 case BTRFS_DIR_INDEX_KEY:
4957 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4958 root->objectid, inode_id,
4959 imode_to_type(mode), key.objectid,
4962 ret = check_dir_item(root, &key, node, slot, &size,
4966 case BTRFS_EXTENT_DATA_KEY:
4968 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4969 root->objectid, inode_id, key.objectid,
4972 ret = check_file_extent(root, &key, node, slot,
4973 nodatasum, &extent_size,
4977 case BTRFS_XATTR_ITEM_KEY:
4980 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4981 key.objectid, key.type, key.offset);
4986 /* verify INODE_ITEM nlink/isize/nbytes */
4989 err |= LINK_COUNT_ERROR;
4990 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4991 root->objectid, inode_id, nlink);
4995 * Just a warning, as dir inode nbytes is just an
4996 * instructive value.
4998 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4999 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5000 root->objectid, inode_id,
5001 root->fs_info->nodesize);
5004 if (isize != size) {
5006 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5007 root->objectid, inode_id, isize, size);
5010 if (nlink != refs) {
5011 err |= LINK_COUNT_ERROR;
5012 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5013 root->objectid, inode_id, nlink, refs);
5014 } else if (!nlink) {
5018 if (!nbytes && !no_holes && extent_end < isize) {
5019 err |= NBYTES_ERROR;
5020 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5021 root->objectid, inode_id, isize);
5024 if (nbytes != extent_size) {
5025 err |= NBYTES_ERROR;
5026 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5027 root->objectid, inode_id, nbytes, extent_size);
5034 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5036 struct btrfs_path path;
5037 struct btrfs_key key;
5041 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5042 key.type = BTRFS_INODE_ITEM_KEY;
5045 /* For root being dropped, we don't need to check first inode */
5046 if (btrfs_root_refs(&root->root_item) == 0 &&
5047 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5051 btrfs_init_path(&path);
5053 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5058 err |= INODE_ITEM_MISSING;
5059 error("first inode item of root %llu is missing",
5063 err |= check_inode_item(root, &path, ext_ref);
5068 btrfs_release_path(&path);
5073 * Iterate all item on the tree and call check_inode_item() to check.
5075 * @root: the root of the tree to be checked.
5076 * @ext_ref: the EXTENDED_IREF feature
5078 * Return 0 if no error found.
5079 * Return <0 for error.
5081 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5083 struct btrfs_path path;
5084 struct node_refs nrefs;
5085 struct btrfs_root_item *root_item = &root->root_item;
5091 * We need to manually check the first inode item(256)
5092 * As the following traversal function will only start from
5093 * the first inode item in the leaf, if inode item(256) is missing
5094 * we will just skip it forever.
5096 ret = check_fs_first_inode(root, ext_ref);
5100 memset(&nrefs, 0, sizeof(nrefs));
5101 level = btrfs_header_level(root->node);
5102 btrfs_init_path(&path);
5104 if (btrfs_root_refs(root_item) > 0 ||
5105 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5106 path.nodes[level] = root->node;
5107 path.slots[level] = 0;
5108 extent_buffer_get(root->node);
5110 struct btrfs_key key;
5112 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5113 level = root_item->drop_level;
5114 path.lowest_level = level;
5115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5122 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5125 /* if ret is negative, walk shall stop */
5131 ret = walk_up_tree_v2(root, &path, &level);
5133 /* Normal exit, reset ret to err */
5140 btrfs_release_path(&path);
5145 * Find the relative ref for root_ref and root_backref.
5147 * @root: the root of the root tree.
5148 * @ref_key: the key of the root ref.
5150 * Return 0 if no error occurred.
5152 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5153 struct extent_buffer *node, int slot)
5155 struct btrfs_path path;
5156 struct btrfs_key key;
5157 struct btrfs_root_ref *ref;
5158 struct btrfs_root_ref *backref;
5159 char ref_name[BTRFS_NAME_LEN] = {0};
5160 char backref_name[BTRFS_NAME_LEN] = {0};
5166 u32 backref_namelen;
5171 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5172 ref_dirid = btrfs_root_ref_dirid(node, ref);
5173 ref_seq = btrfs_root_ref_sequence(node, ref);
5174 ref_namelen = btrfs_root_ref_name_len(node, ref);
5176 if (ref_namelen <= BTRFS_NAME_LEN) {
5179 len = BTRFS_NAME_LEN;
5180 warning("%s[%llu %llu] ref_name too long",
5181 ref_key->type == BTRFS_ROOT_REF_KEY ?
5182 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5185 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5187 /* Find relative root_ref */
5188 key.objectid = ref_key->offset;
5189 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5190 key.offset = ref_key->objectid;
5192 btrfs_init_path(&path);
5193 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5195 err |= ROOT_REF_MISSING;
5196 error("%s[%llu %llu] couldn't find relative ref",
5197 ref_key->type == BTRFS_ROOT_REF_KEY ?
5198 "ROOT_REF" : "ROOT_BACKREF",
5199 ref_key->objectid, ref_key->offset);
5203 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5204 struct btrfs_root_ref);
5205 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5206 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5207 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5209 if (backref_namelen <= BTRFS_NAME_LEN) {
5210 len = backref_namelen;
5212 len = BTRFS_NAME_LEN;
5213 warning("%s[%llu %llu] ref_name too long",
5214 key.type == BTRFS_ROOT_REF_KEY ?
5215 "ROOT_REF" : "ROOT_BACKREF",
5216 key.objectid, key.offset);
5218 read_extent_buffer(path.nodes[0], backref_name,
5219 (unsigned long)(backref + 1), len);
5221 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5222 ref_namelen != backref_namelen ||
5223 strncmp(ref_name, backref_name, len)) {
5224 err |= ROOT_REF_MISMATCH;
5225 error("%s[%llu %llu] mismatch relative ref",
5226 ref_key->type == BTRFS_ROOT_REF_KEY ?
5227 "ROOT_REF" : "ROOT_BACKREF",
5228 ref_key->objectid, ref_key->offset);
5231 btrfs_release_path(&path);
5236 * Check all fs/file tree in low_memory mode.
5238 * 1. for fs tree root item, call check_fs_root_v2()
5239 * 2. for fs tree root ref/backref, call check_root_ref()
5241 * Return 0 if no error occurred.
5243 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5245 struct btrfs_root *tree_root = fs_info->tree_root;
5246 struct btrfs_root *cur_root = NULL;
5247 struct btrfs_path path;
5248 struct btrfs_key key;
5249 struct extent_buffer *node;
5250 unsigned int ext_ref;
5255 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5257 btrfs_init_path(&path);
5258 key.objectid = BTRFS_FS_TREE_OBJECTID;
5260 key.type = BTRFS_ROOT_ITEM_KEY;
5262 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5266 } else if (ret > 0) {
5272 node = path.nodes[0];
5273 slot = path.slots[0];
5274 btrfs_item_key_to_cpu(node, &key, slot);
5275 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5277 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5278 fs_root_objectid(key.objectid)) {
5279 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5280 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5283 key.offset = (u64)-1;
5284 cur_root = btrfs_read_fs_root(fs_info, &key);
5287 if (IS_ERR(cur_root)) {
5288 error("Fail to read fs/subvol tree: %lld",
5294 ret = check_fs_root_v2(cur_root, ext_ref);
5297 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5298 btrfs_free_fs_root(cur_root);
5299 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5300 key.type == BTRFS_ROOT_BACKREF_KEY) {
5301 ret = check_root_ref(tree_root, &key, node, slot);
5305 ret = btrfs_next_item(tree_root, &path);
5315 btrfs_release_path(&path);
5319 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5321 struct list_head *cur = rec->backrefs.next;
5322 struct extent_backref *back;
5323 struct tree_backref *tback;
5324 struct data_backref *dback;
5328 while(cur != &rec->backrefs) {
5329 back = to_extent_backref(cur);
5331 if (!back->found_extent_tree) {
5335 if (back->is_data) {
5336 dback = to_data_backref(back);
5337 fprintf(stderr, "Backref %llu %s %llu"
5338 " owner %llu offset %llu num_refs %lu"
5339 " not found in extent tree\n",
5340 (unsigned long long)rec->start,
5341 back->full_backref ?
5343 back->full_backref ?
5344 (unsigned long long)dback->parent:
5345 (unsigned long long)dback->root,
5346 (unsigned long long)dback->owner,
5347 (unsigned long long)dback->offset,
5348 (unsigned long)dback->num_refs);
5350 tback = to_tree_backref(back);
5351 fprintf(stderr, "Backref %llu parent %llu"
5352 " root %llu not found in extent tree\n",
5353 (unsigned long long)rec->start,
5354 (unsigned long long)tback->parent,
5355 (unsigned long long)tback->root);
5358 if (!back->is_data && !back->found_ref) {
5362 tback = to_tree_backref(back);
5363 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5364 (unsigned long long)rec->start,
5365 back->full_backref ? "parent" : "root",
5366 back->full_backref ?
5367 (unsigned long long)tback->parent :
5368 (unsigned long long)tback->root, back);
5370 if (back->is_data) {
5371 dback = to_data_backref(back);
5372 if (dback->found_ref != dback->num_refs) {
5376 fprintf(stderr, "Incorrect local backref count"
5377 " on %llu %s %llu owner %llu"
5378 " offset %llu found %u wanted %u back %p\n",
5379 (unsigned long long)rec->start,
5380 back->full_backref ?
5382 back->full_backref ?
5383 (unsigned long long)dback->parent:
5384 (unsigned long long)dback->root,
5385 (unsigned long long)dback->owner,
5386 (unsigned long long)dback->offset,
5387 dback->found_ref, dback->num_refs, back);
5389 if (dback->disk_bytenr != rec->start) {
5393 fprintf(stderr, "Backref disk bytenr does not"
5394 " match extent record, bytenr=%llu, "
5395 "ref bytenr=%llu\n",
5396 (unsigned long long)rec->start,
5397 (unsigned long long)dback->disk_bytenr);
5400 if (dback->bytes != rec->nr) {
5404 fprintf(stderr, "Backref bytes do not match "
5405 "extent backref, bytenr=%llu, ref "
5406 "bytes=%llu, backref bytes=%llu\n",
5407 (unsigned long long)rec->start,
5408 (unsigned long long)rec->nr,
5409 (unsigned long long)dback->bytes);
5412 if (!back->is_data) {
5415 dback = to_data_backref(back);
5416 found += dback->found_ref;
5419 if (found != rec->refs) {
5423 fprintf(stderr, "Incorrect global backref count "
5424 "on %llu found %llu wanted %llu\n",
5425 (unsigned long long)rec->start,
5426 (unsigned long long)found,
5427 (unsigned long long)rec->refs);
5433 static int free_all_extent_backrefs(struct extent_record *rec)
5435 struct extent_backref *back;
5436 struct list_head *cur;
5437 while (!list_empty(&rec->backrefs)) {
5438 cur = rec->backrefs.next;
5439 back = to_extent_backref(cur);
5446 static void free_extent_record_cache(struct cache_tree *extent_cache)
5448 struct cache_extent *cache;
5449 struct extent_record *rec;
5452 cache = first_cache_extent(extent_cache);
5455 rec = container_of(cache, struct extent_record, cache);
5456 remove_cache_extent(extent_cache, cache);
5457 free_all_extent_backrefs(rec);
5462 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5463 struct extent_record *rec)
5465 if (rec->content_checked && rec->owner_ref_checked &&
5466 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5467 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5468 !rec->bad_full_backref && !rec->crossing_stripes &&
5469 !rec->wrong_chunk_type) {
5470 remove_cache_extent(extent_cache, &rec->cache);
5471 free_all_extent_backrefs(rec);
5472 list_del_init(&rec->list);
5478 static int check_owner_ref(struct btrfs_root *root,
5479 struct extent_record *rec,
5480 struct extent_buffer *buf)
5482 struct extent_backref *node;
5483 struct tree_backref *back;
5484 struct btrfs_root *ref_root;
5485 struct btrfs_key key;
5486 struct btrfs_path path;
5487 struct extent_buffer *parent;
5492 list_for_each_entry(node, &rec->backrefs, list) {
5495 if (!node->found_ref)
5497 if (node->full_backref)
5499 back = to_tree_backref(node);
5500 if (btrfs_header_owner(buf) == back->root)
5503 BUG_ON(rec->is_root);
5505 /* try to find the block by search corresponding fs tree */
5506 key.objectid = btrfs_header_owner(buf);
5507 key.type = BTRFS_ROOT_ITEM_KEY;
5508 key.offset = (u64)-1;
5510 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5511 if (IS_ERR(ref_root))
5514 level = btrfs_header_level(buf);
5516 btrfs_item_key_to_cpu(buf, &key, 0);
5518 btrfs_node_key_to_cpu(buf, &key, 0);
5520 btrfs_init_path(&path);
5521 path.lowest_level = level + 1;
5522 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5526 parent = path.nodes[level + 1];
5527 if (parent && buf->start == btrfs_node_blockptr(parent,
5528 path.slots[level + 1]))
5531 btrfs_release_path(&path);
5532 return found ? 0 : 1;
5535 static int is_extent_tree_record(struct extent_record *rec)
5537 struct list_head *cur = rec->backrefs.next;
5538 struct extent_backref *node;
5539 struct tree_backref *back;
5542 while(cur != &rec->backrefs) {
5543 node = to_extent_backref(cur);
5547 back = to_tree_backref(node);
5548 if (node->full_backref)
5550 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5557 static int record_bad_block_io(struct btrfs_fs_info *info,
5558 struct cache_tree *extent_cache,
5561 struct extent_record *rec;
5562 struct cache_extent *cache;
5563 struct btrfs_key key;
5565 cache = lookup_cache_extent(extent_cache, start, len);
5569 rec = container_of(cache, struct extent_record, cache);
5570 if (!is_extent_tree_record(rec))
5573 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5574 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5577 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5578 struct extent_buffer *buf, int slot)
5580 if (btrfs_header_level(buf)) {
5581 struct btrfs_key_ptr ptr1, ptr2;
5583 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5584 sizeof(struct btrfs_key_ptr));
5585 read_extent_buffer(buf, &ptr2,
5586 btrfs_node_key_ptr_offset(slot + 1),
5587 sizeof(struct btrfs_key_ptr));
5588 write_extent_buffer(buf, &ptr1,
5589 btrfs_node_key_ptr_offset(slot + 1),
5590 sizeof(struct btrfs_key_ptr));
5591 write_extent_buffer(buf, &ptr2,
5592 btrfs_node_key_ptr_offset(slot),
5593 sizeof(struct btrfs_key_ptr));
5595 struct btrfs_disk_key key;
5596 btrfs_node_key(buf, &key, 0);
5597 btrfs_fixup_low_keys(root, path, &key,
5598 btrfs_header_level(buf) + 1);
5601 struct btrfs_item *item1, *item2;
5602 struct btrfs_key k1, k2;
5603 char *item1_data, *item2_data;
5604 u32 item1_offset, item2_offset, item1_size, item2_size;
5606 item1 = btrfs_item_nr(slot);
5607 item2 = btrfs_item_nr(slot + 1);
5608 btrfs_item_key_to_cpu(buf, &k1, slot);
5609 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5610 item1_offset = btrfs_item_offset(buf, item1);
5611 item2_offset = btrfs_item_offset(buf, item2);
5612 item1_size = btrfs_item_size(buf, item1);
5613 item2_size = btrfs_item_size(buf, item2);
5615 item1_data = malloc(item1_size);
5618 item2_data = malloc(item2_size);
5624 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5625 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5627 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5628 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5632 btrfs_set_item_offset(buf, item1, item2_offset);
5633 btrfs_set_item_offset(buf, item2, item1_offset);
5634 btrfs_set_item_size(buf, item1, item2_size);
5635 btrfs_set_item_size(buf, item2, item1_size);
5637 path->slots[0] = slot;
5638 btrfs_set_item_key_unsafe(root, path, &k2);
5639 path->slots[0] = slot + 1;
5640 btrfs_set_item_key_unsafe(root, path, &k1);
5645 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5647 struct extent_buffer *buf;
5648 struct btrfs_key k1, k2;
5650 int level = path->lowest_level;
5653 buf = path->nodes[level];
5654 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5656 btrfs_node_key_to_cpu(buf, &k1, i);
5657 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5659 btrfs_item_key_to_cpu(buf, &k1, i);
5660 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5662 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5664 ret = swap_values(root, path, buf, i);
5667 btrfs_mark_buffer_dirty(buf);
5673 static int delete_bogus_item(struct btrfs_root *root,
5674 struct btrfs_path *path,
5675 struct extent_buffer *buf, int slot)
5677 struct btrfs_key key;
5678 int nritems = btrfs_header_nritems(buf);
5680 btrfs_item_key_to_cpu(buf, &key, slot);
5682 /* These are all the keys we can deal with missing. */
5683 if (key.type != BTRFS_DIR_INDEX_KEY &&
5684 key.type != BTRFS_EXTENT_ITEM_KEY &&
5685 key.type != BTRFS_METADATA_ITEM_KEY &&
5686 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5687 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5690 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5691 (unsigned long long)key.objectid, key.type,
5692 (unsigned long long)key.offset, slot, buf->start);
5693 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5694 btrfs_item_nr_offset(slot + 1),
5695 sizeof(struct btrfs_item) *
5696 (nritems - slot - 1));
5697 btrfs_set_header_nritems(buf, nritems - 1);
5699 struct btrfs_disk_key disk_key;
5701 btrfs_item_key(buf, &disk_key, 0);
5702 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5704 btrfs_mark_buffer_dirty(buf);
5708 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5710 struct extent_buffer *buf;
5714 /* We should only get this for leaves */
5715 BUG_ON(path->lowest_level);
5716 buf = path->nodes[0];
5718 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5719 unsigned int shift = 0, offset;
5721 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5722 BTRFS_LEAF_DATA_SIZE(root)) {
5723 if (btrfs_item_end_nr(buf, i) >
5724 BTRFS_LEAF_DATA_SIZE(root)) {
5725 ret = delete_bogus_item(root, path, buf, i);
5728 fprintf(stderr, "item is off the end of the "
5729 "leaf, can't fix\n");
5733 shift = BTRFS_LEAF_DATA_SIZE(root) -
5734 btrfs_item_end_nr(buf, i);
5735 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5736 btrfs_item_offset_nr(buf, i - 1)) {
5737 if (btrfs_item_end_nr(buf, i) >
5738 btrfs_item_offset_nr(buf, i - 1)) {
5739 ret = delete_bogus_item(root, path, buf, i);
5742 fprintf(stderr, "items overlap, can't fix\n");
5746 shift = btrfs_item_offset_nr(buf, i - 1) -
5747 btrfs_item_end_nr(buf, i);
5752 printf("Shifting item nr %d by %u bytes in block %llu\n",
5753 i, shift, (unsigned long long)buf->start);
5754 offset = btrfs_item_offset_nr(buf, i);
5755 memmove_extent_buffer(buf,
5756 btrfs_leaf_data(buf) + offset + shift,
5757 btrfs_leaf_data(buf) + offset,
5758 btrfs_item_size_nr(buf, i));
5759 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5761 btrfs_mark_buffer_dirty(buf);
5765 * We may have moved things, in which case we want to exit so we don't
5766 * write those changes out. Once we have proper abort functionality in
5767 * progs this can be changed to something nicer.
5774 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5775 * then just return -EIO.
5777 static int try_to_fix_bad_block(struct btrfs_root *root,
5778 struct extent_buffer *buf,
5779 enum btrfs_tree_block_status status)
5781 struct btrfs_trans_handle *trans;
5782 struct ulist *roots;
5783 struct ulist_node *node;
5784 struct btrfs_root *search_root;
5785 struct btrfs_path path;
5786 struct ulist_iterator iter;
5787 struct btrfs_key root_key, key;
5790 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5791 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5794 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5798 btrfs_init_path(&path);
5799 ULIST_ITER_INIT(&iter);
5800 while ((node = ulist_next(roots, &iter))) {
5801 root_key.objectid = node->val;
5802 root_key.type = BTRFS_ROOT_ITEM_KEY;
5803 root_key.offset = (u64)-1;
5805 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5812 trans = btrfs_start_transaction(search_root, 0);
5813 if (IS_ERR(trans)) {
5814 ret = PTR_ERR(trans);
5818 path.lowest_level = btrfs_header_level(buf);
5819 path.skip_check_block = 1;
5820 if (path.lowest_level)
5821 btrfs_node_key_to_cpu(buf, &key, 0);
5823 btrfs_item_key_to_cpu(buf, &key, 0);
5824 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5827 btrfs_commit_transaction(trans, search_root);
5830 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5831 ret = fix_key_order(search_root, &path);
5832 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5833 ret = fix_item_offset(search_root, &path);
5835 btrfs_commit_transaction(trans, search_root);
5838 btrfs_release_path(&path);
5839 btrfs_commit_transaction(trans, search_root);
5842 btrfs_release_path(&path);
5846 static int check_block(struct btrfs_root *root,
5847 struct cache_tree *extent_cache,
5848 struct extent_buffer *buf, u64 flags)
5850 struct extent_record *rec;
5851 struct cache_extent *cache;
5852 struct btrfs_key key;
5853 enum btrfs_tree_block_status status;
5857 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5860 rec = container_of(cache, struct extent_record, cache);
5861 rec->generation = btrfs_header_generation(buf);
5863 level = btrfs_header_level(buf);
5864 if (btrfs_header_nritems(buf) > 0) {
5867 btrfs_item_key_to_cpu(buf, &key, 0);
5869 btrfs_node_key_to_cpu(buf, &key, 0);
5871 rec->info_objectid = key.objectid;
5873 rec->info_level = level;
5875 if (btrfs_is_leaf(buf))
5876 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5878 status = btrfs_check_node(root, &rec->parent_key, buf);
5880 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5882 status = try_to_fix_bad_block(root, buf, status);
5883 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5885 fprintf(stderr, "bad block %llu\n",
5886 (unsigned long long)buf->start);
5889 * Signal to callers we need to start the scan over
5890 * again since we'll have cowed blocks.
5895 rec->content_checked = 1;
5896 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5897 rec->owner_ref_checked = 1;
5899 ret = check_owner_ref(root, rec, buf);
5901 rec->owner_ref_checked = 1;
5905 maybe_free_extent_rec(extent_cache, rec);
5909 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5910 u64 parent, u64 root)
5912 struct list_head *cur = rec->backrefs.next;
5913 struct extent_backref *node;
5914 struct tree_backref *back;
5916 while(cur != &rec->backrefs) {
5917 node = to_extent_backref(cur);
5921 back = to_tree_backref(node);
5923 if (!node->full_backref)
5925 if (parent == back->parent)
5928 if (node->full_backref)
5930 if (back->root == root)
5937 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5938 u64 parent, u64 root)
5940 struct tree_backref *ref = malloc(sizeof(*ref));
5944 memset(&ref->node, 0, sizeof(ref->node));
5946 ref->parent = parent;
5947 ref->node.full_backref = 1;
5950 ref->node.full_backref = 0;
5952 list_add_tail(&ref->node.list, &rec->backrefs);
5957 static struct data_backref *find_data_backref(struct extent_record *rec,
5958 u64 parent, u64 root,
5959 u64 owner, u64 offset,
5961 u64 disk_bytenr, u64 bytes)
5963 struct list_head *cur = rec->backrefs.next;
5964 struct extent_backref *node;
5965 struct data_backref *back;
5967 while(cur != &rec->backrefs) {
5968 node = to_extent_backref(cur);
5972 back = to_data_backref(node);
5974 if (!node->full_backref)
5976 if (parent == back->parent)
5979 if (node->full_backref)
5981 if (back->root == root && back->owner == owner &&
5982 back->offset == offset) {
5983 if (found_ref && node->found_ref &&
5984 (back->bytes != bytes ||
5985 back->disk_bytenr != disk_bytenr))
5994 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5995 u64 parent, u64 root,
5996 u64 owner, u64 offset,
5999 struct data_backref *ref = malloc(sizeof(*ref));
6003 memset(&ref->node, 0, sizeof(ref->node));
6004 ref->node.is_data = 1;
6007 ref->parent = parent;
6010 ref->node.full_backref = 1;
6014 ref->offset = offset;
6015 ref->node.full_backref = 0;
6017 ref->bytes = max_size;
6020 list_add_tail(&ref->node.list, &rec->backrefs);
6021 if (max_size > rec->max_size)
6022 rec->max_size = max_size;
6026 /* Check if the type of extent matches with its chunk */
6027 static void check_extent_type(struct extent_record *rec)
6029 struct btrfs_block_group_cache *bg_cache;
6031 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6035 /* data extent, check chunk directly*/
6036 if (!rec->metadata) {
6037 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6038 rec->wrong_chunk_type = 1;
6042 /* metadata extent, check the obvious case first */
6043 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6044 BTRFS_BLOCK_GROUP_METADATA))) {
6045 rec->wrong_chunk_type = 1;
6050 * Check SYSTEM extent, as it's also marked as metadata, we can only
6051 * make sure it's a SYSTEM extent by its backref
6053 if (!list_empty(&rec->backrefs)) {
6054 struct extent_backref *node;
6055 struct tree_backref *tback;
6058 node = to_extent_backref(rec->backrefs.next);
6059 if (node->is_data) {
6060 /* tree block shouldn't have data backref */
6061 rec->wrong_chunk_type = 1;
6064 tback = container_of(node, struct tree_backref, node);
6066 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6067 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6069 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6070 if (!(bg_cache->flags & bg_type))
6071 rec->wrong_chunk_type = 1;
6076 * Allocate a new extent record, fill default values from @tmpl and insert int
6077 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6078 * the cache, otherwise it fails.
6080 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6081 struct extent_record *tmpl)
6083 struct extent_record *rec;
6086 BUG_ON(tmpl->max_size == 0);
6087 rec = malloc(sizeof(*rec));
6090 rec->start = tmpl->start;
6091 rec->max_size = tmpl->max_size;
6092 rec->nr = max(tmpl->nr, tmpl->max_size);
6093 rec->found_rec = tmpl->found_rec;
6094 rec->content_checked = tmpl->content_checked;
6095 rec->owner_ref_checked = tmpl->owner_ref_checked;
6096 rec->num_duplicates = 0;
6097 rec->metadata = tmpl->metadata;
6098 rec->flag_block_full_backref = FLAG_UNSET;
6099 rec->bad_full_backref = 0;
6100 rec->crossing_stripes = 0;
6101 rec->wrong_chunk_type = 0;
6102 rec->is_root = tmpl->is_root;
6103 rec->refs = tmpl->refs;
6104 rec->extent_item_refs = tmpl->extent_item_refs;
6105 rec->parent_generation = tmpl->parent_generation;
6106 INIT_LIST_HEAD(&rec->backrefs);
6107 INIT_LIST_HEAD(&rec->dups);
6108 INIT_LIST_HEAD(&rec->list);
6109 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6110 rec->cache.start = tmpl->start;
6111 rec->cache.size = tmpl->nr;
6112 ret = insert_cache_extent(extent_cache, &rec->cache);
6117 bytes_used += rec->nr;
6120 rec->crossing_stripes = check_crossing_stripes(global_info,
6121 rec->start, global_info->nodesize);
6122 check_extent_type(rec);
6127 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6129 * - refs - if found, increase refs
6130 * - is_root - if found, set
6131 * - content_checked - if found, set
6132 * - owner_ref_checked - if found, set
6134 * If not found, create a new one, initialize and insert.
6136 static int add_extent_rec(struct cache_tree *extent_cache,
6137 struct extent_record *tmpl)
6139 struct extent_record *rec;
6140 struct cache_extent *cache;
6144 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6146 rec = container_of(cache, struct extent_record, cache);
6150 rec->nr = max(tmpl->nr, tmpl->max_size);
6153 * We need to make sure to reset nr to whatever the extent
6154 * record says was the real size, this way we can compare it to
6157 if (tmpl->found_rec) {
6158 if (tmpl->start != rec->start || rec->found_rec) {
6159 struct extent_record *tmp;
6162 if (list_empty(&rec->list))
6163 list_add_tail(&rec->list,
6164 &duplicate_extents);
6167 * We have to do this song and dance in case we
6168 * find an extent record that falls inside of
6169 * our current extent record but does not have
6170 * the same objectid.
6172 tmp = malloc(sizeof(*tmp));
6175 tmp->start = tmpl->start;
6176 tmp->max_size = tmpl->max_size;
6179 tmp->metadata = tmpl->metadata;
6180 tmp->extent_item_refs = tmpl->extent_item_refs;
6181 INIT_LIST_HEAD(&tmp->list);
6182 list_add_tail(&tmp->list, &rec->dups);
6183 rec->num_duplicates++;
6190 if (tmpl->extent_item_refs && !dup) {
6191 if (rec->extent_item_refs) {
6192 fprintf(stderr, "block %llu rec "
6193 "extent_item_refs %llu, passed %llu\n",
6194 (unsigned long long)tmpl->start,
6195 (unsigned long long)
6196 rec->extent_item_refs,
6197 (unsigned long long)tmpl->extent_item_refs);
6199 rec->extent_item_refs = tmpl->extent_item_refs;
6203 if (tmpl->content_checked)
6204 rec->content_checked = 1;
6205 if (tmpl->owner_ref_checked)
6206 rec->owner_ref_checked = 1;
6207 memcpy(&rec->parent_key, &tmpl->parent_key,
6208 sizeof(tmpl->parent_key));
6209 if (tmpl->parent_generation)
6210 rec->parent_generation = tmpl->parent_generation;
6211 if (rec->max_size < tmpl->max_size)
6212 rec->max_size = tmpl->max_size;
6215 * A metadata extent can't cross stripe_len boundary, otherwise
6216 * kernel scrub won't be able to handle it.
6217 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6221 rec->crossing_stripes = check_crossing_stripes(
6222 global_info, rec->start,
6223 global_info->nodesize);
6224 check_extent_type(rec);
6225 maybe_free_extent_rec(extent_cache, rec);
6229 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6234 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6235 u64 parent, u64 root, int found_ref)
6237 struct extent_record *rec;
6238 struct tree_backref *back;
6239 struct cache_extent *cache;
6242 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6244 struct extent_record tmpl;
6246 memset(&tmpl, 0, sizeof(tmpl));
6247 tmpl.start = bytenr;
6252 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6256 /* really a bug in cache_extent implement now */
6257 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6262 rec = container_of(cache, struct extent_record, cache);
6263 if (rec->start != bytenr) {
6265 * Several cause, from unaligned bytenr to over lapping extents
6270 back = find_tree_backref(rec, parent, root);
6272 back = alloc_tree_backref(rec, parent, root);
6278 if (back->node.found_ref) {
6279 fprintf(stderr, "Extent back ref already exists "
6280 "for %llu parent %llu root %llu \n",
6281 (unsigned long long)bytenr,
6282 (unsigned long long)parent,
6283 (unsigned long long)root);
6285 back->node.found_ref = 1;
6287 if (back->node.found_extent_tree) {
6288 fprintf(stderr, "Extent back ref already exists "
6289 "for %llu parent %llu root %llu \n",
6290 (unsigned long long)bytenr,
6291 (unsigned long long)parent,
6292 (unsigned long long)root);
6294 back->node.found_extent_tree = 1;
6296 check_extent_type(rec);
6297 maybe_free_extent_rec(extent_cache, rec);
6301 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6302 u64 parent, u64 root, u64 owner, u64 offset,
6303 u32 num_refs, int found_ref, u64 max_size)
6305 struct extent_record *rec;
6306 struct data_backref *back;
6307 struct cache_extent *cache;
6310 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6312 struct extent_record tmpl;
6314 memset(&tmpl, 0, sizeof(tmpl));
6315 tmpl.start = bytenr;
6317 tmpl.max_size = max_size;
6319 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6323 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6328 rec = container_of(cache, struct extent_record, cache);
6329 if (rec->max_size < max_size)
6330 rec->max_size = max_size;
6333 * If found_ref is set then max_size is the real size and must match the
6334 * existing refs. So if we have already found a ref then we need to
6335 * make sure that this ref matches the existing one, otherwise we need
6336 * to add a new backref so we can notice that the backrefs don't match
6337 * and we need to figure out who is telling the truth. This is to
6338 * account for that awful fsync bug I introduced where we'd end up with
6339 * a btrfs_file_extent_item that would have its length include multiple
6340 * prealloc extents or point inside of a prealloc extent.
6342 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6345 back = alloc_data_backref(rec, parent, root, owner, offset,
6351 BUG_ON(num_refs != 1);
6352 if (back->node.found_ref)
6353 BUG_ON(back->bytes != max_size);
6354 back->node.found_ref = 1;
6355 back->found_ref += 1;
6356 back->bytes = max_size;
6357 back->disk_bytenr = bytenr;
6359 rec->content_checked = 1;
6360 rec->owner_ref_checked = 1;
6362 if (back->node.found_extent_tree) {
6363 fprintf(stderr, "Extent back ref already exists "
6364 "for %llu parent %llu root %llu "
6365 "owner %llu offset %llu num_refs %lu\n",
6366 (unsigned long long)bytenr,
6367 (unsigned long long)parent,
6368 (unsigned long long)root,
6369 (unsigned long long)owner,
6370 (unsigned long long)offset,
6371 (unsigned long)num_refs);
6373 back->num_refs = num_refs;
6374 back->node.found_extent_tree = 1;
6376 maybe_free_extent_rec(extent_cache, rec);
6380 static int add_pending(struct cache_tree *pending,
6381 struct cache_tree *seen, u64 bytenr, u32 size)
6384 ret = add_cache_extent(seen, bytenr, size);
6387 add_cache_extent(pending, bytenr, size);
6391 static int pick_next_pending(struct cache_tree *pending,
6392 struct cache_tree *reada,
6393 struct cache_tree *nodes,
6394 u64 last, struct block_info *bits, int bits_nr,
6397 unsigned long node_start = last;
6398 struct cache_extent *cache;
6401 cache = search_cache_extent(reada, 0);
6403 bits[0].start = cache->start;
6404 bits[0].size = cache->size;
6409 if (node_start > 32768)
6410 node_start -= 32768;
6412 cache = search_cache_extent(nodes, node_start);
6414 cache = search_cache_extent(nodes, 0);
6417 cache = search_cache_extent(pending, 0);
6422 bits[ret].start = cache->start;
6423 bits[ret].size = cache->size;
6424 cache = next_cache_extent(cache);
6426 } while (cache && ret < bits_nr);
6432 bits[ret].start = cache->start;
6433 bits[ret].size = cache->size;
6434 cache = next_cache_extent(cache);
6436 } while (cache && ret < bits_nr);
6438 if (bits_nr - ret > 8) {
6439 u64 lookup = bits[0].start + bits[0].size;
6440 struct cache_extent *next;
6441 next = search_cache_extent(pending, lookup);
6443 if (next->start - lookup > 32768)
6445 bits[ret].start = next->start;
6446 bits[ret].size = next->size;
6447 lookup = next->start + next->size;
6451 next = next_cache_extent(next);
6459 static void free_chunk_record(struct cache_extent *cache)
6461 struct chunk_record *rec;
6463 rec = container_of(cache, struct chunk_record, cache);
6464 list_del_init(&rec->list);
6465 list_del_init(&rec->dextents);
6469 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6471 cache_tree_free_extents(chunk_cache, free_chunk_record);
6474 static void free_device_record(struct rb_node *node)
6476 struct device_record *rec;
6478 rec = container_of(node, struct device_record, node);
6482 FREE_RB_BASED_TREE(device_cache, free_device_record);
6484 int insert_block_group_record(struct block_group_tree *tree,
6485 struct block_group_record *bg_rec)
6489 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6493 list_add_tail(&bg_rec->list, &tree->block_groups);
6497 static void free_block_group_record(struct cache_extent *cache)
6499 struct block_group_record *rec;
6501 rec = container_of(cache, struct block_group_record, cache);
6502 list_del_init(&rec->list);
6506 void free_block_group_tree(struct block_group_tree *tree)
6508 cache_tree_free_extents(&tree->tree, free_block_group_record);
6511 int insert_device_extent_record(struct device_extent_tree *tree,
6512 struct device_extent_record *de_rec)
6517 * Device extent is a bit different from the other extents, because
6518 * the extents which belong to the different devices may have the
6519 * same start and size, so we need use the special extent cache
6520 * search/insert functions.
6522 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6526 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6527 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6531 static void free_device_extent_record(struct cache_extent *cache)
6533 struct device_extent_record *rec;
6535 rec = container_of(cache, struct device_extent_record, cache);
6536 if (!list_empty(&rec->chunk_list))
6537 list_del_init(&rec->chunk_list);
6538 if (!list_empty(&rec->device_list))
6539 list_del_init(&rec->device_list);
6543 void free_device_extent_tree(struct device_extent_tree *tree)
6545 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6548 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6549 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6550 struct extent_buffer *leaf, int slot)
6552 struct btrfs_extent_ref_v0 *ref0;
6553 struct btrfs_key key;
6556 btrfs_item_key_to_cpu(leaf, &key, slot);
6557 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6558 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6559 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6562 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6563 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6569 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6570 struct btrfs_key *key,
6573 struct btrfs_chunk *ptr;
6574 struct chunk_record *rec;
6577 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6578 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6580 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6582 fprintf(stderr, "memory allocation failed\n");
6586 INIT_LIST_HEAD(&rec->list);
6587 INIT_LIST_HEAD(&rec->dextents);
6590 rec->cache.start = key->offset;
6591 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6593 rec->generation = btrfs_header_generation(leaf);
6595 rec->objectid = key->objectid;
6596 rec->type = key->type;
6597 rec->offset = key->offset;
6599 rec->length = rec->cache.size;
6600 rec->owner = btrfs_chunk_owner(leaf, ptr);
6601 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6602 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6603 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6604 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6605 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6606 rec->num_stripes = num_stripes;
6607 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6609 for (i = 0; i < rec->num_stripes; ++i) {
6610 rec->stripes[i].devid =
6611 btrfs_stripe_devid_nr(leaf, ptr, i);
6612 rec->stripes[i].offset =
6613 btrfs_stripe_offset_nr(leaf, ptr, i);
6614 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6615 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6622 static int process_chunk_item(struct cache_tree *chunk_cache,
6623 struct btrfs_key *key, struct extent_buffer *eb,
6626 struct chunk_record *rec;
6627 struct btrfs_chunk *chunk;
6630 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6632 * Do extra check for this chunk item,
6634 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6635 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6636 * and owner<->key_type check.
6638 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6641 error("chunk(%llu, %llu) is not valid, ignore it",
6642 key->offset, btrfs_chunk_length(eb, chunk));
6645 rec = btrfs_new_chunk_record(eb, key, slot);
6646 ret = insert_cache_extent(chunk_cache, &rec->cache);
6648 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6649 rec->offset, rec->length);
6656 static int process_device_item(struct rb_root *dev_cache,
6657 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6659 struct btrfs_dev_item *ptr;
6660 struct device_record *rec;
6663 ptr = btrfs_item_ptr(eb,
6664 slot, struct btrfs_dev_item);
6666 rec = malloc(sizeof(*rec));
6668 fprintf(stderr, "memory allocation failed\n");
6672 rec->devid = key->offset;
6673 rec->generation = btrfs_header_generation(eb);
6675 rec->objectid = key->objectid;
6676 rec->type = key->type;
6677 rec->offset = key->offset;
6679 rec->devid = btrfs_device_id(eb, ptr);
6680 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6681 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6683 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6685 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6692 struct block_group_record *
6693 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6696 struct btrfs_block_group_item *ptr;
6697 struct block_group_record *rec;
6699 rec = calloc(1, sizeof(*rec));
6701 fprintf(stderr, "memory allocation failed\n");
6705 rec->cache.start = key->objectid;
6706 rec->cache.size = key->offset;
6708 rec->generation = btrfs_header_generation(leaf);
6710 rec->objectid = key->objectid;
6711 rec->type = key->type;
6712 rec->offset = key->offset;
6714 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6715 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6717 INIT_LIST_HEAD(&rec->list);
6722 static int process_block_group_item(struct block_group_tree *block_group_cache,
6723 struct btrfs_key *key,
6724 struct extent_buffer *eb, int slot)
6726 struct block_group_record *rec;
6729 rec = btrfs_new_block_group_record(eb, key, slot);
6730 ret = insert_block_group_record(block_group_cache, rec);
6732 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6733 rec->objectid, rec->offset);
6740 struct device_extent_record *
6741 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6742 struct btrfs_key *key, int slot)
6744 struct device_extent_record *rec;
6745 struct btrfs_dev_extent *ptr;
6747 rec = calloc(1, sizeof(*rec));
6749 fprintf(stderr, "memory allocation failed\n");
6753 rec->cache.objectid = key->objectid;
6754 rec->cache.start = key->offset;
6756 rec->generation = btrfs_header_generation(leaf);
6758 rec->objectid = key->objectid;
6759 rec->type = key->type;
6760 rec->offset = key->offset;
6762 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6763 rec->chunk_objecteid =
6764 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6766 btrfs_dev_extent_chunk_offset(leaf, ptr);
6767 rec->length = btrfs_dev_extent_length(leaf, ptr);
6768 rec->cache.size = rec->length;
6770 INIT_LIST_HEAD(&rec->chunk_list);
6771 INIT_LIST_HEAD(&rec->device_list);
6777 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6778 struct btrfs_key *key, struct extent_buffer *eb,
6781 struct device_extent_record *rec;
6784 rec = btrfs_new_device_extent_record(eb, key, slot);
6785 ret = insert_device_extent_record(dev_extent_cache, rec);
6788 "Device extent[%llu, %llu, %llu] existed.\n",
6789 rec->objectid, rec->offset, rec->length);
6796 static int process_extent_item(struct btrfs_root *root,
6797 struct cache_tree *extent_cache,
6798 struct extent_buffer *eb, int slot)
6800 struct btrfs_extent_item *ei;
6801 struct btrfs_extent_inline_ref *iref;
6802 struct btrfs_extent_data_ref *dref;
6803 struct btrfs_shared_data_ref *sref;
6804 struct btrfs_key key;
6805 struct extent_record tmpl;
6810 u32 item_size = btrfs_item_size_nr(eb, slot);
6816 btrfs_item_key_to_cpu(eb, &key, slot);
6818 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6820 num_bytes = root->fs_info->nodesize;
6822 num_bytes = key.offset;
6825 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6826 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6827 key.objectid, root->fs_info->sectorsize);
6830 if (item_size < sizeof(*ei)) {
6831 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6832 struct btrfs_extent_item_v0 *ei0;
6833 BUG_ON(item_size != sizeof(*ei0));
6834 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6835 refs = btrfs_extent_refs_v0(eb, ei0);
6839 memset(&tmpl, 0, sizeof(tmpl));
6840 tmpl.start = key.objectid;
6841 tmpl.nr = num_bytes;
6842 tmpl.extent_item_refs = refs;
6843 tmpl.metadata = metadata;
6845 tmpl.max_size = num_bytes;
6847 return add_extent_rec(extent_cache, &tmpl);
6850 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6851 refs = btrfs_extent_refs(eb, ei);
6852 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6856 if (metadata && num_bytes != root->fs_info->nodesize) {
6857 error("ignore invalid metadata extent, length %llu does not equal to %u",
6858 num_bytes, root->fs_info->nodesize);
6861 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6862 error("ignore invalid data extent, length %llu is not aligned to %u",
6863 num_bytes, root->fs_info->sectorsize);
6867 memset(&tmpl, 0, sizeof(tmpl));
6868 tmpl.start = key.objectid;
6869 tmpl.nr = num_bytes;
6870 tmpl.extent_item_refs = refs;
6871 tmpl.metadata = metadata;
6873 tmpl.max_size = num_bytes;
6874 add_extent_rec(extent_cache, &tmpl);
6876 ptr = (unsigned long)(ei + 1);
6877 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6878 key.type == BTRFS_EXTENT_ITEM_KEY)
6879 ptr += sizeof(struct btrfs_tree_block_info);
6881 end = (unsigned long)ei + item_size;
6883 iref = (struct btrfs_extent_inline_ref *)ptr;
6884 type = btrfs_extent_inline_ref_type(eb, iref);
6885 offset = btrfs_extent_inline_ref_offset(eb, iref);
6887 case BTRFS_TREE_BLOCK_REF_KEY:
6888 ret = add_tree_backref(extent_cache, key.objectid,
6892 "add_tree_backref failed (extent items tree block): %s",
6895 case BTRFS_SHARED_BLOCK_REF_KEY:
6896 ret = add_tree_backref(extent_cache, key.objectid,
6900 "add_tree_backref failed (extent items shared block): %s",
6903 case BTRFS_EXTENT_DATA_REF_KEY:
6904 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6905 add_data_backref(extent_cache, key.objectid, 0,
6906 btrfs_extent_data_ref_root(eb, dref),
6907 btrfs_extent_data_ref_objectid(eb,
6909 btrfs_extent_data_ref_offset(eb, dref),
6910 btrfs_extent_data_ref_count(eb, dref),
6913 case BTRFS_SHARED_DATA_REF_KEY:
6914 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6915 add_data_backref(extent_cache, key.objectid, offset,
6917 btrfs_shared_data_ref_count(eb, sref),
6921 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6922 key.objectid, key.type, num_bytes);
6925 ptr += btrfs_extent_inline_ref_size(type);
6932 static int check_cache_range(struct btrfs_root *root,
6933 struct btrfs_block_group_cache *cache,
6934 u64 offset, u64 bytes)
6936 struct btrfs_free_space *entry;
6942 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6943 bytenr = btrfs_sb_offset(i);
6944 ret = btrfs_rmap_block(root->fs_info,
6945 cache->key.objectid, bytenr, 0,
6946 &logical, &nr, &stripe_len);
6951 if (logical[nr] + stripe_len <= offset)
6953 if (offset + bytes <= logical[nr])
6955 if (logical[nr] == offset) {
6956 if (stripe_len >= bytes) {
6960 bytes -= stripe_len;
6961 offset += stripe_len;
6962 } else if (logical[nr] < offset) {
6963 if (logical[nr] + stripe_len >=
6968 bytes = (offset + bytes) -
6969 (logical[nr] + stripe_len);
6970 offset = logical[nr] + stripe_len;
6973 * Could be tricky, the super may land in the
6974 * middle of the area we're checking. First
6975 * check the easiest case, it's at the end.
6977 if (logical[nr] + stripe_len >=
6979 bytes = logical[nr] - offset;
6983 /* Check the left side */
6984 ret = check_cache_range(root, cache,
6986 logical[nr] - offset);
6992 /* Now we continue with the right side */
6993 bytes = (offset + bytes) -
6994 (logical[nr] + stripe_len);
6995 offset = logical[nr] + stripe_len;
7002 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7004 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7005 offset, offset+bytes);
7009 if (entry->offset != offset) {
7010 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7015 if (entry->bytes != bytes) {
7016 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7017 bytes, entry->bytes, offset);
7021 unlink_free_space(cache->free_space_ctl, entry);
7026 static int verify_space_cache(struct btrfs_root *root,
7027 struct btrfs_block_group_cache *cache)
7029 struct btrfs_path path;
7030 struct extent_buffer *leaf;
7031 struct btrfs_key key;
7035 root = root->fs_info->extent_root;
7037 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7039 btrfs_init_path(&path);
7040 key.objectid = last;
7042 key.type = BTRFS_EXTENT_ITEM_KEY;
7043 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7048 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7049 ret = btrfs_next_leaf(root, &path);
7057 leaf = path.nodes[0];
7058 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7059 if (key.objectid >= cache->key.offset + cache->key.objectid)
7061 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7062 key.type != BTRFS_METADATA_ITEM_KEY) {
7067 if (last == key.objectid) {
7068 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7069 last = key.objectid + key.offset;
7071 last = key.objectid + root->fs_info->nodesize;
7076 ret = check_cache_range(root, cache, last,
7077 key.objectid - last);
7080 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7081 last = key.objectid + key.offset;
7083 last = key.objectid + root->fs_info->nodesize;
7087 if (last < cache->key.objectid + cache->key.offset)
7088 ret = check_cache_range(root, cache, last,
7089 cache->key.objectid +
7090 cache->key.offset - last);
7093 btrfs_release_path(&path);
7096 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7097 fprintf(stderr, "There are still entries left in the space "
7105 static int check_space_cache(struct btrfs_root *root)
7107 struct btrfs_block_group_cache *cache;
7108 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7112 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7113 btrfs_super_generation(root->fs_info->super_copy) !=
7114 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7115 printf("cache and super generation don't match, space cache "
7116 "will be invalidated\n");
7120 if (ctx.progress_enabled) {
7121 ctx.tp = TASK_FREE_SPACE;
7122 task_start(ctx.info);
7126 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7130 start = cache->key.objectid + cache->key.offset;
7131 if (!cache->free_space_ctl) {
7132 if (btrfs_init_free_space_ctl(cache,
7133 root->fs_info->sectorsize)) {
7138 btrfs_remove_free_space_cache(cache);
7141 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7142 ret = exclude_super_stripes(root, cache);
7144 fprintf(stderr, "could not exclude super stripes: %s\n",
7149 ret = load_free_space_tree(root->fs_info, cache);
7150 free_excluded_extents(root, cache);
7152 fprintf(stderr, "could not load free space tree: %s\n",
7159 ret = load_free_space_cache(root->fs_info, cache);
7164 ret = verify_space_cache(root, cache);
7166 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7167 cache->key.objectid);
7172 task_stop(ctx.info);
7174 return error ? -EINVAL : 0;
7177 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7178 u64 num_bytes, unsigned long leaf_offset,
7179 struct extent_buffer *eb) {
7181 struct btrfs_fs_info *fs_info = root->fs_info;
7183 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7185 unsigned long csum_offset;
7189 u64 data_checked = 0;
7195 if (num_bytes % fs_info->sectorsize)
7198 data = malloc(num_bytes);
7202 while (offset < num_bytes) {
7205 read_len = num_bytes - offset;
7206 /* read as much space once a time */
7207 ret = read_extent_data(fs_info, data + offset,
7208 bytenr + offset, &read_len, mirror);
7212 /* verify every 4k data's checksum */
7213 while (data_checked < read_len) {
7215 tmp = offset + data_checked;
7217 csum = btrfs_csum_data((char *)data + tmp,
7218 csum, fs_info->sectorsize);
7219 btrfs_csum_final(csum, (u8 *)&csum);
7221 csum_offset = leaf_offset +
7222 tmp / fs_info->sectorsize * csum_size;
7223 read_extent_buffer(eb, (char *)&csum_expected,
7224 csum_offset, csum_size);
7225 /* try another mirror */
7226 if (csum != csum_expected) {
7227 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7228 mirror, bytenr + tmp,
7229 csum, csum_expected);
7230 num_copies = btrfs_num_copies(root->fs_info,
7232 if (mirror < num_copies - 1) {
7237 data_checked += fs_info->sectorsize;
7246 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7249 struct btrfs_path path;
7250 struct extent_buffer *leaf;
7251 struct btrfs_key key;
7254 btrfs_init_path(&path);
7255 key.objectid = bytenr;
7256 key.type = BTRFS_EXTENT_ITEM_KEY;
7257 key.offset = (u64)-1;
7260 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7263 fprintf(stderr, "Error looking up extent record %d\n", ret);
7264 btrfs_release_path(&path);
7267 if (path.slots[0] > 0) {
7270 ret = btrfs_prev_leaf(root, &path);
7273 } else if (ret > 0) {
7280 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7283 * Block group items come before extent items if they have the same
7284 * bytenr, so walk back one more just in case. Dear future traveller,
7285 * first congrats on mastering time travel. Now if it's not too much
7286 * trouble could you go back to 2006 and tell Chris to make the
7287 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7288 * EXTENT_ITEM_KEY please?
7290 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7291 if (path.slots[0] > 0) {
7294 ret = btrfs_prev_leaf(root, &path);
7297 } else if (ret > 0) {
7302 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7306 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7307 ret = btrfs_next_leaf(root, &path);
7309 fprintf(stderr, "Error going to next leaf "
7311 btrfs_release_path(&path);
7317 leaf = path.nodes[0];
7318 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7319 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7323 if (key.objectid + key.offset < bytenr) {
7327 if (key.objectid > bytenr + num_bytes)
7330 if (key.objectid == bytenr) {
7331 if (key.offset >= num_bytes) {
7335 num_bytes -= key.offset;
7336 bytenr += key.offset;
7337 } else if (key.objectid < bytenr) {
7338 if (key.objectid + key.offset >= bytenr + num_bytes) {
7342 num_bytes = (bytenr + num_bytes) -
7343 (key.objectid + key.offset);
7344 bytenr = key.objectid + key.offset;
7346 if (key.objectid + key.offset < bytenr + num_bytes) {
7347 u64 new_start = key.objectid + key.offset;
7348 u64 new_bytes = bytenr + num_bytes - new_start;
7351 * Weird case, the extent is in the middle of
7352 * our range, we'll have to search one side
7353 * and then the other. Not sure if this happens
7354 * in real life, but no harm in coding it up
7355 * anyway just in case.
7357 btrfs_release_path(&path);
7358 ret = check_extent_exists(root, new_start,
7361 fprintf(stderr, "Right section didn't "
7365 num_bytes = key.objectid - bytenr;
7368 num_bytes = key.objectid - bytenr;
7375 if (num_bytes && !ret) {
7376 fprintf(stderr, "There are no extents for csum range "
7377 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7381 btrfs_release_path(&path);
7385 static int check_csums(struct btrfs_root *root)
7387 struct btrfs_path path;
7388 struct extent_buffer *leaf;
7389 struct btrfs_key key;
7390 u64 offset = 0, num_bytes = 0;
7391 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7395 unsigned long leaf_offset;
7397 root = root->fs_info->csum_root;
7398 if (!extent_buffer_uptodate(root->node)) {
7399 fprintf(stderr, "No valid csum tree found\n");
7403 btrfs_init_path(&path);
7404 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7405 key.type = BTRFS_EXTENT_CSUM_KEY;
7407 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7409 fprintf(stderr, "Error searching csum tree %d\n", ret);
7410 btrfs_release_path(&path);
7414 if (ret > 0 && path.slots[0])
7419 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7420 ret = btrfs_next_leaf(root, &path);
7422 fprintf(stderr, "Error going to next leaf "
7429 leaf = path.nodes[0];
7431 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7432 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7437 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7438 csum_size) * root->fs_info->sectorsize;
7439 if (!check_data_csum)
7440 goto skip_csum_check;
7441 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7442 ret = check_extent_csums(root, key.offset, data_len,
7448 offset = key.offset;
7449 } else if (key.offset != offset + num_bytes) {
7450 ret = check_extent_exists(root, offset, num_bytes);
7452 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7453 "there is no extent record\n",
7454 offset, offset+num_bytes);
7457 offset = key.offset;
7460 num_bytes += data_len;
7464 btrfs_release_path(&path);
7468 static int is_dropped_key(struct btrfs_key *key,
7469 struct btrfs_key *drop_key) {
7470 if (key->objectid < drop_key->objectid)
7472 else if (key->objectid == drop_key->objectid) {
7473 if (key->type < drop_key->type)
7475 else if (key->type == drop_key->type) {
7476 if (key->offset < drop_key->offset)
7484 * Here are the rules for FULL_BACKREF.
7486 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7487 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7489 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7490 * if it happened after the relocation occurred since we'll have dropped the
7491 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7492 * have no real way to know for sure.
7494 * We process the blocks one root at a time, and we start from the lowest root
7495 * objectid and go to the highest. So we can just lookup the owner backref for
7496 * the record and if we don't find it then we know it doesn't exist and we have
7499 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7500 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7501 * be set or not and then we can check later once we've gathered all the refs.
7503 static int calc_extent_flag(struct cache_tree *extent_cache,
7504 struct extent_buffer *buf,
7505 struct root_item_record *ri,
7508 struct extent_record *rec;
7509 struct cache_extent *cache;
7510 struct tree_backref *tback;
7513 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7514 /* we have added this extent before */
7518 rec = container_of(cache, struct extent_record, cache);
7521 * Except file/reloc tree, we can not have
7524 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7529 if (buf->start == ri->bytenr)
7532 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7535 owner = btrfs_header_owner(buf);
7536 if (owner == ri->objectid)
7539 tback = find_tree_backref(rec, 0, owner);
7544 if (rec->flag_block_full_backref != FLAG_UNSET &&
7545 rec->flag_block_full_backref != 0)
7546 rec->bad_full_backref = 1;
7549 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7550 if (rec->flag_block_full_backref != FLAG_UNSET &&
7551 rec->flag_block_full_backref != 1)
7552 rec->bad_full_backref = 1;
7556 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7558 fprintf(stderr, "Invalid key type(");
7559 print_key_type(stderr, 0, key_type);
7560 fprintf(stderr, ") found in root(");
7561 print_objectid(stderr, rootid, 0);
7562 fprintf(stderr, ")\n");
7566 * Check if the key is valid with its extent buffer.
7568 * This is a early check in case invalid key exists in a extent buffer
7569 * This is not comprehensive yet, but should prevent wrong key/item passed
7572 static int check_type_with_root(u64 rootid, u8 key_type)
7575 /* Only valid in chunk tree */
7576 case BTRFS_DEV_ITEM_KEY:
7577 case BTRFS_CHUNK_ITEM_KEY:
7578 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7581 /* valid in csum and log tree */
7582 case BTRFS_CSUM_TREE_OBJECTID:
7583 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7587 case BTRFS_EXTENT_ITEM_KEY:
7588 case BTRFS_METADATA_ITEM_KEY:
7589 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7590 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7593 case BTRFS_ROOT_ITEM_KEY:
7594 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7597 case BTRFS_DEV_EXTENT_KEY:
7598 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7604 report_mismatch_key_root(key_type, rootid);
7608 static int run_next_block(struct btrfs_root *root,
7609 struct block_info *bits,
7612 struct cache_tree *pending,
7613 struct cache_tree *seen,
7614 struct cache_tree *reada,
7615 struct cache_tree *nodes,
7616 struct cache_tree *extent_cache,
7617 struct cache_tree *chunk_cache,
7618 struct rb_root *dev_cache,
7619 struct block_group_tree *block_group_cache,
7620 struct device_extent_tree *dev_extent_cache,
7621 struct root_item_record *ri)
7623 struct btrfs_fs_info *fs_info = root->fs_info;
7624 struct extent_buffer *buf;
7625 struct extent_record *rec = NULL;
7636 struct btrfs_key key;
7637 struct cache_extent *cache;
7640 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7641 bits_nr, &reada_bits);
7646 for(i = 0; i < nritems; i++) {
7647 ret = add_cache_extent(reada, bits[i].start,
7652 /* fixme, get the parent transid */
7653 readahead_tree_block(fs_info, bits[i].start,
7657 *last = bits[0].start;
7658 bytenr = bits[0].start;
7659 size = bits[0].size;
7661 cache = lookup_cache_extent(pending, bytenr, size);
7663 remove_cache_extent(pending, cache);
7666 cache = lookup_cache_extent(reada, bytenr, size);
7668 remove_cache_extent(reada, cache);
7671 cache = lookup_cache_extent(nodes, bytenr, size);
7673 remove_cache_extent(nodes, cache);
7676 cache = lookup_cache_extent(extent_cache, bytenr, size);
7678 rec = container_of(cache, struct extent_record, cache);
7679 gen = rec->parent_generation;
7682 /* fixme, get the real parent transid */
7683 buf = read_tree_block(root->fs_info, bytenr, gen);
7684 if (!extent_buffer_uptodate(buf)) {
7685 record_bad_block_io(root->fs_info,
7686 extent_cache, bytenr, size);
7690 nritems = btrfs_header_nritems(buf);
7693 if (!init_extent_tree) {
7694 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7695 btrfs_header_level(buf), 1, NULL,
7698 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7700 fprintf(stderr, "Couldn't calc extent flags\n");
7701 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7706 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7708 fprintf(stderr, "Couldn't calc extent flags\n");
7709 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7713 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7715 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7716 ri->objectid == btrfs_header_owner(buf)) {
7718 * Ok we got to this block from it's original owner and
7719 * we have FULL_BACKREF set. Relocation can leave
7720 * converted blocks over so this is altogether possible,
7721 * however it's not possible if the generation > the
7722 * last snapshot, so check for this case.
7724 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7725 btrfs_header_generation(buf) > ri->last_snapshot) {
7726 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7727 rec->bad_full_backref = 1;
7732 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7733 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7734 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7735 rec->bad_full_backref = 1;
7739 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7740 rec->flag_block_full_backref = 1;
7744 rec->flag_block_full_backref = 0;
7746 owner = btrfs_header_owner(buf);
7749 ret = check_block(root, extent_cache, buf, flags);
7753 if (btrfs_is_leaf(buf)) {
7754 btree_space_waste += btrfs_leaf_free_space(root, buf);
7755 for (i = 0; i < nritems; i++) {
7756 struct btrfs_file_extent_item *fi;
7757 btrfs_item_key_to_cpu(buf, &key, i);
7759 * Check key type against the leaf owner.
7760 * Could filter quite a lot of early error if
7763 if (check_type_with_root(btrfs_header_owner(buf),
7765 fprintf(stderr, "ignoring invalid key\n");
7768 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7769 process_extent_item(root, extent_cache, buf,
7773 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7774 process_extent_item(root, extent_cache, buf,
7778 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7780 btrfs_item_size_nr(buf, i);
7783 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7784 process_chunk_item(chunk_cache, &key, buf, i);
7787 if (key.type == BTRFS_DEV_ITEM_KEY) {
7788 process_device_item(dev_cache, &key, buf, i);
7791 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7792 process_block_group_item(block_group_cache,
7796 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7797 process_device_extent_item(dev_extent_cache,
7802 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7803 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7804 process_extent_ref_v0(extent_cache, buf, i);
7811 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7812 ret = add_tree_backref(extent_cache,
7813 key.objectid, 0, key.offset, 0);
7816 "add_tree_backref failed (leaf tree block): %s",
7820 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7821 ret = add_tree_backref(extent_cache,
7822 key.objectid, key.offset, 0, 0);
7825 "add_tree_backref failed (leaf shared block): %s",
7829 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7830 struct btrfs_extent_data_ref *ref;
7831 ref = btrfs_item_ptr(buf, i,
7832 struct btrfs_extent_data_ref);
7833 add_data_backref(extent_cache,
7835 btrfs_extent_data_ref_root(buf, ref),
7836 btrfs_extent_data_ref_objectid(buf,
7838 btrfs_extent_data_ref_offset(buf, ref),
7839 btrfs_extent_data_ref_count(buf, ref),
7840 0, root->fs_info->sectorsize);
7843 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7844 struct btrfs_shared_data_ref *ref;
7845 ref = btrfs_item_ptr(buf, i,
7846 struct btrfs_shared_data_ref);
7847 add_data_backref(extent_cache,
7848 key.objectid, key.offset, 0, 0, 0,
7849 btrfs_shared_data_ref_count(buf, ref),
7850 0, root->fs_info->sectorsize);
7853 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7854 struct bad_item *bad;
7856 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7860 bad = malloc(sizeof(struct bad_item));
7863 INIT_LIST_HEAD(&bad->list);
7864 memcpy(&bad->key, &key,
7865 sizeof(struct btrfs_key));
7866 bad->root_id = owner;
7867 list_add_tail(&bad->list, &delete_items);
7870 if (key.type != BTRFS_EXTENT_DATA_KEY)
7872 fi = btrfs_item_ptr(buf, i,
7873 struct btrfs_file_extent_item);
7874 if (btrfs_file_extent_type(buf, fi) ==
7875 BTRFS_FILE_EXTENT_INLINE)
7877 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7880 data_bytes_allocated +=
7881 btrfs_file_extent_disk_num_bytes(buf, fi);
7882 if (data_bytes_allocated < root->fs_info->sectorsize) {
7885 data_bytes_referenced +=
7886 btrfs_file_extent_num_bytes(buf, fi);
7887 add_data_backref(extent_cache,
7888 btrfs_file_extent_disk_bytenr(buf, fi),
7889 parent, owner, key.objectid, key.offset -
7890 btrfs_file_extent_offset(buf, fi), 1, 1,
7891 btrfs_file_extent_disk_num_bytes(buf, fi));
7895 struct btrfs_key first_key;
7897 first_key.objectid = 0;
7900 btrfs_item_key_to_cpu(buf, &first_key, 0);
7901 level = btrfs_header_level(buf);
7902 for (i = 0; i < nritems; i++) {
7903 struct extent_record tmpl;
7905 ptr = btrfs_node_blockptr(buf, i);
7906 size = root->fs_info->nodesize;
7907 btrfs_node_key_to_cpu(buf, &key, i);
7909 if ((level == ri->drop_level)
7910 && is_dropped_key(&key, &ri->drop_key)) {
7915 memset(&tmpl, 0, sizeof(tmpl));
7916 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7917 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7922 tmpl.max_size = size;
7923 ret = add_extent_rec(extent_cache, &tmpl);
7927 ret = add_tree_backref(extent_cache, ptr, parent,
7931 "add_tree_backref failed (non-leaf block): %s",
7937 add_pending(nodes, seen, ptr, size);
7939 add_pending(pending, seen, ptr, size);
7942 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7943 nritems) * sizeof(struct btrfs_key_ptr);
7945 total_btree_bytes += buf->len;
7946 if (fs_root_objectid(btrfs_header_owner(buf)))
7947 total_fs_tree_bytes += buf->len;
7948 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7949 total_extent_tree_bytes += buf->len;
7950 if (!found_old_backref &&
7951 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7952 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7953 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7954 found_old_backref = 1;
7956 free_extent_buffer(buf);
7960 static int add_root_to_pending(struct extent_buffer *buf,
7961 struct cache_tree *extent_cache,
7962 struct cache_tree *pending,
7963 struct cache_tree *seen,
7964 struct cache_tree *nodes,
7967 struct extent_record tmpl;
7970 if (btrfs_header_level(buf) > 0)
7971 add_pending(nodes, seen, buf->start, buf->len);
7973 add_pending(pending, seen, buf->start, buf->len);
7975 memset(&tmpl, 0, sizeof(tmpl));
7976 tmpl.start = buf->start;
7981 tmpl.max_size = buf->len;
7982 add_extent_rec(extent_cache, &tmpl);
7984 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7985 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7986 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7989 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7994 /* as we fix the tree, we might be deleting blocks that
7995 * we're tracking for repair. This hook makes sure we
7996 * remove any backrefs for blocks as we are fixing them.
7998 static int free_extent_hook(struct btrfs_trans_handle *trans,
7999 struct btrfs_root *root,
8000 u64 bytenr, u64 num_bytes, u64 parent,
8001 u64 root_objectid, u64 owner, u64 offset,
8004 struct extent_record *rec;
8005 struct cache_extent *cache;
8007 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8009 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8010 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8014 rec = container_of(cache, struct extent_record, cache);
8016 struct data_backref *back;
8017 back = find_data_backref(rec, parent, root_objectid, owner,
8018 offset, 1, bytenr, num_bytes);
8021 if (back->node.found_ref) {
8022 back->found_ref -= refs_to_drop;
8024 rec->refs -= refs_to_drop;
8026 if (back->node.found_extent_tree) {
8027 back->num_refs -= refs_to_drop;
8028 if (rec->extent_item_refs)
8029 rec->extent_item_refs -= refs_to_drop;
8031 if (back->found_ref == 0)
8032 back->node.found_ref = 0;
8033 if (back->num_refs == 0)
8034 back->node.found_extent_tree = 0;
8036 if (!back->node.found_extent_tree && back->node.found_ref) {
8037 list_del(&back->node.list);
8041 struct tree_backref *back;
8042 back = find_tree_backref(rec, parent, root_objectid);
8045 if (back->node.found_ref) {
8048 back->node.found_ref = 0;
8050 if (back->node.found_extent_tree) {
8051 if (rec->extent_item_refs)
8052 rec->extent_item_refs--;
8053 back->node.found_extent_tree = 0;
8055 if (!back->node.found_extent_tree && back->node.found_ref) {
8056 list_del(&back->node.list);
8060 maybe_free_extent_rec(extent_cache, rec);
8065 static int delete_extent_records(struct btrfs_trans_handle *trans,
8066 struct btrfs_root *root,
8067 struct btrfs_path *path,
8070 struct btrfs_key key;
8071 struct btrfs_key found_key;
8072 struct extent_buffer *leaf;
8077 key.objectid = bytenr;
8079 key.offset = (u64)-1;
8082 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8089 if (path->slots[0] == 0)
8095 leaf = path->nodes[0];
8096 slot = path->slots[0];
8098 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8099 if (found_key.objectid != bytenr)
8102 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8103 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8104 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8105 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8106 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8107 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8108 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8109 btrfs_release_path(path);
8110 if (found_key.type == 0) {
8111 if (found_key.offset == 0)
8113 key.offset = found_key.offset - 1;
8114 key.type = found_key.type;
8116 key.type = found_key.type - 1;
8117 key.offset = (u64)-1;
8121 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8122 found_key.objectid, found_key.type, found_key.offset);
8124 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8127 btrfs_release_path(path);
8129 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8130 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8131 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8132 found_key.offset : root->fs_info->nodesize;
8134 ret = btrfs_update_block_group(trans, root, bytenr,
8141 btrfs_release_path(path);
8146 * for a single backref, this will allocate a new extent
8147 * and add the backref to it.
8149 static int record_extent(struct btrfs_trans_handle *trans,
8150 struct btrfs_fs_info *info,
8151 struct btrfs_path *path,
8152 struct extent_record *rec,
8153 struct extent_backref *back,
8154 int allocated, u64 flags)
8157 struct btrfs_root *extent_root = info->extent_root;
8158 struct extent_buffer *leaf;
8159 struct btrfs_key ins_key;
8160 struct btrfs_extent_item *ei;
8161 struct data_backref *dback;
8162 struct btrfs_tree_block_info *bi;
8165 rec->max_size = max_t(u64, rec->max_size,
8169 u32 item_size = sizeof(*ei);
8172 item_size += sizeof(*bi);
8174 ins_key.objectid = rec->start;
8175 ins_key.offset = rec->max_size;
8176 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8178 ret = btrfs_insert_empty_item(trans, extent_root, path,
8179 &ins_key, item_size);
8183 leaf = path->nodes[0];
8184 ei = btrfs_item_ptr(leaf, path->slots[0],
8185 struct btrfs_extent_item);
8187 btrfs_set_extent_refs(leaf, ei, 0);
8188 btrfs_set_extent_generation(leaf, ei, rec->generation);
8190 if (back->is_data) {
8191 btrfs_set_extent_flags(leaf, ei,
8192 BTRFS_EXTENT_FLAG_DATA);
8194 struct btrfs_disk_key copy_key;;
8196 bi = (struct btrfs_tree_block_info *)(ei + 1);
8197 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8200 btrfs_set_disk_key_objectid(©_key,
8201 rec->info_objectid);
8202 btrfs_set_disk_key_type(©_key, 0);
8203 btrfs_set_disk_key_offset(©_key, 0);
8205 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8206 btrfs_set_tree_block_key(leaf, bi, ©_key);
8208 btrfs_set_extent_flags(leaf, ei,
8209 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8212 btrfs_mark_buffer_dirty(leaf);
8213 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8214 rec->max_size, 1, 0);
8217 btrfs_release_path(path);
8220 if (back->is_data) {
8224 dback = to_data_backref(back);
8225 if (back->full_backref)
8226 parent = dback->parent;
8230 for (i = 0; i < dback->found_ref; i++) {
8231 /* if parent != 0, we're doing a full backref
8232 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8233 * just makes the backref allocator create a data
8236 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8237 rec->start, rec->max_size,
8241 BTRFS_FIRST_FREE_OBJECTID :
8247 fprintf(stderr, "adding new data backref"
8248 " on %llu %s %llu owner %llu"
8249 " offset %llu found %d\n",
8250 (unsigned long long)rec->start,
8251 back->full_backref ?
8253 back->full_backref ?
8254 (unsigned long long)parent :
8255 (unsigned long long)dback->root,
8256 (unsigned long long)dback->owner,
8257 (unsigned long long)dback->offset,
8261 struct tree_backref *tback;
8263 tback = to_tree_backref(back);
8264 if (back->full_backref)
8265 parent = tback->parent;
8269 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8270 rec->start, rec->max_size,
8271 parent, tback->root, 0, 0);
8272 fprintf(stderr, "adding new tree backref on "
8273 "start %llu len %llu parent %llu root %llu\n",
8274 rec->start, rec->max_size, parent, tback->root);
8277 btrfs_release_path(path);
8281 static struct extent_entry *find_entry(struct list_head *entries,
8282 u64 bytenr, u64 bytes)
8284 struct extent_entry *entry = NULL;
8286 list_for_each_entry(entry, entries, list) {
8287 if (entry->bytenr == bytenr && entry->bytes == bytes)
8294 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8296 struct extent_entry *entry, *best = NULL, *prev = NULL;
8298 list_for_each_entry(entry, entries, list) {
8300 * If there are as many broken entries as entries then we know
8301 * not to trust this particular entry.
8303 if (entry->broken == entry->count)
8307 * Special case, when there are only two entries and 'best' is
8317 * If our current entry == best then we can't be sure our best
8318 * is really the best, so we need to keep searching.
8320 if (best && best->count == entry->count) {
8326 /* Prev == entry, not good enough, have to keep searching */
8327 if (!prev->broken && prev->count == entry->count)
8331 best = (prev->count > entry->count) ? prev : entry;
8332 else if (best->count < entry->count)
8340 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8341 struct data_backref *dback, struct extent_entry *entry)
8343 struct btrfs_trans_handle *trans;
8344 struct btrfs_root *root;
8345 struct btrfs_file_extent_item *fi;
8346 struct extent_buffer *leaf;
8347 struct btrfs_key key;
8351 key.objectid = dback->root;
8352 key.type = BTRFS_ROOT_ITEM_KEY;
8353 key.offset = (u64)-1;
8354 root = btrfs_read_fs_root(info, &key);
8356 fprintf(stderr, "Couldn't find root for our ref\n");
8361 * The backref points to the original offset of the extent if it was
8362 * split, so we need to search down to the offset we have and then walk
8363 * forward until we find the backref we're looking for.
8365 key.objectid = dback->owner;
8366 key.type = BTRFS_EXTENT_DATA_KEY;
8367 key.offset = dback->offset;
8368 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8370 fprintf(stderr, "Error looking up ref %d\n", ret);
8375 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8376 ret = btrfs_next_leaf(root, path);
8378 fprintf(stderr, "Couldn't find our ref, next\n");
8382 leaf = path->nodes[0];
8383 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8384 if (key.objectid != dback->owner ||
8385 key.type != BTRFS_EXTENT_DATA_KEY) {
8386 fprintf(stderr, "Couldn't find our ref, search\n");
8389 fi = btrfs_item_ptr(leaf, path->slots[0],
8390 struct btrfs_file_extent_item);
8391 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8392 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8394 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8399 btrfs_release_path(path);
8401 trans = btrfs_start_transaction(root, 1);
8403 return PTR_ERR(trans);
8406 * Ok we have the key of the file extent we want to fix, now we can cow
8407 * down to the thing and fix it.
8409 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8411 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8412 key.objectid, key.type, key.offset, ret);
8416 fprintf(stderr, "Well that's odd, we just found this key "
8417 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8422 leaf = path->nodes[0];
8423 fi = btrfs_item_ptr(leaf, path->slots[0],
8424 struct btrfs_file_extent_item);
8426 if (btrfs_file_extent_compression(leaf, fi) &&
8427 dback->disk_bytenr != entry->bytenr) {
8428 fprintf(stderr, "Ref doesn't match the record start and is "
8429 "compressed, please take a btrfs-image of this file "
8430 "system and send it to a btrfs developer so they can "
8431 "complete this functionality for bytenr %Lu\n",
8432 dback->disk_bytenr);
8437 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8438 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8439 } else if (dback->disk_bytenr > entry->bytenr) {
8440 u64 off_diff, offset;
8442 off_diff = dback->disk_bytenr - entry->bytenr;
8443 offset = btrfs_file_extent_offset(leaf, fi);
8444 if (dback->disk_bytenr + offset +
8445 btrfs_file_extent_num_bytes(leaf, fi) >
8446 entry->bytenr + entry->bytes) {
8447 fprintf(stderr, "Ref is past the entry end, please "
8448 "take a btrfs-image of this file system and "
8449 "send it to a btrfs developer, ref %Lu\n",
8450 dback->disk_bytenr);
8455 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8456 btrfs_set_file_extent_offset(leaf, fi, offset);
8457 } else if (dback->disk_bytenr < entry->bytenr) {
8460 offset = btrfs_file_extent_offset(leaf, fi);
8461 if (dback->disk_bytenr + offset < entry->bytenr) {
8462 fprintf(stderr, "Ref is before the entry start, please"
8463 " take a btrfs-image of this file system and "
8464 "send it to a btrfs developer, ref %Lu\n",
8465 dback->disk_bytenr);
8470 offset += dback->disk_bytenr;
8471 offset -= entry->bytenr;
8472 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8473 btrfs_set_file_extent_offset(leaf, fi, offset);
8476 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8479 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8480 * only do this if we aren't using compression, otherwise it's a
8483 if (!btrfs_file_extent_compression(leaf, fi))
8484 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8486 printf("ram bytes may be wrong?\n");
8487 btrfs_mark_buffer_dirty(leaf);
8489 err = btrfs_commit_transaction(trans, root);
8490 btrfs_release_path(path);
8491 return ret ? ret : err;
8494 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8495 struct extent_record *rec)
8497 struct extent_backref *back;
8498 struct data_backref *dback;
8499 struct extent_entry *entry, *best = NULL;
8502 int broken_entries = 0;
8507 * Metadata is easy and the backrefs should always agree on bytenr and
8508 * size, if not we've got bigger issues.
8513 list_for_each_entry(back, &rec->backrefs, list) {
8514 if (back->full_backref || !back->is_data)
8517 dback = to_data_backref(back);
8520 * We only pay attention to backrefs that we found a real
8523 if (dback->found_ref == 0)
8527 * For now we only catch when the bytes don't match, not the
8528 * bytenr. We can easily do this at the same time, but I want
8529 * to have a fs image to test on before we just add repair
8530 * functionality willy-nilly so we know we won't screw up the
8534 entry = find_entry(&entries, dback->disk_bytenr,
8537 entry = malloc(sizeof(struct extent_entry));
8542 memset(entry, 0, sizeof(*entry));
8543 entry->bytenr = dback->disk_bytenr;
8544 entry->bytes = dback->bytes;
8545 list_add_tail(&entry->list, &entries);
8550 * If we only have on entry we may think the entries agree when
8551 * in reality they don't so we have to do some extra checking.
8553 if (dback->disk_bytenr != rec->start ||
8554 dback->bytes != rec->nr || back->broken)
8565 /* Yay all the backrefs agree, carry on good sir */
8566 if (nr_entries <= 1 && !mismatch)
8569 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8570 "%Lu\n", rec->start);
8573 * First we want to see if the backrefs can agree amongst themselves who
8574 * is right, so figure out which one of the entries has the highest
8577 best = find_most_right_entry(&entries);
8580 * Ok so we may have an even split between what the backrefs think, so
8581 * this is where we use the extent ref to see what it thinks.
8584 entry = find_entry(&entries, rec->start, rec->nr);
8585 if (!entry && (!broken_entries || !rec->found_rec)) {
8586 fprintf(stderr, "Backrefs don't agree with each other "
8587 "and extent record doesn't agree with anybody,"
8588 " so we can't fix bytenr %Lu bytes %Lu\n",
8589 rec->start, rec->nr);
8592 } else if (!entry) {
8594 * Ok our backrefs were broken, we'll assume this is the
8595 * correct value and add an entry for this range.
8597 entry = malloc(sizeof(struct extent_entry));
8602 memset(entry, 0, sizeof(*entry));
8603 entry->bytenr = rec->start;
8604 entry->bytes = rec->nr;
8605 list_add_tail(&entry->list, &entries);
8609 best = find_most_right_entry(&entries);
8611 fprintf(stderr, "Backrefs and extent record evenly "
8612 "split on who is right, this is going to "
8613 "require user input to fix bytenr %Lu bytes "
8614 "%Lu\n", rec->start, rec->nr);
8621 * I don't think this can happen currently as we'll abort() if we catch
8622 * this case higher up, but in case somebody removes that we still can't
8623 * deal with it properly here yet, so just bail out of that's the case.
8625 if (best->bytenr != rec->start) {
8626 fprintf(stderr, "Extent start and backref starts don't match, "
8627 "please use btrfs-image on this file system and send "
8628 "it to a btrfs developer so they can make fsck fix "
8629 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8630 rec->start, rec->nr);
8636 * Ok great we all agreed on an extent record, let's go find the real
8637 * references and fix up the ones that don't match.
8639 list_for_each_entry(back, &rec->backrefs, list) {
8640 if (back->full_backref || !back->is_data)
8643 dback = to_data_backref(back);
8646 * Still ignoring backrefs that don't have a real ref attached
8649 if (dback->found_ref == 0)
8652 if (dback->bytes == best->bytes &&
8653 dback->disk_bytenr == best->bytenr)
8656 ret = repair_ref(info, path, dback, best);
8662 * Ok we messed with the actual refs, which means we need to drop our
8663 * entire cache and go back and rescan. I know this is a huge pain and
8664 * adds a lot of extra work, but it's the only way to be safe. Once all
8665 * the backrefs agree we may not need to do anything to the extent
8670 while (!list_empty(&entries)) {
8671 entry = list_entry(entries.next, struct extent_entry, list);
8672 list_del_init(&entry->list);
8678 static int process_duplicates(struct cache_tree *extent_cache,
8679 struct extent_record *rec)
8681 struct extent_record *good, *tmp;
8682 struct cache_extent *cache;
8686 * If we found a extent record for this extent then return, or if we
8687 * have more than one duplicate we are likely going to need to delete
8690 if (rec->found_rec || rec->num_duplicates > 1)
8693 /* Shouldn't happen but just in case */
8694 BUG_ON(!rec->num_duplicates);
8697 * So this happens if we end up with a backref that doesn't match the
8698 * actual extent entry. So either the backref is bad or the extent
8699 * entry is bad. Either way we want to have the extent_record actually
8700 * reflect what we found in the extent_tree, so we need to take the
8701 * duplicate out and use that as the extent_record since the only way we
8702 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8704 remove_cache_extent(extent_cache, &rec->cache);
8706 good = to_extent_record(rec->dups.next);
8707 list_del_init(&good->list);
8708 INIT_LIST_HEAD(&good->backrefs);
8709 INIT_LIST_HEAD(&good->dups);
8710 good->cache.start = good->start;
8711 good->cache.size = good->nr;
8712 good->content_checked = 0;
8713 good->owner_ref_checked = 0;
8714 good->num_duplicates = 0;
8715 good->refs = rec->refs;
8716 list_splice_init(&rec->backrefs, &good->backrefs);
8718 cache = lookup_cache_extent(extent_cache, good->start,
8722 tmp = container_of(cache, struct extent_record, cache);
8725 * If we find another overlapping extent and it's found_rec is
8726 * set then it's a duplicate and we need to try and delete
8729 if (tmp->found_rec || tmp->num_duplicates > 0) {
8730 if (list_empty(&good->list))
8731 list_add_tail(&good->list,
8732 &duplicate_extents);
8733 good->num_duplicates += tmp->num_duplicates + 1;
8734 list_splice_init(&tmp->dups, &good->dups);
8735 list_del_init(&tmp->list);
8736 list_add_tail(&tmp->list, &good->dups);
8737 remove_cache_extent(extent_cache, &tmp->cache);
8742 * Ok we have another non extent item backed extent rec, so lets
8743 * just add it to this extent and carry on like we did above.
8745 good->refs += tmp->refs;
8746 list_splice_init(&tmp->backrefs, &good->backrefs);
8747 remove_cache_extent(extent_cache, &tmp->cache);
8750 ret = insert_cache_extent(extent_cache, &good->cache);
8753 return good->num_duplicates ? 0 : 1;
8756 static int delete_duplicate_records(struct btrfs_root *root,
8757 struct extent_record *rec)
8759 struct btrfs_trans_handle *trans;
8760 LIST_HEAD(delete_list);
8761 struct btrfs_path path;
8762 struct extent_record *tmp, *good, *n;
8765 struct btrfs_key key;
8767 btrfs_init_path(&path);
8770 /* Find the record that covers all of the duplicates. */
8771 list_for_each_entry(tmp, &rec->dups, list) {
8772 if (good->start < tmp->start)
8774 if (good->nr > tmp->nr)
8777 if (tmp->start + tmp->nr < good->start + good->nr) {
8778 fprintf(stderr, "Ok we have overlapping extents that "
8779 "aren't completely covered by each other, this "
8780 "is going to require more careful thought. "
8781 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8782 tmp->start, tmp->nr, good->start, good->nr);
8789 list_add_tail(&rec->list, &delete_list);
8791 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8794 list_move_tail(&tmp->list, &delete_list);
8797 root = root->fs_info->extent_root;
8798 trans = btrfs_start_transaction(root, 1);
8799 if (IS_ERR(trans)) {
8800 ret = PTR_ERR(trans);
8804 list_for_each_entry(tmp, &delete_list, list) {
8805 if (tmp->found_rec == 0)
8807 key.objectid = tmp->start;
8808 key.type = BTRFS_EXTENT_ITEM_KEY;
8809 key.offset = tmp->nr;
8811 /* Shouldn't happen but just in case */
8812 if (tmp->metadata) {
8813 fprintf(stderr, "Well this shouldn't happen, extent "
8814 "record overlaps but is metadata? "
8815 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8819 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8825 ret = btrfs_del_item(trans, root, &path);
8828 btrfs_release_path(&path);
8831 err = btrfs_commit_transaction(trans, root);
8835 while (!list_empty(&delete_list)) {
8836 tmp = to_extent_record(delete_list.next);
8837 list_del_init(&tmp->list);
8843 while (!list_empty(&rec->dups)) {
8844 tmp = to_extent_record(rec->dups.next);
8845 list_del_init(&tmp->list);
8849 btrfs_release_path(&path);
8851 if (!ret && !nr_del)
8852 rec->num_duplicates = 0;
8854 return ret ? ret : nr_del;
8857 static int find_possible_backrefs(struct btrfs_fs_info *info,
8858 struct btrfs_path *path,
8859 struct cache_tree *extent_cache,
8860 struct extent_record *rec)
8862 struct btrfs_root *root;
8863 struct extent_backref *back;
8864 struct data_backref *dback;
8865 struct cache_extent *cache;
8866 struct btrfs_file_extent_item *fi;
8867 struct btrfs_key key;
8871 list_for_each_entry(back, &rec->backrefs, list) {
8872 /* Don't care about full backrefs (poor unloved backrefs) */
8873 if (back->full_backref || !back->is_data)
8876 dback = to_data_backref(back);
8878 /* We found this one, we don't need to do a lookup */
8879 if (dback->found_ref)
8882 key.objectid = dback->root;
8883 key.type = BTRFS_ROOT_ITEM_KEY;
8884 key.offset = (u64)-1;
8886 root = btrfs_read_fs_root(info, &key);
8888 /* No root, definitely a bad ref, skip */
8889 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8891 /* Other err, exit */
8893 return PTR_ERR(root);
8895 key.objectid = dback->owner;
8896 key.type = BTRFS_EXTENT_DATA_KEY;
8897 key.offset = dback->offset;
8898 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8900 btrfs_release_path(path);
8903 /* Didn't find it, we can carry on */
8908 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8909 struct btrfs_file_extent_item);
8910 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8911 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8912 btrfs_release_path(path);
8913 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8915 struct extent_record *tmp;
8916 tmp = container_of(cache, struct extent_record, cache);
8919 * If we found an extent record for the bytenr for this
8920 * particular backref then we can't add it to our
8921 * current extent record. We only want to add backrefs
8922 * that don't have a corresponding extent item in the
8923 * extent tree since they likely belong to this record
8924 * and we need to fix it if it doesn't match bytenrs.
8930 dback->found_ref += 1;
8931 dback->disk_bytenr = bytenr;
8932 dback->bytes = bytes;
8935 * Set this so the verify backref code knows not to trust the
8936 * values in this backref.
8945 * Record orphan data ref into corresponding root.
8947 * Return 0 if the extent item contains data ref and recorded.
8948 * Return 1 if the extent item contains no useful data ref
8949 * On that case, it may contains only shared_dataref or metadata backref
8950 * or the file extent exists(this should be handled by the extent bytenr
8952 * Return <0 if something goes wrong.
8954 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8955 struct extent_record *rec)
8957 struct btrfs_key key;
8958 struct btrfs_root *dest_root;
8959 struct extent_backref *back;
8960 struct data_backref *dback;
8961 struct orphan_data_extent *orphan;
8962 struct btrfs_path path;
8963 int recorded_data_ref = 0;
8968 btrfs_init_path(&path);
8969 list_for_each_entry(back, &rec->backrefs, list) {
8970 if (back->full_backref || !back->is_data ||
8971 !back->found_extent_tree)
8973 dback = to_data_backref(back);
8974 if (dback->found_ref)
8976 key.objectid = dback->root;
8977 key.type = BTRFS_ROOT_ITEM_KEY;
8978 key.offset = (u64)-1;
8980 dest_root = btrfs_read_fs_root(fs_info, &key);
8982 /* For non-exist root we just skip it */
8983 if (IS_ERR(dest_root) || !dest_root)
8986 key.objectid = dback->owner;
8987 key.type = BTRFS_EXTENT_DATA_KEY;
8988 key.offset = dback->offset;
8990 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8991 btrfs_release_path(&path);
8993 * For ret < 0, it's OK since the fs-tree may be corrupted,
8994 * we need to record it for inode/file extent rebuild.
8995 * For ret > 0, we record it only for file extent rebuild.
8996 * For ret == 0, the file extent exists but only bytenr
8997 * mismatch, let the original bytenr fix routine to handle,
9003 orphan = malloc(sizeof(*orphan));
9008 INIT_LIST_HEAD(&orphan->list);
9009 orphan->root = dback->root;
9010 orphan->objectid = dback->owner;
9011 orphan->offset = dback->offset;
9012 orphan->disk_bytenr = rec->cache.start;
9013 orphan->disk_len = rec->cache.size;
9014 list_add(&dest_root->orphan_data_extents, &orphan->list);
9015 recorded_data_ref = 1;
9018 btrfs_release_path(&path);
9020 return !recorded_data_ref;
9026 * when an incorrect extent item is found, this will delete
9027 * all of the existing entries for it and recreate them
9028 * based on what the tree scan found.
9030 static int fixup_extent_refs(struct btrfs_fs_info *info,
9031 struct cache_tree *extent_cache,
9032 struct extent_record *rec)
9034 struct btrfs_trans_handle *trans = NULL;
9036 struct btrfs_path path;
9037 struct list_head *cur = rec->backrefs.next;
9038 struct cache_extent *cache;
9039 struct extent_backref *back;
9043 if (rec->flag_block_full_backref)
9044 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9046 btrfs_init_path(&path);
9047 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9049 * Sometimes the backrefs themselves are so broken they don't
9050 * get attached to any meaningful rec, so first go back and
9051 * check any of our backrefs that we couldn't find and throw
9052 * them into the list if we find the backref so that
9053 * verify_backrefs can figure out what to do.
9055 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9060 /* step one, make sure all of the backrefs agree */
9061 ret = verify_backrefs(info, &path, rec);
9065 trans = btrfs_start_transaction(info->extent_root, 1);
9066 if (IS_ERR(trans)) {
9067 ret = PTR_ERR(trans);
9071 /* step two, delete all the existing records */
9072 ret = delete_extent_records(trans, info->extent_root, &path,
9078 /* was this block corrupt? If so, don't add references to it */
9079 cache = lookup_cache_extent(info->corrupt_blocks,
9080 rec->start, rec->max_size);
9086 /* step three, recreate all the refs we did find */
9087 while(cur != &rec->backrefs) {
9088 back = to_extent_backref(cur);
9092 * if we didn't find any references, don't create a
9095 if (!back->found_ref)
9098 rec->bad_full_backref = 0;
9099 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9107 int err = btrfs_commit_transaction(trans, info->extent_root);
9113 fprintf(stderr, "Repaired extent references for %llu\n",
9114 (unsigned long long)rec->start);
9116 btrfs_release_path(&path);
9120 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9121 struct extent_record *rec)
9123 struct btrfs_trans_handle *trans;
9124 struct btrfs_root *root = fs_info->extent_root;
9125 struct btrfs_path path;
9126 struct btrfs_extent_item *ei;
9127 struct btrfs_key key;
9131 key.objectid = rec->start;
9132 if (rec->metadata) {
9133 key.type = BTRFS_METADATA_ITEM_KEY;
9134 key.offset = rec->info_level;
9136 key.type = BTRFS_EXTENT_ITEM_KEY;
9137 key.offset = rec->max_size;
9140 trans = btrfs_start_transaction(root, 0);
9142 return PTR_ERR(trans);
9144 btrfs_init_path(&path);
9145 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9147 btrfs_release_path(&path);
9148 btrfs_commit_transaction(trans, root);
9151 fprintf(stderr, "Didn't find extent for %llu\n",
9152 (unsigned long long)rec->start);
9153 btrfs_release_path(&path);
9154 btrfs_commit_transaction(trans, root);
9158 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9159 struct btrfs_extent_item);
9160 flags = btrfs_extent_flags(path.nodes[0], ei);
9161 if (rec->flag_block_full_backref) {
9162 fprintf(stderr, "setting full backref on %llu\n",
9163 (unsigned long long)key.objectid);
9164 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9166 fprintf(stderr, "clearing full backref on %llu\n",
9167 (unsigned long long)key.objectid);
9168 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9170 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9171 btrfs_mark_buffer_dirty(path.nodes[0]);
9172 btrfs_release_path(&path);
9173 ret = btrfs_commit_transaction(trans, root);
9175 fprintf(stderr, "Repaired extent flags for %llu\n",
9176 (unsigned long long)rec->start);
9181 /* right now we only prune from the extent allocation tree */
9182 static int prune_one_block(struct btrfs_trans_handle *trans,
9183 struct btrfs_fs_info *info,
9184 struct btrfs_corrupt_block *corrupt)
9187 struct btrfs_path path;
9188 struct extent_buffer *eb;
9192 int level = corrupt->level + 1;
9194 btrfs_init_path(&path);
9196 /* we want to stop at the parent to our busted block */
9197 path.lowest_level = level;
9199 ret = btrfs_search_slot(trans, info->extent_root,
9200 &corrupt->key, &path, -1, 1);
9205 eb = path.nodes[level];
9212 * hopefully the search gave us the block we want to prune,
9213 * lets try that first
9215 slot = path.slots[level];
9216 found = btrfs_node_blockptr(eb, slot);
9217 if (found == corrupt->cache.start)
9220 nritems = btrfs_header_nritems(eb);
9222 /* the search failed, lets scan this node and hope we find it */
9223 for (slot = 0; slot < nritems; slot++) {
9224 found = btrfs_node_blockptr(eb, slot);
9225 if (found == corrupt->cache.start)
9229 * we couldn't find the bad block. TODO, search all the nodes for pointers
9232 if (eb == info->extent_root->node) {
9237 btrfs_release_path(&path);
9242 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9243 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9246 btrfs_release_path(&path);
9250 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9252 struct btrfs_trans_handle *trans = NULL;
9253 struct cache_extent *cache;
9254 struct btrfs_corrupt_block *corrupt;
9257 cache = search_cache_extent(info->corrupt_blocks, 0);
9261 trans = btrfs_start_transaction(info->extent_root, 1);
9263 return PTR_ERR(trans);
9265 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9266 prune_one_block(trans, info, corrupt);
9267 remove_cache_extent(info->corrupt_blocks, cache);
9270 return btrfs_commit_transaction(trans, info->extent_root);
9274 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9276 struct btrfs_block_group_cache *cache;
9281 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9282 &start, &end, EXTENT_DIRTY);
9285 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9290 cache = btrfs_lookup_first_block_group(fs_info, start);
9295 start = cache->key.objectid + cache->key.offset;
9299 static int check_extent_refs(struct btrfs_root *root,
9300 struct cache_tree *extent_cache)
9302 struct extent_record *rec;
9303 struct cache_extent *cache;
9309 * if we're doing a repair, we have to make sure
9310 * we don't allocate from the problem extents.
9311 * In the worst case, this will be all the
9314 cache = search_cache_extent(extent_cache, 0);
9316 rec = container_of(cache, struct extent_record, cache);
9317 set_extent_dirty(root->fs_info->excluded_extents,
9319 rec->start + rec->max_size - 1);
9320 cache = next_cache_extent(cache);
9323 /* pin down all the corrupted blocks too */
9324 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9326 set_extent_dirty(root->fs_info->excluded_extents,
9328 cache->start + cache->size - 1);
9329 cache = next_cache_extent(cache);
9331 prune_corrupt_blocks(root->fs_info);
9332 reset_cached_block_groups(root->fs_info);
9335 reset_cached_block_groups(root->fs_info);
9338 * We need to delete any duplicate entries we find first otherwise we
9339 * could mess up the extent tree when we have backrefs that actually
9340 * belong to a different extent item and not the weird duplicate one.
9342 while (repair && !list_empty(&duplicate_extents)) {
9343 rec = to_extent_record(duplicate_extents.next);
9344 list_del_init(&rec->list);
9346 /* Sometimes we can find a backref before we find an actual
9347 * extent, so we need to process it a little bit to see if there
9348 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9349 * if this is a backref screwup. If we need to delete stuff
9350 * process_duplicates() will return 0, otherwise it will return
9353 if (process_duplicates(extent_cache, rec))
9355 ret = delete_duplicate_records(root, rec);
9359 * delete_duplicate_records will return the number of entries
9360 * deleted, so if it's greater than 0 then we know we actually
9361 * did something and we need to remove.
9374 cache = search_cache_extent(extent_cache, 0);
9377 rec = container_of(cache, struct extent_record, cache);
9378 if (rec->num_duplicates) {
9379 fprintf(stderr, "extent item %llu has multiple extent "
9380 "items\n", (unsigned long long)rec->start);
9384 if (rec->refs != rec->extent_item_refs) {
9385 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9386 (unsigned long long)rec->start,
9387 (unsigned long long)rec->nr);
9388 fprintf(stderr, "extent item %llu, found %llu\n",
9389 (unsigned long long)rec->extent_item_refs,
9390 (unsigned long long)rec->refs);
9391 ret = record_orphan_data_extents(root->fs_info, rec);
9397 if (all_backpointers_checked(rec, 1)) {
9398 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9399 (unsigned long long)rec->start,
9400 (unsigned long long)rec->nr);
9404 if (!rec->owner_ref_checked) {
9405 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9406 (unsigned long long)rec->start,
9407 (unsigned long long)rec->nr);
9412 if (repair && fix) {
9413 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9419 if (rec->bad_full_backref) {
9420 fprintf(stderr, "bad full backref, on [%llu]\n",
9421 (unsigned long long)rec->start);
9423 ret = fixup_extent_flags(root->fs_info, rec);
9431 * Although it's not a extent ref's problem, we reuse this
9432 * routine for error reporting.
9433 * No repair function yet.
9435 if (rec->crossing_stripes) {
9437 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9438 rec->start, rec->start + rec->max_size);
9442 if (rec->wrong_chunk_type) {
9444 "bad extent [%llu, %llu), type mismatch with chunk\n",
9445 rec->start, rec->start + rec->max_size);
9449 remove_cache_extent(extent_cache, cache);
9450 free_all_extent_backrefs(rec);
9451 if (!init_extent_tree && repair && (!cur_err || fix))
9452 clear_extent_dirty(root->fs_info->excluded_extents,
9454 rec->start + rec->max_size - 1);
9459 if (ret && ret != -EAGAIN) {
9460 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9463 struct btrfs_trans_handle *trans;
9465 root = root->fs_info->extent_root;
9466 trans = btrfs_start_transaction(root, 1);
9467 if (IS_ERR(trans)) {
9468 ret = PTR_ERR(trans);
9472 btrfs_fix_block_accounting(trans, root);
9473 ret = btrfs_commit_transaction(trans, root);
9482 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9486 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9487 stripe_size = length;
9488 stripe_size /= num_stripes;
9489 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9490 stripe_size = length * 2;
9491 stripe_size /= num_stripes;
9492 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9493 stripe_size = length;
9494 stripe_size /= (num_stripes - 1);
9495 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9496 stripe_size = length;
9497 stripe_size /= (num_stripes - 2);
9499 stripe_size = length;
9505 * Check the chunk with its block group/dev list ref:
9506 * Return 0 if all refs seems valid.
9507 * Return 1 if part of refs seems valid, need later check for rebuild ref
9508 * like missing block group and needs to search extent tree to rebuild them.
9509 * Return -1 if essential refs are missing and unable to rebuild.
9511 static int check_chunk_refs(struct chunk_record *chunk_rec,
9512 struct block_group_tree *block_group_cache,
9513 struct device_extent_tree *dev_extent_cache,
9516 struct cache_extent *block_group_item;
9517 struct block_group_record *block_group_rec;
9518 struct cache_extent *dev_extent_item;
9519 struct device_extent_record *dev_extent_rec;
9523 int metadump_v2 = 0;
9527 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9530 if (block_group_item) {
9531 block_group_rec = container_of(block_group_item,
9532 struct block_group_record,
9534 if (chunk_rec->length != block_group_rec->offset ||
9535 chunk_rec->offset != block_group_rec->objectid ||
9537 chunk_rec->type_flags != block_group_rec->flags)) {
9540 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9541 chunk_rec->objectid,
9546 chunk_rec->type_flags,
9547 block_group_rec->objectid,
9548 block_group_rec->type,
9549 block_group_rec->offset,
9550 block_group_rec->offset,
9551 block_group_rec->objectid,
9552 block_group_rec->flags);
9555 list_del_init(&block_group_rec->list);
9556 chunk_rec->bg_rec = block_group_rec;
9561 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9562 chunk_rec->objectid,
9567 chunk_rec->type_flags);
9574 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9575 chunk_rec->num_stripes);
9576 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9577 devid = chunk_rec->stripes[i].devid;
9578 offset = chunk_rec->stripes[i].offset;
9579 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9580 devid, offset, length);
9581 if (dev_extent_item) {
9582 dev_extent_rec = container_of(dev_extent_item,
9583 struct device_extent_record,
9585 if (dev_extent_rec->objectid != devid ||
9586 dev_extent_rec->offset != offset ||
9587 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9588 dev_extent_rec->length != length) {
9591 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9592 chunk_rec->objectid,
9595 chunk_rec->stripes[i].devid,
9596 chunk_rec->stripes[i].offset,
9597 dev_extent_rec->objectid,
9598 dev_extent_rec->offset,
9599 dev_extent_rec->length);
9602 list_move(&dev_extent_rec->chunk_list,
9603 &chunk_rec->dextents);
9608 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9609 chunk_rec->objectid,
9612 chunk_rec->stripes[i].devid,
9613 chunk_rec->stripes[i].offset);
9620 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9621 int check_chunks(struct cache_tree *chunk_cache,
9622 struct block_group_tree *block_group_cache,
9623 struct device_extent_tree *dev_extent_cache,
9624 struct list_head *good, struct list_head *bad,
9625 struct list_head *rebuild, int silent)
9627 struct cache_extent *chunk_item;
9628 struct chunk_record *chunk_rec;
9629 struct block_group_record *bg_rec;
9630 struct device_extent_record *dext_rec;
9634 chunk_item = first_cache_extent(chunk_cache);
9635 while (chunk_item) {
9636 chunk_rec = container_of(chunk_item, struct chunk_record,
9638 err = check_chunk_refs(chunk_rec, block_group_cache,
9639 dev_extent_cache, silent);
9642 if (err == 0 && good)
9643 list_add_tail(&chunk_rec->list, good);
9644 if (err > 0 && rebuild)
9645 list_add_tail(&chunk_rec->list, rebuild);
9647 list_add_tail(&chunk_rec->list, bad);
9648 chunk_item = next_cache_extent(chunk_item);
9651 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9654 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9662 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9666 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9677 static int check_device_used(struct device_record *dev_rec,
9678 struct device_extent_tree *dext_cache)
9680 struct cache_extent *cache;
9681 struct device_extent_record *dev_extent_rec;
9684 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9686 dev_extent_rec = container_of(cache,
9687 struct device_extent_record,
9689 if (dev_extent_rec->objectid != dev_rec->devid)
9692 list_del_init(&dev_extent_rec->device_list);
9693 total_byte += dev_extent_rec->length;
9694 cache = next_cache_extent(cache);
9697 if (total_byte != dev_rec->byte_used) {
9699 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9700 total_byte, dev_rec->byte_used, dev_rec->objectid,
9701 dev_rec->type, dev_rec->offset);
9708 /* check btrfs_dev_item -> btrfs_dev_extent */
9709 static int check_devices(struct rb_root *dev_cache,
9710 struct device_extent_tree *dev_extent_cache)
9712 struct rb_node *dev_node;
9713 struct device_record *dev_rec;
9714 struct device_extent_record *dext_rec;
9718 dev_node = rb_first(dev_cache);
9720 dev_rec = container_of(dev_node, struct device_record, node);
9721 err = check_device_used(dev_rec, dev_extent_cache);
9725 dev_node = rb_next(dev_node);
9727 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9730 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9731 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9738 static int add_root_item_to_list(struct list_head *head,
9739 u64 objectid, u64 bytenr, u64 last_snapshot,
9740 u8 level, u8 drop_level,
9741 struct btrfs_key *drop_key)
9744 struct root_item_record *ri_rec;
9745 ri_rec = malloc(sizeof(*ri_rec));
9748 ri_rec->bytenr = bytenr;
9749 ri_rec->objectid = objectid;
9750 ri_rec->level = level;
9751 ri_rec->drop_level = drop_level;
9752 ri_rec->last_snapshot = last_snapshot;
9754 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9755 list_add_tail(&ri_rec->list, head);
9760 static void free_root_item_list(struct list_head *list)
9762 struct root_item_record *ri_rec;
9764 while (!list_empty(list)) {
9765 ri_rec = list_first_entry(list, struct root_item_record,
9767 list_del_init(&ri_rec->list);
9772 static int deal_root_from_list(struct list_head *list,
9773 struct btrfs_root *root,
9774 struct block_info *bits,
9776 struct cache_tree *pending,
9777 struct cache_tree *seen,
9778 struct cache_tree *reada,
9779 struct cache_tree *nodes,
9780 struct cache_tree *extent_cache,
9781 struct cache_tree *chunk_cache,
9782 struct rb_root *dev_cache,
9783 struct block_group_tree *block_group_cache,
9784 struct device_extent_tree *dev_extent_cache)
9789 while (!list_empty(list)) {
9790 struct root_item_record *rec;
9791 struct extent_buffer *buf;
9792 rec = list_entry(list->next,
9793 struct root_item_record, list);
9795 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9796 if (!extent_buffer_uptodate(buf)) {
9797 free_extent_buffer(buf);
9801 ret = add_root_to_pending(buf, extent_cache, pending,
9802 seen, nodes, rec->objectid);
9806 * To rebuild extent tree, we need deal with snapshot
9807 * one by one, otherwise we deal with node firstly which
9808 * can maximize readahead.
9811 ret = run_next_block(root, bits, bits_nr, &last,
9812 pending, seen, reada, nodes,
9813 extent_cache, chunk_cache,
9814 dev_cache, block_group_cache,
9815 dev_extent_cache, rec);
9819 free_extent_buffer(buf);
9820 list_del(&rec->list);
9826 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9827 reada, nodes, extent_cache, chunk_cache,
9828 dev_cache, block_group_cache,
9829 dev_extent_cache, NULL);
9839 static int check_chunks_and_extents(struct btrfs_root *root)
9841 struct rb_root dev_cache;
9842 struct cache_tree chunk_cache;
9843 struct block_group_tree block_group_cache;
9844 struct device_extent_tree dev_extent_cache;
9845 struct cache_tree extent_cache;
9846 struct cache_tree seen;
9847 struct cache_tree pending;
9848 struct cache_tree reada;
9849 struct cache_tree nodes;
9850 struct extent_io_tree excluded_extents;
9851 struct cache_tree corrupt_blocks;
9852 struct btrfs_path path;
9853 struct btrfs_key key;
9854 struct btrfs_key found_key;
9856 struct block_info *bits;
9858 struct extent_buffer *leaf;
9860 struct btrfs_root_item ri;
9861 struct list_head dropping_trees;
9862 struct list_head normal_trees;
9863 struct btrfs_root *root1;
9867 dev_cache = RB_ROOT;
9868 cache_tree_init(&chunk_cache);
9869 block_group_tree_init(&block_group_cache);
9870 device_extent_tree_init(&dev_extent_cache);
9872 cache_tree_init(&extent_cache);
9873 cache_tree_init(&seen);
9874 cache_tree_init(&pending);
9875 cache_tree_init(&nodes);
9876 cache_tree_init(&reada);
9877 cache_tree_init(&corrupt_blocks);
9878 extent_io_tree_init(&excluded_extents);
9879 INIT_LIST_HEAD(&dropping_trees);
9880 INIT_LIST_HEAD(&normal_trees);
9883 root->fs_info->excluded_extents = &excluded_extents;
9884 root->fs_info->fsck_extent_cache = &extent_cache;
9885 root->fs_info->free_extent_hook = free_extent_hook;
9886 root->fs_info->corrupt_blocks = &corrupt_blocks;
9890 bits = malloc(bits_nr * sizeof(struct block_info));
9896 if (ctx.progress_enabled) {
9897 ctx.tp = TASK_EXTENTS;
9898 task_start(ctx.info);
9902 root1 = root->fs_info->tree_root;
9903 level = btrfs_header_level(root1->node);
9904 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9905 root1->node->start, 0, level, 0, NULL);
9908 root1 = root->fs_info->chunk_root;
9909 level = btrfs_header_level(root1->node);
9910 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9911 root1->node->start, 0, level, 0, NULL);
9914 btrfs_init_path(&path);
9917 key.type = BTRFS_ROOT_ITEM_KEY;
9918 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9923 leaf = path.nodes[0];
9924 slot = path.slots[0];
9925 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9926 ret = btrfs_next_leaf(root, &path);
9929 leaf = path.nodes[0];
9930 slot = path.slots[0];
9932 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9933 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9934 unsigned long offset;
9937 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9938 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9939 last_snapshot = btrfs_root_last_snapshot(&ri);
9940 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9941 level = btrfs_root_level(&ri);
9942 ret = add_root_item_to_list(&normal_trees,
9944 btrfs_root_bytenr(&ri),
9945 last_snapshot, level,
9950 level = btrfs_root_level(&ri);
9951 objectid = found_key.objectid;
9952 btrfs_disk_key_to_cpu(&found_key,
9954 ret = add_root_item_to_list(&dropping_trees,
9956 btrfs_root_bytenr(&ri),
9957 last_snapshot, level,
9958 ri.drop_level, &found_key);
9965 btrfs_release_path(&path);
9968 * check_block can return -EAGAIN if it fixes something, please keep
9969 * this in mind when dealing with return values from these functions, if
9970 * we get -EAGAIN we want to fall through and restart the loop.
9972 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9973 &seen, &reada, &nodes, &extent_cache,
9974 &chunk_cache, &dev_cache, &block_group_cache,
9981 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9982 &pending, &seen, &reada, &nodes,
9983 &extent_cache, &chunk_cache, &dev_cache,
9984 &block_group_cache, &dev_extent_cache);
9991 ret = check_chunks(&chunk_cache, &block_group_cache,
9992 &dev_extent_cache, NULL, NULL, NULL, 0);
9999 ret = check_extent_refs(root, &extent_cache);
10001 if (ret == -EAGAIN)
10006 ret = check_devices(&dev_cache, &dev_extent_cache);
10011 task_stop(ctx.info);
10013 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10014 extent_io_tree_cleanup(&excluded_extents);
10015 root->fs_info->fsck_extent_cache = NULL;
10016 root->fs_info->free_extent_hook = NULL;
10017 root->fs_info->corrupt_blocks = NULL;
10018 root->fs_info->excluded_extents = NULL;
10021 free_chunk_cache_tree(&chunk_cache);
10022 free_device_cache_tree(&dev_cache);
10023 free_block_group_tree(&block_group_cache);
10024 free_device_extent_tree(&dev_extent_cache);
10025 free_extent_cache_tree(&seen);
10026 free_extent_cache_tree(&pending);
10027 free_extent_cache_tree(&reada);
10028 free_extent_cache_tree(&nodes);
10029 free_root_item_list(&normal_trees);
10030 free_root_item_list(&dropping_trees);
10033 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10034 free_extent_cache_tree(&seen);
10035 free_extent_cache_tree(&pending);
10036 free_extent_cache_tree(&reada);
10037 free_extent_cache_tree(&nodes);
10038 free_chunk_cache_tree(&chunk_cache);
10039 free_block_group_tree(&block_group_cache);
10040 free_device_cache_tree(&dev_cache);
10041 free_device_extent_tree(&dev_extent_cache);
10042 free_extent_record_cache(&extent_cache);
10043 free_root_item_list(&normal_trees);
10044 free_root_item_list(&dropping_trees);
10045 extent_io_tree_cleanup(&excluded_extents);
10050 * Check backrefs of a tree block given by @bytenr or @eb.
10052 * @root: the root containing the @bytenr or @eb
10053 * @eb: tree block extent buffer, can be NULL
10054 * @bytenr: bytenr of the tree block to search
10055 * @level: tree level of the tree block
10056 * @owner: owner of the tree block
10058 * Return >0 for any error found and output error message
10059 * Return 0 for no error found
10061 static int check_tree_block_ref(struct btrfs_root *root,
10062 struct extent_buffer *eb, u64 bytenr,
10063 int level, u64 owner)
10065 struct btrfs_key key;
10066 struct btrfs_root *extent_root = root->fs_info->extent_root;
10067 struct btrfs_path path;
10068 struct btrfs_extent_item *ei;
10069 struct btrfs_extent_inline_ref *iref;
10070 struct extent_buffer *leaf;
10076 u32 nodesize = root->fs_info->nodesize;
10079 int tree_reloc_root = 0;
10084 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10085 btrfs_header_bytenr(root->node) == bytenr)
10086 tree_reloc_root = 1;
10088 btrfs_init_path(&path);
10089 key.objectid = bytenr;
10090 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10091 key.type = BTRFS_METADATA_ITEM_KEY;
10093 key.type = BTRFS_EXTENT_ITEM_KEY;
10094 key.offset = (u64)-1;
10096 /* Search for the backref in extent tree */
10097 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10099 err |= BACKREF_MISSING;
10102 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10104 err |= BACKREF_MISSING;
10108 leaf = path.nodes[0];
10109 slot = path.slots[0];
10110 btrfs_item_key_to_cpu(leaf, &key, slot);
10112 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10114 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10115 skinny_level = (int)key.offset;
10116 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10118 struct btrfs_tree_block_info *info;
10120 info = (struct btrfs_tree_block_info *)(ei + 1);
10121 skinny_level = btrfs_tree_block_level(leaf, info);
10122 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10129 if (!(btrfs_extent_flags(leaf, ei) &
10130 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10132 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10133 key.objectid, nodesize,
10134 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10135 err = BACKREF_MISMATCH;
10137 header_gen = btrfs_header_generation(eb);
10138 extent_gen = btrfs_extent_generation(leaf, ei);
10139 if (header_gen != extent_gen) {
10141 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10142 key.objectid, nodesize, header_gen,
10144 err = BACKREF_MISMATCH;
10146 if (level != skinny_level) {
10148 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10149 key.objectid, nodesize, level, skinny_level);
10150 err = BACKREF_MISMATCH;
10152 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10154 "extent[%llu %u] is referred by other roots than %llu",
10155 key.objectid, nodesize, root->objectid);
10156 err = BACKREF_MISMATCH;
10161 * Iterate the extent/metadata item to find the exact backref
10163 item_size = btrfs_item_size_nr(leaf, slot);
10164 ptr = (unsigned long)iref;
10165 end = (unsigned long)ei + item_size;
10166 while (ptr < end) {
10167 iref = (struct btrfs_extent_inline_ref *)ptr;
10168 type = btrfs_extent_inline_ref_type(leaf, iref);
10169 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10171 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10172 (offset == root->objectid || offset == owner)) {
10174 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10176 * Backref of tree reloc root points to itself, no need
10177 * to check backref any more.
10179 if (tree_reloc_root)
10182 /* Check if the backref points to valid referencer */
10183 found_ref = !check_tree_block_ref(root, NULL,
10184 offset, level + 1, owner);
10189 ptr += btrfs_extent_inline_ref_size(type);
10193 * Inlined extent item doesn't have what we need, check
10194 * TREE_BLOCK_REF_KEY
10197 btrfs_release_path(&path);
10198 key.objectid = bytenr;
10199 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10200 key.offset = root->objectid;
10202 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10207 err |= BACKREF_MISSING;
10209 btrfs_release_path(&path);
10210 if (eb && (err & BACKREF_MISSING))
10211 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10212 bytenr, nodesize, owner, level);
10217 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10219 * Return >0 any error found and output error message
10220 * Return 0 for no error found
10222 static int check_extent_data_item(struct btrfs_root *root,
10223 struct extent_buffer *eb, int slot)
10225 struct btrfs_file_extent_item *fi;
10226 struct btrfs_path path;
10227 struct btrfs_root *extent_root = root->fs_info->extent_root;
10228 struct btrfs_key fi_key;
10229 struct btrfs_key dbref_key;
10230 struct extent_buffer *leaf;
10231 struct btrfs_extent_item *ei;
10232 struct btrfs_extent_inline_ref *iref;
10233 struct btrfs_extent_data_ref *dref;
10236 u64 disk_num_bytes;
10237 u64 extent_num_bytes;
10244 int found_dbackref = 0;
10248 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10249 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10251 /* Nothing to check for hole and inline data extents */
10252 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10253 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10256 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10257 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10258 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10260 /* Check unaligned disk_num_bytes and num_bytes */
10261 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10263 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10264 fi_key.objectid, fi_key.offset, disk_num_bytes,
10265 root->fs_info->sectorsize);
10266 err |= BYTES_UNALIGNED;
10268 data_bytes_allocated += disk_num_bytes;
10270 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10272 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10273 fi_key.objectid, fi_key.offset, extent_num_bytes,
10274 root->fs_info->sectorsize);
10275 err |= BYTES_UNALIGNED;
10277 data_bytes_referenced += extent_num_bytes;
10279 owner = btrfs_header_owner(eb);
10281 /* Check the extent item of the file extent in extent tree */
10282 btrfs_init_path(&path);
10283 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10284 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10285 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10287 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10291 leaf = path.nodes[0];
10292 slot = path.slots[0];
10293 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10295 extent_flags = btrfs_extent_flags(leaf, ei);
10297 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10299 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10300 disk_bytenr, disk_num_bytes,
10301 BTRFS_EXTENT_FLAG_DATA);
10302 err |= BACKREF_MISMATCH;
10305 /* Check data backref inside that extent item */
10306 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10307 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10308 ptr = (unsigned long)iref;
10309 end = (unsigned long)ei + item_size;
10310 while (ptr < end) {
10311 iref = (struct btrfs_extent_inline_ref *)ptr;
10312 type = btrfs_extent_inline_ref_type(leaf, iref);
10313 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10315 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10316 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10317 if (ref_root == owner || ref_root == root->objectid)
10318 found_dbackref = 1;
10319 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10320 found_dbackref = !check_tree_block_ref(root, NULL,
10321 btrfs_extent_inline_ref_offset(leaf, iref),
10325 if (found_dbackref)
10327 ptr += btrfs_extent_inline_ref_size(type);
10330 if (!found_dbackref) {
10331 btrfs_release_path(&path);
10333 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10334 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10335 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10336 dbref_key.offset = hash_extent_data_ref(root->objectid,
10337 fi_key.objectid, fi_key.offset);
10339 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10340 &dbref_key, &path, 0, 0);
10342 found_dbackref = 1;
10346 btrfs_release_path(&path);
10349 * Neither inlined nor EXTENT_DATA_REF found, try
10350 * SHARED_DATA_REF as last chance.
10352 dbref_key.objectid = disk_bytenr;
10353 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10354 dbref_key.offset = eb->start;
10356 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10357 &dbref_key, &path, 0, 0);
10359 found_dbackref = 1;
10365 if (!found_dbackref)
10366 err |= BACKREF_MISSING;
10367 btrfs_release_path(&path);
10368 if (err & BACKREF_MISSING) {
10369 error("data extent[%llu %llu] backref lost",
10370 disk_bytenr, disk_num_bytes);
10376 * Get real tree block level for the case like shared block
10377 * Return >= 0 as tree level
10378 * Return <0 for error
10380 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10382 struct extent_buffer *eb;
10383 struct btrfs_path path;
10384 struct btrfs_key key;
10385 struct btrfs_extent_item *ei;
10392 /* Search extent tree for extent generation and level */
10393 key.objectid = bytenr;
10394 key.type = BTRFS_METADATA_ITEM_KEY;
10395 key.offset = (u64)-1;
10397 btrfs_init_path(&path);
10398 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10401 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10409 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10410 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10411 struct btrfs_extent_item);
10412 flags = btrfs_extent_flags(path.nodes[0], ei);
10413 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10418 /* Get transid for later read_tree_block() check */
10419 transid = btrfs_extent_generation(path.nodes[0], ei);
10421 /* Get backref level as one source */
10422 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10423 backref_level = key.offset;
10425 struct btrfs_tree_block_info *info;
10427 info = (struct btrfs_tree_block_info *)(ei + 1);
10428 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10430 btrfs_release_path(&path);
10432 /* Get level from tree block as an alternative source */
10433 eb = read_tree_block(fs_info, bytenr, transid);
10434 if (!extent_buffer_uptodate(eb)) {
10435 free_extent_buffer(eb);
10438 header_level = btrfs_header_level(eb);
10439 free_extent_buffer(eb);
10441 if (header_level != backref_level)
10443 return header_level;
10446 btrfs_release_path(&path);
10451 * Check if a tree block backref is valid (points to a valid tree block)
10452 * if level == -1, level will be resolved
10453 * Return >0 for any error found and print error message
10455 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10456 u64 bytenr, int level)
10458 struct btrfs_root *root;
10459 struct btrfs_key key;
10460 struct btrfs_path path;
10461 struct extent_buffer *eb;
10462 struct extent_buffer *node;
10463 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10467 /* Query level for level == -1 special case */
10469 level = query_tree_block_level(fs_info, bytenr);
10471 err |= REFERENCER_MISSING;
10475 key.objectid = root_id;
10476 key.type = BTRFS_ROOT_ITEM_KEY;
10477 key.offset = (u64)-1;
10479 root = btrfs_read_fs_root(fs_info, &key);
10480 if (IS_ERR(root)) {
10481 err |= REFERENCER_MISSING;
10485 /* Read out the tree block to get item/node key */
10486 eb = read_tree_block(fs_info, bytenr, 0);
10487 if (!extent_buffer_uptodate(eb)) {
10488 err |= REFERENCER_MISSING;
10489 free_extent_buffer(eb);
10493 /* Empty tree, no need to check key */
10494 if (!btrfs_header_nritems(eb) && !level) {
10495 free_extent_buffer(eb);
10500 btrfs_node_key_to_cpu(eb, &key, 0);
10502 btrfs_item_key_to_cpu(eb, &key, 0);
10504 free_extent_buffer(eb);
10506 btrfs_init_path(&path);
10507 path.lowest_level = level;
10508 /* Search with the first key, to ensure we can reach it */
10509 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10511 err |= REFERENCER_MISSING;
10515 node = path.nodes[level];
10516 if (btrfs_header_bytenr(node) != bytenr) {
10518 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10519 bytenr, nodesize, bytenr,
10520 btrfs_header_bytenr(node));
10521 err |= REFERENCER_MISMATCH;
10523 if (btrfs_header_level(node) != level) {
10525 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10526 bytenr, nodesize, level,
10527 btrfs_header_level(node));
10528 err |= REFERENCER_MISMATCH;
10532 btrfs_release_path(&path);
10534 if (err & REFERENCER_MISSING) {
10536 error("extent [%llu %d] lost referencer (owner: %llu)",
10537 bytenr, nodesize, root_id);
10540 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10541 bytenr, nodesize, root_id, level);
10548 * Check if tree block @eb is tree reloc root.
10549 * Return 0 if it's not or any problem happens
10550 * Return 1 if it's a tree reloc root
10552 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10553 struct extent_buffer *eb)
10555 struct btrfs_root *tree_reloc_root;
10556 struct btrfs_key key;
10557 u64 bytenr = btrfs_header_bytenr(eb);
10558 u64 owner = btrfs_header_owner(eb);
10561 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10562 key.offset = owner;
10563 key.type = BTRFS_ROOT_ITEM_KEY;
10565 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10566 if (IS_ERR(tree_reloc_root))
10569 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10571 btrfs_free_fs_root(tree_reloc_root);
10576 * Check referencer for shared block backref
10577 * If level == -1, this function will resolve the level.
10579 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10580 u64 parent, u64 bytenr, int level)
10582 struct extent_buffer *eb;
10584 int found_parent = 0;
10587 eb = read_tree_block(fs_info, parent, 0);
10588 if (!extent_buffer_uptodate(eb))
10592 level = query_tree_block_level(fs_info, bytenr);
10596 /* It's possible it's a tree reloc root */
10597 if (parent == bytenr) {
10598 if (is_tree_reloc_root(fs_info, eb))
10603 if (level + 1 != btrfs_header_level(eb))
10606 nr = btrfs_header_nritems(eb);
10607 for (i = 0; i < nr; i++) {
10608 if (bytenr == btrfs_node_blockptr(eb, i)) {
10614 free_extent_buffer(eb);
10615 if (!found_parent) {
10617 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10618 bytenr, fs_info->nodesize, parent, level);
10619 return REFERENCER_MISSING;
10625 * Check referencer for normal (inlined) data ref
10626 * If len == 0, it will be resolved by searching in extent tree
10628 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10629 u64 root_id, u64 objectid, u64 offset,
10630 u64 bytenr, u64 len, u32 count)
10632 struct btrfs_root *root;
10633 struct btrfs_root *extent_root = fs_info->extent_root;
10634 struct btrfs_key key;
10635 struct btrfs_path path;
10636 struct extent_buffer *leaf;
10637 struct btrfs_file_extent_item *fi;
10638 u32 found_count = 0;
10643 key.objectid = bytenr;
10644 key.type = BTRFS_EXTENT_ITEM_KEY;
10645 key.offset = (u64)-1;
10647 btrfs_init_path(&path);
10648 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10651 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10654 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10655 if (key.objectid != bytenr ||
10656 key.type != BTRFS_EXTENT_ITEM_KEY)
10659 btrfs_release_path(&path);
10661 key.objectid = root_id;
10662 key.type = BTRFS_ROOT_ITEM_KEY;
10663 key.offset = (u64)-1;
10664 btrfs_init_path(&path);
10666 root = btrfs_read_fs_root(fs_info, &key);
10670 key.objectid = objectid;
10671 key.type = BTRFS_EXTENT_DATA_KEY;
10673 * It can be nasty as data backref offset is
10674 * file offset - file extent offset, which is smaller or
10675 * equal to original backref offset. The only special case is
10676 * overflow. So we need to special check and do further search.
10678 key.offset = offset & (1ULL << 63) ? 0 : offset;
10680 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10685 * Search afterwards to get correct one
10686 * NOTE: As we must do a comprehensive check on the data backref to
10687 * make sure the dref count also matches, we must iterate all file
10688 * extents for that inode.
10691 leaf = path.nodes[0];
10692 slot = path.slots[0];
10694 if (slot >= btrfs_header_nritems(leaf))
10696 btrfs_item_key_to_cpu(leaf, &key, slot);
10697 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10699 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10701 * Except normal disk bytenr and disk num bytes, we still
10702 * need to do extra check on dbackref offset as
10703 * dbackref offset = file_offset - file_extent_offset
10705 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10706 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10707 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10712 ret = btrfs_next_item(root, &path);
10717 btrfs_release_path(&path);
10718 if (found_count != count) {
10720 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10721 bytenr, len, root_id, objectid, offset, count, found_count);
10722 return REFERENCER_MISSING;
10728 * Check if the referencer of a shared data backref exists
10730 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10731 u64 parent, u64 bytenr)
10733 struct extent_buffer *eb;
10734 struct btrfs_key key;
10735 struct btrfs_file_extent_item *fi;
10737 int found_parent = 0;
10740 eb = read_tree_block(fs_info, parent, 0);
10741 if (!extent_buffer_uptodate(eb))
10744 nr = btrfs_header_nritems(eb);
10745 for (i = 0; i < nr; i++) {
10746 btrfs_item_key_to_cpu(eb, &key, i);
10747 if (key.type != BTRFS_EXTENT_DATA_KEY)
10750 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10751 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10754 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10761 free_extent_buffer(eb);
10762 if (!found_parent) {
10763 error("shared extent %llu referencer lost (parent: %llu)",
10765 return REFERENCER_MISSING;
10771 * This function will check a given extent item, including its backref and
10772 * itself (like crossing stripe boundary and type)
10774 * Since we don't use extent_record anymore, introduce new error bit
10776 static int check_extent_item(struct btrfs_fs_info *fs_info,
10777 struct extent_buffer *eb, int slot)
10779 struct btrfs_extent_item *ei;
10780 struct btrfs_extent_inline_ref *iref;
10781 struct btrfs_extent_data_ref *dref;
10785 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10786 u32 item_size = btrfs_item_size_nr(eb, slot);
10791 struct btrfs_key key;
10795 btrfs_item_key_to_cpu(eb, &key, slot);
10796 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10797 bytes_used += key.offset;
10799 bytes_used += nodesize;
10801 if (item_size < sizeof(*ei)) {
10803 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10804 * old thing when on disk format is still un-determined.
10805 * No need to care about it anymore
10807 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10811 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10812 flags = btrfs_extent_flags(eb, ei);
10814 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10816 if (metadata && check_crossing_stripes(global_info, key.objectid,
10818 error("bad metadata [%llu, %llu) crossing stripe boundary",
10819 key.objectid, key.objectid + nodesize);
10820 err |= CROSSING_STRIPE_BOUNDARY;
10823 ptr = (unsigned long)(ei + 1);
10825 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10826 /* Old EXTENT_ITEM metadata */
10827 struct btrfs_tree_block_info *info;
10829 info = (struct btrfs_tree_block_info *)ptr;
10830 level = btrfs_tree_block_level(eb, info);
10831 ptr += sizeof(struct btrfs_tree_block_info);
10833 /* New METADATA_ITEM */
10834 level = key.offset;
10836 end = (unsigned long)ei + item_size;
10839 /* Reached extent item end normally */
10843 /* Beyond extent item end, wrong item size */
10845 err |= ITEM_SIZE_MISMATCH;
10846 error("extent item at bytenr %llu slot %d has wrong size",
10851 /* Now check every backref in this extent item */
10852 iref = (struct btrfs_extent_inline_ref *)ptr;
10853 type = btrfs_extent_inline_ref_type(eb, iref);
10854 offset = btrfs_extent_inline_ref_offset(eb, iref);
10856 case BTRFS_TREE_BLOCK_REF_KEY:
10857 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10861 case BTRFS_SHARED_BLOCK_REF_KEY:
10862 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10866 case BTRFS_EXTENT_DATA_REF_KEY:
10867 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10868 ret = check_extent_data_backref(fs_info,
10869 btrfs_extent_data_ref_root(eb, dref),
10870 btrfs_extent_data_ref_objectid(eb, dref),
10871 btrfs_extent_data_ref_offset(eb, dref),
10872 key.objectid, key.offset,
10873 btrfs_extent_data_ref_count(eb, dref));
10876 case BTRFS_SHARED_DATA_REF_KEY:
10877 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10881 error("extent[%llu %d %llu] has unknown ref type: %d",
10882 key.objectid, key.type, key.offset, type);
10883 err |= UNKNOWN_TYPE;
10887 ptr += btrfs_extent_inline_ref_size(type);
10895 * Check if a dev extent item is referred correctly by its chunk
10897 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10898 struct extent_buffer *eb, int slot)
10900 struct btrfs_root *chunk_root = fs_info->chunk_root;
10901 struct btrfs_dev_extent *ptr;
10902 struct btrfs_path path;
10903 struct btrfs_key chunk_key;
10904 struct btrfs_key devext_key;
10905 struct btrfs_chunk *chunk;
10906 struct extent_buffer *l;
10910 int found_chunk = 0;
10913 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10914 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10915 length = btrfs_dev_extent_length(eb, ptr);
10917 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10918 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10919 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10921 btrfs_init_path(&path);
10922 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10927 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10928 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10933 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10936 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10937 for (i = 0; i < num_stripes; i++) {
10938 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10939 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10941 if (devid == devext_key.objectid &&
10942 offset == devext_key.offset) {
10948 btrfs_release_path(&path);
10949 if (!found_chunk) {
10951 "device extent[%llu, %llu, %llu] did not find the related chunk",
10952 devext_key.objectid, devext_key.offset, length);
10953 return REFERENCER_MISSING;
10959 * Check if the used space is correct with the dev item
10961 static int check_dev_item(struct btrfs_fs_info *fs_info,
10962 struct extent_buffer *eb, int slot)
10964 struct btrfs_root *dev_root = fs_info->dev_root;
10965 struct btrfs_dev_item *dev_item;
10966 struct btrfs_path path;
10967 struct btrfs_key key;
10968 struct btrfs_dev_extent *ptr;
10974 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10975 dev_id = btrfs_device_id(eb, dev_item);
10976 used = btrfs_device_bytes_used(eb, dev_item);
10978 key.objectid = dev_id;
10979 key.type = BTRFS_DEV_EXTENT_KEY;
10982 btrfs_init_path(&path);
10983 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10985 btrfs_item_key_to_cpu(eb, &key, slot);
10986 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10987 key.objectid, key.type, key.offset);
10988 btrfs_release_path(&path);
10989 return REFERENCER_MISSING;
10992 /* Iterate dev_extents to calculate the used space of a device */
10994 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10997 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10998 if (key.objectid > dev_id)
11000 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11003 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11004 struct btrfs_dev_extent);
11005 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11007 ret = btrfs_next_item(dev_root, &path);
11011 btrfs_release_path(&path);
11013 if (used != total) {
11014 btrfs_item_key_to_cpu(eb, &key, slot);
11016 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11017 total, used, BTRFS_ROOT_TREE_OBJECTID,
11018 BTRFS_DEV_EXTENT_KEY, dev_id);
11019 return ACCOUNTING_MISMATCH;
11025 * Check a block group item with its referener (chunk) and its used space
11026 * with extent/metadata item
11028 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11029 struct extent_buffer *eb, int slot)
11031 struct btrfs_root *extent_root = fs_info->extent_root;
11032 struct btrfs_root *chunk_root = fs_info->chunk_root;
11033 struct btrfs_block_group_item *bi;
11034 struct btrfs_block_group_item bg_item;
11035 struct btrfs_path path;
11036 struct btrfs_key bg_key;
11037 struct btrfs_key chunk_key;
11038 struct btrfs_key extent_key;
11039 struct btrfs_chunk *chunk;
11040 struct extent_buffer *leaf;
11041 struct btrfs_extent_item *ei;
11042 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11050 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11051 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11052 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11053 used = btrfs_block_group_used(&bg_item);
11054 bg_flags = btrfs_block_group_flags(&bg_item);
11056 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11057 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11058 chunk_key.offset = bg_key.objectid;
11060 btrfs_init_path(&path);
11061 /* Search for the referencer chunk */
11062 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11065 "block group[%llu %llu] did not find the related chunk item",
11066 bg_key.objectid, bg_key.offset);
11067 err |= REFERENCER_MISSING;
11069 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11070 struct btrfs_chunk);
11071 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11074 "block group[%llu %llu] related chunk item length does not match",
11075 bg_key.objectid, bg_key.offset);
11076 err |= REFERENCER_MISMATCH;
11079 btrfs_release_path(&path);
11081 /* Search from the block group bytenr */
11082 extent_key.objectid = bg_key.objectid;
11083 extent_key.type = 0;
11084 extent_key.offset = 0;
11086 btrfs_init_path(&path);
11087 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11091 /* Iterate extent tree to account used space */
11093 leaf = path.nodes[0];
11095 /* Search slot can point to the last item beyond leaf nritems */
11096 if (path.slots[0] >= btrfs_header_nritems(leaf))
11099 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11100 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11103 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11104 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11106 if (extent_key.objectid < bg_key.objectid)
11109 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11112 total += extent_key.offset;
11114 ei = btrfs_item_ptr(leaf, path.slots[0],
11115 struct btrfs_extent_item);
11116 flags = btrfs_extent_flags(leaf, ei);
11117 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11118 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11120 "bad extent[%llu, %llu) type mismatch with chunk",
11121 extent_key.objectid,
11122 extent_key.objectid + extent_key.offset);
11123 err |= CHUNK_TYPE_MISMATCH;
11125 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11126 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11127 BTRFS_BLOCK_GROUP_METADATA))) {
11129 "bad extent[%llu, %llu) type mismatch with chunk",
11130 extent_key.objectid,
11131 extent_key.objectid + nodesize);
11132 err |= CHUNK_TYPE_MISMATCH;
11136 ret = btrfs_next_item(extent_root, &path);
11142 btrfs_release_path(&path);
11144 if (total != used) {
11146 "block group[%llu %llu] used %llu but extent items used %llu",
11147 bg_key.objectid, bg_key.offset, used, total);
11148 err |= ACCOUNTING_MISMATCH;
11154 * Check a chunk item.
11155 * Including checking all referred dev_extents and block group
11157 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11158 struct extent_buffer *eb, int slot)
11160 struct btrfs_root *extent_root = fs_info->extent_root;
11161 struct btrfs_root *dev_root = fs_info->dev_root;
11162 struct btrfs_path path;
11163 struct btrfs_key chunk_key;
11164 struct btrfs_key bg_key;
11165 struct btrfs_key devext_key;
11166 struct btrfs_chunk *chunk;
11167 struct extent_buffer *leaf;
11168 struct btrfs_block_group_item *bi;
11169 struct btrfs_block_group_item bg_item;
11170 struct btrfs_dev_extent *ptr;
11182 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11183 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11184 length = btrfs_chunk_length(eb, chunk);
11185 chunk_end = chunk_key.offset + length;
11186 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11189 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11191 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11194 type = btrfs_chunk_type(eb, chunk);
11196 bg_key.objectid = chunk_key.offset;
11197 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11198 bg_key.offset = length;
11200 btrfs_init_path(&path);
11201 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11204 "chunk[%llu %llu) did not find the related block group item",
11205 chunk_key.offset, chunk_end);
11206 err |= REFERENCER_MISSING;
11208 leaf = path.nodes[0];
11209 bi = btrfs_item_ptr(leaf, path.slots[0],
11210 struct btrfs_block_group_item);
11211 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11213 if (btrfs_block_group_flags(&bg_item) != type) {
11215 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11216 chunk_key.offset, chunk_end, type,
11217 btrfs_block_group_flags(&bg_item));
11218 err |= REFERENCER_MISSING;
11222 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11223 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11224 for (i = 0; i < num_stripes; i++) {
11225 btrfs_release_path(&path);
11226 btrfs_init_path(&path);
11227 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11228 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11229 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11231 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11234 goto not_match_dev;
11236 leaf = path.nodes[0];
11237 ptr = btrfs_item_ptr(leaf, path.slots[0],
11238 struct btrfs_dev_extent);
11239 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11240 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11241 if (objectid != chunk_key.objectid ||
11242 offset != chunk_key.offset ||
11243 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11244 goto not_match_dev;
11247 err |= BACKREF_MISSING;
11249 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11250 chunk_key.objectid, chunk_end, i);
11253 btrfs_release_path(&path);
11259 * Main entry function to check known items and update related accounting info
11261 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11263 struct btrfs_fs_info *fs_info = root->fs_info;
11264 struct btrfs_key key;
11267 struct btrfs_extent_data_ref *dref;
11272 btrfs_item_key_to_cpu(eb, &key, slot);
11276 case BTRFS_EXTENT_DATA_KEY:
11277 ret = check_extent_data_item(root, eb, slot);
11280 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11281 ret = check_block_group_item(fs_info, eb, slot);
11284 case BTRFS_DEV_ITEM_KEY:
11285 ret = check_dev_item(fs_info, eb, slot);
11288 case BTRFS_CHUNK_ITEM_KEY:
11289 ret = check_chunk_item(fs_info, eb, slot);
11292 case BTRFS_DEV_EXTENT_KEY:
11293 ret = check_dev_extent_item(fs_info, eb, slot);
11296 case BTRFS_EXTENT_ITEM_KEY:
11297 case BTRFS_METADATA_ITEM_KEY:
11298 ret = check_extent_item(fs_info, eb, slot);
11301 case BTRFS_EXTENT_CSUM_KEY:
11302 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11304 case BTRFS_TREE_BLOCK_REF_KEY:
11305 ret = check_tree_block_backref(fs_info, key.offset,
11309 case BTRFS_EXTENT_DATA_REF_KEY:
11310 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11311 ret = check_extent_data_backref(fs_info,
11312 btrfs_extent_data_ref_root(eb, dref),
11313 btrfs_extent_data_ref_objectid(eb, dref),
11314 btrfs_extent_data_ref_offset(eb, dref),
11316 btrfs_extent_data_ref_count(eb, dref));
11319 case BTRFS_SHARED_BLOCK_REF_KEY:
11320 ret = check_shared_block_backref(fs_info, key.offset,
11324 case BTRFS_SHARED_DATA_REF_KEY:
11325 ret = check_shared_data_backref(fs_info, key.offset,
11333 if (++slot < btrfs_header_nritems(eb))
11340 * Helper function for later fs/subvol tree check. To determine if a tree
11341 * block should be checked.
11342 * This function will ensure only the direct referencer with lowest rootid to
11343 * check a fs/subvolume tree block.
11345 * Backref check at extent tree would detect errors like missing subvolume
11346 * tree, so we can do aggressive check to reduce duplicated checks.
11348 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11350 struct btrfs_root *extent_root = root->fs_info->extent_root;
11351 struct btrfs_key key;
11352 struct btrfs_path path;
11353 struct extent_buffer *leaf;
11355 struct btrfs_extent_item *ei;
11361 struct btrfs_extent_inline_ref *iref;
11364 btrfs_init_path(&path);
11365 key.objectid = btrfs_header_bytenr(eb);
11366 key.type = BTRFS_METADATA_ITEM_KEY;
11367 key.offset = (u64)-1;
11370 * Any failure in backref resolving means we can't determine
11371 * whom the tree block belongs to.
11372 * So in that case, we need to check that tree block
11374 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11378 ret = btrfs_previous_extent_item(extent_root, &path,
11379 btrfs_header_bytenr(eb));
11383 leaf = path.nodes[0];
11384 slot = path.slots[0];
11385 btrfs_item_key_to_cpu(leaf, &key, slot);
11386 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11388 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11389 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11391 struct btrfs_tree_block_info *info;
11393 info = (struct btrfs_tree_block_info *)(ei + 1);
11394 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11397 item_size = btrfs_item_size_nr(leaf, slot);
11398 ptr = (unsigned long)iref;
11399 end = (unsigned long)ei + item_size;
11400 while (ptr < end) {
11401 iref = (struct btrfs_extent_inline_ref *)ptr;
11402 type = btrfs_extent_inline_ref_type(leaf, iref);
11403 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11406 * We only check the tree block if current root is
11407 * the lowest referencer of it.
11409 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11410 offset < root->objectid) {
11411 btrfs_release_path(&path);
11415 ptr += btrfs_extent_inline_ref_size(type);
11418 * Normally we should also check keyed tree block ref, but that may be
11419 * very time consuming. Inlined ref should already make us skip a lot
11420 * of refs now. So skip search keyed tree block ref.
11424 btrfs_release_path(&path);
11429 * Traversal function for tree block. We will do:
11430 * 1) Skip shared fs/subvolume tree blocks
11431 * 2) Update related bytes accounting
11432 * 3) Pre-order traversal
11434 static int traverse_tree_block(struct btrfs_root *root,
11435 struct extent_buffer *node)
11437 struct extent_buffer *eb;
11438 struct btrfs_key key;
11439 struct btrfs_key drop_key;
11447 * Skip shared fs/subvolume tree block, in that case they will
11448 * be checked by referencer with lowest rootid
11450 if (is_fstree(root->objectid) && !should_check(root, node))
11453 /* Update bytes accounting */
11454 total_btree_bytes += node->len;
11455 if (fs_root_objectid(btrfs_header_owner(node)))
11456 total_fs_tree_bytes += node->len;
11457 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11458 total_extent_tree_bytes += node->len;
11459 if (!found_old_backref &&
11460 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11461 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11462 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11463 found_old_backref = 1;
11465 /* pre-order tranversal, check itself first */
11466 level = btrfs_header_level(node);
11467 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11468 btrfs_header_level(node),
11469 btrfs_header_owner(node));
11473 "check %s failed root %llu bytenr %llu level %d, force continue check",
11474 level ? "node":"leaf", root->objectid,
11475 btrfs_header_bytenr(node), btrfs_header_level(node));
11478 btree_space_waste += btrfs_leaf_free_space(root, node);
11479 ret = check_leaf_items(root, node);
11484 nr = btrfs_header_nritems(node);
11485 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11486 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11487 sizeof(struct btrfs_key_ptr);
11489 /* Then check all its children */
11490 for (i = 0; i < nr; i++) {
11491 u64 blocknr = btrfs_node_blockptr(node, i);
11493 btrfs_node_key_to_cpu(node, &key, i);
11494 if (level == root->root_item.drop_level &&
11495 is_dropped_key(&key, &drop_key))
11499 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11500 * to call the function itself.
11502 eb = read_tree_block(root->fs_info, blocknr, 0);
11503 if (extent_buffer_uptodate(eb)) {
11504 ret = traverse_tree_block(root, eb);
11507 free_extent_buffer(eb);
11514 * Low memory usage version check_chunks_and_extents.
11516 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11518 struct btrfs_path path;
11519 struct btrfs_key key;
11520 struct btrfs_root *root1;
11521 struct btrfs_root *cur_root;
11525 root1 = root->fs_info->chunk_root;
11526 ret = traverse_tree_block(root1, root1->node);
11529 root1 = root->fs_info->tree_root;
11530 ret = traverse_tree_block(root1, root1->node);
11533 btrfs_init_path(&path);
11534 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11536 key.type = BTRFS_ROOT_ITEM_KEY;
11538 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11540 error("cannot find extent treet in tree_root");
11545 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11546 if (key.type != BTRFS_ROOT_ITEM_KEY)
11548 key.offset = (u64)-1;
11550 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11551 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11554 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11555 if (IS_ERR(cur_root) || !cur_root) {
11556 error("failed to read tree: %lld", key.objectid);
11560 ret = traverse_tree_block(cur_root, cur_root->node);
11563 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11564 btrfs_free_fs_root(cur_root);
11566 ret = btrfs_next_item(root1, &path);
11572 btrfs_release_path(&path);
11576 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11577 struct btrfs_root *root, int overwrite)
11579 struct extent_buffer *c;
11580 struct extent_buffer *old = root->node;
11583 struct btrfs_disk_key disk_key = {0,0,0};
11589 extent_buffer_get(c);
11592 c = btrfs_alloc_free_block(trans, root,
11593 root->fs_info->nodesize,
11594 root->root_key.objectid,
11595 &disk_key, level, 0, 0);
11598 extent_buffer_get(c);
11602 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11603 btrfs_set_header_level(c, level);
11604 btrfs_set_header_bytenr(c, c->start);
11605 btrfs_set_header_generation(c, trans->transid);
11606 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11607 btrfs_set_header_owner(c, root->root_key.objectid);
11609 write_extent_buffer(c, root->fs_info->fsid,
11610 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11612 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11613 btrfs_header_chunk_tree_uuid(c),
11616 btrfs_mark_buffer_dirty(c);
11618 * this case can happen in the following case:
11620 * 1.overwrite previous root.
11622 * 2.reinit reloc data root, this is because we skip pin
11623 * down reloc data tree before which means we can allocate
11624 * same block bytenr here.
11626 if (old->start == c->start) {
11627 btrfs_set_root_generation(&root->root_item,
11629 root->root_item.level = btrfs_header_level(root->node);
11630 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11631 &root->root_key, &root->root_item);
11633 free_extent_buffer(c);
11637 free_extent_buffer(old);
11639 add_root_to_dirty_list(root);
11643 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11644 struct extent_buffer *eb, int tree_root)
11646 struct extent_buffer *tmp;
11647 struct btrfs_root_item *ri;
11648 struct btrfs_key key;
11650 int level = btrfs_header_level(eb);
11656 * If we have pinned this block before, don't pin it again.
11657 * This can not only avoid forever loop with broken filesystem
11658 * but also give us some speedups.
11660 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11661 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11664 btrfs_pin_extent(fs_info, eb->start, eb->len);
11666 nritems = btrfs_header_nritems(eb);
11667 for (i = 0; i < nritems; i++) {
11669 btrfs_item_key_to_cpu(eb, &key, i);
11670 if (key.type != BTRFS_ROOT_ITEM_KEY)
11672 /* Skip the extent root and reloc roots */
11673 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11674 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11675 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11677 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11678 bytenr = btrfs_disk_root_bytenr(eb, ri);
11681 * If at any point we start needing the real root we
11682 * will have to build a stump root for the root we are
11683 * in, but for now this doesn't actually use the root so
11684 * just pass in extent_root.
11686 tmp = read_tree_block(fs_info, bytenr, 0);
11687 if (!extent_buffer_uptodate(tmp)) {
11688 fprintf(stderr, "Error reading root block\n");
11691 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11692 free_extent_buffer(tmp);
11696 bytenr = btrfs_node_blockptr(eb, i);
11698 /* If we aren't the tree root don't read the block */
11699 if (level == 1 && !tree_root) {
11700 btrfs_pin_extent(fs_info, bytenr,
11701 fs_info->nodesize);
11705 tmp = read_tree_block(fs_info, bytenr, 0);
11706 if (!extent_buffer_uptodate(tmp)) {
11707 fprintf(stderr, "Error reading tree block\n");
11710 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11711 free_extent_buffer(tmp);
11720 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11724 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11728 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11731 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11733 struct btrfs_block_group_cache *cache;
11734 struct btrfs_path path;
11735 struct extent_buffer *leaf;
11736 struct btrfs_chunk *chunk;
11737 struct btrfs_key key;
11741 btrfs_init_path(&path);
11743 key.type = BTRFS_CHUNK_ITEM_KEY;
11745 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11747 btrfs_release_path(&path);
11752 * We do this in case the block groups were screwed up and had alloc
11753 * bits that aren't actually set on the chunks. This happens with
11754 * restored images every time and could happen in real life I guess.
11756 fs_info->avail_data_alloc_bits = 0;
11757 fs_info->avail_metadata_alloc_bits = 0;
11758 fs_info->avail_system_alloc_bits = 0;
11760 /* First we need to create the in-memory block groups */
11762 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11763 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11765 btrfs_release_path(&path);
11773 leaf = path.nodes[0];
11774 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11775 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11780 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11781 btrfs_add_block_group(fs_info, 0,
11782 btrfs_chunk_type(leaf, chunk),
11783 key.objectid, key.offset,
11784 btrfs_chunk_length(leaf, chunk));
11785 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11786 key.offset + btrfs_chunk_length(leaf, chunk));
11791 cache = btrfs_lookup_first_block_group(fs_info, start);
11795 start = cache->key.objectid + cache->key.offset;
11798 btrfs_release_path(&path);
11802 static int reset_balance(struct btrfs_trans_handle *trans,
11803 struct btrfs_fs_info *fs_info)
11805 struct btrfs_root *root = fs_info->tree_root;
11806 struct btrfs_path path;
11807 struct extent_buffer *leaf;
11808 struct btrfs_key key;
11809 int del_slot, del_nr = 0;
11813 btrfs_init_path(&path);
11814 key.objectid = BTRFS_BALANCE_OBJECTID;
11815 key.type = BTRFS_BALANCE_ITEM_KEY;
11817 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11822 goto reinit_data_reloc;
11827 ret = btrfs_del_item(trans, root, &path);
11830 btrfs_release_path(&path);
11832 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11833 key.type = BTRFS_ROOT_ITEM_KEY;
11835 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11839 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11844 ret = btrfs_del_items(trans, root, &path,
11851 btrfs_release_path(&path);
11854 ret = btrfs_search_slot(trans, root, &key, &path,
11861 leaf = path.nodes[0];
11862 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11863 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11865 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11870 del_slot = path.slots[0];
11879 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11883 btrfs_release_path(&path);
11886 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11887 key.type = BTRFS_ROOT_ITEM_KEY;
11888 key.offset = (u64)-1;
11889 root = btrfs_read_fs_root(fs_info, &key);
11890 if (IS_ERR(root)) {
11891 fprintf(stderr, "Error reading data reloc tree\n");
11892 ret = PTR_ERR(root);
11895 record_root_in_trans(trans, root);
11896 ret = btrfs_fsck_reinit_root(trans, root, 0);
11899 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11901 btrfs_release_path(&path);
11905 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11906 struct btrfs_fs_info *fs_info)
11912 * The only reason we don't do this is because right now we're just
11913 * walking the trees we find and pinning down their bytes, we don't look
11914 * at any of the leaves. In order to do mixed groups we'd have to check
11915 * the leaves of any fs roots and pin down the bytes for any file
11916 * extents we find. Not hard but why do it if we don't have to?
11918 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11919 fprintf(stderr, "We don't support re-initing the extent tree "
11920 "for mixed block groups yet, please notify a btrfs "
11921 "developer you want to do this so they can add this "
11922 "functionality.\n");
11927 * first we need to walk all of the trees except the extent tree and pin
11928 * down the bytes that are in use so we don't overwrite any existing
11931 ret = pin_metadata_blocks(fs_info);
11933 fprintf(stderr, "error pinning down used bytes\n");
11938 * Need to drop all the block groups since we're going to recreate all
11941 btrfs_free_block_groups(fs_info);
11942 ret = reset_block_groups(fs_info);
11944 fprintf(stderr, "error resetting the block groups\n");
11948 /* Ok we can allocate now, reinit the extent root */
11949 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11951 fprintf(stderr, "extent root initialization failed\n");
11953 * When the transaction code is updated we should end the
11954 * transaction, but for now progs only knows about commit so
11955 * just return an error.
11961 * Now we have all the in-memory block groups setup so we can make
11962 * allocations properly, and the metadata we care about is safe since we
11963 * pinned all of it above.
11966 struct btrfs_block_group_cache *cache;
11968 cache = btrfs_lookup_first_block_group(fs_info, start);
11971 start = cache->key.objectid + cache->key.offset;
11972 ret = btrfs_insert_item(trans, fs_info->extent_root,
11973 &cache->key, &cache->item,
11974 sizeof(cache->item));
11976 fprintf(stderr, "Error adding block group\n");
11979 btrfs_extent_post_op(trans, fs_info->extent_root);
11982 ret = reset_balance(trans, fs_info);
11984 fprintf(stderr, "error resetting the pending balance\n");
11989 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11991 struct btrfs_path path;
11992 struct btrfs_trans_handle *trans;
11993 struct btrfs_key key;
11996 printf("Recowing metadata block %llu\n", eb->start);
11997 key.objectid = btrfs_header_owner(eb);
11998 key.type = BTRFS_ROOT_ITEM_KEY;
11999 key.offset = (u64)-1;
12001 root = btrfs_read_fs_root(root->fs_info, &key);
12002 if (IS_ERR(root)) {
12003 fprintf(stderr, "Couldn't find owner root %llu\n",
12005 return PTR_ERR(root);
12008 trans = btrfs_start_transaction(root, 1);
12010 return PTR_ERR(trans);
12012 btrfs_init_path(&path);
12013 path.lowest_level = btrfs_header_level(eb);
12014 if (path.lowest_level)
12015 btrfs_node_key_to_cpu(eb, &key, 0);
12017 btrfs_item_key_to_cpu(eb, &key, 0);
12019 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12020 btrfs_commit_transaction(trans, root);
12021 btrfs_release_path(&path);
12025 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12027 struct btrfs_path path;
12028 struct btrfs_trans_handle *trans;
12029 struct btrfs_key key;
12032 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12033 bad->key.type, bad->key.offset);
12034 key.objectid = bad->root_id;
12035 key.type = BTRFS_ROOT_ITEM_KEY;
12036 key.offset = (u64)-1;
12038 root = btrfs_read_fs_root(root->fs_info, &key);
12039 if (IS_ERR(root)) {
12040 fprintf(stderr, "Couldn't find owner root %llu\n",
12042 return PTR_ERR(root);
12045 trans = btrfs_start_transaction(root, 1);
12047 return PTR_ERR(trans);
12049 btrfs_init_path(&path);
12050 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12056 ret = btrfs_del_item(trans, root, &path);
12058 btrfs_commit_transaction(trans, root);
12059 btrfs_release_path(&path);
12063 static int zero_log_tree(struct btrfs_root *root)
12065 struct btrfs_trans_handle *trans;
12068 trans = btrfs_start_transaction(root, 1);
12069 if (IS_ERR(trans)) {
12070 ret = PTR_ERR(trans);
12073 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12074 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12075 ret = btrfs_commit_transaction(trans, root);
12079 static int populate_csum(struct btrfs_trans_handle *trans,
12080 struct btrfs_root *csum_root, char *buf, u64 start,
12083 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12088 while (offset < len) {
12089 sectorsize = fs_info->sectorsize;
12090 ret = read_extent_data(fs_info, buf, start + offset,
12094 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12095 start + offset, buf, sectorsize);
12098 offset += sectorsize;
12103 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12104 struct btrfs_root *csum_root,
12105 struct btrfs_root *cur_root)
12107 struct btrfs_path path;
12108 struct btrfs_key key;
12109 struct extent_buffer *node;
12110 struct btrfs_file_extent_item *fi;
12117 buf = malloc(cur_root->fs_info->sectorsize);
12121 btrfs_init_path(&path);
12125 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12128 /* Iterate all regular file extents and fill its csum */
12130 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12132 if (key.type != BTRFS_EXTENT_DATA_KEY)
12134 node = path.nodes[0];
12135 slot = path.slots[0];
12136 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12137 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12139 start = btrfs_file_extent_disk_bytenr(node, fi);
12140 len = btrfs_file_extent_disk_num_bytes(node, fi);
12142 ret = populate_csum(trans, csum_root, buf, start, len);
12143 if (ret == -EEXIST)
12149 * TODO: if next leaf is corrupted, jump to nearest next valid
12152 ret = btrfs_next_item(cur_root, &path);
12162 btrfs_release_path(&path);
12167 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12168 struct btrfs_root *csum_root)
12170 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12171 struct btrfs_path path;
12172 struct btrfs_root *tree_root = fs_info->tree_root;
12173 struct btrfs_root *cur_root;
12174 struct extent_buffer *node;
12175 struct btrfs_key key;
12179 btrfs_init_path(&path);
12180 key.objectid = BTRFS_FS_TREE_OBJECTID;
12182 key.type = BTRFS_ROOT_ITEM_KEY;
12183 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12192 node = path.nodes[0];
12193 slot = path.slots[0];
12194 btrfs_item_key_to_cpu(node, &key, slot);
12195 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12197 if (key.type != BTRFS_ROOT_ITEM_KEY)
12199 if (!is_fstree(key.objectid))
12201 key.offset = (u64)-1;
12203 cur_root = btrfs_read_fs_root(fs_info, &key);
12204 if (IS_ERR(cur_root) || !cur_root) {
12205 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12209 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12214 ret = btrfs_next_item(tree_root, &path);
12224 btrfs_release_path(&path);
12228 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12229 struct btrfs_root *csum_root)
12231 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12232 struct btrfs_path path;
12233 struct btrfs_extent_item *ei;
12234 struct extent_buffer *leaf;
12236 struct btrfs_key key;
12239 btrfs_init_path(&path);
12241 key.type = BTRFS_EXTENT_ITEM_KEY;
12243 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12245 btrfs_release_path(&path);
12249 buf = malloc(csum_root->fs_info->sectorsize);
12251 btrfs_release_path(&path);
12256 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12257 ret = btrfs_next_leaf(extent_root, &path);
12265 leaf = path.nodes[0];
12267 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12268 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12273 ei = btrfs_item_ptr(leaf, path.slots[0],
12274 struct btrfs_extent_item);
12275 if (!(btrfs_extent_flags(leaf, ei) &
12276 BTRFS_EXTENT_FLAG_DATA)) {
12281 ret = populate_csum(trans, csum_root, buf, key.objectid,
12288 btrfs_release_path(&path);
12294 * Recalculate the csum and put it into the csum tree.
12296 * Extent tree init will wipe out all the extent info, so in that case, we
12297 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12298 * will use fs/subvol trees to init the csum tree.
12300 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12301 struct btrfs_root *csum_root,
12302 int search_fs_tree)
12304 if (search_fs_tree)
12305 return fill_csum_tree_from_fs(trans, csum_root);
12307 return fill_csum_tree_from_extent(trans, csum_root);
12310 static void free_roots_info_cache(void)
12312 if (!roots_info_cache)
12315 while (!cache_tree_empty(roots_info_cache)) {
12316 struct cache_extent *entry;
12317 struct root_item_info *rii;
12319 entry = first_cache_extent(roots_info_cache);
12322 remove_cache_extent(roots_info_cache, entry);
12323 rii = container_of(entry, struct root_item_info, cache_extent);
12327 free(roots_info_cache);
12328 roots_info_cache = NULL;
12331 static int build_roots_info_cache(struct btrfs_fs_info *info)
12334 struct btrfs_key key;
12335 struct extent_buffer *leaf;
12336 struct btrfs_path path;
12338 if (!roots_info_cache) {
12339 roots_info_cache = malloc(sizeof(*roots_info_cache));
12340 if (!roots_info_cache)
12342 cache_tree_init(roots_info_cache);
12345 btrfs_init_path(&path);
12347 key.type = BTRFS_EXTENT_ITEM_KEY;
12349 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12352 leaf = path.nodes[0];
12355 struct btrfs_key found_key;
12356 struct btrfs_extent_item *ei;
12357 struct btrfs_extent_inline_ref *iref;
12358 int slot = path.slots[0];
12363 struct cache_extent *entry;
12364 struct root_item_info *rii;
12366 if (slot >= btrfs_header_nritems(leaf)) {
12367 ret = btrfs_next_leaf(info->extent_root, &path);
12374 leaf = path.nodes[0];
12375 slot = path.slots[0];
12378 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12380 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12381 found_key.type != BTRFS_METADATA_ITEM_KEY)
12384 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12385 flags = btrfs_extent_flags(leaf, ei);
12387 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12388 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12391 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12392 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12393 level = found_key.offset;
12395 struct btrfs_tree_block_info *binfo;
12397 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12398 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12399 level = btrfs_tree_block_level(leaf, binfo);
12403 * For a root extent, it must be of the following type and the
12404 * first (and only one) iref in the item.
12406 type = btrfs_extent_inline_ref_type(leaf, iref);
12407 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12410 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12411 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12413 rii = malloc(sizeof(struct root_item_info));
12418 rii->cache_extent.start = root_id;
12419 rii->cache_extent.size = 1;
12420 rii->level = (u8)-1;
12421 entry = &rii->cache_extent;
12422 ret = insert_cache_extent(roots_info_cache, entry);
12425 rii = container_of(entry, struct root_item_info,
12429 ASSERT(rii->cache_extent.start == root_id);
12430 ASSERT(rii->cache_extent.size == 1);
12432 if (level > rii->level || rii->level == (u8)-1) {
12433 rii->level = level;
12434 rii->bytenr = found_key.objectid;
12435 rii->gen = btrfs_extent_generation(leaf, ei);
12436 rii->node_count = 1;
12437 } else if (level == rii->level) {
12445 btrfs_release_path(&path);
12450 static int maybe_repair_root_item(struct btrfs_path *path,
12451 const struct btrfs_key *root_key,
12452 const int read_only_mode)
12454 const u64 root_id = root_key->objectid;
12455 struct cache_extent *entry;
12456 struct root_item_info *rii;
12457 struct btrfs_root_item ri;
12458 unsigned long offset;
12460 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12463 "Error: could not find extent items for root %llu\n",
12464 root_key->objectid);
12468 rii = container_of(entry, struct root_item_info, cache_extent);
12469 ASSERT(rii->cache_extent.start == root_id);
12470 ASSERT(rii->cache_extent.size == 1);
12472 if (rii->node_count != 1) {
12474 "Error: could not find btree root extent for root %llu\n",
12479 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12480 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12482 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12483 btrfs_root_level(&ri) != rii->level ||
12484 btrfs_root_generation(&ri) != rii->gen) {
12487 * If we're in repair mode but our caller told us to not update
12488 * the root item, i.e. just check if it needs to be updated, don't
12489 * print this message, since the caller will call us again shortly
12490 * for the same root item without read only mode (the caller will
12491 * open a transaction first).
12493 if (!(read_only_mode && repair))
12495 "%sroot item for root %llu,"
12496 " current bytenr %llu, current gen %llu, current level %u,"
12497 " new bytenr %llu, new gen %llu, new level %u\n",
12498 (read_only_mode ? "" : "fixing "),
12500 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12501 btrfs_root_level(&ri),
12502 rii->bytenr, rii->gen, rii->level);
12504 if (btrfs_root_generation(&ri) > rii->gen) {
12506 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12507 root_id, btrfs_root_generation(&ri), rii->gen);
12511 if (!read_only_mode) {
12512 btrfs_set_root_bytenr(&ri, rii->bytenr);
12513 btrfs_set_root_level(&ri, rii->level);
12514 btrfs_set_root_generation(&ri, rii->gen);
12515 write_extent_buffer(path->nodes[0], &ri,
12516 offset, sizeof(ri));
12526 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12527 * caused read-only snapshots to be corrupted if they were created at a moment
12528 * when the source subvolume/snapshot had orphan items. The issue was that the
12529 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12530 * node instead of the post orphan cleanup root node.
12531 * So this function, and its callees, just detects and fixes those cases. Even
12532 * though the regression was for read-only snapshots, this function applies to
12533 * any snapshot/subvolume root.
12534 * This must be run before any other repair code - not doing it so, makes other
12535 * repair code delete or modify backrefs in the extent tree for example, which
12536 * will result in an inconsistent fs after repairing the root items.
12538 static int repair_root_items(struct btrfs_fs_info *info)
12540 struct btrfs_path path;
12541 struct btrfs_key key;
12542 struct extent_buffer *leaf;
12543 struct btrfs_trans_handle *trans = NULL;
12546 int need_trans = 0;
12548 btrfs_init_path(&path);
12550 ret = build_roots_info_cache(info);
12554 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12555 key.type = BTRFS_ROOT_ITEM_KEY;
12560 * Avoid opening and committing transactions if a leaf doesn't have
12561 * any root items that need to be fixed, so that we avoid rotating
12562 * backup roots unnecessarily.
12565 trans = btrfs_start_transaction(info->tree_root, 1);
12566 if (IS_ERR(trans)) {
12567 ret = PTR_ERR(trans);
12572 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12576 leaf = path.nodes[0];
12579 struct btrfs_key found_key;
12581 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12582 int no_more_keys = find_next_key(&path, &key);
12584 btrfs_release_path(&path);
12586 ret = btrfs_commit_transaction(trans,
12598 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12600 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12602 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12605 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12609 if (!trans && repair) {
12612 btrfs_release_path(&path);
12622 free_roots_info_cache();
12623 btrfs_release_path(&path);
12625 btrfs_commit_transaction(trans, info->tree_root);
12632 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12634 struct btrfs_trans_handle *trans;
12635 struct btrfs_block_group_cache *bg_cache;
12639 /* Clear all free space cache inodes and its extent data */
12641 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12644 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12647 current = bg_cache->key.objectid + bg_cache->key.offset;
12650 /* Don't forget to set cache_generation to -1 */
12651 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12652 if (IS_ERR(trans)) {
12653 error("failed to update super block cache generation");
12654 return PTR_ERR(trans);
12656 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12657 btrfs_commit_transaction(trans, fs_info->tree_root);
12662 const char * const cmd_check_usage[] = {
12663 "btrfs check [options] <device>",
12664 "Check structural integrity of a filesystem (unmounted).",
12665 "Check structural integrity of an unmounted filesystem. Verify internal",
12666 "trees' consistency and item connectivity. In the repair mode try to",
12667 "fix the problems found. ",
12668 "WARNING: the repair mode is considered dangerous",
12670 "-s|--super <superblock> use this superblock copy",
12671 "-b|--backup use the first valid backup root copy",
12672 "--repair try to repair the filesystem",
12673 "--readonly run in read-only mode (default)",
12674 "--init-csum-tree create a new CRC tree",
12675 "--init-extent-tree create a new extent tree",
12676 "--mode <MODE> allows choice of memory/IO trade-offs",
12677 " where MODE is one of:",
12678 " original - read inodes and extents to memory (requires",
12679 " more memory, does less IO)",
12680 " lowmem - try to use less memory but read blocks again",
12682 "--check-data-csum verify checksums of data blocks",
12683 "-Q|--qgroup-report print a report on qgroup consistency",
12684 "-E|--subvol-extents <subvolid>",
12685 " print subvolume extents and sharing state",
12686 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12687 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12688 "-p|--progress indicate progress",
12689 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12693 int cmd_check(int argc, char **argv)
12695 struct cache_tree root_cache;
12696 struct btrfs_root *root;
12697 struct btrfs_fs_info *info;
12700 u64 tree_root_bytenr = 0;
12701 u64 chunk_root_bytenr = 0;
12702 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12706 int init_csum_tree = 0;
12708 int clear_space_cache = 0;
12709 int qgroup_report = 0;
12710 int qgroups_repaired = 0;
12711 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12715 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12716 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12717 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12718 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12719 static const struct option long_options[] = {
12720 { "super", required_argument, NULL, 's' },
12721 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12722 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12723 { "init-csum-tree", no_argument, NULL,
12724 GETOPT_VAL_INIT_CSUM },
12725 { "init-extent-tree", no_argument, NULL,
12726 GETOPT_VAL_INIT_EXTENT },
12727 { "check-data-csum", no_argument, NULL,
12728 GETOPT_VAL_CHECK_CSUM },
12729 { "backup", no_argument, NULL, 'b' },
12730 { "subvol-extents", required_argument, NULL, 'E' },
12731 { "qgroup-report", no_argument, NULL, 'Q' },
12732 { "tree-root", required_argument, NULL, 'r' },
12733 { "chunk-root", required_argument, NULL,
12734 GETOPT_VAL_CHUNK_TREE },
12735 { "progress", no_argument, NULL, 'p' },
12736 { "mode", required_argument, NULL,
12738 { "clear-space-cache", required_argument, NULL,
12739 GETOPT_VAL_CLEAR_SPACE_CACHE},
12740 { NULL, 0, NULL, 0}
12743 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12747 case 'a': /* ignored */ break;
12749 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12752 num = arg_strtou64(optarg);
12753 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12755 "super mirror should be less than %d",
12756 BTRFS_SUPER_MIRROR_MAX);
12759 bytenr = btrfs_sb_offset(((int)num));
12760 printf("using SB copy %llu, bytenr %llu\n", num,
12761 (unsigned long long)bytenr);
12767 subvolid = arg_strtou64(optarg);
12770 tree_root_bytenr = arg_strtou64(optarg);
12772 case GETOPT_VAL_CHUNK_TREE:
12773 chunk_root_bytenr = arg_strtou64(optarg);
12776 ctx.progress_enabled = true;
12780 usage(cmd_check_usage);
12781 case GETOPT_VAL_REPAIR:
12782 printf("enabling repair mode\n");
12784 ctree_flags |= OPEN_CTREE_WRITES;
12786 case GETOPT_VAL_READONLY:
12789 case GETOPT_VAL_INIT_CSUM:
12790 printf("Creating a new CRC tree\n");
12791 init_csum_tree = 1;
12793 ctree_flags |= OPEN_CTREE_WRITES;
12795 case GETOPT_VAL_INIT_EXTENT:
12796 init_extent_tree = 1;
12797 ctree_flags |= (OPEN_CTREE_WRITES |
12798 OPEN_CTREE_NO_BLOCK_GROUPS);
12801 case GETOPT_VAL_CHECK_CSUM:
12802 check_data_csum = 1;
12804 case GETOPT_VAL_MODE:
12805 check_mode = parse_check_mode(optarg);
12806 if (check_mode == CHECK_MODE_UNKNOWN) {
12807 error("unknown mode: %s", optarg);
12811 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12812 if (strcmp(optarg, "v1") == 0) {
12813 clear_space_cache = 1;
12814 } else if (strcmp(optarg, "v2") == 0) {
12815 clear_space_cache = 2;
12816 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12819 "invalid argument to --clear-space-cache, must be v1 or v2");
12822 ctree_flags |= OPEN_CTREE_WRITES;
12827 if (check_argc_exact(argc - optind, 1))
12828 usage(cmd_check_usage);
12830 if (ctx.progress_enabled) {
12831 ctx.tp = TASK_NOTHING;
12832 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12835 /* This check is the only reason for --readonly to exist */
12836 if (readonly && repair) {
12837 error("repair options are not compatible with --readonly");
12842 * Not supported yet
12844 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12845 error("low memory mode doesn't support repair yet");
12850 cache_tree_init(&root_cache);
12852 if((ret = check_mounted(argv[optind])) < 0) {
12853 error("could not check mount status: %s", strerror(-ret));
12857 error("%s is currently mounted, aborting", argv[optind]);
12863 /* only allow partial opening under repair mode */
12865 ctree_flags |= OPEN_CTREE_PARTIAL;
12867 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12868 chunk_root_bytenr, ctree_flags);
12870 error("cannot open file system");
12876 global_info = info;
12877 root = info->fs_root;
12878 if (clear_space_cache == 1) {
12879 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12881 "free space cache v2 detected, use --clear-space-cache v2");
12885 printf("Clearing free space cache\n");
12886 ret = clear_free_space_cache(info);
12888 error("failed to clear free space cache");
12891 printf("Free space cache cleared\n");
12894 } else if (clear_space_cache == 2) {
12895 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12896 printf("no free space cache v2 to clear\n");
12900 printf("Clear free space cache v2\n");
12901 ret = btrfs_clear_free_space_tree(info);
12903 error("failed to clear free space cache v2: %d", ret);
12906 printf("free space cache v2 cleared\n");
12912 * repair mode will force us to commit transaction which
12913 * will make us fail to load log tree when mounting.
12915 if (repair && btrfs_super_log_root(info->super_copy)) {
12916 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12922 ret = zero_log_tree(root);
12925 error("failed to zero log tree: %d", ret);
12930 uuid_unparse(info->super_copy->fsid, uuidbuf);
12931 if (qgroup_report) {
12932 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12934 ret = qgroup_verify_all(info);
12941 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12942 subvolid, argv[optind], uuidbuf);
12943 ret = print_extent_state(info, subvolid);
12947 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12949 if (!extent_buffer_uptodate(info->tree_root->node) ||
12950 !extent_buffer_uptodate(info->dev_root->node) ||
12951 !extent_buffer_uptodate(info->chunk_root->node)) {
12952 error("critical roots corrupted, unable to check the filesystem");
12958 if (init_extent_tree || init_csum_tree) {
12959 struct btrfs_trans_handle *trans;
12961 trans = btrfs_start_transaction(info->extent_root, 0);
12962 if (IS_ERR(trans)) {
12963 error("error starting transaction");
12964 ret = PTR_ERR(trans);
12969 if (init_extent_tree) {
12970 printf("Creating a new extent tree\n");
12971 ret = reinit_extent_tree(trans, info);
12977 if (init_csum_tree) {
12978 printf("Reinitialize checksum tree\n");
12979 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12981 error("checksum tree initialization failed: %d",
12988 ret = fill_csum_tree(trans, info->csum_root,
12992 error("checksum tree refilling failed: %d", ret);
12997 * Ok now we commit and run the normal fsck, which will add
12998 * extent entries for all of the items it finds.
13000 ret = btrfs_commit_transaction(trans, info->extent_root);
13005 if (!extent_buffer_uptodate(info->extent_root->node)) {
13006 error("critical: extent_root, unable to check the filesystem");
13011 if (!extent_buffer_uptodate(info->csum_root->node)) {
13012 error("critical: csum_root, unable to check the filesystem");
13018 if (!ctx.progress_enabled)
13019 fprintf(stderr, "checking extents\n");
13020 if (check_mode == CHECK_MODE_LOWMEM)
13021 ret = check_chunks_and_extents_v2(root);
13023 ret = check_chunks_and_extents(root);
13027 "errors found in extent allocation tree or chunk allocation");
13029 ret = repair_root_items(info);
13032 error("failed to repair root items: %s", strerror(-ret));
13036 fprintf(stderr, "Fixed %d roots.\n", ret);
13038 } else if (ret > 0) {
13040 "Found %d roots with an outdated root item.\n",
13043 "Please run a filesystem check with the option --repair to fix them.\n");
13049 if (!ctx.progress_enabled) {
13050 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13051 fprintf(stderr, "checking free space tree\n");
13053 fprintf(stderr, "checking free space cache\n");
13055 ret = check_space_cache(root);
13058 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13059 error("errors found in free space tree");
13061 error("errors found in free space cache");
13066 * We used to have to have these hole extents in between our real
13067 * extents so if we don't have this flag set we need to make sure there
13068 * are no gaps in the file extents for inodes, otherwise we can just
13069 * ignore it when this happens.
13071 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13072 if (!ctx.progress_enabled)
13073 fprintf(stderr, "checking fs roots\n");
13074 if (check_mode == CHECK_MODE_LOWMEM)
13075 ret = check_fs_roots_v2(root->fs_info);
13077 ret = check_fs_roots(root, &root_cache);
13080 error("errors found in fs roots");
13084 fprintf(stderr, "checking csums\n");
13085 ret = check_csums(root);
13088 error("errors found in csum tree");
13092 fprintf(stderr, "checking root refs\n");
13093 /* For low memory mode, check_fs_roots_v2 handles root refs */
13094 if (check_mode != CHECK_MODE_LOWMEM) {
13095 ret = check_root_refs(root, &root_cache);
13098 error("errors found in root refs");
13103 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13104 struct extent_buffer *eb;
13106 eb = list_first_entry(&root->fs_info->recow_ebs,
13107 struct extent_buffer, recow);
13108 list_del_init(&eb->recow);
13109 ret = recow_extent_buffer(root, eb);
13112 error("fails to fix transid errors");
13117 while (!list_empty(&delete_items)) {
13118 struct bad_item *bad;
13120 bad = list_first_entry(&delete_items, struct bad_item, list);
13121 list_del_init(&bad->list);
13123 ret = delete_bad_item(root, bad);
13129 if (info->quota_enabled) {
13130 fprintf(stderr, "checking quota groups\n");
13131 ret = qgroup_verify_all(info);
13134 error("failed to check quota groups");
13138 ret = repair_qgroups(info, &qgroups_repaired);
13141 error("failed to repair quota groups");
13147 if (!list_empty(&root->fs_info->recow_ebs)) {
13148 error("transid errors in file system");
13153 if (found_old_backref) { /*
13154 * there was a disk format change when mixed
13155 * backref was in testing tree. The old format
13156 * existed about one week.
13158 printf("\n * Found old mixed backref format. "
13159 "The old format is not supported! *"
13160 "\n * Please mount the FS in readonly mode, "
13161 "backup data and re-format the FS. *\n\n");
13164 printf("found %llu bytes used, ",
13165 (unsigned long long)bytes_used);
13167 printf("error(s) found\n");
13169 printf("no error found\n");
13170 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13171 printf("total tree bytes: %llu\n",
13172 (unsigned long long)total_btree_bytes);
13173 printf("total fs tree bytes: %llu\n",
13174 (unsigned long long)total_fs_tree_bytes);
13175 printf("total extent tree bytes: %llu\n",
13176 (unsigned long long)total_extent_tree_bytes);
13177 printf("btree space waste bytes: %llu\n",
13178 (unsigned long long)btree_space_waste);
13179 printf("file data blocks allocated: %llu\n referenced %llu\n",
13180 (unsigned long long)data_bytes_allocated,
13181 (unsigned long long)data_bytes_referenced);
13183 free_qgroup_counts();
13184 free_root_recs_tree(&root_cache);
13188 if (ctx.progress_enabled)
13189 task_deinit(ctx.info);