2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
50 TASK_NOTHING, /* have to be the last element */
55 enum task_position tp;
57 struct task_info *info;
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
118 return container_of(back, struct data_backref, node);
122 * Much like data_backref, just removed the undetermined members
123 * and change it to use list_head.
124 * During extent scan, it is stored in root->orphan_data_extent.
125 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
127 struct orphan_data_extent {
128 struct list_head list;
136 struct tree_backref {
137 struct extent_backref node;
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
146 return container_of(back, struct tree_backref, node);
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
152 struct extent_record {
153 struct list_head backrefs;
154 struct list_head dups;
155 struct list_head list;
156 struct cache_extent cache;
157 struct btrfs_disk_key parent_key;
162 u64 extent_item_refs;
164 u64 parent_generation;
168 unsigned int flag_block_full_backref:2;
169 unsigned int found_rec:1;
170 unsigned int content_checked:1;
171 unsigned int owner_ref_checked:1;
172 unsigned int is_root:1;
173 unsigned int metadata:1;
174 unsigned int bad_full_backref:1;
175 unsigned int crossing_stripes:1;
176 unsigned int wrong_chunk_type:1;
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
181 return container_of(entry, struct extent_record, list);
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
200 return list_entry(entry, struct inode_backref, list);
203 struct root_item_record {
204 struct list_head list;
211 struct btrfs_key drop_key;
214 #define REF_ERR_NO_DIR_ITEM (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX (1 << 1)
216 #define REF_ERR_NO_INODE_REF (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
219 #define REF_ERR_DUP_INODE_REF (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
228 struct file_extent_hole {
234 struct inode_record {
235 struct list_head backrefs;
236 unsigned int checked:1;
237 unsigned int merging:1;
238 unsigned int found_inode_item:1;
239 unsigned int found_dir_item:1;
240 unsigned int found_file_extent:1;
241 unsigned int found_csum_item:1;
242 unsigned int some_csum_missing:1;
243 unsigned int nodatasum:1;
256 struct rb_root holes;
257 struct list_head orphan_extents;
262 #define I_ERR_NO_INODE_ITEM (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
278 struct root_backref {
279 struct list_head list;
280 unsigned int found_dir_item:1;
281 unsigned int found_dir_index:1;
282 unsigned int found_back_ref:1;
283 unsigned int found_forward_ref:1;
284 unsigned int reachable:1;
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
295 return list_entry(entry, struct root_backref, list);
299 struct list_head backrefs;
300 struct cache_extent cache;
301 unsigned int found_root_item:1;
307 struct cache_extent cache;
312 struct cache_extent cache;
313 struct cache_tree root_cache;
314 struct cache_tree inode_cache;
315 struct inode_record *current;
324 struct walk_control {
325 struct cache_tree shared;
326 struct shared_node *nodes[BTRFS_MAX_LEVEL];
332 struct btrfs_key key;
334 struct list_head list;
337 struct extent_entry {
342 struct list_head list;
345 struct root_item_info {
346 /* level of the root */
348 /* number of nodes at this level, must be 1 for a root */
352 struct cache_extent cache_extent;
356 * Error bit for low memory mode check.
358 * Currently no caller cares about it yet. Just internal use for error
361 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH (1 << 8)
372 static void *print_status_check(void *p)
374 struct task_ctx *priv = p;
375 const char work_indicator[] = { '.', 'o', 'O', 'o' };
377 static char *task_position_string[] = {
379 "checking free space cache",
383 task_period_start(priv->info, 1000 /* 1s */);
385 if (priv->tp == TASK_NOTHING)
389 printf("%s [%c]\r", task_position_string[priv->tp],
390 work_indicator[count % 4]);
393 task_period_wait(priv->info);
398 static int print_status_return(void *p)
406 static enum btrfs_check_mode parse_check_mode(const char *str)
408 if (strcmp(str, "lowmem") == 0)
409 return CHECK_MODE_LOWMEM;
410 if (strcmp(str, "orig") == 0)
411 return CHECK_MODE_ORIGINAL;
412 if (strcmp(str, "original") == 0)
413 return CHECK_MODE_ORIGINAL;
415 return CHECK_MODE_UNKNOWN;
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
421 struct file_extent_hole *hole;
423 if (RB_EMPTY_ROOT(holes))
426 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
432 struct file_extent_hole *hole1;
433 struct file_extent_hole *hole2;
435 hole1 = rb_entry(node1, struct file_extent_hole, node);
436 hole2 = rb_entry(node2, struct file_extent_hole, node);
438 if (hole1->start > hole2->start)
440 if (hole1->start < hole2->start)
442 /* Now hole1->start == hole2->start */
443 if (hole1->len >= hole2->len)
445 * Hole 1 will be merge center
446 * Same hole will be merged later
449 /* Hole 2 will be merge center */
454 * Add a hole to the record
456 * This will do hole merge for copy_file_extent_holes(),
457 * which will ensure there won't be continuous holes.
459 static int add_file_extent_hole(struct rb_root *holes,
462 struct file_extent_hole *hole;
463 struct file_extent_hole *prev = NULL;
464 struct file_extent_hole *next = NULL;
466 hole = malloc(sizeof(*hole));
471 /* Since compare will not return 0, no -EEXIST will happen */
472 rb_insert(holes, &hole->node, compare_hole);
474 /* simple merge with previous hole */
475 if (rb_prev(&hole->node))
476 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
478 if (prev && prev->start + prev->len >= hole->start) {
479 hole->len = hole->start + hole->len - prev->start;
480 hole->start = prev->start;
481 rb_erase(&prev->node, holes);
486 /* iterate merge with next holes */
488 if (!rb_next(&hole->node))
490 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
492 if (hole->start + hole->len >= next->start) {
493 if (hole->start + hole->len <= next->start + next->len)
494 hole->len = next->start + next->len -
496 rb_erase(&next->node, holes);
505 static int compare_hole_range(struct rb_node *node, void *data)
507 struct file_extent_hole *hole;
510 hole = (struct file_extent_hole *)data;
513 hole = rb_entry(node, struct file_extent_hole, node);
514 if (start < hole->start)
516 if (start >= hole->start && start < hole->start + hole->len)
522 * Delete a hole in the record
524 * This will do the hole split and is much restrict than add.
526 static int del_file_extent_hole(struct rb_root *holes,
529 struct file_extent_hole *hole;
530 struct file_extent_hole tmp;
535 struct rb_node *node;
542 node = rb_search(holes, &tmp, compare_hole_range, NULL);
545 hole = rb_entry(node, struct file_extent_hole, node);
546 if (start + len > hole->start + hole->len)
550 * Now there will be no overlap, delete the hole and re-add the
551 * split(s) if they exists.
553 if (start > hole->start) {
554 prev_start = hole->start;
555 prev_len = start - hole->start;
558 if (hole->start + hole->len > start + len) {
559 next_start = start + len;
560 next_len = hole->start + hole->len - start - len;
563 rb_erase(node, holes);
566 ret = add_file_extent_hole(holes, prev_start, prev_len);
571 ret = add_file_extent_hole(holes, next_start, next_len);
578 static int copy_file_extent_holes(struct rb_root *dst,
581 struct file_extent_hole *hole;
582 struct rb_node *node;
585 node = rb_first(src);
587 hole = rb_entry(node, struct file_extent_hole, node);
588 ret = add_file_extent_hole(dst, hole->start, hole->len);
591 node = rb_next(node);
596 static void free_file_extent_holes(struct rb_root *holes)
598 struct rb_node *node;
599 struct file_extent_hole *hole;
601 node = rb_first(holes);
603 hole = rb_entry(node, struct file_extent_hole, node);
604 rb_erase(node, holes);
606 node = rb_first(holes);
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613 struct btrfs_root *root)
615 if (root->last_trans != trans->transid) {
616 root->track_dirty = 1;
617 root->last_trans = trans->transid;
618 root->commit_root = root->node;
619 extent_buffer_get(root->node);
623 static u8 imode_to_type(u32 imode)
626 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
628 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
629 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
630 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
631 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
632 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
633 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
636 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
642 struct device_record *rec1;
643 struct device_record *rec2;
645 rec1 = rb_entry(node1, struct device_record, node);
646 rec2 = rb_entry(node2, struct device_record, node);
647 if (rec1->devid > rec2->devid)
649 else if (rec1->devid < rec2->devid)
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
657 struct inode_record *rec;
658 struct inode_backref *backref;
659 struct inode_backref *orig;
660 struct inode_backref *tmp;
661 struct orphan_data_extent *src_orphan;
662 struct orphan_data_extent *dst_orphan;
667 rec = malloc(sizeof(*rec));
669 return ERR_PTR(-ENOMEM);
670 memcpy(rec, orig_rec, sizeof(*rec));
672 INIT_LIST_HEAD(&rec->backrefs);
673 INIT_LIST_HEAD(&rec->orphan_extents);
674 rec->holes = RB_ROOT;
676 list_for_each_entry(orig, &orig_rec->backrefs, list) {
677 size = sizeof(*orig) + orig->namelen + 1;
678 backref = malloc(size);
683 memcpy(backref, orig, size);
684 list_add_tail(&backref->list, &rec->backrefs);
686 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687 dst_orphan = malloc(sizeof(*dst_orphan));
692 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
695 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
702 rb = rb_first(&rec->holes);
704 struct file_extent_hole *hole;
706 hole = rb_entry(rb, struct file_extent_hole, node);
712 if (!list_empty(&rec->backrefs))
713 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714 list_del(&orig->list);
718 if (!list_empty(&rec->orphan_extents))
719 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720 list_del(&orig->list);
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
732 struct orphan_data_extent *orphan;
734 if (list_empty(orphan_extents))
736 printf("The following data extent is lost in tree %llu:\n",
738 list_for_each_entry(orphan, orphan_extents, list) {
739 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740 orphan->objectid, orphan->offset, orphan->disk_bytenr,
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
747 u64 root_objectid = root->root_key.objectid;
748 int errors = rec->errors;
752 /* reloc root errors, we print its corresponding fs root objectid*/
753 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754 root_objectid = root->root_key.offset;
755 fprintf(stderr, "reloc");
757 fprintf(stderr, "root %llu inode %llu errors %x",
758 (unsigned long long) root_objectid,
759 (unsigned long long) rec->ino, rec->errors);
761 if (errors & I_ERR_NO_INODE_ITEM)
762 fprintf(stderr, ", no inode item");
763 if (errors & I_ERR_NO_ORPHAN_ITEM)
764 fprintf(stderr, ", no orphan item");
765 if (errors & I_ERR_DUP_INODE_ITEM)
766 fprintf(stderr, ", dup inode item");
767 if (errors & I_ERR_DUP_DIR_INDEX)
768 fprintf(stderr, ", dup dir index");
769 if (errors & I_ERR_ODD_DIR_ITEM)
770 fprintf(stderr, ", odd dir item");
771 if (errors & I_ERR_ODD_FILE_EXTENT)
772 fprintf(stderr, ", odd file extent");
773 if (errors & I_ERR_BAD_FILE_EXTENT)
774 fprintf(stderr, ", bad file extent");
775 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776 fprintf(stderr, ", file extent overlap");
777 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778 fprintf(stderr, ", file extent discount");
779 if (errors & I_ERR_DIR_ISIZE_WRONG)
780 fprintf(stderr, ", dir isize wrong");
781 if (errors & I_ERR_FILE_NBYTES_WRONG)
782 fprintf(stderr, ", nbytes wrong");
783 if (errors & I_ERR_ODD_CSUM_ITEM)
784 fprintf(stderr, ", odd csum item");
785 if (errors & I_ERR_SOME_CSUM_MISSING)
786 fprintf(stderr, ", some csum missing");
787 if (errors & I_ERR_LINK_COUNT_WRONG)
788 fprintf(stderr, ", link count wrong");
789 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790 fprintf(stderr, ", orphan file extent");
791 fprintf(stderr, "\n");
792 /* Print the orphan extents if needed */
793 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
796 /* Print the holes if needed */
797 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798 struct file_extent_hole *hole;
799 struct rb_node *node;
802 node = rb_first(&rec->holes);
803 fprintf(stderr, "Found file extent holes:\n");
806 hole = rb_entry(node, struct file_extent_hole, node);
807 fprintf(stderr, "\tstart: %llu, len: %llu\n",
808 hole->start, hole->len);
809 node = rb_next(node);
812 fprintf(stderr, "\tstart: 0, len: %llu\n",
813 round_up(rec->isize, root->sectorsize));
817 static void print_ref_error(int errors)
819 if (errors & REF_ERR_NO_DIR_ITEM)
820 fprintf(stderr, ", no dir item");
821 if (errors & REF_ERR_NO_DIR_INDEX)
822 fprintf(stderr, ", no dir index");
823 if (errors & REF_ERR_NO_INODE_REF)
824 fprintf(stderr, ", no inode ref");
825 if (errors & REF_ERR_DUP_DIR_ITEM)
826 fprintf(stderr, ", dup dir item");
827 if (errors & REF_ERR_DUP_DIR_INDEX)
828 fprintf(stderr, ", dup dir index");
829 if (errors & REF_ERR_DUP_INODE_REF)
830 fprintf(stderr, ", dup inode ref");
831 if (errors & REF_ERR_INDEX_UNMATCH)
832 fprintf(stderr, ", index mismatch");
833 if (errors & REF_ERR_FILETYPE_UNMATCH)
834 fprintf(stderr, ", filetype mismatch");
835 if (errors & REF_ERR_NAME_TOO_LONG)
836 fprintf(stderr, ", name too long");
837 if (errors & REF_ERR_NO_ROOT_REF)
838 fprintf(stderr, ", no root ref");
839 if (errors & REF_ERR_NO_ROOT_BACKREF)
840 fprintf(stderr, ", no root backref");
841 if (errors & REF_ERR_DUP_ROOT_REF)
842 fprintf(stderr, ", dup root ref");
843 if (errors & REF_ERR_DUP_ROOT_BACKREF)
844 fprintf(stderr, ", dup root backref");
845 fprintf(stderr, "\n");
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
851 struct ptr_node *node;
852 struct cache_extent *cache;
853 struct inode_record *rec = NULL;
856 cache = lookup_cache_extent(inode_cache, ino, 1);
858 node = container_of(cache, struct ptr_node, cache);
860 if (mod && rec->refs > 1) {
861 node->data = clone_inode_rec(rec);
862 if (IS_ERR(node->data))
868 rec = calloc(1, sizeof(*rec));
870 return ERR_PTR(-ENOMEM);
872 rec->extent_start = (u64)-1;
874 INIT_LIST_HEAD(&rec->backrefs);
875 INIT_LIST_HEAD(&rec->orphan_extents);
876 rec->holes = RB_ROOT;
878 node = malloc(sizeof(*node));
881 return ERR_PTR(-ENOMEM);
883 node->cache.start = ino;
884 node->cache.size = 1;
887 if (ino == BTRFS_FREE_INO_OBJECTID)
890 ret = insert_cache_extent(inode_cache, &node->cache);
892 return ERR_PTR(-EEXIST);
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
899 struct orphan_data_extent *orphan;
901 while (!list_empty(orphan_extents)) {
902 orphan = list_entry(orphan_extents->next,
903 struct orphan_data_extent, list);
904 list_del(&orphan->list);
909 static void free_inode_rec(struct inode_record *rec)
911 struct inode_backref *backref;
916 while (!list_empty(&rec->backrefs)) {
917 backref = to_inode_backref(rec->backrefs.next);
918 list_del(&backref->list);
921 free_orphan_data_extents(&rec->orphan_extents);
922 free_file_extent_holes(&rec->holes);
926 static int can_free_inode_rec(struct inode_record *rec)
928 if (!rec->errors && rec->checked && rec->found_inode_item &&
929 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935 struct inode_record *rec)
937 struct cache_extent *cache;
938 struct inode_backref *tmp, *backref;
939 struct ptr_node *node;
942 if (!rec->found_inode_item)
945 filetype = imode_to_type(rec->imode);
946 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947 if (backref->found_dir_item && backref->found_dir_index) {
948 if (backref->filetype != filetype)
949 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950 if (!backref->errors && backref->found_inode_ref &&
951 rec->nlink == rec->found_link) {
952 list_del(&backref->list);
958 if (!rec->checked || rec->merging)
961 if (S_ISDIR(rec->imode)) {
962 if (rec->found_size != rec->isize)
963 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964 if (rec->found_file_extent)
965 rec->errors |= I_ERR_ODD_FILE_EXTENT;
966 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967 if (rec->found_dir_item)
968 rec->errors |= I_ERR_ODD_DIR_ITEM;
969 if (rec->found_size != rec->nbytes)
970 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971 if (rec->nlink > 0 && !no_holes &&
972 (rec->extent_end < rec->isize ||
973 first_extent_gap(&rec->holes) < rec->isize))
974 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
977 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978 if (rec->found_csum_item && rec->nodatasum)
979 rec->errors |= I_ERR_ODD_CSUM_ITEM;
980 if (rec->some_csum_missing && !rec->nodatasum)
981 rec->errors |= I_ERR_SOME_CSUM_MISSING;
984 BUG_ON(rec->refs != 1);
985 if (can_free_inode_rec(rec)) {
986 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987 node = container_of(cache, struct ptr_node, cache);
988 BUG_ON(node->data != rec);
989 remove_cache_extent(inode_cache, &node->cache);
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
997 struct btrfs_path path;
998 struct btrfs_key key;
1001 key.objectid = BTRFS_ORPHAN_OBJECTID;
1002 key.type = BTRFS_ORPHAN_ITEM_KEY;
1005 btrfs_init_path(&path);
1006 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007 btrfs_release_path(&path);
1013 static int process_inode_item(struct extent_buffer *eb,
1014 int slot, struct btrfs_key *key,
1015 struct shared_node *active_node)
1017 struct inode_record *rec;
1018 struct btrfs_inode_item *item;
1020 rec = active_node->current;
1021 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022 if (rec->found_inode_item) {
1023 rec->errors |= I_ERR_DUP_INODE_ITEM;
1026 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027 rec->nlink = btrfs_inode_nlink(eb, item);
1028 rec->isize = btrfs_inode_size(eb, item);
1029 rec->nbytes = btrfs_inode_nbytes(eb, item);
1030 rec->imode = btrfs_inode_mode(eb, item);
1031 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1033 rec->found_inode_item = 1;
1034 if (rec->nlink == 0)
1035 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036 maybe_free_inode_rec(&active_node->inode_cache, rec);
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1042 int namelen, u64 dir)
1044 struct inode_backref *backref;
1046 list_for_each_entry(backref, &rec->backrefs, list) {
1047 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1049 if (backref->dir != dir || backref->namelen != namelen)
1051 if (memcmp(name, backref->name, namelen))
1056 backref = malloc(sizeof(*backref) + namelen + 1);
1059 memset(backref, 0, sizeof(*backref));
1061 backref->namelen = namelen;
1062 memcpy(backref->name, name, namelen);
1063 backref->name[namelen] = '\0';
1064 list_add_tail(&backref->list, &rec->backrefs);
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069 u64 ino, u64 dir, u64 index,
1070 const char *name, int namelen,
1071 u8 filetype, u8 itemtype, int errors)
1073 struct inode_record *rec;
1074 struct inode_backref *backref;
1076 rec = get_inode_rec(inode_cache, ino, 1);
1077 BUG_ON(IS_ERR(rec));
1078 backref = get_inode_backref(rec, name, namelen, dir);
1081 backref->errors |= errors;
1082 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083 if (backref->found_dir_index)
1084 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085 if (backref->found_inode_ref && backref->index != index)
1086 backref->errors |= REF_ERR_INDEX_UNMATCH;
1087 if (backref->found_dir_item && backref->filetype != filetype)
1088 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1090 backref->index = index;
1091 backref->filetype = filetype;
1092 backref->found_dir_index = 1;
1093 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1095 if (backref->found_dir_item)
1096 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097 if (backref->found_dir_index && backref->filetype != filetype)
1098 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1100 backref->filetype = filetype;
1101 backref->found_dir_item = 1;
1102 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104 if (backref->found_inode_ref)
1105 backref->errors |= REF_ERR_DUP_INODE_REF;
1106 if (backref->found_dir_index && backref->index != index)
1107 backref->errors |= REF_ERR_INDEX_UNMATCH;
1109 backref->index = index;
1111 backref->ref_type = itemtype;
1112 backref->found_inode_ref = 1;
1117 maybe_free_inode_rec(inode_cache, rec);
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122 struct cache_tree *dst_cache)
1124 struct inode_backref *backref;
1129 list_for_each_entry(backref, &src->backrefs, list) {
1130 if (backref->found_dir_index) {
1131 add_inode_backref(dst_cache, dst->ino, backref->dir,
1132 backref->index, backref->name,
1133 backref->namelen, backref->filetype,
1134 BTRFS_DIR_INDEX_KEY, backref->errors);
1136 if (backref->found_dir_item) {
1138 add_inode_backref(dst_cache, dst->ino,
1139 backref->dir, 0, backref->name,
1140 backref->namelen, backref->filetype,
1141 BTRFS_DIR_ITEM_KEY, backref->errors);
1143 if (backref->found_inode_ref) {
1144 add_inode_backref(dst_cache, dst->ino,
1145 backref->dir, backref->index,
1146 backref->name, backref->namelen, 0,
1147 backref->ref_type, backref->errors);
1151 if (src->found_dir_item)
1152 dst->found_dir_item = 1;
1153 if (src->found_file_extent)
1154 dst->found_file_extent = 1;
1155 if (src->found_csum_item)
1156 dst->found_csum_item = 1;
1157 if (src->some_csum_missing)
1158 dst->some_csum_missing = 1;
1159 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1165 BUG_ON(src->found_link < dir_count);
1166 dst->found_link += src->found_link - dir_count;
1167 dst->found_size += src->found_size;
1168 if (src->extent_start != (u64)-1) {
1169 if (dst->extent_start == (u64)-1) {
1170 dst->extent_start = src->extent_start;
1171 dst->extent_end = src->extent_end;
1173 if (dst->extent_end > src->extent_start)
1174 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175 else if (dst->extent_end < src->extent_start) {
1176 ret = add_file_extent_hole(&dst->holes,
1178 src->extent_start - dst->extent_end);
1180 if (dst->extent_end < src->extent_end)
1181 dst->extent_end = src->extent_end;
1185 dst->errors |= src->errors;
1186 if (src->found_inode_item) {
1187 if (!dst->found_inode_item) {
1188 dst->nlink = src->nlink;
1189 dst->isize = src->isize;
1190 dst->nbytes = src->nbytes;
1191 dst->imode = src->imode;
1192 dst->nodatasum = src->nodatasum;
1193 dst->found_inode_item = 1;
1195 dst->errors |= I_ERR_DUP_INODE_ITEM;
1203 static int splice_shared_node(struct shared_node *src_node,
1204 struct shared_node *dst_node)
1206 struct cache_extent *cache;
1207 struct ptr_node *node, *ins;
1208 struct cache_tree *src, *dst;
1209 struct inode_record *rec, *conflict;
1210 u64 current_ino = 0;
1214 if (--src_node->refs == 0)
1216 if (src_node->current)
1217 current_ino = src_node->current->ino;
1219 src = &src_node->root_cache;
1220 dst = &dst_node->root_cache;
1222 cache = search_cache_extent(src, 0);
1224 node = container_of(cache, struct ptr_node, cache);
1226 cache = next_cache_extent(cache);
1229 remove_cache_extent(src, &node->cache);
1232 ins = malloc(sizeof(*ins));
1234 ins->cache.start = node->cache.start;
1235 ins->cache.size = node->cache.size;
1239 ret = insert_cache_extent(dst, &ins->cache);
1240 if (ret == -EEXIST) {
1241 conflict = get_inode_rec(dst, rec->ino, 1);
1242 BUG_ON(IS_ERR(conflict));
1243 merge_inode_recs(rec, conflict, dst);
1245 conflict->checked = 1;
1246 if (dst_node->current == conflict)
1247 dst_node->current = NULL;
1249 maybe_free_inode_rec(dst, conflict);
1250 free_inode_rec(rec);
1257 if (src == &src_node->root_cache) {
1258 src = &src_node->inode_cache;
1259 dst = &dst_node->inode_cache;
1263 if (current_ino > 0 && (!dst_node->current ||
1264 current_ino > dst_node->current->ino)) {
1265 if (dst_node->current) {
1266 dst_node->current->checked = 1;
1267 maybe_free_inode_rec(dst, dst_node->current);
1269 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270 BUG_ON(IS_ERR(dst_node->current));
1275 static void free_inode_ptr(struct cache_extent *cache)
1277 struct ptr_node *node;
1278 struct inode_record *rec;
1280 node = container_of(cache, struct ptr_node, cache);
1282 free_inode_rec(rec);
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1291 struct cache_extent *cache;
1292 struct shared_node *node;
1294 cache = lookup_cache_extent(shared, bytenr, 1);
1296 node = container_of(cache, struct shared_node, cache);
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1305 struct shared_node *node;
1307 node = calloc(1, sizeof(*node));
1310 node->cache.start = bytenr;
1311 node->cache.size = 1;
1312 cache_tree_init(&node->root_cache);
1313 cache_tree_init(&node->inode_cache);
1316 ret = insert_cache_extent(shared, &node->cache);
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322 struct walk_control *wc, int level)
1324 struct shared_node *node;
1325 struct shared_node *dest;
1328 if (level == wc->active_node)
1331 BUG_ON(wc->active_node <= level);
1332 node = find_shared_node(&wc->shared, bytenr);
1334 ret = add_shared_node(&wc->shared, bytenr, refs);
1336 node = find_shared_node(&wc->shared, bytenr);
1337 wc->nodes[level] = node;
1338 wc->active_node = level;
1342 if (wc->root_level == wc->active_node &&
1343 btrfs_root_refs(&root->root_item) == 0) {
1344 if (--node->refs == 0) {
1345 free_inode_recs_tree(&node->root_cache);
1346 free_inode_recs_tree(&node->inode_cache);
1347 remove_cache_extent(&wc->shared, &node->cache);
1353 dest = wc->nodes[wc->active_node];
1354 splice_shared_node(node, dest);
1355 if (node->refs == 0) {
1356 remove_cache_extent(&wc->shared, &node->cache);
1362 static int leave_shared_node(struct btrfs_root *root,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->root_level)
1372 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1376 BUG_ON(i >= BTRFS_MAX_LEVEL);
1378 node = wc->nodes[wc->active_node];
1379 wc->nodes[wc->active_node] = NULL;
1380 wc->active_node = i;
1382 dest = wc->nodes[wc->active_node];
1383 if (wc->active_node < wc->root_level ||
1384 btrfs_root_refs(&root->root_item) > 0) {
1385 BUG_ON(node->refs <= 1);
1386 splice_shared_node(node, dest);
1388 BUG_ON(node->refs < 2);
1397 * 1 - if the root with id child_root_id is a child of root parent_root_id
1398 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1399 * has other root(s) as parent(s)
1400 * 2 - if the root child_root_id doesn't have any parent roots
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1405 struct btrfs_path path;
1406 struct btrfs_key key;
1407 struct extent_buffer *leaf;
1411 btrfs_init_path(&path);
1413 key.objectid = parent_root_id;
1414 key.type = BTRFS_ROOT_REF_KEY;
1415 key.offset = child_root_id;
1416 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1420 btrfs_release_path(&path);
1424 key.objectid = child_root_id;
1425 key.type = BTRFS_ROOT_BACKREF_KEY;
1427 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1433 leaf = path.nodes[0];
1434 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1438 leaf = path.nodes[0];
1441 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442 if (key.objectid != child_root_id ||
1443 key.type != BTRFS_ROOT_BACKREF_KEY)
1448 if (key.offset == parent_root_id) {
1449 btrfs_release_path(&path);
1456 btrfs_release_path(&path);
1459 return has_parent ? 0 : 2;
1462 static int process_dir_item(struct btrfs_root *root,
1463 struct extent_buffer *eb,
1464 int slot, struct btrfs_key *key,
1465 struct shared_node *active_node)
1475 struct btrfs_dir_item *di;
1476 struct inode_record *rec;
1477 struct cache_tree *root_cache;
1478 struct cache_tree *inode_cache;
1479 struct btrfs_key location;
1480 char namebuf[BTRFS_NAME_LEN];
1482 root_cache = &active_node->root_cache;
1483 inode_cache = &active_node->inode_cache;
1484 rec = active_node->current;
1485 rec->found_dir_item = 1;
1487 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1491 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492 name_len = btrfs_dir_name_len(eb, di);
1493 data_len = btrfs_dir_data_len(eb, di);
1494 filetype = btrfs_dir_type(eb, di);
1496 rec->found_size += name_len;
1497 if (name_len <= BTRFS_NAME_LEN) {
1501 len = BTRFS_NAME_LEN;
1502 error = REF_ERR_NAME_TOO_LONG;
1504 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1506 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507 add_inode_backref(inode_cache, location.objectid,
1508 key->objectid, key->offset, namebuf,
1509 len, filetype, key->type, error);
1510 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511 add_inode_backref(root_cache, location.objectid,
1512 key->objectid, key->offset,
1513 namebuf, len, filetype,
1516 fprintf(stderr, "invalid location in dir item %u\n",
1518 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519 key->objectid, key->offset, namebuf,
1520 len, filetype, key->type, error);
1523 len = sizeof(*di) + name_len + data_len;
1524 di = (struct btrfs_dir_item *)((char *)di + len);
1527 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528 rec->errors |= I_ERR_DUP_DIR_INDEX;
1533 static int process_inode_ref(struct extent_buffer *eb,
1534 int slot, struct btrfs_key *key,
1535 struct shared_node *active_node)
1543 struct cache_tree *inode_cache;
1544 struct btrfs_inode_ref *ref;
1545 char namebuf[BTRFS_NAME_LEN];
1547 inode_cache = &active_node->inode_cache;
1549 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550 total = btrfs_item_size_nr(eb, slot);
1551 while (cur < total) {
1552 name_len = btrfs_inode_ref_name_len(eb, ref);
1553 index = btrfs_inode_ref_index(eb, ref);
1554 if (name_len <= BTRFS_NAME_LEN) {
1558 len = BTRFS_NAME_LEN;
1559 error = REF_ERR_NAME_TOO_LONG;
1561 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562 add_inode_backref(inode_cache, key->objectid, key->offset,
1563 index, namebuf, len, 0, key->type, error);
1565 len = sizeof(*ref) + name_len;
1566 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1572 static int process_inode_extref(struct extent_buffer *eb,
1573 int slot, struct btrfs_key *key,
1574 struct shared_node *active_node)
1583 struct cache_tree *inode_cache;
1584 struct btrfs_inode_extref *extref;
1585 char namebuf[BTRFS_NAME_LEN];
1587 inode_cache = &active_node->inode_cache;
1589 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590 total = btrfs_item_size_nr(eb, slot);
1591 while (cur < total) {
1592 name_len = btrfs_inode_extref_name_len(eb, extref);
1593 index = btrfs_inode_extref_index(eb, extref);
1594 parent = btrfs_inode_extref_parent(eb, extref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf,
1603 (unsigned long)(extref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, parent,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*extref) + name_len;
1608 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616 u64 len, u64 *found)
1618 struct btrfs_key key;
1619 struct btrfs_path path;
1620 struct extent_buffer *leaf;
1625 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1627 btrfs_init_path(&path);
1629 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1631 key.type = BTRFS_EXTENT_CSUM_KEY;
1633 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1637 if (ret > 0 && path.slots[0] > 0) {
1638 leaf = path.nodes[0];
1639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641 key.type == BTRFS_EXTENT_CSUM_KEY)
1646 leaf = path.nodes[0];
1647 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1653 leaf = path.nodes[0];
1656 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658 key.type != BTRFS_EXTENT_CSUM_KEY)
1661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662 if (key.offset >= start + len)
1665 if (key.offset > start)
1668 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670 if (csum_end > start) {
1671 size = min(csum_end - start, len);
1680 btrfs_release_path(&path);
1686 static int process_file_extent(struct btrfs_root *root,
1687 struct extent_buffer *eb,
1688 int slot, struct btrfs_key *key,
1689 struct shared_node *active_node)
1691 struct inode_record *rec;
1692 struct btrfs_file_extent_item *fi;
1694 u64 disk_bytenr = 0;
1695 u64 extent_offset = 0;
1696 u64 mask = root->sectorsize - 1;
1700 rec = active_node->current;
1701 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702 rec->found_file_extent = 1;
1704 if (rec->extent_start == (u64)-1) {
1705 rec->extent_start = key->offset;
1706 rec->extent_end = key->offset;
1709 if (rec->extent_end > key->offset)
1710 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711 else if (rec->extent_end < key->offset) {
1712 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713 key->offset - rec->extent_end);
1718 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719 extent_type = btrfs_file_extent_type(eb, fi);
1721 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1724 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725 rec->found_size += num_bytes;
1726 num_bytes = (num_bytes + mask) & ~mask;
1727 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731 extent_offset = btrfs_file_extent_offset(eb, fi);
1732 if (num_bytes == 0 || (num_bytes & mask))
1733 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 if (num_bytes + extent_offset >
1735 btrfs_file_extent_ram_bytes(eb, fi))
1736 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738 (btrfs_file_extent_compression(eb, fi) ||
1739 btrfs_file_extent_encryption(eb, fi) ||
1740 btrfs_file_extent_other_encoding(eb, fi)))
1741 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742 if (disk_bytenr > 0)
1743 rec->found_size += num_bytes;
1745 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1747 rec->extent_end = key->offset + num_bytes;
1750 * The data reloc tree will copy full extents into its inode and then
1751 * copy the corresponding csums. Because the extent it copied could be
1752 * a preallocated extent that hasn't been written to yet there may be no
1753 * csums to copy, ergo we won't have csums for our file extent. This is
1754 * ok so just don't bother checking csums if the inode belongs to the
1757 if (disk_bytenr > 0 &&
1758 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1760 if (btrfs_file_extent_compression(eb, fi))
1761 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1763 disk_bytenr += extent_offset;
1765 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1768 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1770 rec->found_csum_item = 1;
1771 if (found < num_bytes)
1772 rec->some_csum_missing = 1;
1773 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1775 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782 struct walk_control *wc)
1784 struct btrfs_key key;
1788 struct cache_tree *inode_cache;
1789 struct shared_node *active_node;
1791 if (wc->root_level == wc->active_node &&
1792 btrfs_root_refs(&root->root_item) == 0)
1795 active_node = wc->nodes[wc->active_node];
1796 inode_cache = &active_node->inode_cache;
1797 nritems = btrfs_header_nritems(eb);
1798 for (i = 0; i < nritems; i++) {
1799 btrfs_item_key_to_cpu(eb, &key, i);
1801 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1803 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1806 if (active_node->current == NULL ||
1807 active_node->current->ino < key.objectid) {
1808 if (active_node->current) {
1809 active_node->current->checked = 1;
1810 maybe_free_inode_rec(inode_cache,
1811 active_node->current);
1813 active_node->current = get_inode_rec(inode_cache,
1815 BUG_ON(IS_ERR(active_node->current));
1818 case BTRFS_DIR_ITEM_KEY:
1819 case BTRFS_DIR_INDEX_KEY:
1820 ret = process_dir_item(root, eb, i, &key, active_node);
1822 case BTRFS_INODE_REF_KEY:
1823 ret = process_inode_ref(eb, i, &key, active_node);
1825 case BTRFS_INODE_EXTREF_KEY:
1826 ret = process_inode_extref(eb, i, &key, active_node);
1828 case BTRFS_INODE_ITEM_KEY:
1829 ret = process_inode_item(eb, i, &key, active_node);
1831 case BTRFS_EXTENT_DATA_KEY:
1832 ret = process_file_extent(root, eb, i, &key,
1842 static void reada_walk_down(struct btrfs_root *root,
1843 struct extent_buffer *node, int slot)
1852 level = btrfs_header_level(node);
1856 nritems = btrfs_header_nritems(node);
1857 blocksize = root->nodesize;
1858 for (i = slot; i < nritems; i++) {
1859 bytenr = btrfs_node_blockptr(node, i);
1860 ptr_gen = btrfs_node_ptr_generation(node, i);
1861 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1866 * Check the child node/leaf by the following condition:
1867 * 1. the first item key of the node/leaf should be the same with the one
1869 * 2. block in parent node should match the child node/leaf.
1870 * 3. generation of parent node and child's header should be consistent.
1872 * Or the child node/leaf pointed by the key in parent is not valid.
1874 * We hope to check leaf owner too, but since subvol may share leaves,
1875 * which makes leaf owner check not so strong, key check should be
1876 * sufficient enough for that case.
1878 static int check_child_node(struct btrfs_root *root,
1879 struct extent_buffer *parent, int slot,
1880 struct extent_buffer *child)
1882 struct btrfs_key parent_key;
1883 struct btrfs_key child_key;
1886 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887 if (btrfs_header_level(child) == 0)
1888 btrfs_item_key_to_cpu(child, &child_key, 0);
1890 btrfs_node_key_to_cpu(child, &child_key, 0);
1892 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1895 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896 parent_key.objectid, parent_key.type, parent_key.offset,
1897 child_key.objectid, child_key.type, child_key.offset);
1899 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1901 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902 btrfs_node_blockptr(parent, slot),
1903 btrfs_header_bytenr(child));
1905 if (btrfs_node_ptr_generation(parent, slot) !=
1906 btrfs_header_generation(child)) {
1908 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909 btrfs_header_generation(child),
1910 btrfs_node_ptr_generation(parent, slot));
1916 u64 bytenr[BTRFS_MAX_LEVEL];
1917 u64 refs[BTRFS_MAX_LEVEL];
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level,
1922 struct node_refs *nrefs)
1924 enum btrfs_tree_block_status status;
1927 struct extent_buffer *next;
1928 struct extent_buffer *cur;
1933 WARN_ON(*level < 0);
1934 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1936 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937 refs = nrefs->refs[*level];
1940 ret = btrfs_lookup_extent_info(NULL, root,
1941 path->nodes[*level]->start,
1942 *level, 1, &refs, NULL);
1947 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948 nrefs->refs[*level] = refs;
1952 ret = enter_shared_node(root, path->nodes[*level]->start,
1960 while (*level >= 0) {
1961 WARN_ON(*level < 0);
1962 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963 cur = path->nodes[*level];
1965 if (btrfs_header_level(cur) != *level)
1968 if (path->slots[*level] >= btrfs_header_nritems(cur))
1971 ret = process_one_leaf(root, cur, wc);
1976 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978 blocksize = root->nodesize;
1980 if (bytenr == nrefs->bytenr[*level - 1]) {
1981 refs = nrefs->refs[*level - 1];
1983 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984 *level - 1, 1, &refs, NULL);
1988 nrefs->bytenr[*level - 1] = bytenr;
1989 nrefs->refs[*level - 1] = refs;
1994 ret = enter_shared_node(root, bytenr, refs,
1997 path->slots[*level]++;
2002 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004 free_extent_buffer(next);
2005 reada_walk_down(root, cur, path->slots[*level]);
2006 next = read_tree_block(root, bytenr, blocksize,
2008 if (!extent_buffer_uptodate(next)) {
2009 struct btrfs_key node_key;
2011 btrfs_node_key_to_cpu(path->nodes[*level],
2013 path->slots[*level]);
2014 btrfs_add_corrupt_extent_record(root->fs_info,
2016 path->nodes[*level]->start,
2017 root->nodesize, *level);
2023 ret = check_child_node(root, cur, path->slots[*level], next);
2029 if (btrfs_is_leaf(next))
2030 status = btrfs_check_leaf(root, NULL, next);
2032 status = btrfs_check_node(root, NULL, next);
2033 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034 free_extent_buffer(next);
2039 *level = *level - 1;
2040 free_extent_buffer(path->nodes[*level]);
2041 path->nodes[*level] = next;
2042 path->slots[*level] = 0;
2045 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050 struct walk_control *wc, int *level)
2053 struct extent_buffer *leaf;
2055 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056 leaf = path->nodes[i];
2057 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2062 free_extent_buffer(path->nodes[*level]);
2063 path->nodes[*level] = NULL;
2064 BUG_ON(*level > wc->active_node);
2065 if (*level == wc->active_node)
2066 leave_shared_node(root, wc, *level);
2073 static int check_root_dir(struct inode_record *rec)
2075 struct inode_backref *backref;
2078 if (!rec->found_inode_item || rec->errors)
2080 if (rec->nlink != 1 || rec->found_link != 0)
2082 if (list_empty(&rec->backrefs))
2084 backref = to_inode_backref(rec->backrefs.next);
2085 if (!backref->found_inode_ref)
2087 if (backref->index != 0 || backref->namelen != 2 ||
2088 memcmp(backref->name, "..", 2))
2090 if (backref->found_dir_index || backref->found_dir_item)
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098 struct btrfs_root *root, struct btrfs_path *path,
2099 struct inode_record *rec)
2101 struct btrfs_inode_item *ei;
2102 struct btrfs_key key;
2105 key.objectid = rec->ino;
2106 key.type = BTRFS_INODE_ITEM_KEY;
2107 key.offset = (u64)-1;
2109 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2113 if (!path->slots[0]) {
2120 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121 if (key.objectid != rec->ino) {
2126 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127 struct btrfs_inode_item);
2128 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129 btrfs_mark_buffer_dirty(path->nodes[0]);
2130 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132 root->root_key.objectid);
2134 btrfs_release_path(path);
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139 struct btrfs_root *root,
2140 struct btrfs_path *path,
2141 struct inode_record *rec)
2145 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146 btrfs_release_path(path);
2148 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153 struct btrfs_root *root,
2154 struct btrfs_path *path,
2155 struct inode_record *rec)
2157 struct btrfs_inode_item *ei;
2158 struct btrfs_key key;
2161 key.objectid = rec->ino;
2162 key.type = BTRFS_INODE_ITEM_KEY;
2165 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2172 /* Since ret == 0, no need to check anything */
2173 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174 struct btrfs_inode_item);
2175 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176 btrfs_mark_buffer_dirty(path->nodes[0]);
2177 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178 printf("reset nbytes for ino %llu root %llu\n",
2179 rec->ino, root->root_key.objectid);
2181 btrfs_release_path(path);
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186 struct cache_tree *inode_cache,
2187 struct inode_record *rec,
2188 struct inode_backref *backref)
2190 struct btrfs_path path;
2191 struct btrfs_trans_handle *trans;
2192 struct btrfs_dir_item *dir_item;
2193 struct extent_buffer *leaf;
2194 struct btrfs_key key;
2195 struct btrfs_disk_key disk_key;
2196 struct inode_record *dir_rec;
2197 unsigned long name_ptr;
2198 u32 data_size = sizeof(*dir_item) + backref->namelen;
2201 trans = btrfs_start_transaction(root, 1);
2203 return PTR_ERR(trans);
2205 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206 (unsigned long long)rec->ino);
2208 btrfs_init_path(&path);
2209 key.objectid = backref->dir;
2210 key.type = BTRFS_DIR_INDEX_KEY;
2211 key.offset = backref->index;
2212 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2215 leaf = path.nodes[0];
2216 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2218 disk_key.objectid = cpu_to_le64(rec->ino);
2219 disk_key.type = BTRFS_INODE_ITEM_KEY;
2220 disk_key.offset = 0;
2222 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224 btrfs_set_dir_data_len(leaf, dir_item, 0);
2225 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226 name_ptr = (unsigned long)(dir_item + 1);
2227 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228 btrfs_mark_buffer_dirty(leaf);
2229 btrfs_release_path(&path);
2230 btrfs_commit_transaction(trans, root);
2232 backref->found_dir_index = 1;
2233 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234 BUG_ON(IS_ERR(dir_rec));
2237 dir_rec->found_size += backref->namelen;
2238 if (dir_rec->found_size == dir_rec->isize &&
2239 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241 if (dir_rec->found_size != dir_rec->isize)
2242 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247 static int delete_dir_index(struct btrfs_root *root,
2248 struct cache_tree *inode_cache,
2249 struct inode_record *rec,
2250 struct inode_backref *backref)
2252 struct btrfs_trans_handle *trans;
2253 struct btrfs_dir_item *di;
2254 struct btrfs_path path;
2257 trans = btrfs_start_transaction(root, 1);
2259 return PTR_ERR(trans);
2261 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262 (unsigned long long)backref->dir,
2263 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264 (unsigned long long)root->objectid);
2266 btrfs_init_path(&path);
2267 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268 backref->name, backref->namelen,
2269 backref->index, -1);
2272 btrfs_release_path(&path);
2273 btrfs_commit_transaction(trans, root);
2280 ret = btrfs_del_item(trans, root, &path);
2282 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2284 btrfs_release_path(&path);
2285 btrfs_commit_transaction(trans, root);
2289 static int create_inode_item(struct btrfs_root *root,
2290 struct inode_record *rec,
2291 struct inode_backref *backref, int root_dir)
2293 struct btrfs_trans_handle *trans;
2294 struct btrfs_inode_item inode_item;
2295 time_t now = time(NULL);
2298 trans = btrfs_start_transaction(root, 1);
2299 if (IS_ERR(trans)) {
2300 ret = PTR_ERR(trans);
2304 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305 "be incomplete, please check permissions and content after "
2306 "the fsck completes.\n", (unsigned long long)root->objectid,
2307 (unsigned long long)rec->ino);
2309 memset(&inode_item, 0, sizeof(inode_item));
2310 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2312 btrfs_set_stack_inode_nlink(&inode_item, 1);
2314 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316 if (rec->found_dir_item) {
2317 if (rec->found_file_extent)
2318 fprintf(stderr, "root %llu inode %llu has both a dir "
2319 "item and extents, unsure if it is a dir or a "
2320 "regular file so setting it as a directory\n",
2321 (unsigned long long)root->objectid,
2322 (unsigned long long)rec->ino);
2323 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325 } else if (!rec->found_dir_item) {
2326 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2329 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2338 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2340 btrfs_commit_transaction(trans, root);
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345 struct inode_record *rec,
2346 struct cache_tree *inode_cache,
2349 struct inode_backref *tmp, *backref;
2350 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 if (!delete && rec->ino == root_dirid) {
2356 if (!rec->found_inode_item) {
2357 ret = create_inode_item(root, rec, backref, 1);
2364 /* Index 0 for root dir's are special, don't mess with it */
2365 if (rec->ino == root_dirid && backref->index == 0)
2369 ((backref->found_dir_index && !backref->found_inode_ref) ||
2370 (backref->found_dir_index && backref->found_inode_ref &&
2371 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372 ret = delete_dir_index(root, inode_cache, rec, backref);
2376 list_del(&backref->list);
2380 if (!delete && !backref->found_dir_index &&
2381 backref->found_dir_item && backref->found_inode_ref) {
2382 ret = add_missing_dir_index(root, inode_cache, rec,
2387 if (backref->found_dir_item &&
2388 backref->found_dir_index &&
2389 backref->found_dir_index) {
2390 if (!backref->errors &&
2391 backref->found_inode_ref) {
2392 list_del(&backref->list);
2398 if (!delete && (!backref->found_dir_index &&
2399 !backref->found_dir_item &&
2400 backref->found_inode_ref)) {
2401 struct btrfs_trans_handle *trans;
2402 struct btrfs_key location;
2404 ret = check_dir_conflict(root, backref->name,
2410 * let nlink fixing routine to handle it,
2411 * which can do it better.
2416 location.objectid = rec->ino;
2417 location.type = BTRFS_INODE_ITEM_KEY;
2418 location.offset = 0;
2420 trans = btrfs_start_transaction(root, 1);
2421 if (IS_ERR(trans)) {
2422 ret = PTR_ERR(trans);
2425 fprintf(stderr, "adding missing dir index/item pair "
2427 (unsigned long long)rec->ino);
2428 ret = btrfs_insert_dir_item(trans, root, backref->name,
2430 backref->dir, &location,
2431 imode_to_type(rec->imode),
2434 btrfs_commit_transaction(trans, root);
2438 if (!delete && (backref->found_inode_ref &&
2439 backref->found_dir_index &&
2440 backref->found_dir_item &&
2441 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442 !rec->found_inode_item)) {
2443 ret = create_inode_item(root, rec, backref, 0);
2450 return ret ? ret : repaired;
2454 * To determine the file type for nlink/inode_item repair
2456 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457 * Return -ENOENT if file type is not found.
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2461 struct inode_backref *backref;
2463 /* For inode item recovered case */
2464 if (rec->found_inode_item) {
2465 *type = imode_to_type(rec->imode);
2469 list_for_each_entry(backref, &rec->backrefs, list) {
2470 if (backref->found_dir_index || backref->found_dir_item) {
2471 *type = backref->filetype;
2479 * To determine the file name for nlink repair
2481 * Return 0 if file name is found, set name and namelen.
2482 * Return -ENOENT if file name is not found.
2484 static int find_file_name(struct inode_record *rec,
2485 char *name, int *namelen)
2487 struct inode_backref *backref;
2489 list_for_each_entry(backref, &rec->backrefs, list) {
2490 if (backref->found_dir_index || backref->found_dir_item ||
2491 backref->found_inode_ref) {
2492 memcpy(name, backref->name, backref->namelen);
2493 *namelen = backref->namelen;
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502 struct btrfs_root *root,
2503 struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct inode_backref *backref;
2507 struct inode_backref *tmp;
2508 struct btrfs_key key;
2509 struct btrfs_inode_item *inode_item;
2512 /* We don't believe this either, reset it and iterate backref */
2513 rec->found_link = 0;
2515 /* Remove all backref including the valid ones */
2516 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518 backref->index, backref->name,
2519 backref->namelen, 0);
2523 /* remove invalid backref, so it won't be added back */
2524 if (!(backref->found_dir_index &&
2525 backref->found_dir_item &&
2526 backref->found_inode_ref)) {
2527 list_del(&backref->list);
2534 /* Set nlink to 0 */
2535 key.objectid = rec->ino;
2536 key.type = BTRFS_INODE_ITEM_KEY;
2538 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2545 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546 struct btrfs_inode_item);
2547 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548 btrfs_mark_buffer_dirty(path->nodes[0]);
2549 btrfs_release_path(path);
2552 * Add back valid inode_ref/dir_item/dir_index,
2553 * add_link() will handle the nlink inc, so new nlink must be correct
2555 list_for_each_entry(backref, &rec->backrefs, list) {
2556 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557 backref->name, backref->namelen,
2558 backref->filetype, &backref->index, 1);
2563 btrfs_release_path(path);
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568 struct btrfs_root *root,
2569 struct btrfs_path *path,
2570 struct inode_record *rec)
2572 char *dir_name = "lost+found";
2573 char namebuf[BTRFS_NAME_LEN] = {0};
2578 int name_recovered = 0;
2579 int type_recovered = 0;
2583 * Get file name and type first before these invalid inode ref
2584 * are deleted by remove_all_invalid_backref()
2586 name_recovered = !find_file_name(rec, namebuf, &namelen);
2587 type_recovered = !find_file_type(rec, &type);
2589 if (!name_recovered) {
2590 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591 rec->ino, rec->ino);
2592 namelen = count_digits(rec->ino);
2593 sprintf(namebuf, "%llu", rec->ino);
2596 if (!type_recovered) {
2597 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2599 type = BTRFS_FT_REG_FILE;
2603 ret = reset_nlink(trans, root, path, rec);
2606 "Failed to reset nlink for inode %llu: %s\n",
2607 rec->ino, strerror(-ret));
2611 if (rec->found_link == 0) {
2612 lost_found_ino = root->highest_inode;
2613 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2618 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2622 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623 dir_name, strerror(-ret));
2626 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627 namebuf, namelen, type, NULL, 1);
2629 * Add ".INO" suffix several times to handle case where
2630 * "FILENAME.INO" is already taken by another file.
2632 while (ret == -EEXIST) {
2634 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2636 if (namelen + count_digits(rec->ino) + 1 >
2641 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2643 namelen += count_digits(rec->ino) + 1;
2644 ret = btrfs_add_link(trans, root, rec->ino,
2645 lost_found_ino, namebuf,
2646 namelen, type, NULL, 1);
2650 "Failed to link the inode %llu to %s dir: %s\n",
2651 rec->ino, dir_name, strerror(-ret));
2655 * Just increase the found_link, don't actually add the
2656 * backref. This will make things easier and this inode
2657 * record will be freed after the repair is done.
2658 * So fsck will not report problem about this inode.
2661 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662 namelen, namebuf, dir_name);
2664 printf("Fixed the nlink of inode %llu\n", rec->ino);
2667 * Clear the flag anyway, or we will loop forever for the same inode
2668 * as it will not be removed from the bad inode list and the dead loop
2671 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672 btrfs_release_path(path);
2677 * Check if there is any normal(reg or prealloc) file extent for given
2679 * This is used to determine the file type when neither its dir_index/item or
2680 * inode_item exists.
2682 * This will *NOT* report error, if any error happens, just consider it does
2683 * not have any normal file extent.
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2687 struct btrfs_path path;
2688 struct btrfs_key key;
2689 struct btrfs_key found_key;
2690 struct btrfs_file_extent_item *fi;
2694 btrfs_init_path(&path);
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2699 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2704 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705 ret = btrfs_next_leaf(root, &path);
2712 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2717 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path.nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2726 btrfs_release_path(&path);
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2752 int type_recovered = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2768 * 1. If found backref(inode_index/item is already handled) to it,
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2782 filetype = BTRFS_FT_REG_FILE;
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2884 node = rb_first(&rec->holes);
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path path;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2930 * For nlink repair, it may create a dir and add link, so
2931 * 2 for parent(256)'s dir_index and dir_item
2932 * 2 for lost+found dir's inode_item and inode_ref
2933 * 1 for the new inode_ref of the file
2934 * 2 for lost+found dir's dir_index and dir_item for the file
2936 trans = btrfs_start_transaction(root, 7);
2938 return PTR_ERR(trans);
2940 btrfs_init_path(&path);
2941 if (rec->errors & I_ERR_NO_INODE_ITEM)
2942 ret = repair_inode_no_item(trans, root, &path, rec);
2943 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948 ret = repair_inode_isize(trans, root, &path, rec);
2949 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952 ret = repair_inode_nlinks(trans, root, &path, rec);
2953 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954 ret = repair_inode_nbytes(trans, root, &path, rec);
2955 btrfs_commit_transaction(trans, root);
2956 btrfs_release_path(&path);
2960 static int check_inode_recs(struct btrfs_root *root,
2961 struct cache_tree *inode_cache)
2963 struct cache_extent *cache;
2964 struct ptr_node *node;
2965 struct inode_record *rec;
2966 struct inode_backref *backref;
2971 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2973 if (btrfs_root_refs(&root->root_item) == 0) {
2974 if (!cache_tree_empty(inode_cache))
2975 fprintf(stderr, "warning line %d\n", __LINE__);
2980 * We need to record the highest inode number for later 'lost+found'
2982 * We must select an ino not used/referred by any existing inode, or
2983 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984 * this may cause 'lost+found' dir has wrong nlinks.
2986 cache = last_cache_extent(inode_cache);
2988 node = container_of(cache, struct ptr_node, cache);
2990 if (rec->ino > root->highest_inode)
2991 root->highest_inode = rec->ino;
2995 * We need to repair backrefs first because we could change some of the
2996 * errors in the inode recs.
2998 * We also need to go through and delete invalid backrefs first and then
2999 * add the correct ones second. We do this because we may get EEXIST
3000 * when adding back the correct index because we hadn't yet deleted the
3003 * For example, if we were missing a dir index then the directories
3004 * isize would be wrong, so if we fixed the isize to what we thought it
3005 * would be and then fixed the backref we'd still have a invalid fs, so
3006 * we need to add back the dir index and then check to see if the isize
3011 if (stage == 3 && !err)
3014 cache = search_cache_extent(inode_cache, 0);
3015 while (repair && cache) {
3016 node = container_of(cache, struct ptr_node, cache);
3018 cache = next_cache_extent(cache);
3020 /* Need to free everything up and rescan */
3022 remove_cache_extent(inode_cache, &node->cache);
3024 free_inode_rec(rec);
3028 if (list_empty(&rec->backrefs))
3031 ret = repair_inode_backrefs(root, rec, inode_cache,
3045 rec = get_inode_rec(inode_cache, root_dirid, 0);
3046 BUG_ON(IS_ERR(rec));
3048 ret = check_root_dir(rec);
3050 fprintf(stderr, "root %llu root dir %llu error\n",
3051 (unsigned long long)root->root_key.objectid,
3052 (unsigned long long)root_dirid);
3053 print_inode_error(root, rec);
3058 struct btrfs_trans_handle *trans;
3060 trans = btrfs_start_transaction(root, 1);
3061 if (IS_ERR(trans)) {
3062 err = PTR_ERR(trans);
3067 "root %llu missing its root dir, recreating\n",
3068 (unsigned long long)root->objectid);
3070 ret = btrfs_make_root_dir(trans, root, root_dirid);
3073 btrfs_commit_transaction(trans, root);
3077 fprintf(stderr, "root %llu root dir %llu not found\n",
3078 (unsigned long long)root->root_key.objectid,
3079 (unsigned long long)root_dirid);
3083 cache = search_cache_extent(inode_cache, 0);
3086 node = container_of(cache, struct ptr_node, cache);
3088 remove_cache_extent(inode_cache, &node->cache);
3090 if (rec->ino == root_dirid ||
3091 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092 free_inode_rec(rec);
3096 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097 ret = check_orphan_item(root, rec->ino);
3099 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100 if (can_free_inode_rec(rec)) {
3101 free_inode_rec(rec);
3106 if (!rec->found_inode_item)
3107 rec->errors |= I_ERR_NO_INODE_ITEM;
3108 if (rec->found_link != rec->nlink)
3109 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3111 ret = try_repair_inode(root, rec);
3112 if (ret == 0 && can_free_inode_rec(rec)) {
3113 free_inode_rec(rec);
3119 if (!(repair && ret == 0))
3121 print_inode_error(root, rec);
3122 list_for_each_entry(backref, &rec->backrefs, list) {
3123 if (!backref->found_dir_item)
3124 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125 if (!backref->found_dir_index)
3126 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127 if (!backref->found_inode_ref)
3128 backref->errors |= REF_ERR_NO_INODE_REF;
3129 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130 " namelen %u name %s filetype %d errors %x",
3131 (unsigned long long)backref->dir,
3132 (unsigned long long)backref->index,
3133 backref->namelen, backref->name,
3134 backref->filetype, backref->errors);
3135 print_ref_error(backref->errors);
3137 free_inode_rec(rec);
3139 return (error > 0) ? -1 : 0;
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3145 struct cache_extent *cache;
3146 struct root_record *rec = NULL;
3149 cache = lookup_cache_extent(root_cache, objectid, 1);
3151 rec = container_of(cache, struct root_record, cache);
3153 rec = calloc(1, sizeof(*rec));
3155 return ERR_PTR(-ENOMEM);
3156 rec->objectid = objectid;
3157 INIT_LIST_HEAD(&rec->backrefs);
3158 rec->cache.start = objectid;
3159 rec->cache.size = 1;
3161 ret = insert_cache_extent(root_cache, &rec->cache);
3163 return ERR_PTR(-EEXIST);
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169 u64 ref_root, u64 dir, u64 index,
3170 const char *name, int namelen)
3172 struct root_backref *backref;
3174 list_for_each_entry(backref, &rec->backrefs, list) {
3175 if (backref->ref_root != ref_root || backref->dir != dir ||
3176 backref->namelen != namelen)
3178 if (memcmp(name, backref->name, namelen))
3183 backref = calloc(1, sizeof(*backref) + namelen + 1);
3186 backref->ref_root = ref_root;
3188 backref->index = index;
3189 backref->namelen = namelen;
3190 memcpy(backref->name, name, namelen);
3191 backref->name[namelen] = '\0';
3192 list_add_tail(&backref->list, &rec->backrefs);
3196 static void free_root_record(struct cache_extent *cache)
3198 struct root_record *rec;
3199 struct root_backref *backref;
3201 rec = container_of(cache, struct root_record, cache);
3202 while (!list_empty(&rec->backrefs)) {
3203 backref = to_root_backref(rec->backrefs.next);
3204 list_del(&backref->list);
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3213 static int add_root_backref(struct cache_tree *root_cache,
3214 u64 root_id, u64 ref_root, u64 dir, u64 index,
3215 const char *name, int namelen,
3216 int item_type, int errors)
3218 struct root_record *rec;
3219 struct root_backref *backref;
3221 rec = get_root_rec(root_cache, root_id);
3222 BUG_ON(IS_ERR(rec));
3223 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3226 backref->errors |= errors;
3228 if (item_type != BTRFS_DIR_ITEM_KEY) {
3229 if (backref->found_dir_index || backref->found_back_ref ||
3230 backref->found_forward_ref) {
3231 if (backref->index != index)
3232 backref->errors |= REF_ERR_INDEX_UNMATCH;
3234 backref->index = index;
3238 if (item_type == BTRFS_DIR_ITEM_KEY) {
3239 if (backref->found_forward_ref)
3241 backref->found_dir_item = 1;
3242 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243 backref->found_dir_index = 1;
3244 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245 if (backref->found_forward_ref)
3246 backref->errors |= REF_ERR_DUP_ROOT_REF;
3247 else if (backref->found_dir_item)
3249 backref->found_forward_ref = 1;
3250 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251 if (backref->found_back_ref)
3252 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253 backref->found_back_ref = 1;
3258 if (backref->found_forward_ref && backref->found_dir_item)
3259 backref->reachable = 1;
3263 static int merge_root_recs(struct btrfs_root *root,
3264 struct cache_tree *src_cache,
3265 struct cache_tree *dst_cache)
3267 struct cache_extent *cache;
3268 struct ptr_node *node;
3269 struct inode_record *rec;
3270 struct inode_backref *backref;
3273 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274 free_inode_recs_tree(src_cache);
3279 cache = search_cache_extent(src_cache, 0);
3282 node = container_of(cache, struct ptr_node, cache);
3284 remove_cache_extent(src_cache, &node->cache);
3287 ret = is_child_root(root, root->objectid, rec->ino);
3293 list_for_each_entry(backref, &rec->backrefs, list) {
3294 BUG_ON(backref->found_inode_ref);
3295 if (backref->found_dir_item)
3296 add_root_backref(dst_cache, rec->ino,
3297 root->root_key.objectid, backref->dir,
3298 backref->index, backref->name,
3299 backref->namelen, BTRFS_DIR_ITEM_KEY,
3301 if (backref->found_dir_index)
3302 add_root_backref(dst_cache, rec->ino,
3303 root->root_key.objectid, backref->dir,
3304 backref->index, backref->name,
3305 backref->namelen, BTRFS_DIR_INDEX_KEY,
3309 free_inode_rec(rec);
3316 static int check_root_refs(struct btrfs_root *root,
3317 struct cache_tree *root_cache)
3319 struct root_record *rec;
3320 struct root_record *ref_root;
3321 struct root_backref *backref;
3322 struct cache_extent *cache;
3328 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329 BUG_ON(IS_ERR(rec));
3332 /* fixme: this can not detect circular references */
3335 cache = search_cache_extent(root_cache, 0);
3339 rec = container_of(cache, struct root_record, cache);
3340 cache = next_cache_extent(cache);
3342 if (rec->found_ref == 0)
3345 list_for_each_entry(backref, &rec->backrefs, list) {
3346 if (!backref->reachable)
3349 ref_root = get_root_rec(root_cache,
3351 BUG_ON(IS_ERR(ref_root));
3352 if (ref_root->found_ref > 0)
3355 backref->reachable = 0;
3357 if (rec->found_ref == 0)
3363 cache = search_cache_extent(root_cache, 0);
3367 rec = container_of(cache, struct root_record, cache);
3368 cache = next_cache_extent(cache);
3370 if (rec->found_ref == 0 &&
3371 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373 ret = check_orphan_item(root->fs_info->tree_root,
3379 * If we don't have a root item then we likely just have
3380 * a dir item in a snapshot for this root but no actual
3381 * ref key or anything so it's meaningless.
3383 if (!rec->found_root_item)
3386 fprintf(stderr, "fs tree %llu not referenced\n",
3387 (unsigned long long)rec->objectid);
3391 if (rec->found_ref > 0 && !rec->found_root_item)
3393 list_for_each_entry(backref, &rec->backrefs, list) {
3394 if (!backref->found_dir_item)
3395 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396 if (!backref->found_dir_index)
3397 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398 if (!backref->found_back_ref)
3399 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400 if (!backref->found_forward_ref)
3401 backref->errors |= REF_ERR_NO_ROOT_REF;
3402 if (backref->reachable && backref->errors)
3409 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410 (unsigned long long)rec->objectid, rec->found_ref,
3411 rec->found_root_item ? "" : "not found");
3413 list_for_each_entry(backref, &rec->backrefs, list) {
3414 if (!backref->reachable)
3416 if (!backref->errors && rec->found_root_item)
3418 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419 " index %llu namelen %u name %s errors %x\n",
3420 (unsigned long long)backref->ref_root,
3421 (unsigned long long)backref->dir,
3422 (unsigned long long)backref->index,
3423 backref->namelen, backref->name,
3425 print_ref_error(backref->errors);
3428 return errors > 0 ? 1 : 0;
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432 struct btrfs_key *key,
3433 struct cache_tree *root_cache)
3439 struct btrfs_root_ref *ref;
3440 char namebuf[BTRFS_NAME_LEN];
3443 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3445 dirid = btrfs_root_ref_dirid(eb, ref);
3446 index = btrfs_root_ref_sequence(eb, ref);
3447 name_len = btrfs_root_ref_name_len(eb, ref);
3449 if (name_len <= BTRFS_NAME_LEN) {
3453 len = BTRFS_NAME_LEN;
3454 error = REF_ERR_NAME_TOO_LONG;
3456 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3458 if (key->type == BTRFS_ROOT_REF_KEY) {
3459 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460 index, namebuf, len, key->type, error);
3462 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463 index, namebuf, len, key->type, error);
3468 static void free_corrupt_block(struct cache_extent *cache)
3470 struct btrfs_corrupt_block *corrupt;
3472 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3479 * Repair the btree of the given root.
3481 * The fix is to remove the node key in corrupt_blocks cache_tree.
3482 * and rebalance the tree.
3483 * After the fix, the btree should be writeable.
3485 static int repair_btree(struct btrfs_root *root,
3486 struct cache_tree *corrupt_blocks)
3488 struct btrfs_trans_handle *trans;
3489 struct btrfs_path path;
3490 struct btrfs_corrupt_block *corrupt;
3491 struct cache_extent *cache;
3492 struct btrfs_key key;
3497 if (cache_tree_empty(corrupt_blocks))
3500 trans = btrfs_start_transaction(root, 1);
3501 if (IS_ERR(trans)) {
3502 ret = PTR_ERR(trans);
3503 fprintf(stderr, "Error starting transaction: %s\n",
3507 btrfs_init_path(&path);
3508 cache = first_cache_extent(corrupt_blocks);
3510 corrupt = container_of(cache, struct btrfs_corrupt_block,
3512 level = corrupt->level;
3513 path.lowest_level = level;
3514 key.objectid = corrupt->key.objectid;
3515 key.type = corrupt->key.type;
3516 key.offset = corrupt->key.offset;
3519 * Here we don't want to do any tree balance, since it may
3520 * cause a balance with corrupted brother leaf/node,
3521 * so ins_len set to 0 here.
3522 * Balance will be done after all corrupt node/leaf is deleted.
3524 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3527 offset = btrfs_node_blockptr(path.nodes[level],
3530 /* Remove the ptr */
3531 ret = btrfs_del_ptr(trans, root, &path, level,
3536 * Remove the corresponding extent
3537 * return value is not concerned.
3539 btrfs_release_path(&path);
3540 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541 0, root->root_key.objectid,
3543 cache = next_cache_extent(cache);
3546 /* Balance the btree using btrfs_search_slot() */
3547 cache = first_cache_extent(corrupt_blocks);
3549 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551 memcpy(&key, &corrupt->key, sizeof(key));
3552 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3555 /* return will always >0 since it won't find the item */
3557 btrfs_release_path(&path);
3558 cache = next_cache_extent(cache);
3561 btrfs_commit_transaction(trans, root);
3562 btrfs_release_path(&path);
3566 static int check_fs_root(struct btrfs_root *root,
3567 struct cache_tree *root_cache,
3568 struct walk_control *wc)
3574 struct btrfs_path path;
3575 struct shared_node root_node;
3576 struct root_record *rec;
3577 struct btrfs_root_item *root_item = &root->root_item;
3578 struct cache_tree corrupt_blocks;
3579 struct orphan_data_extent *orphan;
3580 struct orphan_data_extent *tmp;
3581 enum btrfs_tree_block_status status;
3582 struct node_refs nrefs;
3585 * Reuse the corrupt_block cache tree to record corrupted tree block
3587 * Unlike the usage in extent tree check, here we do it in a per
3588 * fs/subvol tree base.
3590 cache_tree_init(&corrupt_blocks);
3591 root->fs_info->corrupt_blocks = &corrupt_blocks;
3593 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594 rec = get_root_rec(root_cache, root->root_key.objectid);
3595 BUG_ON(IS_ERR(rec));
3596 if (btrfs_root_refs(root_item) > 0)
3597 rec->found_root_item = 1;
3600 btrfs_init_path(&path);
3601 memset(&root_node, 0, sizeof(root_node));
3602 cache_tree_init(&root_node.root_cache);
3603 cache_tree_init(&root_node.inode_cache);
3604 memset(&nrefs, 0, sizeof(nrefs));
3606 /* Move the orphan extent record to corresponding inode_record */
3607 list_for_each_entry_safe(orphan, tmp,
3608 &root->orphan_data_extents, list) {
3609 struct inode_record *inode;
3611 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3613 BUG_ON(IS_ERR(inode));
3614 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615 list_move(&orphan->list, &inode->orphan_extents);
3618 level = btrfs_header_level(root->node);
3619 memset(wc->nodes, 0, sizeof(wc->nodes));
3620 wc->nodes[level] = &root_node;
3621 wc->active_node = level;
3622 wc->root_level = level;
3624 /* We may not have checked the root block, lets do that now */
3625 if (btrfs_is_leaf(root->node))
3626 status = btrfs_check_leaf(root, NULL, root->node);
3628 status = btrfs_check_node(root, NULL, root->node);
3629 if (status != BTRFS_TREE_BLOCK_CLEAN)
3632 if (btrfs_root_refs(root_item) > 0 ||
3633 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634 path.nodes[level] = root->node;
3635 extent_buffer_get(root->node);
3636 path.slots[level] = 0;
3638 struct btrfs_key key;
3639 struct btrfs_disk_key found_key;
3641 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642 level = root_item->drop_level;
3643 path.lowest_level = level;
3644 if (level > btrfs_header_level(root->node) ||
3645 level >= BTRFS_MAX_LEVEL) {
3646 error("ignoring invalid drop level: %u", level);
3649 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3652 btrfs_node_key(path.nodes[level], &found_key,
3654 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655 sizeof(found_key)));
3659 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3665 wret = walk_up_tree(root, &path, wc, &level);
3672 btrfs_release_path(&path);
3674 if (!cache_tree_empty(&corrupt_blocks)) {
3675 struct cache_extent *cache;
3676 struct btrfs_corrupt_block *corrupt;
3678 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679 root->root_key.objectid);
3680 cache = first_cache_extent(&corrupt_blocks);
3682 corrupt = container_of(cache,
3683 struct btrfs_corrupt_block,
3685 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686 cache->start, corrupt->level,
3687 corrupt->key.objectid, corrupt->key.type,
3688 corrupt->key.offset);
3689 cache = next_cache_extent(cache);
3692 printf("Try to repair the btree for root %llu\n",
3693 root->root_key.objectid);
3694 ret = repair_btree(root, &corrupt_blocks);
3696 fprintf(stderr, "Failed to repair btree: %s\n",
3699 printf("Btree for root %llu is fixed\n",
3700 root->root_key.objectid);
3704 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3708 if (root_node.current) {
3709 root_node.current->checked = 1;
3710 maybe_free_inode_rec(&root_node.inode_cache,
3714 err = check_inode_recs(root, &root_node.inode_cache);
3718 free_corrupt_blocks_tree(&corrupt_blocks);
3719 root->fs_info->corrupt_blocks = NULL;
3720 free_orphan_data_extents(&root->orphan_data_extents);
3724 static int fs_root_objectid(u64 objectid)
3726 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3729 return is_fstree(objectid);
3732 static int check_fs_roots(struct btrfs_root *root,
3733 struct cache_tree *root_cache)
3735 struct btrfs_path path;
3736 struct btrfs_key key;
3737 struct walk_control wc;
3738 struct extent_buffer *leaf, *tree_node;
3739 struct btrfs_root *tmp_root;
3740 struct btrfs_root *tree_root = root->fs_info->tree_root;
3744 if (ctx.progress_enabled) {
3745 ctx.tp = TASK_FS_ROOTS;
3746 task_start(ctx.info);
3750 * Just in case we made any changes to the extent tree that weren't
3751 * reflected into the free space cache yet.
3754 reset_cached_block_groups(root->fs_info);
3755 memset(&wc, 0, sizeof(wc));
3756 cache_tree_init(&wc.shared);
3757 btrfs_init_path(&path);
3762 key.type = BTRFS_ROOT_ITEM_KEY;
3763 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3768 tree_node = tree_root->node;
3770 if (tree_node != tree_root->node) {
3771 free_root_recs_tree(root_cache);
3772 btrfs_release_path(&path);
3775 leaf = path.nodes[0];
3776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777 ret = btrfs_next_leaf(tree_root, &path);
3783 leaf = path.nodes[0];
3785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787 fs_root_objectid(key.objectid)) {
3788 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789 tmp_root = btrfs_read_fs_root_no_cache(
3790 root->fs_info, &key);
3792 key.offset = (u64)-1;
3793 tmp_root = btrfs_read_fs_root(
3794 root->fs_info, &key);
3796 if (IS_ERR(tmp_root)) {
3800 ret = check_fs_root(tmp_root, root_cache, &wc);
3801 if (ret == -EAGAIN) {
3802 free_root_recs_tree(root_cache);
3803 btrfs_release_path(&path);
3808 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809 btrfs_free_fs_root(tmp_root);
3810 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811 key.type == BTRFS_ROOT_BACKREF_KEY) {
3812 process_root_ref(leaf, path.slots[0], &key,
3819 btrfs_release_path(&path);
3821 free_extent_cache_tree(&wc.shared);
3822 if (!cache_tree_empty(&wc.shared))
3823 fprintf(stderr, "warning line %d\n", __LINE__);
3825 task_stop(ctx.info);
3830 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
3836 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3837 * INODE_REF/INODE_EXTREF match.
3839 * @root: the root of the fs/file tree
3840 * @ref_key: the key of the INODE_REF/INODE_EXTREF
3841 * @key: the key of the DIR_ITEM/DIR_INDEX
3842 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
3843 * distinguish root_dir between normal dir/file
3844 * @name: the name in the INODE_REF/INODE_EXTREF
3845 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
3846 * @mode: the st_mode of INODE_ITEM
3848 * Return 0 if no error occurred.
3849 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3850 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3852 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3853 * not match for normal dir/file.
3855 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3856 struct btrfs_key *key, u64 index, char *name,
3857 u32 namelen, u32 mode)
3859 struct btrfs_path path;
3860 struct extent_buffer *node;
3861 struct btrfs_dir_item *di;
3862 struct btrfs_key location;
3863 char namebuf[BTRFS_NAME_LEN] = {0};
3873 btrfs_init_path(&path);
3874 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3876 ret = DIR_ITEM_MISSING;
3880 /* Process root dir and goto out*/
3883 ret = ROOT_DIR_ERROR;
3885 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3887 ref_key->type == BTRFS_INODE_REF_KEY ?
3889 ref_key->objectid, ref_key->offset,
3890 key->type == BTRFS_DIR_ITEM_KEY ?
3891 "DIR_ITEM" : "DIR_INDEX");
3899 /* Process normal file/dir */
3901 ret = DIR_ITEM_MISSING;
3903 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3905 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3906 ref_key->objectid, ref_key->offset,
3907 key->type == BTRFS_DIR_ITEM_KEY ?
3908 "DIR_ITEM" : "DIR_INDEX",
3909 key->objectid, key->offset, namelen, name,
3910 imode_to_type(mode));
3914 /* Check whether inode_id/filetype/name match */
3915 node = path.nodes[0];
3916 slot = path.slots[0];
3917 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3918 total = btrfs_item_size_nr(node, slot);
3919 while (cur < total) {
3920 ret = DIR_ITEM_MISMATCH;
3921 name_len = btrfs_dir_name_len(node, di);
3922 data_len = btrfs_dir_data_len(node, di);
3924 btrfs_dir_item_key_to_cpu(node, di, &location);
3925 if (location.objectid != ref_key->objectid ||
3926 location.type != BTRFS_INODE_ITEM_KEY ||
3927 location.offset != 0)
3930 filetype = btrfs_dir_type(node, di);
3931 if (imode_to_type(mode) != filetype)
3934 if (name_len <= BTRFS_NAME_LEN) {
3937 len = BTRFS_NAME_LEN;
3938 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3940 key->type == BTRFS_DIR_ITEM_KEY ?
3941 "DIR_ITEM" : "DIR_INDEX",
3942 key->objectid, key->offset, name_len);
3944 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3945 if (len != namelen || strncmp(namebuf, name, len))
3951 len = sizeof(*di) + name_len + data_len;
3952 di = (struct btrfs_dir_item *)((char *)di + len);
3955 if (ret == DIR_ITEM_MISMATCH)
3957 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3959 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3960 ref_key->objectid, ref_key->offset,
3961 key->type == BTRFS_DIR_ITEM_KEY ?
3962 "DIR_ITEM" : "DIR_INDEX",
3963 key->objectid, key->offset, namelen, name,
3964 imode_to_type(mode));
3966 btrfs_release_path(&path);
3971 * Traverse the given INODE_REF and call find_dir_item() to find related
3972 * DIR_ITEM/DIR_INDEX.
3974 * @root: the root of the fs/file tree
3975 * @ref_key: the key of the INODE_REF
3976 * @refs: the count of INODE_REF
3977 * @mode: the st_mode of INODE_ITEM
3979 * Return 0 if no error occurred.
3981 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3982 struct extent_buffer *node, int slot, u64 *refs,
3985 struct btrfs_key key;
3986 struct btrfs_inode_ref *ref;
3987 char namebuf[BTRFS_NAME_LEN] = {0};
3995 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
3996 total = btrfs_item_size_nr(node, slot);
3999 /* Update inode ref count */
4002 index = btrfs_inode_ref_index(node, ref);
4003 name_len = btrfs_inode_ref_name_len(node, ref);
4004 if (name_len <= BTRFS_NAME_LEN) {
4007 len = BTRFS_NAME_LEN;
4008 warning("root %llu INODE_REF[%llu %llu] name too long",
4009 root->objectid, ref_key->objectid, ref_key->offset);
4012 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4014 /* Check root dir ref name */
4015 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4016 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4017 root->objectid, ref_key->objectid, ref_key->offset,
4019 err |= ROOT_DIR_ERROR;
4022 /* Find related DIR_INDEX */
4023 key.objectid = ref_key->offset;
4024 key.type = BTRFS_DIR_INDEX_KEY;
4026 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4029 /* Find related dir_item */
4030 key.objectid = ref_key->offset;
4031 key.type = BTRFS_DIR_ITEM_KEY;
4032 key.offset = btrfs_name_hash(namebuf, len);
4033 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4036 len = sizeof(*ref) + name_len;
4037 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4046 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4047 * DIR_ITEM/DIR_INDEX.
4049 * @root: the root of the fs/file tree
4050 * @ref_key: the key of the INODE_EXTREF
4051 * @refs: the count of INODE_EXTREF
4052 * @mode: the st_mode of INODE_ITEM
4054 * Return 0 if no error occurred.
4056 static int check_inode_extref(struct btrfs_root *root,
4057 struct btrfs_key *ref_key,
4058 struct extent_buffer *node, int slot, u64 *refs,
4061 struct btrfs_key key;
4062 struct btrfs_inode_extref *extref;
4063 char namebuf[BTRFS_NAME_LEN] = {0};
4073 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4074 total = btrfs_item_size_nr(node, slot);
4077 /* update inode ref count */
4079 name_len = btrfs_inode_extref_name_len(node, extref);
4080 index = btrfs_inode_extref_index(node, extref);
4081 parent = btrfs_inode_extref_parent(node, extref);
4082 if (name_len <= BTRFS_NAME_LEN) {
4085 len = BTRFS_NAME_LEN;
4086 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4087 root->objectid, ref_key->objectid, ref_key->offset);
4089 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4091 /* Check root dir ref name */
4092 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4093 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4094 root->objectid, ref_key->objectid, ref_key->offset,
4096 err |= ROOT_DIR_ERROR;
4099 /* find related dir_index */
4100 key.objectid = parent;
4101 key.type = BTRFS_DIR_INDEX_KEY;
4103 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4106 /* find related dir_item */
4107 key.objectid = parent;
4108 key.type = BTRFS_DIR_ITEM_KEY;
4109 key.offset = btrfs_name_hash(namebuf, len);
4110 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4113 len = sizeof(*extref) + name_len;
4114 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4124 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4125 * DIR_ITEM/DIR_INDEX match.
4127 * @root: the root of the fs/file tree
4128 * @key: the key of the INODE_REF/INODE_EXTREF
4129 * @name: the name in the INODE_REF/INODE_EXTREF
4130 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4131 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4133 * @ext_ref: the EXTENDED_IREF feature
4135 * Return 0 if no error occurred.
4136 * Return >0 for error bitmap
4138 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4139 char *name, int namelen, u64 index,
4140 unsigned int ext_ref)
4142 struct btrfs_path path;
4143 struct btrfs_inode_ref *ref;
4144 struct btrfs_inode_extref *extref;
4145 struct extent_buffer *node;
4146 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4157 btrfs_init_path(&path);
4158 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4160 ret = INODE_REF_MISSING;
4164 node = path.nodes[0];
4165 slot = path.slots[0];
4167 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4168 total = btrfs_item_size_nr(node, slot);
4170 /* Iterate all entry of INODE_REF */
4171 while (cur < total) {
4172 ret = INODE_REF_MISSING;
4174 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4175 ref_index = btrfs_inode_ref_index(node, ref);
4176 if (index != (u64)-1 && index != ref_index)
4179 if (ref_namelen <= BTRFS_NAME_LEN) {
4182 len = BTRFS_NAME_LEN;
4183 warning("root %llu INODE %s[%llu %llu] name too long",
4185 key->type == BTRFS_INODE_REF_KEY ?
4187 key->objectid, key->offset);
4189 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4192 if (len != namelen || strncmp(ref_namebuf, name, len))
4198 len = sizeof(*ref) + ref_namelen;
4199 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4204 /* Skip if not support EXTENDED_IREF feature */
4208 btrfs_release_path(&path);
4209 btrfs_init_path(&path);
4211 dir_id = key->offset;
4212 key->type = BTRFS_INODE_EXTREF_KEY;
4213 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4215 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4217 ret = INODE_REF_MISSING;
4221 node = path.nodes[0];
4222 slot = path.slots[0];
4224 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4226 total = btrfs_item_size_nr(node, slot);
4228 /* Iterate all entry of INODE_EXTREF */
4229 while (cur < total) {
4230 ret = INODE_REF_MISSING;
4232 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4233 ref_index = btrfs_inode_extref_index(node, extref);
4234 parent = btrfs_inode_extref_parent(node, extref);
4235 if (index != (u64)-1 && index != ref_index)
4238 if (parent != dir_id)
4241 if (ref_namelen <= BTRFS_NAME_LEN) {
4244 len = BTRFS_NAME_LEN;
4245 warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4247 key->type == BTRFS_INODE_REF_KEY ?
4249 key->objectid, key->offset);
4251 read_extent_buffer(node, ref_namebuf,
4252 (unsigned long)(extref + 1), len);
4254 if (len != namelen || strncmp(ref_namebuf, name, len))
4261 len = sizeof(*extref) + ref_namelen;
4262 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4267 btrfs_release_path(&path);
4271 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4273 struct list_head *cur = rec->backrefs.next;
4274 struct extent_backref *back;
4275 struct tree_backref *tback;
4276 struct data_backref *dback;
4280 while(cur != &rec->backrefs) {
4281 back = to_extent_backref(cur);
4283 if (!back->found_extent_tree) {
4287 if (back->is_data) {
4288 dback = to_data_backref(back);
4289 fprintf(stderr, "Backref %llu %s %llu"
4290 " owner %llu offset %llu num_refs %lu"
4291 " not found in extent tree\n",
4292 (unsigned long long)rec->start,
4293 back->full_backref ?
4295 back->full_backref ?
4296 (unsigned long long)dback->parent:
4297 (unsigned long long)dback->root,
4298 (unsigned long long)dback->owner,
4299 (unsigned long long)dback->offset,
4300 (unsigned long)dback->num_refs);
4302 tback = to_tree_backref(back);
4303 fprintf(stderr, "Backref %llu parent %llu"
4304 " root %llu not found in extent tree\n",
4305 (unsigned long long)rec->start,
4306 (unsigned long long)tback->parent,
4307 (unsigned long long)tback->root);
4310 if (!back->is_data && !back->found_ref) {
4314 tback = to_tree_backref(back);
4315 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4316 (unsigned long long)rec->start,
4317 back->full_backref ? "parent" : "root",
4318 back->full_backref ?
4319 (unsigned long long)tback->parent :
4320 (unsigned long long)tback->root, back);
4322 if (back->is_data) {
4323 dback = to_data_backref(back);
4324 if (dback->found_ref != dback->num_refs) {
4328 fprintf(stderr, "Incorrect local backref count"
4329 " on %llu %s %llu owner %llu"
4330 " offset %llu found %u wanted %u back %p\n",
4331 (unsigned long long)rec->start,
4332 back->full_backref ?
4334 back->full_backref ?
4335 (unsigned long long)dback->parent:
4336 (unsigned long long)dback->root,
4337 (unsigned long long)dback->owner,
4338 (unsigned long long)dback->offset,
4339 dback->found_ref, dback->num_refs, back);
4341 if (dback->disk_bytenr != rec->start) {
4345 fprintf(stderr, "Backref disk bytenr does not"
4346 " match extent record, bytenr=%llu, "
4347 "ref bytenr=%llu\n",
4348 (unsigned long long)rec->start,
4349 (unsigned long long)dback->disk_bytenr);
4352 if (dback->bytes != rec->nr) {
4356 fprintf(stderr, "Backref bytes do not match "
4357 "extent backref, bytenr=%llu, ref "
4358 "bytes=%llu, backref bytes=%llu\n",
4359 (unsigned long long)rec->start,
4360 (unsigned long long)rec->nr,
4361 (unsigned long long)dback->bytes);
4364 if (!back->is_data) {
4367 dback = to_data_backref(back);
4368 found += dback->found_ref;
4371 if (found != rec->refs) {
4375 fprintf(stderr, "Incorrect global backref count "
4376 "on %llu found %llu wanted %llu\n",
4377 (unsigned long long)rec->start,
4378 (unsigned long long)found,
4379 (unsigned long long)rec->refs);
4385 static int free_all_extent_backrefs(struct extent_record *rec)
4387 struct extent_backref *back;
4388 struct list_head *cur;
4389 while (!list_empty(&rec->backrefs)) {
4390 cur = rec->backrefs.next;
4391 back = to_extent_backref(cur);
4398 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4399 struct cache_tree *extent_cache)
4401 struct cache_extent *cache;
4402 struct extent_record *rec;
4405 cache = first_cache_extent(extent_cache);
4408 rec = container_of(cache, struct extent_record, cache);
4409 remove_cache_extent(extent_cache, cache);
4410 free_all_extent_backrefs(rec);
4415 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4416 struct extent_record *rec)
4418 if (rec->content_checked && rec->owner_ref_checked &&
4419 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4420 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4421 !rec->bad_full_backref && !rec->crossing_stripes &&
4422 !rec->wrong_chunk_type) {
4423 remove_cache_extent(extent_cache, &rec->cache);
4424 free_all_extent_backrefs(rec);
4425 list_del_init(&rec->list);
4431 static int check_owner_ref(struct btrfs_root *root,
4432 struct extent_record *rec,
4433 struct extent_buffer *buf)
4435 struct extent_backref *node;
4436 struct tree_backref *back;
4437 struct btrfs_root *ref_root;
4438 struct btrfs_key key;
4439 struct btrfs_path path;
4440 struct extent_buffer *parent;
4445 list_for_each_entry(node, &rec->backrefs, list) {
4448 if (!node->found_ref)
4450 if (node->full_backref)
4452 back = to_tree_backref(node);
4453 if (btrfs_header_owner(buf) == back->root)
4456 BUG_ON(rec->is_root);
4458 /* try to find the block by search corresponding fs tree */
4459 key.objectid = btrfs_header_owner(buf);
4460 key.type = BTRFS_ROOT_ITEM_KEY;
4461 key.offset = (u64)-1;
4463 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4464 if (IS_ERR(ref_root))
4467 level = btrfs_header_level(buf);
4469 btrfs_item_key_to_cpu(buf, &key, 0);
4471 btrfs_node_key_to_cpu(buf, &key, 0);
4473 btrfs_init_path(&path);
4474 path.lowest_level = level + 1;
4475 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4479 parent = path.nodes[level + 1];
4480 if (parent && buf->start == btrfs_node_blockptr(parent,
4481 path.slots[level + 1]))
4484 btrfs_release_path(&path);
4485 return found ? 0 : 1;
4488 static int is_extent_tree_record(struct extent_record *rec)
4490 struct list_head *cur = rec->backrefs.next;
4491 struct extent_backref *node;
4492 struct tree_backref *back;
4495 while(cur != &rec->backrefs) {
4496 node = to_extent_backref(cur);
4500 back = to_tree_backref(node);
4501 if (node->full_backref)
4503 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4510 static int record_bad_block_io(struct btrfs_fs_info *info,
4511 struct cache_tree *extent_cache,
4514 struct extent_record *rec;
4515 struct cache_extent *cache;
4516 struct btrfs_key key;
4518 cache = lookup_cache_extent(extent_cache, start, len);
4522 rec = container_of(cache, struct extent_record, cache);
4523 if (!is_extent_tree_record(rec))
4526 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4527 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4530 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4531 struct extent_buffer *buf, int slot)
4533 if (btrfs_header_level(buf)) {
4534 struct btrfs_key_ptr ptr1, ptr2;
4536 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4537 sizeof(struct btrfs_key_ptr));
4538 read_extent_buffer(buf, &ptr2,
4539 btrfs_node_key_ptr_offset(slot + 1),
4540 sizeof(struct btrfs_key_ptr));
4541 write_extent_buffer(buf, &ptr1,
4542 btrfs_node_key_ptr_offset(slot + 1),
4543 sizeof(struct btrfs_key_ptr));
4544 write_extent_buffer(buf, &ptr2,
4545 btrfs_node_key_ptr_offset(slot),
4546 sizeof(struct btrfs_key_ptr));
4548 struct btrfs_disk_key key;
4549 btrfs_node_key(buf, &key, 0);
4550 btrfs_fixup_low_keys(root, path, &key,
4551 btrfs_header_level(buf) + 1);
4554 struct btrfs_item *item1, *item2;
4555 struct btrfs_key k1, k2;
4556 char *item1_data, *item2_data;
4557 u32 item1_offset, item2_offset, item1_size, item2_size;
4559 item1 = btrfs_item_nr(slot);
4560 item2 = btrfs_item_nr(slot + 1);
4561 btrfs_item_key_to_cpu(buf, &k1, slot);
4562 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4563 item1_offset = btrfs_item_offset(buf, item1);
4564 item2_offset = btrfs_item_offset(buf, item2);
4565 item1_size = btrfs_item_size(buf, item1);
4566 item2_size = btrfs_item_size(buf, item2);
4568 item1_data = malloc(item1_size);
4571 item2_data = malloc(item2_size);
4577 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4578 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4580 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4581 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4585 btrfs_set_item_offset(buf, item1, item2_offset);
4586 btrfs_set_item_offset(buf, item2, item1_offset);
4587 btrfs_set_item_size(buf, item1, item2_size);
4588 btrfs_set_item_size(buf, item2, item1_size);
4590 path->slots[0] = slot;
4591 btrfs_set_item_key_unsafe(root, path, &k2);
4592 path->slots[0] = slot + 1;
4593 btrfs_set_item_key_unsafe(root, path, &k1);
4598 static int fix_key_order(struct btrfs_trans_handle *trans,
4599 struct btrfs_root *root,
4600 struct btrfs_path *path)
4602 struct extent_buffer *buf;
4603 struct btrfs_key k1, k2;
4605 int level = path->lowest_level;
4608 buf = path->nodes[level];
4609 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4611 btrfs_node_key_to_cpu(buf, &k1, i);
4612 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4614 btrfs_item_key_to_cpu(buf, &k1, i);
4615 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4617 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4619 ret = swap_values(root, path, buf, i);
4622 btrfs_mark_buffer_dirty(buf);
4628 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4629 struct btrfs_root *root,
4630 struct btrfs_path *path,
4631 struct extent_buffer *buf, int slot)
4633 struct btrfs_key key;
4634 int nritems = btrfs_header_nritems(buf);
4636 btrfs_item_key_to_cpu(buf, &key, slot);
4638 /* These are all the keys we can deal with missing. */
4639 if (key.type != BTRFS_DIR_INDEX_KEY &&
4640 key.type != BTRFS_EXTENT_ITEM_KEY &&
4641 key.type != BTRFS_METADATA_ITEM_KEY &&
4642 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4643 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4646 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4647 (unsigned long long)key.objectid, key.type,
4648 (unsigned long long)key.offset, slot, buf->start);
4649 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4650 btrfs_item_nr_offset(slot + 1),
4651 sizeof(struct btrfs_item) *
4652 (nritems - slot - 1));
4653 btrfs_set_header_nritems(buf, nritems - 1);
4655 struct btrfs_disk_key disk_key;
4657 btrfs_item_key(buf, &disk_key, 0);
4658 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4660 btrfs_mark_buffer_dirty(buf);
4664 static int fix_item_offset(struct btrfs_trans_handle *trans,
4665 struct btrfs_root *root,
4666 struct btrfs_path *path)
4668 struct extent_buffer *buf;
4672 /* We should only get this for leaves */
4673 BUG_ON(path->lowest_level);
4674 buf = path->nodes[0];
4676 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4677 unsigned int shift = 0, offset;
4679 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4680 BTRFS_LEAF_DATA_SIZE(root)) {
4681 if (btrfs_item_end_nr(buf, i) >
4682 BTRFS_LEAF_DATA_SIZE(root)) {
4683 ret = delete_bogus_item(trans, root, path,
4687 fprintf(stderr, "item is off the end of the "
4688 "leaf, can't fix\n");
4692 shift = BTRFS_LEAF_DATA_SIZE(root) -
4693 btrfs_item_end_nr(buf, i);
4694 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4695 btrfs_item_offset_nr(buf, i - 1)) {
4696 if (btrfs_item_end_nr(buf, i) >
4697 btrfs_item_offset_nr(buf, i - 1)) {
4698 ret = delete_bogus_item(trans, root, path,
4702 fprintf(stderr, "items overlap, can't fix\n");
4706 shift = btrfs_item_offset_nr(buf, i - 1) -
4707 btrfs_item_end_nr(buf, i);
4712 printf("Shifting item nr %d by %u bytes in block %llu\n",
4713 i, shift, (unsigned long long)buf->start);
4714 offset = btrfs_item_offset_nr(buf, i);
4715 memmove_extent_buffer(buf,
4716 btrfs_leaf_data(buf) + offset + shift,
4717 btrfs_leaf_data(buf) + offset,
4718 btrfs_item_size_nr(buf, i));
4719 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4721 btrfs_mark_buffer_dirty(buf);
4725 * We may have moved things, in which case we want to exit so we don't
4726 * write those changes out. Once we have proper abort functionality in
4727 * progs this can be changed to something nicer.
4734 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4735 * then just return -EIO.
4737 static int try_to_fix_bad_block(struct btrfs_root *root,
4738 struct extent_buffer *buf,
4739 enum btrfs_tree_block_status status)
4741 struct btrfs_trans_handle *trans;
4742 struct ulist *roots;
4743 struct ulist_node *node;
4744 struct btrfs_root *search_root;
4745 struct btrfs_path path;
4746 struct ulist_iterator iter;
4747 struct btrfs_key root_key, key;
4750 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4751 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4754 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4758 btrfs_init_path(&path);
4759 ULIST_ITER_INIT(&iter);
4760 while ((node = ulist_next(roots, &iter))) {
4761 root_key.objectid = node->val;
4762 root_key.type = BTRFS_ROOT_ITEM_KEY;
4763 root_key.offset = (u64)-1;
4765 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4772 trans = btrfs_start_transaction(search_root, 0);
4773 if (IS_ERR(trans)) {
4774 ret = PTR_ERR(trans);
4778 path.lowest_level = btrfs_header_level(buf);
4779 path.skip_check_block = 1;
4780 if (path.lowest_level)
4781 btrfs_node_key_to_cpu(buf, &key, 0);
4783 btrfs_item_key_to_cpu(buf, &key, 0);
4784 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4787 btrfs_commit_transaction(trans, search_root);
4790 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4791 ret = fix_key_order(trans, search_root, &path);
4792 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4793 ret = fix_item_offset(trans, search_root, &path);
4795 btrfs_commit_transaction(trans, search_root);
4798 btrfs_release_path(&path);
4799 btrfs_commit_transaction(trans, search_root);
4802 btrfs_release_path(&path);
4806 static int check_block(struct btrfs_root *root,
4807 struct cache_tree *extent_cache,
4808 struct extent_buffer *buf, u64 flags)
4810 struct extent_record *rec;
4811 struct cache_extent *cache;
4812 struct btrfs_key key;
4813 enum btrfs_tree_block_status status;
4817 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4820 rec = container_of(cache, struct extent_record, cache);
4821 rec->generation = btrfs_header_generation(buf);
4823 level = btrfs_header_level(buf);
4824 if (btrfs_header_nritems(buf) > 0) {
4827 btrfs_item_key_to_cpu(buf, &key, 0);
4829 btrfs_node_key_to_cpu(buf, &key, 0);
4831 rec->info_objectid = key.objectid;
4833 rec->info_level = level;
4835 if (btrfs_is_leaf(buf))
4836 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4838 status = btrfs_check_node(root, &rec->parent_key, buf);
4840 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4842 status = try_to_fix_bad_block(root, buf, status);
4843 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4845 fprintf(stderr, "bad block %llu\n",
4846 (unsigned long long)buf->start);
4849 * Signal to callers we need to start the scan over
4850 * again since we'll have cowed blocks.
4855 rec->content_checked = 1;
4856 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4857 rec->owner_ref_checked = 1;
4859 ret = check_owner_ref(root, rec, buf);
4861 rec->owner_ref_checked = 1;
4865 maybe_free_extent_rec(extent_cache, rec);
4869 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4870 u64 parent, u64 root)
4872 struct list_head *cur = rec->backrefs.next;
4873 struct extent_backref *node;
4874 struct tree_backref *back;
4876 while(cur != &rec->backrefs) {
4877 node = to_extent_backref(cur);
4881 back = to_tree_backref(node);
4883 if (!node->full_backref)
4885 if (parent == back->parent)
4888 if (node->full_backref)
4890 if (back->root == root)
4897 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4898 u64 parent, u64 root)
4900 struct tree_backref *ref = malloc(sizeof(*ref));
4904 memset(&ref->node, 0, sizeof(ref->node));
4906 ref->parent = parent;
4907 ref->node.full_backref = 1;
4910 ref->node.full_backref = 0;
4912 list_add_tail(&ref->node.list, &rec->backrefs);
4917 static struct data_backref *find_data_backref(struct extent_record *rec,
4918 u64 parent, u64 root,
4919 u64 owner, u64 offset,
4921 u64 disk_bytenr, u64 bytes)
4923 struct list_head *cur = rec->backrefs.next;
4924 struct extent_backref *node;
4925 struct data_backref *back;
4927 while(cur != &rec->backrefs) {
4928 node = to_extent_backref(cur);
4932 back = to_data_backref(node);
4934 if (!node->full_backref)
4936 if (parent == back->parent)
4939 if (node->full_backref)
4941 if (back->root == root && back->owner == owner &&
4942 back->offset == offset) {
4943 if (found_ref && node->found_ref &&
4944 (back->bytes != bytes ||
4945 back->disk_bytenr != disk_bytenr))
4954 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4955 u64 parent, u64 root,
4956 u64 owner, u64 offset,
4959 struct data_backref *ref = malloc(sizeof(*ref));
4963 memset(&ref->node, 0, sizeof(ref->node));
4964 ref->node.is_data = 1;
4967 ref->parent = parent;
4970 ref->node.full_backref = 1;
4974 ref->offset = offset;
4975 ref->node.full_backref = 0;
4977 ref->bytes = max_size;
4980 list_add_tail(&ref->node.list, &rec->backrefs);
4981 if (max_size > rec->max_size)
4982 rec->max_size = max_size;
4986 /* Check if the type of extent matches with its chunk */
4987 static void check_extent_type(struct extent_record *rec)
4989 struct btrfs_block_group_cache *bg_cache;
4991 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4995 /* data extent, check chunk directly*/
4996 if (!rec->metadata) {
4997 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4998 rec->wrong_chunk_type = 1;
5002 /* metadata extent, check the obvious case first */
5003 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5004 BTRFS_BLOCK_GROUP_METADATA))) {
5005 rec->wrong_chunk_type = 1;
5010 * Check SYSTEM extent, as it's also marked as metadata, we can only
5011 * make sure it's a SYSTEM extent by its backref
5013 if (!list_empty(&rec->backrefs)) {
5014 struct extent_backref *node;
5015 struct tree_backref *tback;
5018 node = to_extent_backref(rec->backrefs.next);
5019 if (node->is_data) {
5020 /* tree block shouldn't have data backref */
5021 rec->wrong_chunk_type = 1;
5024 tback = container_of(node, struct tree_backref, node);
5026 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5027 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5029 bg_type = BTRFS_BLOCK_GROUP_METADATA;
5030 if (!(bg_cache->flags & bg_type))
5031 rec->wrong_chunk_type = 1;
5036 * Allocate a new extent record, fill default values from @tmpl and insert int
5037 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5038 * the cache, otherwise it fails.
5040 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5041 struct extent_record *tmpl)
5043 struct extent_record *rec;
5046 rec = malloc(sizeof(*rec));
5049 rec->start = tmpl->start;
5050 rec->max_size = tmpl->max_size;
5051 rec->nr = max(tmpl->nr, tmpl->max_size);
5052 rec->found_rec = tmpl->found_rec;
5053 rec->content_checked = tmpl->content_checked;
5054 rec->owner_ref_checked = tmpl->owner_ref_checked;
5055 rec->num_duplicates = 0;
5056 rec->metadata = tmpl->metadata;
5057 rec->flag_block_full_backref = FLAG_UNSET;
5058 rec->bad_full_backref = 0;
5059 rec->crossing_stripes = 0;
5060 rec->wrong_chunk_type = 0;
5061 rec->is_root = tmpl->is_root;
5062 rec->refs = tmpl->refs;
5063 rec->extent_item_refs = tmpl->extent_item_refs;
5064 rec->parent_generation = tmpl->parent_generation;
5065 INIT_LIST_HEAD(&rec->backrefs);
5066 INIT_LIST_HEAD(&rec->dups);
5067 INIT_LIST_HEAD(&rec->list);
5068 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5069 rec->cache.start = tmpl->start;
5070 rec->cache.size = tmpl->nr;
5071 ret = insert_cache_extent(extent_cache, &rec->cache);
5076 bytes_used += rec->nr;
5079 rec->crossing_stripes = check_crossing_stripes(global_info,
5080 rec->start, global_info->tree_root->nodesize);
5081 check_extent_type(rec);
5086 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5088 * - refs - if found, increase refs
5089 * - is_root - if found, set
5090 * - content_checked - if found, set
5091 * - owner_ref_checked - if found, set
5093 * If not found, create a new one, initialize and insert.
5095 static int add_extent_rec(struct cache_tree *extent_cache,
5096 struct extent_record *tmpl)
5098 struct extent_record *rec;
5099 struct cache_extent *cache;
5103 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5105 rec = container_of(cache, struct extent_record, cache);
5109 rec->nr = max(tmpl->nr, tmpl->max_size);
5112 * We need to make sure to reset nr to whatever the extent
5113 * record says was the real size, this way we can compare it to
5116 if (tmpl->found_rec) {
5117 if (tmpl->start != rec->start || rec->found_rec) {
5118 struct extent_record *tmp;
5121 if (list_empty(&rec->list))
5122 list_add_tail(&rec->list,
5123 &duplicate_extents);
5126 * We have to do this song and dance in case we
5127 * find an extent record that falls inside of
5128 * our current extent record but does not have
5129 * the same objectid.
5131 tmp = malloc(sizeof(*tmp));
5134 tmp->start = tmpl->start;
5135 tmp->max_size = tmpl->max_size;
5138 tmp->metadata = tmpl->metadata;
5139 tmp->extent_item_refs = tmpl->extent_item_refs;
5140 INIT_LIST_HEAD(&tmp->list);
5141 list_add_tail(&tmp->list, &rec->dups);
5142 rec->num_duplicates++;
5149 if (tmpl->extent_item_refs && !dup) {
5150 if (rec->extent_item_refs) {
5151 fprintf(stderr, "block %llu rec "
5152 "extent_item_refs %llu, passed %llu\n",
5153 (unsigned long long)tmpl->start,
5154 (unsigned long long)
5155 rec->extent_item_refs,
5156 (unsigned long long)tmpl->extent_item_refs);
5158 rec->extent_item_refs = tmpl->extent_item_refs;
5162 if (tmpl->content_checked)
5163 rec->content_checked = 1;
5164 if (tmpl->owner_ref_checked)
5165 rec->owner_ref_checked = 1;
5166 memcpy(&rec->parent_key, &tmpl->parent_key,
5167 sizeof(tmpl->parent_key));
5168 if (tmpl->parent_generation)
5169 rec->parent_generation = tmpl->parent_generation;
5170 if (rec->max_size < tmpl->max_size)
5171 rec->max_size = tmpl->max_size;
5174 * A metadata extent can't cross stripe_len boundary, otherwise
5175 * kernel scrub won't be able to handle it.
5176 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5180 rec->crossing_stripes = check_crossing_stripes(
5181 global_info, rec->start,
5182 global_info->tree_root->nodesize);
5183 check_extent_type(rec);
5184 maybe_free_extent_rec(extent_cache, rec);
5188 ret = add_extent_rec_nolookup(extent_cache, tmpl);
5193 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5194 u64 parent, u64 root, int found_ref)
5196 struct extent_record *rec;
5197 struct tree_backref *back;
5198 struct cache_extent *cache;
5201 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5203 struct extent_record tmpl;
5205 memset(&tmpl, 0, sizeof(tmpl));
5206 tmpl.start = bytenr;
5210 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5214 /* really a bug in cache_extent implement now */
5215 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5220 rec = container_of(cache, struct extent_record, cache);
5221 if (rec->start != bytenr) {
5223 * Several cause, from unaligned bytenr to over lapping extents
5228 back = find_tree_backref(rec, parent, root);
5230 back = alloc_tree_backref(rec, parent, root);
5236 if (back->node.found_ref) {
5237 fprintf(stderr, "Extent back ref already exists "
5238 "for %llu parent %llu root %llu \n",
5239 (unsigned long long)bytenr,
5240 (unsigned long long)parent,
5241 (unsigned long long)root);
5243 back->node.found_ref = 1;
5245 if (back->node.found_extent_tree) {
5246 fprintf(stderr, "Extent back ref already exists "
5247 "for %llu parent %llu root %llu \n",
5248 (unsigned long long)bytenr,
5249 (unsigned long long)parent,
5250 (unsigned long long)root);
5252 back->node.found_extent_tree = 1;
5254 check_extent_type(rec);
5255 maybe_free_extent_rec(extent_cache, rec);
5259 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5260 u64 parent, u64 root, u64 owner, u64 offset,
5261 u32 num_refs, int found_ref, u64 max_size)
5263 struct extent_record *rec;
5264 struct data_backref *back;
5265 struct cache_extent *cache;
5268 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5270 struct extent_record tmpl;
5272 memset(&tmpl, 0, sizeof(tmpl));
5273 tmpl.start = bytenr;
5275 tmpl.max_size = max_size;
5277 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5281 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5286 rec = container_of(cache, struct extent_record, cache);
5287 if (rec->max_size < max_size)
5288 rec->max_size = max_size;
5291 * If found_ref is set then max_size is the real size and must match the
5292 * existing refs. So if we have already found a ref then we need to
5293 * make sure that this ref matches the existing one, otherwise we need
5294 * to add a new backref so we can notice that the backrefs don't match
5295 * and we need to figure out who is telling the truth. This is to
5296 * account for that awful fsync bug I introduced where we'd end up with
5297 * a btrfs_file_extent_item that would have its length include multiple
5298 * prealloc extents or point inside of a prealloc extent.
5300 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5303 back = alloc_data_backref(rec, parent, root, owner, offset,
5309 BUG_ON(num_refs != 1);
5310 if (back->node.found_ref)
5311 BUG_ON(back->bytes != max_size);
5312 back->node.found_ref = 1;
5313 back->found_ref += 1;
5314 back->bytes = max_size;
5315 back->disk_bytenr = bytenr;
5317 rec->content_checked = 1;
5318 rec->owner_ref_checked = 1;
5320 if (back->node.found_extent_tree) {
5321 fprintf(stderr, "Extent back ref already exists "
5322 "for %llu parent %llu root %llu "
5323 "owner %llu offset %llu num_refs %lu\n",
5324 (unsigned long long)bytenr,
5325 (unsigned long long)parent,
5326 (unsigned long long)root,
5327 (unsigned long long)owner,
5328 (unsigned long long)offset,
5329 (unsigned long)num_refs);
5331 back->num_refs = num_refs;
5332 back->node.found_extent_tree = 1;
5334 maybe_free_extent_rec(extent_cache, rec);
5338 static int add_pending(struct cache_tree *pending,
5339 struct cache_tree *seen, u64 bytenr, u32 size)
5342 ret = add_cache_extent(seen, bytenr, size);
5345 add_cache_extent(pending, bytenr, size);
5349 static int pick_next_pending(struct cache_tree *pending,
5350 struct cache_tree *reada,
5351 struct cache_tree *nodes,
5352 u64 last, struct block_info *bits, int bits_nr,
5355 unsigned long node_start = last;
5356 struct cache_extent *cache;
5359 cache = search_cache_extent(reada, 0);
5361 bits[0].start = cache->start;
5362 bits[0].size = cache->size;
5367 if (node_start > 32768)
5368 node_start -= 32768;
5370 cache = search_cache_extent(nodes, node_start);
5372 cache = search_cache_extent(nodes, 0);
5375 cache = search_cache_extent(pending, 0);
5380 bits[ret].start = cache->start;
5381 bits[ret].size = cache->size;
5382 cache = next_cache_extent(cache);
5384 } while (cache && ret < bits_nr);
5390 bits[ret].start = cache->start;
5391 bits[ret].size = cache->size;
5392 cache = next_cache_extent(cache);
5394 } while (cache && ret < bits_nr);
5396 if (bits_nr - ret > 8) {
5397 u64 lookup = bits[0].start + bits[0].size;
5398 struct cache_extent *next;
5399 next = search_cache_extent(pending, lookup);
5401 if (next->start - lookup > 32768)
5403 bits[ret].start = next->start;
5404 bits[ret].size = next->size;
5405 lookup = next->start + next->size;
5409 next = next_cache_extent(next);
5417 static void free_chunk_record(struct cache_extent *cache)
5419 struct chunk_record *rec;
5421 rec = container_of(cache, struct chunk_record, cache);
5422 list_del_init(&rec->list);
5423 list_del_init(&rec->dextents);
5427 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5429 cache_tree_free_extents(chunk_cache, free_chunk_record);
5432 static void free_device_record(struct rb_node *node)
5434 struct device_record *rec;
5436 rec = container_of(node, struct device_record, node);
5440 FREE_RB_BASED_TREE(device_cache, free_device_record);
5442 int insert_block_group_record(struct block_group_tree *tree,
5443 struct block_group_record *bg_rec)
5447 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5451 list_add_tail(&bg_rec->list, &tree->block_groups);
5455 static void free_block_group_record(struct cache_extent *cache)
5457 struct block_group_record *rec;
5459 rec = container_of(cache, struct block_group_record, cache);
5460 list_del_init(&rec->list);
5464 void free_block_group_tree(struct block_group_tree *tree)
5466 cache_tree_free_extents(&tree->tree, free_block_group_record);
5469 int insert_device_extent_record(struct device_extent_tree *tree,
5470 struct device_extent_record *de_rec)
5475 * Device extent is a bit different from the other extents, because
5476 * the extents which belong to the different devices may have the
5477 * same start and size, so we need use the special extent cache
5478 * search/insert functions.
5480 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5484 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5485 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5489 static void free_device_extent_record(struct cache_extent *cache)
5491 struct device_extent_record *rec;
5493 rec = container_of(cache, struct device_extent_record, cache);
5494 if (!list_empty(&rec->chunk_list))
5495 list_del_init(&rec->chunk_list);
5496 if (!list_empty(&rec->device_list))
5497 list_del_init(&rec->device_list);
5501 void free_device_extent_tree(struct device_extent_tree *tree)
5503 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5506 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5507 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5508 struct extent_buffer *leaf, int slot)
5510 struct btrfs_extent_ref_v0 *ref0;
5511 struct btrfs_key key;
5514 btrfs_item_key_to_cpu(leaf, &key, slot);
5515 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5516 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5517 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5520 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5521 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5527 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5528 struct btrfs_key *key,
5531 struct btrfs_chunk *ptr;
5532 struct chunk_record *rec;
5535 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5536 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5538 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5540 fprintf(stderr, "memory allocation failed\n");
5544 INIT_LIST_HEAD(&rec->list);
5545 INIT_LIST_HEAD(&rec->dextents);
5548 rec->cache.start = key->offset;
5549 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5551 rec->generation = btrfs_header_generation(leaf);
5553 rec->objectid = key->objectid;
5554 rec->type = key->type;
5555 rec->offset = key->offset;
5557 rec->length = rec->cache.size;
5558 rec->owner = btrfs_chunk_owner(leaf, ptr);
5559 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5560 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5561 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5562 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5563 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5564 rec->num_stripes = num_stripes;
5565 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5567 for (i = 0; i < rec->num_stripes; ++i) {
5568 rec->stripes[i].devid =
5569 btrfs_stripe_devid_nr(leaf, ptr, i);
5570 rec->stripes[i].offset =
5571 btrfs_stripe_offset_nr(leaf, ptr, i);
5572 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5573 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5580 static int process_chunk_item(struct cache_tree *chunk_cache,
5581 struct btrfs_key *key, struct extent_buffer *eb,
5584 struct chunk_record *rec;
5585 struct btrfs_chunk *chunk;
5588 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5590 * Do extra check for this chunk item,
5592 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5593 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5594 * and owner<->key_type check.
5596 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5599 error("chunk(%llu, %llu) is not valid, ignore it",
5600 key->offset, btrfs_chunk_length(eb, chunk));
5603 rec = btrfs_new_chunk_record(eb, key, slot);
5604 ret = insert_cache_extent(chunk_cache, &rec->cache);
5606 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5607 rec->offset, rec->length);
5614 static int process_device_item(struct rb_root *dev_cache,
5615 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5617 struct btrfs_dev_item *ptr;
5618 struct device_record *rec;
5621 ptr = btrfs_item_ptr(eb,
5622 slot, struct btrfs_dev_item);
5624 rec = malloc(sizeof(*rec));
5626 fprintf(stderr, "memory allocation failed\n");
5630 rec->devid = key->offset;
5631 rec->generation = btrfs_header_generation(eb);
5633 rec->objectid = key->objectid;
5634 rec->type = key->type;
5635 rec->offset = key->offset;
5637 rec->devid = btrfs_device_id(eb, ptr);
5638 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5639 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5641 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5643 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5650 struct block_group_record *
5651 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5654 struct btrfs_block_group_item *ptr;
5655 struct block_group_record *rec;
5657 rec = calloc(1, sizeof(*rec));
5659 fprintf(stderr, "memory allocation failed\n");
5663 rec->cache.start = key->objectid;
5664 rec->cache.size = key->offset;
5666 rec->generation = btrfs_header_generation(leaf);
5668 rec->objectid = key->objectid;
5669 rec->type = key->type;
5670 rec->offset = key->offset;
5672 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5673 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5675 INIT_LIST_HEAD(&rec->list);
5680 static int process_block_group_item(struct block_group_tree *block_group_cache,
5681 struct btrfs_key *key,
5682 struct extent_buffer *eb, int slot)
5684 struct block_group_record *rec;
5687 rec = btrfs_new_block_group_record(eb, key, slot);
5688 ret = insert_block_group_record(block_group_cache, rec);
5690 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5691 rec->objectid, rec->offset);
5698 struct device_extent_record *
5699 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5700 struct btrfs_key *key, int slot)
5702 struct device_extent_record *rec;
5703 struct btrfs_dev_extent *ptr;
5705 rec = calloc(1, sizeof(*rec));
5707 fprintf(stderr, "memory allocation failed\n");
5711 rec->cache.objectid = key->objectid;
5712 rec->cache.start = key->offset;
5714 rec->generation = btrfs_header_generation(leaf);
5716 rec->objectid = key->objectid;
5717 rec->type = key->type;
5718 rec->offset = key->offset;
5720 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5721 rec->chunk_objecteid =
5722 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5724 btrfs_dev_extent_chunk_offset(leaf, ptr);
5725 rec->length = btrfs_dev_extent_length(leaf, ptr);
5726 rec->cache.size = rec->length;
5728 INIT_LIST_HEAD(&rec->chunk_list);
5729 INIT_LIST_HEAD(&rec->device_list);
5735 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5736 struct btrfs_key *key, struct extent_buffer *eb,
5739 struct device_extent_record *rec;
5742 rec = btrfs_new_device_extent_record(eb, key, slot);
5743 ret = insert_device_extent_record(dev_extent_cache, rec);
5746 "Device extent[%llu, %llu, %llu] existed.\n",
5747 rec->objectid, rec->offset, rec->length);
5754 static int process_extent_item(struct btrfs_root *root,
5755 struct cache_tree *extent_cache,
5756 struct extent_buffer *eb, int slot)
5758 struct btrfs_extent_item *ei;
5759 struct btrfs_extent_inline_ref *iref;
5760 struct btrfs_extent_data_ref *dref;
5761 struct btrfs_shared_data_ref *sref;
5762 struct btrfs_key key;
5763 struct extent_record tmpl;
5768 u32 item_size = btrfs_item_size_nr(eb, slot);
5774 btrfs_item_key_to_cpu(eb, &key, slot);
5776 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5778 num_bytes = root->nodesize;
5780 num_bytes = key.offset;
5783 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5784 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5785 key.objectid, root->sectorsize);
5788 if (item_size < sizeof(*ei)) {
5789 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5790 struct btrfs_extent_item_v0 *ei0;
5791 BUG_ON(item_size != sizeof(*ei0));
5792 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5793 refs = btrfs_extent_refs_v0(eb, ei0);
5797 memset(&tmpl, 0, sizeof(tmpl));
5798 tmpl.start = key.objectid;
5799 tmpl.nr = num_bytes;
5800 tmpl.extent_item_refs = refs;
5801 tmpl.metadata = metadata;
5803 tmpl.max_size = num_bytes;
5805 return add_extent_rec(extent_cache, &tmpl);
5808 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5809 refs = btrfs_extent_refs(eb, ei);
5810 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5814 if (metadata && num_bytes != root->nodesize) {
5815 error("ignore invalid metadata extent, length %llu does not equal to %u",
5816 num_bytes, root->nodesize);
5819 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5820 error("ignore invalid data extent, length %llu is not aligned to %u",
5821 num_bytes, root->sectorsize);
5825 memset(&tmpl, 0, sizeof(tmpl));
5826 tmpl.start = key.objectid;
5827 tmpl.nr = num_bytes;
5828 tmpl.extent_item_refs = refs;
5829 tmpl.metadata = metadata;
5831 tmpl.max_size = num_bytes;
5832 add_extent_rec(extent_cache, &tmpl);
5834 ptr = (unsigned long)(ei + 1);
5835 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5836 key.type == BTRFS_EXTENT_ITEM_KEY)
5837 ptr += sizeof(struct btrfs_tree_block_info);
5839 end = (unsigned long)ei + item_size;
5841 iref = (struct btrfs_extent_inline_ref *)ptr;
5842 type = btrfs_extent_inline_ref_type(eb, iref);
5843 offset = btrfs_extent_inline_ref_offset(eb, iref);
5845 case BTRFS_TREE_BLOCK_REF_KEY:
5846 ret = add_tree_backref(extent_cache, key.objectid,
5849 error("add_tree_backref failed: %s",
5852 case BTRFS_SHARED_BLOCK_REF_KEY:
5853 ret = add_tree_backref(extent_cache, key.objectid,
5856 error("add_tree_backref failed: %s",
5859 case BTRFS_EXTENT_DATA_REF_KEY:
5860 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5861 add_data_backref(extent_cache, key.objectid, 0,
5862 btrfs_extent_data_ref_root(eb, dref),
5863 btrfs_extent_data_ref_objectid(eb,
5865 btrfs_extent_data_ref_offset(eb, dref),
5866 btrfs_extent_data_ref_count(eb, dref),
5869 case BTRFS_SHARED_DATA_REF_KEY:
5870 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5871 add_data_backref(extent_cache, key.objectid, offset,
5873 btrfs_shared_data_ref_count(eb, sref),
5877 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5878 key.objectid, key.type, num_bytes);
5881 ptr += btrfs_extent_inline_ref_size(type);
5888 static int check_cache_range(struct btrfs_root *root,
5889 struct btrfs_block_group_cache *cache,
5890 u64 offset, u64 bytes)
5892 struct btrfs_free_space *entry;
5898 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5899 bytenr = btrfs_sb_offset(i);
5900 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5901 cache->key.objectid, bytenr, 0,
5902 &logical, &nr, &stripe_len);
5907 if (logical[nr] + stripe_len <= offset)
5909 if (offset + bytes <= logical[nr])
5911 if (logical[nr] == offset) {
5912 if (stripe_len >= bytes) {
5916 bytes -= stripe_len;
5917 offset += stripe_len;
5918 } else if (logical[nr] < offset) {
5919 if (logical[nr] + stripe_len >=
5924 bytes = (offset + bytes) -
5925 (logical[nr] + stripe_len);
5926 offset = logical[nr] + stripe_len;
5929 * Could be tricky, the super may land in the
5930 * middle of the area we're checking. First
5931 * check the easiest case, it's at the end.
5933 if (logical[nr] + stripe_len >=
5935 bytes = logical[nr] - offset;
5939 /* Check the left side */
5940 ret = check_cache_range(root, cache,
5942 logical[nr] - offset);
5948 /* Now we continue with the right side */
5949 bytes = (offset + bytes) -
5950 (logical[nr] + stripe_len);
5951 offset = logical[nr] + stripe_len;
5958 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5960 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5961 offset, offset+bytes);
5965 if (entry->offset != offset) {
5966 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5971 if (entry->bytes != bytes) {
5972 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5973 bytes, entry->bytes, offset);
5977 unlink_free_space(cache->free_space_ctl, entry);
5982 static int verify_space_cache(struct btrfs_root *root,
5983 struct btrfs_block_group_cache *cache)
5985 struct btrfs_path path;
5986 struct extent_buffer *leaf;
5987 struct btrfs_key key;
5991 root = root->fs_info->extent_root;
5993 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5995 btrfs_init_path(&path);
5996 key.objectid = last;
5998 key.type = BTRFS_EXTENT_ITEM_KEY;
5999 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6004 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6005 ret = btrfs_next_leaf(root, &path);
6013 leaf = path.nodes[0];
6014 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6015 if (key.objectid >= cache->key.offset + cache->key.objectid)
6017 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6018 key.type != BTRFS_METADATA_ITEM_KEY) {
6023 if (last == key.objectid) {
6024 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6025 last = key.objectid + key.offset;
6027 last = key.objectid + root->nodesize;
6032 ret = check_cache_range(root, cache, last,
6033 key.objectid - last);
6036 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6037 last = key.objectid + key.offset;
6039 last = key.objectid + root->nodesize;
6043 if (last < cache->key.objectid + cache->key.offset)
6044 ret = check_cache_range(root, cache, last,
6045 cache->key.objectid +
6046 cache->key.offset - last);
6049 btrfs_release_path(&path);
6052 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6053 fprintf(stderr, "There are still entries left in the space "
6061 static int check_space_cache(struct btrfs_root *root)
6063 struct btrfs_block_group_cache *cache;
6064 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6068 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6069 btrfs_super_generation(root->fs_info->super_copy) !=
6070 btrfs_super_cache_generation(root->fs_info->super_copy)) {
6071 printf("cache and super generation don't match, space cache "
6072 "will be invalidated\n");
6076 if (ctx.progress_enabled) {
6077 ctx.tp = TASK_FREE_SPACE;
6078 task_start(ctx.info);
6082 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6086 start = cache->key.objectid + cache->key.offset;
6087 if (!cache->free_space_ctl) {
6088 if (btrfs_init_free_space_ctl(cache,
6089 root->sectorsize)) {
6094 btrfs_remove_free_space_cache(cache);
6097 if (btrfs_fs_compat_ro(root->fs_info,
6098 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6099 ret = exclude_super_stripes(root, cache);
6101 fprintf(stderr, "could not exclude super stripes: %s\n",
6106 ret = load_free_space_tree(root->fs_info, cache);
6107 free_excluded_extents(root, cache);
6109 fprintf(stderr, "could not load free space tree: %s\n",
6116 ret = load_free_space_cache(root->fs_info, cache);
6121 ret = verify_space_cache(root, cache);
6123 fprintf(stderr, "cache appears valid but isn't %Lu\n",
6124 cache->key.objectid);
6129 task_stop(ctx.info);
6131 return error ? -EINVAL : 0;
6134 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6135 u64 num_bytes, unsigned long leaf_offset,
6136 struct extent_buffer *eb) {
6139 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6141 unsigned long csum_offset;
6145 u64 data_checked = 0;
6151 if (num_bytes % root->sectorsize)
6154 data = malloc(num_bytes);
6158 while (offset < num_bytes) {
6161 read_len = num_bytes - offset;
6162 /* read as much space once a time */
6163 ret = read_extent_data(root, data + offset,
6164 bytenr + offset, &read_len, mirror);
6168 /* verify every 4k data's checksum */
6169 while (data_checked < read_len) {
6171 tmp = offset + data_checked;
6173 csum = btrfs_csum_data(NULL, (char *)data + tmp,
6174 csum, root->sectorsize);
6175 btrfs_csum_final(csum, (u8 *)&csum);
6177 csum_offset = leaf_offset +
6178 tmp / root->sectorsize * csum_size;
6179 read_extent_buffer(eb, (char *)&csum_expected,
6180 csum_offset, csum_size);
6181 /* try another mirror */
6182 if (csum != csum_expected) {
6183 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6184 mirror, bytenr + tmp,
6185 csum, csum_expected);
6186 num_copies = btrfs_num_copies(
6187 &root->fs_info->mapping_tree,
6189 if (mirror < num_copies - 1) {
6194 data_checked += root->sectorsize;
6203 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6206 struct btrfs_path path;
6207 struct extent_buffer *leaf;
6208 struct btrfs_key key;
6211 btrfs_init_path(&path);
6212 key.objectid = bytenr;
6213 key.type = BTRFS_EXTENT_ITEM_KEY;
6214 key.offset = (u64)-1;
6217 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6220 fprintf(stderr, "Error looking up extent record %d\n", ret);
6221 btrfs_release_path(&path);
6224 if (path.slots[0] > 0) {
6227 ret = btrfs_prev_leaf(root, &path);
6230 } else if (ret > 0) {
6237 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6240 * Block group items come before extent items if they have the same
6241 * bytenr, so walk back one more just in case. Dear future traveller,
6242 * first congrats on mastering time travel. Now if it's not too much
6243 * trouble could you go back to 2006 and tell Chris to make the
6244 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6245 * EXTENT_ITEM_KEY please?
6247 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6248 if (path.slots[0] > 0) {
6251 ret = btrfs_prev_leaf(root, &path);
6254 } else if (ret > 0) {
6259 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6263 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6264 ret = btrfs_next_leaf(root, &path);
6266 fprintf(stderr, "Error going to next leaf "
6268 btrfs_release_path(&path);
6274 leaf = path.nodes[0];
6275 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6276 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6280 if (key.objectid + key.offset < bytenr) {
6284 if (key.objectid > bytenr + num_bytes)
6287 if (key.objectid == bytenr) {
6288 if (key.offset >= num_bytes) {
6292 num_bytes -= key.offset;
6293 bytenr += key.offset;
6294 } else if (key.objectid < bytenr) {
6295 if (key.objectid + key.offset >= bytenr + num_bytes) {
6299 num_bytes = (bytenr + num_bytes) -
6300 (key.objectid + key.offset);
6301 bytenr = key.objectid + key.offset;
6303 if (key.objectid + key.offset < bytenr + num_bytes) {
6304 u64 new_start = key.objectid + key.offset;
6305 u64 new_bytes = bytenr + num_bytes - new_start;
6308 * Weird case, the extent is in the middle of
6309 * our range, we'll have to search one side
6310 * and then the other. Not sure if this happens
6311 * in real life, but no harm in coding it up
6312 * anyway just in case.
6314 btrfs_release_path(&path);
6315 ret = check_extent_exists(root, new_start,
6318 fprintf(stderr, "Right section didn't "
6322 num_bytes = key.objectid - bytenr;
6325 num_bytes = key.objectid - bytenr;
6332 if (num_bytes && !ret) {
6333 fprintf(stderr, "There are no extents for csum range "
6334 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6338 btrfs_release_path(&path);
6342 static int check_csums(struct btrfs_root *root)
6344 struct btrfs_path path;
6345 struct extent_buffer *leaf;
6346 struct btrfs_key key;
6347 u64 offset = 0, num_bytes = 0;
6348 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6352 unsigned long leaf_offset;
6354 root = root->fs_info->csum_root;
6355 if (!extent_buffer_uptodate(root->node)) {
6356 fprintf(stderr, "No valid csum tree found\n");
6360 btrfs_init_path(&path);
6361 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6362 key.type = BTRFS_EXTENT_CSUM_KEY;
6364 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6366 fprintf(stderr, "Error searching csum tree %d\n", ret);
6367 btrfs_release_path(&path);
6371 if (ret > 0 && path.slots[0])
6376 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6377 ret = btrfs_next_leaf(root, &path);
6379 fprintf(stderr, "Error going to next leaf "
6386 leaf = path.nodes[0];
6388 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6389 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6394 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6395 csum_size) * root->sectorsize;
6396 if (!check_data_csum)
6397 goto skip_csum_check;
6398 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6399 ret = check_extent_csums(root, key.offset, data_len,
6405 offset = key.offset;
6406 } else if (key.offset != offset + num_bytes) {
6407 ret = check_extent_exists(root, offset, num_bytes);
6409 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6410 "there is no extent record\n",
6411 offset, offset+num_bytes);
6414 offset = key.offset;
6417 num_bytes += data_len;
6421 btrfs_release_path(&path);
6425 static int is_dropped_key(struct btrfs_key *key,
6426 struct btrfs_key *drop_key) {
6427 if (key->objectid < drop_key->objectid)
6429 else if (key->objectid == drop_key->objectid) {
6430 if (key->type < drop_key->type)
6432 else if (key->type == drop_key->type) {
6433 if (key->offset < drop_key->offset)
6441 * Here are the rules for FULL_BACKREF.
6443 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6444 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6446 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6447 * if it happened after the relocation occurred since we'll have dropped the
6448 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6449 * have no real way to know for sure.
6451 * We process the blocks one root at a time, and we start from the lowest root
6452 * objectid and go to the highest. So we can just lookup the owner backref for
6453 * the record and if we don't find it then we know it doesn't exist and we have
6456 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6457 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6458 * be set or not and then we can check later once we've gathered all the refs.
6460 static int calc_extent_flag(struct btrfs_root *root,
6461 struct cache_tree *extent_cache,
6462 struct extent_buffer *buf,
6463 struct root_item_record *ri,
6466 struct extent_record *rec;
6467 struct cache_extent *cache;
6468 struct tree_backref *tback;
6471 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6472 /* we have added this extent before */
6476 rec = container_of(cache, struct extent_record, cache);
6479 * Except file/reloc tree, we can not have
6482 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6487 if (buf->start == ri->bytenr)
6490 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6493 owner = btrfs_header_owner(buf);
6494 if (owner == ri->objectid)
6497 tback = find_tree_backref(rec, 0, owner);
6502 if (rec->flag_block_full_backref != FLAG_UNSET &&
6503 rec->flag_block_full_backref != 0)
6504 rec->bad_full_backref = 1;
6507 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6508 if (rec->flag_block_full_backref != FLAG_UNSET &&
6509 rec->flag_block_full_backref != 1)
6510 rec->bad_full_backref = 1;
6514 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6516 fprintf(stderr, "Invalid key type(");
6517 print_key_type(stderr, 0, key_type);
6518 fprintf(stderr, ") found in root(");
6519 print_objectid(stderr, rootid, 0);
6520 fprintf(stderr, ")\n");
6524 * Check if the key is valid with its extent buffer.
6526 * This is a early check in case invalid key exists in a extent buffer
6527 * This is not comprehensive yet, but should prevent wrong key/item passed
6530 static int check_type_with_root(u64 rootid, u8 key_type)
6533 /* Only valid in chunk tree */
6534 case BTRFS_DEV_ITEM_KEY:
6535 case BTRFS_CHUNK_ITEM_KEY:
6536 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6539 /* valid in csum and log tree */
6540 case BTRFS_CSUM_TREE_OBJECTID:
6541 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6545 case BTRFS_EXTENT_ITEM_KEY:
6546 case BTRFS_METADATA_ITEM_KEY:
6547 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6548 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6551 case BTRFS_ROOT_ITEM_KEY:
6552 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6555 case BTRFS_DEV_EXTENT_KEY:
6556 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6562 report_mismatch_key_root(key_type, rootid);
6566 static int run_next_block(struct btrfs_root *root,
6567 struct block_info *bits,
6570 struct cache_tree *pending,
6571 struct cache_tree *seen,
6572 struct cache_tree *reada,
6573 struct cache_tree *nodes,
6574 struct cache_tree *extent_cache,
6575 struct cache_tree *chunk_cache,
6576 struct rb_root *dev_cache,
6577 struct block_group_tree *block_group_cache,
6578 struct device_extent_tree *dev_extent_cache,
6579 struct root_item_record *ri)
6581 struct extent_buffer *buf;
6582 struct extent_record *rec = NULL;
6593 struct btrfs_key key;
6594 struct cache_extent *cache;
6597 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6598 bits_nr, &reada_bits);
6603 for(i = 0; i < nritems; i++) {
6604 ret = add_cache_extent(reada, bits[i].start,
6609 /* fixme, get the parent transid */
6610 readahead_tree_block(root, bits[i].start,
6614 *last = bits[0].start;
6615 bytenr = bits[0].start;
6616 size = bits[0].size;
6618 cache = lookup_cache_extent(pending, bytenr, size);
6620 remove_cache_extent(pending, cache);
6623 cache = lookup_cache_extent(reada, bytenr, size);
6625 remove_cache_extent(reada, cache);
6628 cache = lookup_cache_extent(nodes, bytenr, size);
6630 remove_cache_extent(nodes, cache);
6633 cache = lookup_cache_extent(extent_cache, bytenr, size);
6635 rec = container_of(cache, struct extent_record, cache);
6636 gen = rec->parent_generation;
6639 /* fixme, get the real parent transid */
6640 buf = read_tree_block(root, bytenr, size, gen);
6641 if (!extent_buffer_uptodate(buf)) {
6642 record_bad_block_io(root->fs_info,
6643 extent_cache, bytenr, size);
6647 nritems = btrfs_header_nritems(buf);
6650 if (!init_extent_tree) {
6651 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6652 btrfs_header_level(buf), 1, NULL,
6655 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6657 fprintf(stderr, "Couldn't calc extent flags\n");
6658 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6663 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6665 fprintf(stderr, "Couldn't calc extent flags\n");
6666 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6670 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6672 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6673 ri->objectid == btrfs_header_owner(buf)) {
6675 * Ok we got to this block from it's original owner and
6676 * we have FULL_BACKREF set. Relocation can leave
6677 * converted blocks over so this is altogether possible,
6678 * however it's not possible if the generation > the
6679 * last snapshot, so check for this case.
6681 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6682 btrfs_header_generation(buf) > ri->last_snapshot) {
6683 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6684 rec->bad_full_backref = 1;
6689 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6690 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6691 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6692 rec->bad_full_backref = 1;
6696 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6697 rec->flag_block_full_backref = 1;
6701 rec->flag_block_full_backref = 0;
6703 owner = btrfs_header_owner(buf);
6706 ret = check_block(root, extent_cache, buf, flags);
6710 if (btrfs_is_leaf(buf)) {
6711 btree_space_waste += btrfs_leaf_free_space(root, buf);
6712 for (i = 0; i < nritems; i++) {
6713 struct btrfs_file_extent_item *fi;
6714 btrfs_item_key_to_cpu(buf, &key, i);
6716 * Check key type against the leaf owner.
6717 * Could filter quite a lot of early error if
6720 if (check_type_with_root(btrfs_header_owner(buf),
6722 fprintf(stderr, "ignoring invalid key\n");
6725 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6726 process_extent_item(root, extent_cache, buf,
6730 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6731 process_extent_item(root, extent_cache, buf,
6735 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6737 btrfs_item_size_nr(buf, i);
6740 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6741 process_chunk_item(chunk_cache, &key, buf, i);
6744 if (key.type == BTRFS_DEV_ITEM_KEY) {
6745 process_device_item(dev_cache, &key, buf, i);
6748 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6749 process_block_group_item(block_group_cache,
6753 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6754 process_device_extent_item(dev_extent_cache,
6759 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6760 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6761 process_extent_ref_v0(extent_cache, buf, i);
6768 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6769 ret = add_tree_backref(extent_cache,
6770 key.objectid, 0, key.offset, 0);
6772 error("add_tree_backref failed: %s",
6776 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6777 ret = add_tree_backref(extent_cache,
6778 key.objectid, key.offset, 0, 0);
6780 error("add_tree_backref failed: %s",
6784 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6785 struct btrfs_extent_data_ref *ref;
6786 ref = btrfs_item_ptr(buf, i,
6787 struct btrfs_extent_data_ref);
6788 add_data_backref(extent_cache,
6790 btrfs_extent_data_ref_root(buf, ref),
6791 btrfs_extent_data_ref_objectid(buf,
6793 btrfs_extent_data_ref_offset(buf, ref),
6794 btrfs_extent_data_ref_count(buf, ref),
6795 0, root->sectorsize);
6798 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6799 struct btrfs_shared_data_ref *ref;
6800 ref = btrfs_item_ptr(buf, i,
6801 struct btrfs_shared_data_ref);
6802 add_data_backref(extent_cache,
6803 key.objectid, key.offset, 0, 0, 0,
6804 btrfs_shared_data_ref_count(buf, ref),
6805 0, root->sectorsize);
6808 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6809 struct bad_item *bad;
6811 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6815 bad = malloc(sizeof(struct bad_item));
6818 INIT_LIST_HEAD(&bad->list);
6819 memcpy(&bad->key, &key,
6820 sizeof(struct btrfs_key));
6821 bad->root_id = owner;
6822 list_add_tail(&bad->list, &delete_items);
6825 if (key.type != BTRFS_EXTENT_DATA_KEY)
6827 fi = btrfs_item_ptr(buf, i,
6828 struct btrfs_file_extent_item);
6829 if (btrfs_file_extent_type(buf, fi) ==
6830 BTRFS_FILE_EXTENT_INLINE)
6832 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6835 data_bytes_allocated +=
6836 btrfs_file_extent_disk_num_bytes(buf, fi);
6837 if (data_bytes_allocated < root->sectorsize) {
6840 data_bytes_referenced +=
6841 btrfs_file_extent_num_bytes(buf, fi);
6842 add_data_backref(extent_cache,
6843 btrfs_file_extent_disk_bytenr(buf, fi),
6844 parent, owner, key.objectid, key.offset -
6845 btrfs_file_extent_offset(buf, fi), 1, 1,
6846 btrfs_file_extent_disk_num_bytes(buf, fi));
6850 struct btrfs_key first_key;
6852 first_key.objectid = 0;
6855 btrfs_item_key_to_cpu(buf, &first_key, 0);
6856 level = btrfs_header_level(buf);
6857 for (i = 0; i < nritems; i++) {
6858 struct extent_record tmpl;
6860 ptr = btrfs_node_blockptr(buf, i);
6861 size = root->nodesize;
6862 btrfs_node_key_to_cpu(buf, &key, i);
6864 if ((level == ri->drop_level)
6865 && is_dropped_key(&key, &ri->drop_key)) {
6870 memset(&tmpl, 0, sizeof(tmpl));
6871 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6872 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6877 tmpl.max_size = size;
6878 ret = add_extent_rec(extent_cache, &tmpl);
6882 ret = add_tree_backref(extent_cache, ptr, parent,
6885 error("add_tree_backref failed: %s",
6891 add_pending(nodes, seen, ptr, size);
6893 add_pending(pending, seen, ptr, size);
6896 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6897 nritems) * sizeof(struct btrfs_key_ptr);
6899 total_btree_bytes += buf->len;
6900 if (fs_root_objectid(btrfs_header_owner(buf)))
6901 total_fs_tree_bytes += buf->len;
6902 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6903 total_extent_tree_bytes += buf->len;
6904 if (!found_old_backref &&
6905 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6906 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6907 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6908 found_old_backref = 1;
6910 free_extent_buffer(buf);
6914 static int add_root_to_pending(struct extent_buffer *buf,
6915 struct cache_tree *extent_cache,
6916 struct cache_tree *pending,
6917 struct cache_tree *seen,
6918 struct cache_tree *nodes,
6921 struct extent_record tmpl;
6924 if (btrfs_header_level(buf) > 0)
6925 add_pending(nodes, seen, buf->start, buf->len);
6927 add_pending(pending, seen, buf->start, buf->len);
6929 memset(&tmpl, 0, sizeof(tmpl));
6930 tmpl.start = buf->start;
6935 tmpl.max_size = buf->len;
6936 add_extent_rec(extent_cache, &tmpl);
6938 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6939 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6940 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6943 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6948 /* as we fix the tree, we might be deleting blocks that
6949 * we're tracking for repair. This hook makes sure we
6950 * remove any backrefs for blocks as we are fixing them.
6952 static int free_extent_hook(struct btrfs_trans_handle *trans,
6953 struct btrfs_root *root,
6954 u64 bytenr, u64 num_bytes, u64 parent,
6955 u64 root_objectid, u64 owner, u64 offset,
6958 struct extent_record *rec;
6959 struct cache_extent *cache;
6961 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6963 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6964 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6968 rec = container_of(cache, struct extent_record, cache);
6970 struct data_backref *back;
6971 back = find_data_backref(rec, parent, root_objectid, owner,
6972 offset, 1, bytenr, num_bytes);
6975 if (back->node.found_ref) {
6976 back->found_ref -= refs_to_drop;
6978 rec->refs -= refs_to_drop;
6980 if (back->node.found_extent_tree) {
6981 back->num_refs -= refs_to_drop;
6982 if (rec->extent_item_refs)
6983 rec->extent_item_refs -= refs_to_drop;
6985 if (back->found_ref == 0)
6986 back->node.found_ref = 0;
6987 if (back->num_refs == 0)
6988 back->node.found_extent_tree = 0;
6990 if (!back->node.found_extent_tree && back->node.found_ref) {
6991 list_del(&back->node.list);
6995 struct tree_backref *back;
6996 back = find_tree_backref(rec, parent, root_objectid);
6999 if (back->node.found_ref) {
7002 back->node.found_ref = 0;
7004 if (back->node.found_extent_tree) {
7005 if (rec->extent_item_refs)
7006 rec->extent_item_refs--;
7007 back->node.found_extent_tree = 0;
7009 if (!back->node.found_extent_tree && back->node.found_ref) {
7010 list_del(&back->node.list);
7014 maybe_free_extent_rec(extent_cache, rec);
7019 static int delete_extent_records(struct btrfs_trans_handle *trans,
7020 struct btrfs_root *root,
7021 struct btrfs_path *path,
7022 u64 bytenr, u64 new_len)
7024 struct btrfs_key key;
7025 struct btrfs_key found_key;
7026 struct extent_buffer *leaf;
7031 key.objectid = bytenr;
7033 key.offset = (u64)-1;
7036 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7043 if (path->slots[0] == 0)
7049 leaf = path->nodes[0];
7050 slot = path->slots[0];
7052 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7053 if (found_key.objectid != bytenr)
7056 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7057 found_key.type != BTRFS_METADATA_ITEM_KEY &&
7058 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7059 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7060 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7061 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7062 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7063 btrfs_release_path(path);
7064 if (found_key.type == 0) {
7065 if (found_key.offset == 0)
7067 key.offset = found_key.offset - 1;
7068 key.type = found_key.type;
7070 key.type = found_key.type - 1;
7071 key.offset = (u64)-1;
7075 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7076 found_key.objectid, found_key.type, found_key.offset);
7078 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7081 btrfs_release_path(path);
7083 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7084 found_key.type == BTRFS_METADATA_ITEM_KEY) {
7085 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7086 found_key.offset : root->nodesize;
7088 ret = btrfs_update_block_group(trans, root, bytenr,
7095 btrfs_release_path(path);
7100 * for a single backref, this will allocate a new extent
7101 * and add the backref to it.
7103 static int record_extent(struct btrfs_trans_handle *trans,
7104 struct btrfs_fs_info *info,
7105 struct btrfs_path *path,
7106 struct extent_record *rec,
7107 struct extent_backref *back,
7108 int allocated, u64 flags)
7111 struct btrfs_root *extent_root = info->extent_root;
7112 struct extent_buffer *leaf;
7113 struct btrfs_key ins_key;
7114 struct btrfs_extent_item *ei;
7115 struct data_backref *dback;
7116 struct btrfs_tree_block_info *bi;
7119 rec->max_size = max_t(u64, rec->max_size,
7120 info->extent_root->nodesize);
7123 u32 item_size = sizeof(*ei);
7126 item_size += sizeof(*bi);
7128 ins_key.objectid = rec->start;
7129 ins_key.offset = rec->max_size;
7130 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7132 ret = btrfs_insert_empty_item(trans, extent_root, path,
7133 &ins_key, item_size);
7137 leaf = path->nodes[0];
7138 ei = btrfs_item_ptr(leaf, path->slots[0],
7139 struct btrfs_extent_item);
7141 btrfs_set_extent_refs(leaf, ei, 0);
7142 btrfs_set_extent_generation(leaf, ei, rec->generation);
7144 if (back->is_data) {
7145 btrfs_set_extent_flags(leaf, ei,
7146 BTRFS_EXTENT_FLAG_DATA);
7148 struct btrfs_disk_key copy_key;;
7150 bi = (struct btrfs_tree_block_info *)(ei + 1);
7151 memset_extent_buffer(leaf, 0, (unsigned long)bi,
7154 btrfs_set_disk_key_objectid(©_key,
7155 rec->info_objectid);
7156 btrfs_set_disk_key_type(©_key, 0);
7157 btrfs_set_disk_key_offset(©_key, 0);
7159 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7160 btrfs_set_tree_block_key(leaf, bi, ©_key);
7162 btrfs_set_extent_flags(leaf, ei,
7163 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7166 btrfs_mark_buffer_dirty(leaf);
7167 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7168 rec->max_size, 1, 0);
7171 btrfs_release_path(path);
7174 if (back->is_data) {
7178 dback = to_data_backref(back);
7179 if (back->full_backref)
7180 parent = dback->parent;
7184 for (i = 0; i < dback->found_ref; i++) {
7185 /* if parent != 0, we're doing a full backref
7186 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7187 * just makes the backref allocator create a data
7190 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7191 rec->start, rec->max_size,
7195 BTRFS_FIRST_FREE_OBJECTID :
7201 fprintf(stderr, "adding new data backref"
7202 " on %llu %s %llu owner %llu"
7203 " offset %llu found %d\n",
7204 (unsigned long long)rec->start,
7205 back->full_backref ?
7207 back->full_backref ?
7208 (unsigned long long)parent :
7209 (unsigned long long)dback->root,
7210 (unsigned long long)dback->owner,
7211 (unsigned long long)dback->offset,
7215 struct tree_backref *tback;
7217 tback = to_tree_backref(back);
7218 if (back->full_backref)
7219 parent = tback->parent;
7223 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7224 rec->start, rec->max_size,
7225 parent, tback->root, 0, 0);
7226 fprintf(stderr, "adding new tree backref on "
7227 "start %llu len %llu parent %llu root %llu\n",
7228 rec->start, rec->max_size, parent, tback->root);
7231 btrfs_release_path(path);
7235 static struct extent_entry *find_entry(struct list_head *entries,
7236 u64 bytenr, u64 bytes)
7238 struct extent_entry *entry = NULL;
7240 list_for_each_entry(entry, entries, list) {
7241 if (entry->bytenr == bytenr && entry->bytes == bytes)
7248 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7250 struct extent_entry *entry, *best = NULL, *prev = NULL;
7252 list_for_each_entry(entry, entries, list) {
7254 * If there are as many broken entries as entries then we know
7255 * not to trust this particular entry.
7257 if (entry->broken == entry->count)
7261 * Special case, when there are only two entries and 'best' is
7271 * If our current entry == best then we can't be sure our best
7272 * is really the best, so we need to keep searching.
7274 if (best && best->count == entry->count) {
7280 /* Prev == entry, not good enough, have to keep searching */
7281 if (!prev->broken && prev->count == entry->count)
7285 best = (prev->count > entry->count) ? prev : entry;
7286 else if (best->count < entry->count)
7294 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7295 struct data_backref *dback, struct extent_entry *entry)
7297 struct btrfs_trans_handle *trans;
7298 struct btrfs_root *root;
7299 struct btrfs_file_extent_item *fi;
7300 struct extent_buffer *leaf;
7301 struct btrfs_key key;
7305 key.objectid = dback->root;
7306 key.type = BTRFS_ROOT_ITEM_KEY;
7307 key.offset = (u64)-1;
7308 root = btrfs_read_fs_root(info, &key);
7310 fprintf(stderr, "Couldn't find root for our ref\n");
7315 * The backref points to the original offset of the extent if it was
7316 * split, so we need to search down to the offset we have and then walk
7317 * forward until we find the backref we're looking for.
7319 key.objectid = dback->owner;
7320 key.type = BTRFS_EXTENT_DATA_KEY;
7321 key.offset = dback->offset;
7322 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7324 fprintf(stderr, "Error looking up ref %d\n", ret);
7329 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7330 ret = btrfs_next_leaf(root, path);
7332 fprintf(stderr, "Couldn't find our ref, next\n");
7336 leaf = path->nodes[0];
7337 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7338 if (key.objectid != dback->owner ||
7339 key.type != BTRFS_EXTENT_DATA_KEY) {
7340 fprintf(stderr, "Couldn't find our ref, search\n");
7343 fi = btrfs_item_ptr(leaf, path->slots[0],
7344 struct btrfs_file_extent_item);
7345 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7346 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7348 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7353 btrfs_release_path(path);
7355 trans = btrfs_start_transaction(root, 1);
7357 return PTR_ERR(trans);
7360 * Ok we have the key of the file extent we want to fix, now we can cow
7361 * down to the thing and fix it.
7363 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7365 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7366 key.objectid, key.type, key.offset, ret);
7370 fprintf(stderr, "Well that's odd, we just found this key "
7371 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7376 leaf = path->nodes[0];
7377 fi = btrfs_item_ptr(leaf, path->slots[0],
7378 struct btrfs_file_extent_item);
7380 if (btrfs_file_extent_compression(leaf, fi) &&
7381 dback->disk_bytenr != entry->bytenr) {
7382 fprintf(stderr, "Ref doesn't match the record start and is "
7383 "compressed, please take a btrfs-image of this file "
7384 "system and send it to a btrfs developer so they can "
7385 "complete this functionality for bytenr %Lu\n",
7386 dback->disk_bytenr);
7391 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7392 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7393 } else if (dback->disk_bytenr > entry->bytenr) {
7394 u64 off_diff, offset;
7396 off_diff = dback->disk_bytenr - entry->bytenr;
7397 offset = btrfs_file_extent_offset(leaf, fi);
7398 if (dback->disk_bytenr + offset +
7399 btrfs_file_extent_num_bytes(leaf, fi) >
7400 entry->bytenr + entry->bytes) {
7401 fprintf(stderr, "Ref is past the entry end, please "
7402 "take a btrfs-image of this file system and "
7403 "send it to a btrfs developer, ref %Lu\n",
7404 dback->disk_bytenr);
7409 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7410 btrfs_set_file_extent_offset(leaf, fi, offset);
7411 } else if (dback->disk_bytenr < entry->bytenr) {
7414 offset = btrfs_file_extent_offset(leaf, fi);
7415 if (dback->disk_bytenr + offset < entry->bytenr) {
7416 fprintf(stderr, "Ref is before the entry start, please"
7417 " take a btrfs-image of this file system and "
7418 "send it to a btrfs developer, ref %Lu\n",
7419 dback->disk_bytenr);
7424 offset += dback->disk_bytenr;
7425 offset -= entry->bytenr;
7426 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7427 btrfs_set_file_extent_offset(leaf, fi, offset);
7430 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7433 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7434 * only do this if we aren't using compression, otherwise it's a
7437 if (!btrfs_file_extent_compression(leaf, fi))
7438 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7440 printf("ram bytes may be wrong?\n");
7441 btrfs_mark_buffer_dirty(leaf);
7443 err = btrfs_commit_transaction(trans, root);
7444 btrfs_release_path(path);
7445 return ret ? ret : err;
7448 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7449 struct extent_record *rec)
7451 struct extent_backref *back;
7452 struct data_backref *dback;
7453 struct extent_entry *entry, *best = NULL;
7456 int broken_entries = 0;
7461 * Metadata is easy and the backrefs should always agree on bytenr and
7462 * size, if not we've got bigger issues.
7467 list_for_each_entry(back, &rec->backrefs, list) {
7468 if (back->full_backref || !back->is_data)
7471 dback = to_data_backref(back);
7474 * We only pay attention to backrefs that we found a real
7477 if (dback->found_ref == 0)
7481 * For now we only catch when the bytes don't match, not the
7482 * bytenr. We can easily do this at the same time, but I want
7483 * to have a fs image to test on before we just add repair
7484 * functionality willy-nilly so we know we won't screw up the
7488 entry = find_entry(&entries, dback->disk_bytenr,
7491 entry = malloc(sizeof(struct extent_entry));
7496 memset(entry, 0, sizeof(*entry));
7497 entry->bytenr = dback->disk_bytenr;
7498 entry->bytes = dback->bytes;
7499 list_add_tail(&entry->list, &entries);
7504 * If we only have on entry we may think the entries agree when
7505 * in reality they don't so we have to do some extra checking.
7507 if (dback->disk_bytenr != rec->start ||
7508 dback->bytes != rec->nr || back->broken)
7519 /* Yay all the backrefs agree, carry on good sir */
7520 if (nr_entries <= 1 && !mismatch)
7523 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7524 "%Lu\n", rec->start);
7527 * First we want to see if the backrefs can agree amongst themselves who
7528 * is right, so figure out which one of the entries has the highest
7531 best = find_most_right_entry(&entries);
7534 * Ok so we may have an even split between what the backrefs think, so
7535 * this is where we use the extent ref to see what it thinks.
7538 entry = find_entry(&entries, rec->start, rec->nr);
7539 if (!entry && (!broken_entries || !rec->found_rec)) {
7540 fprintf(stderr, "Backrefs don't agree with each other "
7541 "and extent record doesn't agree with anybody,"
7542 " so we can't fix bytenr %Lu bytes %Lu\n",
7543 rec->start, rec->nr);
7546 } else if (!entry) {
7548 * Ok our backrefs were broken, we'll assume this is the
7549 * correct value and add an entry for this range.
7551 entry = malloc(sizeof(struct extent_entry));
7556 memset(entry, 0, sizeof(*entry));
7557 entry->bytenr = rec->start;
7558 entry->bytes = rec->nr;
7559 list_add_tail(&entry->list, &entries);
7563 best = find_most_right_entry(&entries);
7565 fprintf(stderr, "Backrefs and extent record evenly "
7566 "split on who is right, this is going to "
7567 "require user input to fix bytenr %Lu bytes "
7568 "%Lu\n", rec->start, rec->nr);
7575 * I don't think this can happen currently as we'll abort() if we catch
7576 * this case higher up, but in case somebody removes that we still can't
7577 * deal with it properly here yet, so just bail out of that's the case.
7579 if (best->bytenr != rec->start) {
7580 fprintf(stderr, "Extent start and backref starts don't match, "
7581 "please use btrfs-image on this file system and send "
7582 "it to a btrfs developer so they can make fsck fix "
7583 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7584 rec->start, rec->nr);
7590 * Ok great we all agreed on an extent record, let's go find the real
7591 * references and fix up the ones that don't match.
7593 list_for_each_entry(back, &rec->backrefs, list) {
7594 if (back->full_backref || !back->is_data)
7597 dback = to_data_backref(back);
7600 * Still ignoring backrefs that don't have a real ref attached
7603 if (dback->found_ref == 0)
7606 if (dback->bytes == best->bytes &&
7607 dback->disk_bytenr == best->bytenr)
7610 ret = repair_ref(info, path, dback, best);
7616 * Ok we messed with the actual refs, which means we need to drop our
7617 * entire cache and go back and rescan. I know this is a huge pain and
7618 * adds a lot of extra work, but it's the only way to be safe. Once all
7619 * the backrefs agree we may not need to do anything to the extent
7624 while (!list_empty(&entries)) {
7625 entry = list_entry(entries.next, struct extent_entry, list);
7626 list_del_init(&entry->list);
7632 static int process_duplicates(struct btrfs_root *root,
7633 struct cache_tree *extent_cache,
7634 struct extent_record *rec)
7636 struct extent_record *good, *tmp;
7637 struct cache_extent *cache;
7641 * If we found a extent record for this extent then return, or if we
7642 * have more than one duplicate we are likely going to need to delete
7645 if (rec->found_rec || rec->num_duplicates > 1)
7648 /* Shouldn't happen but just in case */
7649 BUG_ON(!rec->num_duplicates);
7652 * So this happens if we end up with a backref that doesn't match the
7653 * actual extent entry. So either the backref is bad or the extent
7654 * entry is bad. Either way we want to have the extent_record actually
7655 * reflect what we found in the extent_tree, so we need to take the
7656 * duplicate out and use that as the extent_record since the only way we
7657 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7659 remove_cache_extent(extent_cache, &rec->cache);
7661 good = to_extent_record(rec->dups.next);
7662 list_del_init(&good->list);
7663 INIT_LIST_HEAD(&good->backrefs);
7664 INIT_LIST_HEAD(&good->dups);
7665 good->cache.start = good->start;
7666 good->cache.size = good->nr;
7667 good->content_checked = 0;
7668 good->owner_ref_checked = 0;
7669 good->num_duplicates = 0;
7670 good->refs = rec->refs;
7671 list_splice_init(&rec->backrefs, &good->backrefs);
7673 cache = lookup_cache_extent(extent_cache, good->start,
7677 tmp = container_of(cache, struct extent_record, cache);
7680 * If we find another overlapping extent and it's found_rec is
7681 * set then it's a duplicate and we need to try and delete
7684 if (tmp->found_rec || tmp->num_duplicates > 0) {
7685 if (list_empty(&good->list))
7686 list_add_tail(&good->list,
7687 &duplicate_extents);
7688 good->num_duplicates += tmp->num_duplicates + 1;
7689 list_splice_init(&tmp->dups, &good->dups);
7690 list_del_init(&tmp->list);
7691 list_add_tail(&tmp->list, &good->dups);
7692 remove_cache_extent(extent_cache, &tmp->cache);
7697 * Ok we have another non extent item backed extent rec, so lets
7698 * just add it to this extent and carry on like we did above.
7700 good->refs += tmp->refs;
7701 list_splice_init(&tmp->backrefs, &good->backrefs);
7702 remove_cache_extent(extent_cache, &tmp->cache);
7705 ret = insert_cache_extent(extent_cache, &good->cache);
7708 return good->num_duplicates ? 0 : 1;
7711 static int delete_duplicate_records(struct btrfs_root *root,
7712 struct extent_record *rec)
7714 struct btrfs_trans_handle *trans;
7715 LIST_HEAD(delete_list);
7716 struct btrfs_path path;
7717 struct extent_record *tmp, *good, *n;
7720 struct btrfs_key key;
7722 btrfs_init_path(&path);
7725 /* Find the record that covers all of the duplicates. */
7726 list_for_each_entry(tmp, &rec->dups, list) {
7727 if (good->start < tmp->start)
7729 if (good->nr > tmp->nr)
7732 if (tmp->start + tmp->nr < good->start + good->nr) {
7733 fprintf(stderr, "Ok we have overlapping extents that "
7734 "aren't completely covered by each other, this "
7735 "is going to require more careful thought. "
7736 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7737 tmp->start, tmp->nr, good->start, good->nr);
7744 list_add_tail(&rec->list, &delete_list);
7746 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7749 list_move_tail(&tmp->list, &delete_list);
7752 root = root->fs_info->extent_root;
7753 trans = btrfs_start_transaction(root, 1);
7754 if (IS_ERR(trans)) {
7755 ret = PTR_ERR(trans);
7759 list_for_each_entry(tmp, &delete_list, list) {
7760 if (tmp->found_rec == 0)
7762 key.objectid = tmp->start;
7763 key.type = BTRFS_EXTENT_ITEM_KEY;
7764 key.offset = tmp->nr;
7766 /* Shouldn't happen but just in case */
7767 if (tmp->metadata) {
7768 fprintf(stderr, "Well this shouldn't happen, extent "
7769 "record overlaps but is metadata? "
7770 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7774 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7780 ret = btrfs_del_item(trans, root, &path);
7783 btrfs_release_path(&path);
7786 err = btrfs_commit_transaction(trans, root);
7790 while (!list_empty(&delete_list)) {
7791 tmp = to_extent_record(delete_list.next);
7792 list_del_init(&tmp->list);
7798 while (!list_empty(&rec->dups)) {
7799 tmp = to_extent_record(rec->dups.next);
7800 list_del_init(&tmp->list);
7804 btrfs_release_path(&path);
7806 if (!ret && !nr_del)
7807 rec->num_duplicates = 0;
7809 return ret ? ret : nr_del;
7812 static int find_possible_backrefs(struct btrfs_fs_info *info,
7813 struct btrfs_path *path,
7814 struct cache_tree *extent_cache,
7815 struct extent_record *rec)
7817 struct btrfs_root *root;
7818 struct extent_backref *back;
7819 struct data_backref *dback;
7820 struct cache_extent *cache;
7821 struct btrfs_file_extent_item *fi;
7822 struct btrfs_key key;
7826 list_for_each_entry(back, &rec->backrefs, list) {
7827 /* Don't care about full backrefs (poor unloved backrefs) */
7828 if (back->full_backref || !back->is_data)
7831 dback = to_data_backref(back);
7833 /* We found this one, we don't need to do a lookup */
7834 if (dback->found_ref)
7837 key.objectid = dback->root;
7838 key.type = BTRFS_ROOT_ITEM_KEY;
7839 key.offset = (u64)-1;
7841 root = btrfs_read_fs_root(info, &key);
7843 /* No root, definitely a bad ref, skip */
7844 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7846 /* Other err, exit */
7848 return PTR_ERR(root);
7850 key.objectid = dback->owner;
7851 key.type = BTRFS_EXTENT_DATA_KEY;
7852 key.offset = dback->offset;
7853 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7855 btrfs_release_path(path);
7858 /* Didn't find it, we can carry on */
7863 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7864 struct btrfs_file_extent_item);
7865 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7866 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7867 btrfs_release_path(path);
7868 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7870 struct extent_record *tmp;
7871 tmp = container_of(cache, struct extent_record, cache);
7874 * If we found an extent record for the bytenr for this
7875 * particular backref then we can't add it to our
7876 * current extent record. We only want to add backrefs
7877 * that don't have a corresponding extent item in the
7878 * extent tree since they likely belong to this record
7879 * and we need to fix it if it doesn't match bytenrs.
7885 dback->found_ref += 1;
7886 dback->disk_bytenr = bytenr;
7887 dback->bytes = bytes;
7890 * Set this so the verify backref code knows not to trust the
7891 * values in this backref.
7900 * Record orphan data ref into corresponding root.
7902 * Return 0 if the extent item contains data ref and recorded.
7903 * Return 1 if the extent item contains no useful data ref
7904 * On that case, it may contains only shared_dataref or metadata backref
7905 * or the file extent exists(this should be handled by the extent bytenr
7907 * Return <0 if something goes wrong.
7909 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7910 struct extent_record *rec)
7912 struct btrfs_key key;
7913 struct btrfs_root *dest_root;
7914 struct extent_backref *back;
7915 struct data_backref *dback;
7916 struct orphan_data_extent *orphan;
7917 struct btrfs_path path;
7918 int recorded_data_ref = 0;
7923 btrfs_init_path(&path);
7924 list_for_each_entry(back, &rec->backrefs, list) {
7925 if (back->full_backref || !back->is_data ||
7926 !back->found_extent_tree)
7928 dback = to_data_backref(back);
7929 if (dback->found_ref)
7931 key.objectid = dback->root;
7932 key.type = BTRFS_ROOT_ITEM_KEY;
7933 key.offset = (u64)-1;
7935 dest_root = btrfs_read_fs_root(fs_info, &key);
7937 /* For non-exist root we just skip it */
7938 if (IS_ERR(dest_root) || !dest_root)
7941 key.objectid = dback->owner;
7942 key.type = BTRFS_EXTENT_DATA_KEY;
7943 key.offset = dback->offset;
7945 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7946 btrfs_release_path(&path);
7948 * For ret < 0, it's OK since the fs-tree may be corrupted,
7949 * we need to record it for inode/file extent rebuild.
7950 * For ret > 0, we record it only for file extent rebuild.
7951 * For ret == 0, the file extent exists but only bytenr
7952 * mismatch, let the original bytenr fix routine to handle,
7958 orphan = malloc(sizeof(*orphan));
7963 INIT_LIST_HEAD(&orphan->list);
7964 orphan->root = dback->root;
7965 orphan->objectid = dback->owner;
7966 orphan->offset = dback->offset;
7967 orphan->disk_bytenr = rec->cache.start;
7968 orphan->disk_len = rec->cache.size;
7969 list_add(&dest_root->orphan_data_extents, &orphan->list);
7970 recorded_data_ref = 1;
7973 btrfs_release_path(&path);
7975 return !recorded_data_ref;
7981 * when an incorrect extent item is found, this will delete
7982 * all of the existing entries for it and recreate them
7983 * based on what the tree scan found.
7985 static int fixup_extent_refs(struct btrfs_fs_info *info,
7986 struct cache_tree *extent_cache,
7987 struct extent_record *rec)
7989 struct btrfs_trans_handle *trans = NULL;
7991 struct btrfs_path path;
7992 struct list_head *cur = rec->backrefs.next;
7993 struct cache_extent *cache;
7994 struct extent_backref *back;
7998 if (rec->flag_block_full_backref)
7999 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8001 btrfs_init_path(&path);
8002 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8004 * Sometimes the backrefs themselves are so broken they don't
8005 * get attached to any meaningful rec, so first go back and
8006 * check any of our backrefs that we couldn't find and throw
8007 * them into the list if we find the backref so that
8008 * verify_backrefs can figure out what to do.
8010 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8015 /* step one, make sure all of the backrefs agree */
8016 ret = verify_backrefs(info, &path, rec);
8020 trans = btrfs_start_transaction(info->extent_root, 1);
8021 if (IS_ERR(trans)) {
8022 ret = PTR_ERR(trans);
8026 /* step two, delete all the existing records */
8027 ret = delete_extent_records(trans, info->extent_root, &path,
8028 rec->start, rec->max_size);
8033 /* was this block corrupt? If so, don't add references to it */
8034 cache = lookup_cache_extent(info->corrupt_blocks,
8035 rec->start, rec->max_size);
8041 /* step three, recreate all the refs we did find */
8042 while(cur != &rec->backrefs) {
8043 back = to_extent_backref(cur);
8047 * if we didn't find any references, don't create a
8050 if (!back->found_ref)
8053 rec->bad_full_backref = 0;
8054 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8062 int err = btrfs_commit_transaction(trans, info->extent_root);
8067 btrfs_release_path(&path);
8071 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8072 struct extent_record *rec)
8074 struct btrfs_trans_handle *trans;
8075 struct btrfs_root *root = fs_info->extent_root;
8076 struct btrfs_path path;
8077 struct btrfs_extent_item *ei;
8078 struct btrfs_key key;
8082 key.objectid = rec->start;
8083 if (rec->metadata) {
8084 key.type = BTRFS_METADATA_ITEM_KEY;
8085 key.offset = rec->info_level;
8087 key.type = BTRFS_EXTENT_ITEM_KEY;
8088 key.offset = rec->max_size;
8091 trans = btrfs_start_transaction(root, 0);
8093 return PTR_ERR(trans);
8095 btrfs_init_path(&path);
8096 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8098 btrfs_release_path(&path);
8099 btrfs_commit_transaction(trans, root);
8102 fprintf(stderr, "Didn't find extent for %llu\n",
8103 (unsigned long long)rec->start);
8104 btrfs_release_path(&path);
8105 btrfs_commit_transaction(trans, root);
8109 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8110 struct btrfs_extent_item);
8111 flags = btrfs_extent_flags(path.nodes[0], ei);
8112 if (rec->flag_block_full_backref) {
8113 fprintf(stderr, "setting full backref on %llu\n",
8114 (unsigned long long)key.objectid);
8115 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8117 fprintf(stderr, "clearing full backref on %llu\n",
8118 (unsigned long long)key.objectid);
8119 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8121 btrfs_set_extent_flags(path.nodes[0], ei, flags);
8122 btrfs_mark_buffer_dirty(path.nodes[0]);
8123 btrfs_release_path(&path);
8124 return btrfs_commit_transaction(trans, root);
8127 /* right now we only prune from the extent allocation tree */
8128 static int prune_one_block(struct btrfs_trans_handle *trans,
8129 struct btrfs_fs_info *info,
8130 struct btrfs_corrupt_block *corrupt)
8133 struct btrfs_path path;
8134 struct extent_buffer *eb;
8138 int level = corrupt->level + 1;
8140 btrfs_init_path(&path);
8142 /* we want to stop at the parent to our busted block */
8143 path.lowest_level = level;
8145 ret = btrfs_search_slot(trans, info->extent_root,
8146 &corrupt->key, &path, -1, 1);
8151 eb = path.nodes[level];
8158 * hopefully the search gave us the block we want to prune,
8159 * lets try that first
8161 slot = path.slots[level];
8162 found = btrfs_node_blockptr(eb, slot);
8163 if (found == corrupt->cache.start)
8166 nritems = btrfs_header_nritems(eb);
8168 /* the search failed, lets scan this node and hope we find it */
8169 for (slot = 0; slot < nritems; slot++) {
8170 found = btrfs_node_blockptr(eb, slot);
8171 if (found == corrupt->cache.start)
8175 * we couldn't find the bad block. TODO, search all the nodes for pointers
8178 if (eb == info->extent_root->node) {
8183 btrfs_release_path(&path);
8188 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8189 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8192 btrfs_release_path(&path);
8196 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8198 struct btrfs_trans_handle *trans = NULL;
8199 struct cache_extent *cache;
8200 struct btrfs_corrupt_block *corrupt;
8203 cache = search_cache_extent(info->corrupt_blocks, 0);
8207 trans = btrfs_start_transaction(info->extent_root, 1);
8209 return PTR_ERR(trans);
8211 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8212 prune_one_block(trans, info, corrupt);
8213 remove_cache_extent(info->corrupt_blocks, cache);
8216 return btrfs_commit_transaction(trans, info->extent_root);
8220 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8222 struct btrfs_block_group_cache *cache;
8227 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8228 &start, &end, EXTENT_DIRTY);
8231 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8237 cache = btrfs_lookup_first_block_group(fs_info, start);
8242 start = cache->key.objectid + cache->key.offset;
8246 static int check_extent_refs(struct btrfs_root *root,
8247 struct cache_tree *extent_cache)
8249 struct extent_record *rec;
8250 struct cache_extent *cache;
8259 * if we're doing a repair, we have to make sure
8260 * we don't allocate from the problem extents.
8261 * In the worst case, this will be all the
8264 cache = search_cache_extent(extent_cache, 0);
8266 rec = container_of(cache, struct extent_record, cache);
8267 set_extent_dirty(root->fs_info->excluded_extents,
8269 rec->start + rec->max_size - 1,
8271 cache = next_cache_extent(cache);
8274 /* pin down all the corrupted blocks too */
8275 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8277 set_extent_dirty(root->fs_info->excluded_extents,
8279 cache->start + cache->size - 1,
8281 cache = next_cache_extent(cache);
8283 prune_corrupt_blocks(root->fs_info);
8284 reset_cached_block_groups(root->fs_info);
8287 reset_cached_block_groups(root->fs_info);
8290 * We need to delete any duplicate entries we find first otherwise we
8291 * could mess up the extent tree when we have backrefs that actually
8292 * belong to a different extent item and not the weird duplicate one.
8294 while (repair && !list_empty(&duplicate_extents)) {
8295 rec = to_extent_record(duplicate_extents.next);
8296 list_del_init(&rec->list);
8298 /* Sometimes we can find a backref before we find an actual
8299 * extent, so we need to process it a little bit to see if there
8300 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8301 * if this is a backref screwup. If we need to delete stuff
8302 * process_duplicates() will return 0, otherwise it will return
8305 if (process_duplicates(root, extent_cache, rec))
8307 ret = delete_duplicate_records(root, rec);
8311 * delete_duplicate_records will return the number of entries
8312 * deleted, so if it's greater than 0 then we know we actually
8313 * did something and we need to remove.
8327 cache = search_cache_extent(extent_cache, 0);
8330 rec = container_of(cache, struct extent_record, cache);
8331 if (rec->num_duplicates) {
8332 fprintf(stderr, "extent item %llu has multiple extent "
8333 "items\n", (unsigned long long)rec->start);
8338 if (rec->refs != rec->extent_item_refs) {
8339 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8340 (unsigned long long)rec->start,
8341 (unsigned long long)rec->nr);
8342 fprintf(stderr, "extent item %llu, found %llu\n",
8343 (unsigned long long)rec->extent_item_refs,
8344 (unsigned long long)rec->refs);
8345 ret = record_orphan_data_extents(root->fs_info, rec);
8352 * we can't use the extent to repair file
8353 * extent, let the fallback method handle it.
8355 if (!fixed && repair) {
8356 ret = fixup_extent_refs(
8367 if (all_backpointers_checked(rec, 1)) {
8368 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8369 (unsigned long long)rec->start,
8370 (unsigned long long)rec->nr);
8372 if (!fixed && !recorded && repair) {
8373 ret = fixup_extent_refs(root->fs_info,
8382 if (!rec->owner_ref_checked) {
8383 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8384 (unsigned long long)rec->start,
8385 (unsigned long long)rec->nr);
8386 if (!fixed && !recorded && repair) {
8387 ret = fixup_extent_refs(root->fs_info,
8396 if (rec->bad_full_backref) {
8397 fprintf(stderr, "bad full backref, on [%llu]\n",
8398 (unsigned long long)rec->start);
8400 ret = fixup_extent_flags(root->fs_info, rec);
8409 * Although it's not a extent ref's problem, we reuse this
8410 * routine for error reporting.
8411 * No repair function yet.
8413 if (rec->crossing_stripes) {
8415 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8416 rec->start, rec->start + rec->max_size);
8421 if (rec->wrong_chunk_type) {
8423 "bad extent [%llu, %llu), type mismatch with chunk\n",
8424 rec->start, rec->start + rec->max_size);
8429 remove_cache_extent(extent_cache, cache);
8430 free_all_extent_backrefs(rec);
8431 if (!init_extent_tree && repair && (!cur_err || fixed))
8432 clear_extent_dirty(root->fs_info->excluded_extents,
8434 rec->start + rec->max_size - 1,
8440 if (ret && ret != -EAGAIN) {
8441 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8444 struct btrfs_trans_handle *trans;
8446 root = root->fs_info->extent_root;
8447 trans = btrfs_start_transaction(root, 1);
8448 if (IS_ERR(trans)) {
8449 ret = PTR_ERR(trans);
8453 btrfs_fix_block_accounting(trans, root);
8454 ret = btrfs_commit_transaction(trans, root);
8459 fprintf(stderr, "repaired damaged extent references\n");
8465 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8469 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8470 stripe_size = length;
8471 stripe_size /= num_stripes;
8472 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8473 stripe_size = length * 2;
8474 stripe_size /= num_stripes;
8475 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8476 stripe_size = length;
8477 stripe_size /= (num_stripes - 1);
8478 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8479 stripe_size = length;
8480 stripe_size /= (num_stripes - 2);
8482 stripe_size = length;
8488 * Check the chunk with its block group/dev list ref:
8489 * Return 0 if all refs seems valid.
8490 * Return 1 if part of refs seems valid, need later check for rebuild ref
8491 * like missing block group and needs to search extent tree to rebuild them.
8492 * Return -1 if essential refs are missing and unable to rebuild.
8494 static int check_chunk_refs(struct chunk_record *chunk_rec,
8495 struct block_group_tree *block_group_cache,
8496 struct device_extent_tree *dev_extent_cache,
8499 struct cache_extent *block_group_item;
8500 struct block_group_record *block_group_rec;
8501 struct cache_extent *dev_extent_item;
8502 struct device_extent_record *dev_extent_rec;
8506 int metadump_v2 = 0;
8510 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8513 if (block_group_item) {
8514 block_group_rec = container_of(block_group_item,
8515 struct block_group_record,
8517 if (chunk_rec->length != block_group_rec->offset ||
8518 chunk_rec->offset != block_group_rec->objectid ||
8520 chunk_rec->type_flags != block_group_rec->flags)) {
8523 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8524 chunk_rec->objectid,
8529 chunk_rec->type_flags,
8530 block_group_rec->objectid,
8531 block_group_rec->type,
8532 block_group_rec->offset,
8533 block_group_rec->offset,
8534 block_group_rec->objectid,
8535 block_group_rec->flags);
8538 list_del_init(&block_group_rec->list);
8539 chunk_rec->bg_rec = block_group_rec;
8544 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8545 chunk_rec->objectid,
8550 chunk_rec->type_flags);
8557 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8558 chunk_rec->num_stripes);
8559 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8560 devid = chunk_rec->stripes[i].devid;
8561 offset = chunk_rec->stripes[i].offset;
8562 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8563 devid, offset, length);
8564 if (dev_extent_item) {
8565 dev_extent_rec = container_of(dev_extent_item,
8566 struct device_extent_record,
8568 if (dev_extent_rec->objectid != devid ||
8569 dev_extent_rec->offset != offset ||
8570 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8571 dev_extent_rec->length != length) {
8574 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8575 chunk_rec->objectid,
8578 chunk_rec->stripes[i].devid,
8579 chunk_rec->stripes[i].offset,
8580 dev_extent_rec->objectid,
8581 dev_extent_rec->offset,
8582 dev_extent_rec->length);
8585 list_move(&dev_extent_rec->chunk_list,
8586 &chunk_rec->dextents);
8591 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8592 chunk_rec->objectid,
8595 chunk_rec->stripes[i].devid,
8596 chunk_rec->stripes[i].offset);
8603 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8604 int check_chunks(struct cache_tree *chunk_cache,
8605 struct block_group_tree *block_group_cache,
8606 struct device_extent_tree *dev_extent_cache,
8607 struct list_head *good, struct list_head *bad,
8608 struct list_head *rebuild, int silent)
8610 struct cache_extent *chunk_item;
8611 struct chunk_record *chunk_rec;
8612 struct block_group_record *bg_rec;
8613 struct device_extent_record *dext_rec;
8617 chunk_item = first_cache_extent(chunk_cache);
8618 while (chunk_item) {
8619 chunk_rec = container_of(chunk_item, struct chunk_record,
8621 err = check_chunk_refs(chunk_rec, block_group_cache,
8622 dev_extent_cache, silent);
8625 if (err == 0 && good)
8626 list_add_tail(&chunk_rec->list, good);
8627 if (err > 0 && rebuild)
8628 list_add_tail(&chunk_rec->list, rebuild);
8630 list_add_tail(&chunk_rec->list, bad);
8631 chunk_item = next_cache_extent(chunk_item);
8634 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8637 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8645 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8649 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8660 static int check_device_used(struct device_record *dev_rec,
8661 struct device_extent_tree *dext_cache)
8663 struct cache_extent *cache;
8664 struct device_extent_record *dev_extent_rec;
8667 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8669 dev_extent_rec = container_of(cache,
8670 struct device_extent_record,
8672 if (dev_extent_rec->objectid != dev_rec->devid)
8675 list_del_init(&dev_extent_rec->device_list);
8676 total_byte += dev_extent_rec->length;
8677 cache = next_cache_extent(cache);
8680 if (total_byte != dev_rec->byte_used) {
8682 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8683 total_byte, dev_rec->byte_used, dev_rec->objectid,
8684 dev_rec->type, dev_rec->offset);
8691 /* check btrfs_dev_item -> btrfs_dev_extent */
8692 static int check_devices(struct rb_root *dev_cache,
8693 struct device_extent_tree *dev_extent_cache)
8695 struct rb_node *dev_node;
8696 struct device_record *dev_rec;
8697 struct device_extent_record *dext_rec;
8701 dev_node = rb_first(dev_cache);
8703 dev_rec = container_of(dev_node, struct device_record, node);
8704 err = check_device_used(dev_rec, dev_extent_cache);
8708 dev_node = rb_next(dev_node);
8710 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8713 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8714 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8721 static int add_root_item_to_list(struct list_head *head,
8722 u64 objectid, u64 bytenr, u64 last_snapshot,
8723 u8 level, u8 drop_level,
8724 int level_size, struct btrfs_key *drop_key)
8727 struct root_item_record *ri_rec;
8728 ri_rec = malloc(sizeof(*ri_rec));
8731 ri_rec->bytenr = bytenr;
8732 ri_rec->objectid = objectid;
8733 ri_rec->level = level;
8734 ri_rec->level_size = level_size;
8735 ri_rec->drop_level = drop_level;
8736 ri_rec->last_snapshot = last_snapshot;
8738 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8739 list_add_tail(&ri_rec->list, head);
8744 static void free_root_item_list(struct list_head *list)
8746 struct root_item_record *ri_rec;
8748 while (!list_empty(list)) {
8749 ri_rec = list_first_entry(list, struct root_item_record,
8751 list_del_init(&ri_rec->list);
8756 static int deal_root_from_list(struct list_head *list,
8757 struct btrfs_root *root,
8758 struct block_info *bits,
8760 struct cache_tree *pending,
8761 struct cache_tree *seen,
8762 struct cache_tree *reada,
8763 struct cache_tree *nodes,
8764 struct cache_tree *extent_cache,
8765 struct cache_tree *chunk_cache,
8766 struct rb_root *dev_cache,
8767 struct block_group_tree *block_group_cache,
8768 struct device_extent_tree *dev_extent_cache)
8773 while (!list_empty(list)) {
8774 struct root_item_record *rec;
8775 struct extent_buffer *buf;
8776 rec = list_entry(list->next,
8777 struct root_item_record, list);
8779 buf = read_tree_block(root->fs_info->tree_root,
8780 rec->bytenr, rec->level_size, 0);
8781 if (!extent_buffer_uptodate(buf)) {
8782 free_extent_buffer(buf);
8786 ret = add_root_to_pending(buf, extent_cache, pending,
8787 seen, nodes, rec->objectid);
8791 * To rebuild extent tree, we need deal with snapshot
8792 * one by one, otherwise we deal with node firstly which
8793 * can maximize readahead.
8796 ret = run_next_block(root, bits, bits_nr, &last,
8797 pending, seen, reada, nodes,
8798 extent_cache, chunk_cache,
8799 dev_cache, block_group_cache,
8800 dev_extent_cache, rec);
8804 free_extent_buffer(buf);
8805 list_del(&rec->list);
8811 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8812 reada, nodes, extent_cache, chunk_cache,
8813 dev_cache, block_group_cache,
8814 dev_extent_cache, NULL);
8824 static int check_chunks_and_extents(struct btrfs_root *root)
8826 struct rb_root dev_cache;
8827 struct cache_tree chunk_cache;
8828 struct block_group_tree block_group_cache;
8829 struct device_extent_tree dev_extent_cache;
8830 struct cache_tree extent_cache;
8831 struct cache_tree seen;
8832 struct cache_tree pending;
8833 struct cache_tree reada;
8834 struct cache_tree nodes;
8835 struct extent_io_tree excluded_extents;
8836 struct cache_tree corrupt_blocks;
8837 struct btrfs_path path;
8838 struct btrfs_key key;
8839 struct btrfs_key found_key;
8841 struct block_info *bits;
8843 struct extent_buffer *leaf;
8845 struct btrfs_root_item ri;
8846 struct list_head dropping_trees;
8847 struct list_head normal_trees;
8848 struct btrfs_root *root1;
8853 dev_cache = RB_ROOT;
8854 cache_tree_init(&chunk_cache);
8855 block_group_tree_init(&block_group_cache);
8856 device_extent_tree_init(&dev_extent_cache);
8858 cache_tree_init(&extent_cache);
8859 cache_tree_init(&seen);
8860 cache_tree_init(&pending);
8861 cache_tree_init(&nodes);
8862 cache_tree_init(&reada);
8863 cache_tree_init(&corrupt_blocks);
8864 extent_io_tree_init(&excluded_extents);
8865 INIT_LIST_HEAD(&dropping_trees);
8866 INIT_LIST_HEAD(&normal_trees);
8869 root->fs_info->excluded_extents = &excluded_extents;
8870 root->fs_info->fsck_extent_cache = &extent_cache;
8871 root->fs_info->free_extent_hook = free_extent_hook;
8872 root->fs_info->corrupt_blocks = &corrupt_blocks;
8876 bits = malloc(bits_nr * sizeof(struct block_info));
8882 if (ctx.progress_enabled) {
8883 ctx.tp = TASK_EXTENTS;
8884 task_start(ctx.info);
8888 root1 = root->fs_info->tree_root;
8889 level = btrfs_header_level(root1->node);
8890 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8891 root1->node->start, 0, level, 0,
8892 root1->nodesize, NULL);
8895 root1 = root->fs_info->chunk_root;
8896 level = btrfs_header_level(root1->node);
8897 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8898 root1->node->start, 0, level, 0,
8899 root1->nodesize, NULL);
8902 btrfs_init_path(&path);
8905 key.type = BTRFS_ROOT_ITEM_KEY;
8906 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8911 leaf = path.nodes[0];
8912 slot = path.slots[0];
8913 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8914 ret = btrfs_next_leaf(root, &path);
8917 leaf = path.nodes[0];
8918 slot = path.slots[0];
8920 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8921 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8922 unsigned long offset;
8925 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8926 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8927 last_snapshot = btrfs_root_last_snapshot(&ri);
8928 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8929 level = btrfs_root_level(&ri);
8930 level_size = root->nodesize;
8931 ret = add_root_item_to_list(&normal_trees,
8933 btrfs_root_bytenr(&ri),
8934 last_snapshot, level,
8935 0, level_size, NULL);
8939 level = btrfs_root_level(&ri);
8940 level_size = root->nodesize;
8941 objectid = found_key.objectid;
8942 btrfs_disk_key_to_cpu(&found_key,
8944 ret = add_root_item_to_list(&dropping_trees,
8946 btrfs_root_bytenr(&ri),
8947 last_snapshot, level,
8949 level_size, &found_key);
8956 btrfs_release_path(&path);
8959 * check_block can return -EAGAIN if it fixes something, please keep
8960 * this in mind when dealing with return values from these functions, if
8961 * we get -EAGAIN we want to fall through and restart the loop.
8963 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8964 &seen, &reada, &nodes, &extent_cache,
8965 &chunk_cache, &dev_cache, &block_group_cache,
8972 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8973 &pending, &seen, &reada, &nodes,
8974 &extent_cache, &chunk_cache, &dev_cache,
8975 &block_group_cache, &dev_extent_cache);
8982 ret = check_chunks(&chunk_cache, &block_group_cache,
8983 &dev_extent_cache, NULL, NULL, NULL, 0);
8990 ret = check_extent_refs(root, &extent_cache);
8997 ret = check_devices(&dev_cache, &dev_extent_cache);
9002 task_stop(ctx.info);
9004 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9005 extent_io_tree_cleanup(&excluded_extents);
9006 root->fs_info->fsck_extent_cache = NULL;
9007 root->fs_info->free_extent_hook = NULL;
9008 root->fs_info->corrupt_blocks = NULL;
9009 root->fs_info->excluded_extents = NULL;
9012 free_chunk_cache_tree(&chunk_cache);
9013 free_device_cache_tree(&dev_cache);
9014 free_block_group_tree(&block_group_cache);
9015 free_device_extent_tree(&dev_extent_cache);
9016 free_extent_cache_tree(&seen);
9017 free_extent_cache_tree(&pending);
9018 free_extent_cache_tree(&reada);
9019 free_extent_cache_tree(&nodes);
9022 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9023 free_extent_cache_tree(&seen);
9024 free_extent_cache_tree(&pending);
9025 free_extent_cache_tree(&reada);
9026 free_extent_cache_tree(&nodes);
9027 free_chunk_cache_tree(&chunk_cache);
9028 free_block_group_tree(&block_group_cache);
9029 free_device_cache_tree(&dev_cache);
9030 free_device_extent_tree(&dev_extent_cache);
9031 free_extent_record_cache(root->fs_info, &extent_cache);
9032 free_root_item_list(&normal_trees);
9033 free_root_item_list(&dropping_trees);
9034 extent_io_tree_cleanup(&excluded_extents);
9039 * Check backrefs of a tree block given by @bytenr or @eb.
9041 * @root: the root containing the @bytenr or @eb
9042 * @eb: tree block extent buffer, can be NULL
9043 * @bytenr: bytenr of the tree block to search
9044 * @level: tree level of the tree block
9045 * @owner: owner of the tree block
9047 * Return >0 for any error found and output error message
9048 * Return 0 for no error found
9050 static int check_tree_block_ref(struct btrfs_root *root,
9051 struct extent_buffer *eb, u64 bytenr,
9052 int level, u64 owner)
9054 struct btrfs_key key;
9055 struct btrfs_root *extent_root = root->fs_info->extent_root;
9056 struct btrfs_path path;
9057 struct btrfs_extent_item *ei;
9058 struct btrfs_extent_inline_ref *iref;
9059 struct extent_buffer *leaf;
9065 u32 nodesize = root->nodesize;
9072 btrfs_init_path(&path);
9073 key.objectid = bytenr;
9074 if (btrfs_fs_incompat(root->fs_info,
9075 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9076 key.type = BTRFS_METADATA_ITEM_KEY;
9078 key.type = BTRFS_EXTENT_ITEM_KEY;
9079 key.offset = (u64)-1;
9081 /* Search for the backref in extent tree */
9082 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9084 err |= BACKREF_MISSING;
9087 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9089 err |= BACKREF_MISSING;
9093 leaf = path.nodes[0];
9094 slot = path.slots[0];
9095 btrfs_item_key_to_cpu(leaf, &key, slot);
9097 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9099 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9100 skinny_level = (int)key.offset;
9101 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9103 struct btrfs_tree_block_info *info;
9105 info = (struct btrfs_tree_block_info *)(ei + 1);
9106 skinny_level = btrfs_tree_block_level(leaf, info);
9107 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9114 if (!(btrfs_extent_flags(leaf, ei) &
9115 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9117 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9118 key.objectid, nodesize,
9119 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9120 err = BACKREF_MISMATCH;
9122 header_gen = btrfs_header_generation(eb);
9123 extent_gen = btrfs_extent_generation(leaf, ei);
9124 if (header_gen != extent_gen) {
9126 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9127 key.objectid, nodesize, header_gen,
9129 err = BACKREF_MISMATCH;
9131 if (level != skinny_level) {
9133 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9134 key.objectid, nodesize, level, skinny_level);
9135 err = BACKREF_MISMATCH;
9137 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9139 "extent[%llu %u] is referred by other roots than %llu",
9140 key.objectid, nodesize, root->objectid);
9141 err = BACKREF_MISMATCH;
9146 * Iterate the extent/metadata item to find the exact backref
9148 item_size = btrfs_item_size_nr(leaf, slot);
9149 ptr = (unsigned long)iref;
9150 end = (unsigned long)ei + item_size;
9152 iref = (struct btrfs_extent_inline_ref *)ptr;
9153 type = btrfs_extent_inline_ref_type(leaf, iref);
9154 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9156 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9157 (offset == root->objectid || offset == owner)) {
9159 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9160 /* Check if the backref points to valid referencer */
9161 found_ref = !check_tree_block_ref(root, NULL, offset,
9167 ptr += btrfs_extent_inline_ref_size(type);
9171 * Inlined extent item doesn't have what we need, check
9172 * TREE_BLOCK_REF_KEY
9175 btrfs_release_path(&path);
9176 key.objectid = bytenr;
9177 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9178 key.offset = root->objectid;
9180 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9185 err |= BACKREF_MISSING;
9187 btrfs_release_path(&path);
9188 if (eb && (err & BACKREF_MISSING))
9189 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9190 bytenr, nodesize, owner, level);
9195 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9197 * Return >0 any error found and output error message
9198 * Return 0 for no error found
9200 static int check_extent_data_item(struct btrfs_root *root,
9201 struct extent_buffer *eb, int slot)
9203 struct btrfs_file_extent_item *fi;
9204 struct btrfs_path path;
9205 struct btrfs_root *extent_root = root->fs_info->extent_root;
9206 struct btrfs_key fi_key;
9207 struct btrfs_key dbref_key;
9208 struct extent_buffer *leaf;
9209 struct btrfs_extent_item *ei;
9210 struct btrfs_extent_inline_ref *iref;
9211 struct btrfs_extent_data_ref *dref;
9213 u64 file_extent_gen;
9216 u64 extent_num_bytes;
9224 int found_dbackref = 0;
9228 btrfs_item_key_to_cpu(eb, &fi_key, slot);
9229 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9230 file_extent_gen = btrfs_file_extent_generation(eb, fi);
9232 /* Nothing to check for hole and inline data extents */
9233 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9234 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9237 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9238 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9239 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9241 /* Check unaligned disk_num_bytes and num_bytes */
9242 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9244 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9245 fi_key.objectid, fi_key.offset, disk_num_bytes,
9247 err |= BYTES_UNALIGNED;
9249 data_bytes_allocated += disk_num_bytes;
9251 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9253 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9254 fi_key.objectid, fi_key.offset, extent_num_bytes,
9256 err |= BYTES_UNALIGNED;
9258 data_bytes_referenced += extent_num_bytes;
9260 owner = btrfs_header_owner(eb);
9262 /* Check the extent item of the file extent in extent tree */
9263 btrfs_init_path(&path);
9264 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9265 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9266 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9268 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9270 err |= BACKREF_MISSING;
9274 leaf = path.nodes[0];
9275 slot = path.slots[0];
9276 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9278 extent_flags = btrfs_extent_flags(leaf, ei);
9279 extent_gen = btrfs_extent_generation(leaf, ei);
9281 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9283 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9284 disk_bytenr, disk_num_bytes,
9285 BTRFS_EXTENT_FLAG_DATA);
9286 err |= BACKREF_MISMATCH;
9289 if (file_extent_gen < extent_gen) {
9291 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9292 disk_bytenr, disk_num_bytes, file_extent_gen,
9294 err |= BACKREF_MISMATCH;
9297 /* Check data backref inside that extent item */
9298 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9299 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9300 ptr = (unsigned long)iref;
9301 end = (unsigned long)ei + item_size;
9303 iref = (struct btrfs_extent_inline_ref *)ptr;
9304 type = btrfs_extent_inline_ref_type(leaf, iref);
9305 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9307 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9308 ref_root = btrfs_extent_data_ref_root(leaf, dref);
9309 if (ref_root == owner || ref_root == root->objectid)
9311 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9312 found_dbackref = !check_tree_block_ref(root, NULL,
9313 btrfs_extent_inline_ref_offset(leaf, iref),
9319 ptr += btrfs_extent_inline_ref_size(type);
9322 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9323 if (!found_dbackref) {
9324 btrfs_release_path(&path);
9326 btrfs_init_path(&path);
9327 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9328 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9329 dbref_key.offset = hash_extent_data_ref(root->objectid,
9330 fi_key.objectid, fi_key.offset);
9332 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9333 &dbref_key, &path, 0, 0);
9338 if (!found_dbackref)
9339 err |= BACKREF_MISSING;
9341 btrfs_release_path(&path);
9342 if (err & BACKREF_MISSING) {
9343 error("data extent[%llu %llu] backref lost",
9344 disk_bytenr, disk_num_bytes);
9350 * Get real tree block level for the case like shared block
9351 * Return >= 0 as tree level
9352 * Return <0 for error
9354 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9356 struct extent_buffer *eb;
9357 struct btrfs_path path;
9358 struct btrfs_key key;
9359 struct btrfs_extent_item *ei;
9362 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9367 /* Search extent tree for extent generation and level */
9368 key.objectid = bytenr;
9369 key.type = BTRFS_METADATA_ITEM_KEY;
9370 key.offset = (u64)-1;
9372 btrfs_init_path(&path);
9373 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9376 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9384 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9385 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9386 struct btrfs_extent_item);
9387 flags = btrfs_extent_flags(path.nodes[0], ei);
9388 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9393 /* Get transid for later read_tree_block() check */
9394 transid = btrfs_extent_generation(path.nodes[0], ei);
9396 /* Get backref level as one source */
9397 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9398 backref_level = key.offset;
9400 struct btrfs_tree_block_info *info;
9402 info = (struct btrfs_tree_block_info *)(ei + 1);
9403 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9405 btrfs_release_path(&path);
9407 /* Get level from tree block as an alternative source */
9408 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9409 if (!extent_buffer_uptodate(eb)) {
9410 free_extent_buffer(eb);
9413 header_level = btrfs_header_level(eb);
9414 free_extent_buffer(eb);
9416 if (header_level != backref_level)
9418 return header_level;
9421 btrfs_release_path(&path);
9426 * Check if a tree block backref is valid (points to a valid tree block)
9427 * if level == -1, level will be resolved
9428 * Return >0 for any error found and print error message
9430 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9431 u64 bytenr, int level)
9433 struct btrfs_root *root;
9434 struct btrfs_key key;
9435 struct btrfs_path path;
9436 struct extent_buffer *eb;
9437 struct extent_buffer *node;
9438 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9442 /* Query level for level == -1 special case */
9444 level = query_tree_block_level(fs_info, bytenr);
9446 err |= REFERENCER_MISSING;
9450 key.objectid = root_id;
9451 key.type = BTRFS_ROOT_ITEM_KEY;
9452 key.offset = (u64)-1;
9454 root = btrfs_read_fs_root(fs_info, &key);
9456 err |= REFERENCER_MISSING;
9460 /* Read out the tree block to get item/node key */
9461 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9462 if (!extent_buffer_uptodate(eb)) {
9463 err |= REFERENCER_MISSING;
9464 free_extent_buffer(eb);
9468 /* Empty tree, no need to check key */
9469 if (!btrfs_header_nritems(eb) && !level) {
9470 free_extent_buffer(eb);
9475 btrfs_node_key_to_cpu(eb, &key, 0);
9477 btrfs_item_key_to_cpu(eb, &key, 0);
9479 free_extent_buffer(eb);
9481 btrfs_init_path(&path);
9482 path.lowest_level = level;
9483 /* Search with the first key, to ensure we can reach it */
9484 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9486 err |= REFERENCER_MISSING;
9490 node = path.nodes[level];
9491 if (btrfs_header_bytenr(node) != bytenr) {
9493 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9494 bytenr, nodesize, bytenr,
9495 btrfs_header_bytenr(node));
9496 err |= REFERENCER_MISMATCH;
9498 if (btrfs_header_level(node) != level) {
9500 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9501 bytenr, nodesize, level,
9502 btrfs_header_level(node));
9503 err |= REFERENCER_MISMATCH;
9507 btrfs_release_path(&path);
9509 if (err & REFERENCER_MISSING) {
9511 error("extent [%llu %d] lost referencer (owner: %llu)",
9512 bytenr, nodesize, root_id);
9515 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9516 bytenr, nodesize, root_id, level);
9523 * Check referencer for shared block backref
9524 * If level == -1, this function will resolve the level.
9526 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9527 u64 parent, u64 bytenr, int level)
9529 struct extent_buffer *eb;
9530 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9532 int found_parent = 0;
9535 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9536 if (!extent_buffer_uptodate(eb))
9540 level = query_tree_block_level(fs_info, bytenr);
9544 if (level + 1 != btrfs_header_level(eb))
9547 nr = btrfs_header_nritems(eb);
9548 for (i = 0; i < nr; i++) {
9549 if (bytenr == btrfs_node_blockptr(eb, i)) {
9555 free_extent_buffer(eb);
9556 if (!found_parent) {
9558 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9559 bytenr, nodesize, parent, level);
9560 return REFERENCER_MISSING;
9566 * Check referencer for normal (inlined) data ref
9567 * If len == 0, it will be resolved by searching in extent tree
9569 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9570 u64 root_id, u64 objectid, u64 offset,
9571 u64 bytenr, u64 len, u32 count)
9573 struct btrfs_root *root;
9574 struct btrfs_root *extent_root = fs_info->extent_root;
9575 struct btrfs_key key;
9576 struct btrfs_path path;
9577 struct extent_buffer *leaf;
9578 struct btrfs_file_extent_item *fi;
9579 u32 found_count = 0;
9584 key.objectid = bytenr;
9585 key.type = BTRFS_EXTENT_ITEM_KEY;
9586 key.offset = (u64)-1;
9588 btrfs_init_path(&path);
9589 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9592 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9595 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9596 if (key.objectid != bytenr ||
9597 key.type != BTRFS_EXTENT_ITEM_KEY)
9600 btrfs_release_path(&path);
9602 key.objectid = root_id;
9603 key.type = BTRFS_ROOT_ITEM_KEY;
9604 key.offset = (u64)-1;
9605 btrfs_init_path(&path);
9607 root = btrfs_read_fs_root(fs_info, &key);
9611 key.objectid = objectid;
9612 key.type = BTRFS_EXTENT_DATA_KEY;
9614 * It can be nasty as data backref offset is
9615 * file offset - file extent offset, which is smaller or
9616 * equal to original backref offset. The only special case is
9617 * overflow. So we need to special check and do further search.
9619 key.offset = offset & (1ULL << 63) ? 0 : offset;
9621 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9626 * Search afterwards to get correct one
9627 * NOTE: As we must do a comprehensive check on the data backref to
9628 * make sure the dref count also matches, we must iterate all file
9629 * extents for that inode.
9632 leaf = path.nodes[0];
9633 slot = path.slots[0];
9635 btrfs_item_key_to_cpu(leaf, &key, slot);
9636 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9638 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9640 * Except normal disk bytenr and disk num bytes, we still
9641 * need to do extra check on dbackref offset as
9642 * dbackref offset = file_offset - file_extent_offset
9644 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9645 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9646 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9650 ret = btrfs_next_item(root, &path);
9655 btrfs_release_path(&path);
9656 if (found_count != count) {
9658 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9659 bytenr, len, root_id, objectid, offset, count, found_count);
9660 return REFERENCER_MISSING;
9666 * Check if the referencer of a shared data backref exists
9668 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9669 u64 parent, u64 bytenr)
9671 struct extent_buffer *eb;
9672 struct btrfs_key key;
9673 struct btrfs_file_extent_item *fi;
9674 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9676 int found_parent = 0;
9679 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9680 if (!extent_buffer_uptodate(eb))
9683 nr = btrfs_header_nritems(eb);
9684 for (i = 0; i < nr; i++) {
9685 btrfs_item_key_to_cpu(eb, &key, i);
9686 if (key.type != BTRFS_EXTENT_DATA_KEY)
9689 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9690 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9693 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9700 free_extent_buffer(eb);
9701 if (!found_parent) {
9702 error("shared extent %llu referencer lost (parent: %llu)",
9704 return REFERENCER_MISSING;
9710 * This function will check a given extent item, including its backref and
9711 * itself (like crossing stripe boundary and type)
9713 * Since we don't use extent_record anymore, introduce new error bit
9715 static int check_extent_item(struct btrfs_fs_info *fs_info,
9716 struct extent_buffer *eb, int slot)
9718 struct btrfs_extent_item *ei;
9719 struct btrfs_extent_inline_ref *iref;
9720 struct btrfs_extent_data_ref *dref;
9724 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9725 u32 item_size = btrfs_item_size_nr(eb, slot);
9730 struct btrfs_key key;
9734 btrfs_item_key_to_cpu(eb, &key, slot);
9735 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9736 bytes_used += key.offset;
9738 bytes_used += nodesize;
9740 if (item_size < sizeof(*ei)) {
9742 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9743 * old thing when on disk format is still un-determined.
9744 * No need to care about it anymore
9746 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9750 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9751 flags = btrfs_extent_flags(eb, ei);
9753 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9755 if (metadata && check_crossing_stripes(global_info, key.objectid,
9757 error("bad metadata [%llu, %llu) crossing stripe boundary",
9758 key.objectid, key.objectid + nodesize);
9759 err |= CROSSING_STRIPE_BOUNDARY;
9762 ptr = (unsigned long)(ei + 1);
9764 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9765 /* Old EXTENT_ITEM metadata */
9766 struct btrfs_tree_block_info *info;
9768 info = (struct btrfs_tree_block_info *)ptr;
9769 level = btrfs_tree_block_level(eb, info);
9770 ptr += sizeof(struct btrfs_tree_block_info);
9772 /* New METADATA_ITEM */
9775 end = (unsigned long)ei + item_size;
9778 err |= ITEM_SIZE_MISMATCH;
9782 /* Now check every backref in this extent item */
9784 iref = (struct btrfs_extent_inline_ref *)ptr;
9785 type = btrfs_extent_inline_ref_type(eb, iref);
9786 offset = btrfs_extent_inline_ref_offset(eb, iref);
9788 case BTRFS_TREE_BLOCK_REF_KEY:
9789 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9793 case BTRFS_SHARED_BLOCK_REF_KEY:
9794 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9798 case BTRFS_EXTENT_DATA_REF_KEY:
9799 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9800 ret = check_extent_data_backref(fs_info,
9801 btrfs_extent_data_ref_root(eb, dref),
9802 btrfs_extent_data_ref_objectid(eb, dref),
9803 btrfs_extent_data_ref_offset(eb, dref),
9804 key.objectid, key.offset,
9805 btrfs_extent_data_ref_count(eb, dref));
9808 case BTRFS_SHARED_DATA_REF_KEY:
9809 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9813 error("extent[%llu %d %llu] has unknown ref type: %d",
9814 key.objectid, key.type, key.offset, type);
9815 err |= UNKNOWN_TYPE;
9819 ptr += btrfs_extent_inline_ref_size(type);
9828 * Check if a dev extent item is referred correctly by its chunk
9830 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9831 struct extent_buffer *eb, int slot)
9833 struct btrfs_root *chunk_root = fs_info->chunk_root;
9834 struct btrfs_dev_extent *ptr;
9835 struct btrfs_path path;
9836 struct btrfs_key chunk_key;
9837 struct btrfs_key devext_key;
9838 struct btrfs_chunk *chunk;
9839 struct extent_buffer *l;
9843 int found_chunk = 0;
9846 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9847 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9848 length = btrfs_dev_extent_length(eb, ptr);
9850 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9851 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9852 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9854 btrfs_init_path(&path);
9855 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9860 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9861 if (btrfs_chunk_length(l, chunk) != length)
9864 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9865 for (i = 0; i < num_stripes; i++) {
9866 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9867 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9869 if (devid == devext_key.objectid &&
9870 offset == devext_key.offset) {
9876 btrfs_release_path(&path);
9879 "device extent[%llu, %llu, %llu] did not find the related chunk",
9880 devext_key.objectid, devext_key.offset, length);
9881 return REFERENCER_MISSING;
9887 * Check if the used space is correct with the dev item
9889 static int check_dev_item(struct btrfs_fs_info *fs_info,
9890 struct extent_buffer *eb, int slot)
9892 struct btrfs_root *dev_root = fs_info->dev_root;
9893 struct btrfs_dev_item *dev_item;
9894 struct btrfs_path path;
9895 struct btrfs_key key;
9896 struct btrfs_dev_extent *ptr;
9902 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9903 dev_id = btrfs_device_id(eb, dev_item);
9904 used = btrfs_device_bytes_used(eb, dev_item);
9906 key.objectid = dev_id;
9907 key.type = BTRFS_DEV_EXTENT_KEY;
9910 btrfs_init_path(&path);
9911 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9913 btrfs_item_key_to_cpu(eb, &key, slot);
9914 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9915 key.objectid, key.type, key.offset);
9916 btrfs_release_path(&path);
9917 return REFERENCER_MISSING;
9920 /* Iterate dev_extents to calculate the used space of a device */
9922 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9924 if (key.objectid > dev_id)
9926 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9929 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9930 struct btrfs_dev_extent);
9931 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9933 ret = btrfs_next_item(dev_root, &path);
9937 btrfs_release_path(&path);
9939 if (used != total) {
9940 btrfs_item_key_to_cpu(eb, &key, slot);
9942 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9943 total, used, BTRFS_ROOT_TREE_OBJECTID,
9944 BTRFS_DEV_EXTENT_KEY, dev_id);
9945 return ACCOUNTING_MISMATCH;
9951 * Check a block group item with its referener (chunk) and its used space
9952 * with extent/metadata item
9954 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9955 struct extent_buffer *eb, int slot)
9957 struct btrfs_root *extent_root = fs_info->extent_root;
9958 struct btrfs_root *chunk_root = fs_info->chunk_root;
9959 struct btrfs_block_group_item *bi;
9960 struct btrfs_block_group_item bg_item;
9961 struct btrfs_path path;
9962 struct btrfs_key bg_key;
9963 struct btrfs_key chunk_key;
9964 struct btrfs_key extent_key;
9965 struct btrfs_chunk *chunk;
9966 struct extent_buffer *leaf;
9967 struct btrfs_extent_item *ei;
9968 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9976 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9977 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9978 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9979 used = btrfs_block_group_used(&bg_item);
9980 bg_flags = btrfs_block_group_flags(&bg_item);
9982 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9983 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9984 chunk_key.offset = bg_key.objectid;
9986 btrfs_init_path(&path);
9987 /* Search for the referencer chunk */
9988 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9991 "block group[%llu %llu] did not find the related chunk item",
9992 bg_key.objectid, bg_key.offset);
9993 err |= REFERENCER_MISSING;
9995 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9996 struct btrfs_chunk);
9997 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10000 "block group[%llu %llu] related chunk item length does not match",
10001 bg_key.objectid, bg_key.offset);
10002 err |= REFERENCER_MISMATCH;
10005 btrfs_release_path(&path);
10007 /* Search from the block group bytenr */
10008 extent_key.objectid = bg_key.objectid;
10009 extent_key.type = 0;
10010 extent_key.offset = 0;
10012 btrfs_init_path(&path);
10013 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10017 /* Iterate extent tree to account used space */
10019 leaf = path.nodes[0];
10020 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10021 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10024 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10025 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10027 if (extent_key.objectid < bg_key.objectid)
10030 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10033 total += extent_key.offset;
10035 ei = btrfs_item_ptr(leaf, path.slots[0],
10036 struct btrfs_extent_item);
10037 flags = btrfs_extent_flags(leaf, ei);
10038 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10039 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10041 "bad extent[%llu, %llu) type mismatch with chunk",
10042 extent_key.objectid,
10043 extent_key.objectid + extent_key.offset);
10044 err |= CHUNK_TYPE_MISMATCH;
10046 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10047 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10048 BTRFS_BLOCK_GROUP_METADATA))) {
10050 "bad extent[%llu, %llu) type mismatch with chunk",
10051 extent_key.objectid,
10052 extent_key.objectid + nodesize);
10053 err |= CHUNK_TYPE_MISMATCH;
10057 ret = btrfs_next_item(extent_root, &path);
10063 btrfs_release_path(&path);
10065 if (total != used) {
10067 "block group[%llu %llu] used %llu but extent items used %llu",
10068 bg_key.objectid, bg_key.offset, used, total);
10069 err |= ACCOUNTING_MISMATCH;
10075 * Check a chunk item.
10076 * Including checking all referred dev_extents and block group
10078 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10079 struct extent_buffer *eb, int slot)
10081 struct btrfs_root *extent_root = fs_info->extent_root;
10082 struct btrfs_root *dev_root = fs_info->dev_root;
10083 struct btrfs_path path;
10084 struct btrfs_key chunk_key;
10085 struct btrfs_key bg_key;
10086 struct btrfs_key devext_key;
10087 struct btrfs_chunk *chunk;
10088 struct extent_buffer *leaf;
10089 struct btrfs_block_group_item *bi;
10090 struct btrfs_block_group_item bg_item;
10091 struct btrfs_dev_extent *ptr;
10092 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10104 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10105 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10106 length = btrfs_chunk_length(eb, chunk);
10107 chunk_end = chunk_key.offset + length;
10108 if (!IS_ALIGNED(length, sectorsize)) {
10109 error("chunk[%llu %llu) not aligned to %u",
10110 chunk_key.offset, chunk_end, sectorsize);
10111 err |= BYTES_UNALIGNED;
10115 type = btrfs_chunk_type(eb, chunk);
10116 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10117 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10118 error("chunk[%llu %llu) has no chunk type",
10119 chunk_key.offset, chunk_end);
10120 err |= UNKNOWN_TYPE;
10122 if (profile && (profile & (profile - 1))) {
10123 error("chunk[%llu %llu) multiple profiles detected: %llx",
10124 chunk_key.offset, chunk_end, profile);
10125 err |= UNKNOWN_TYPE;
10128 bg_key.objectid = chunk_key.offset;
10129 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10130 bg_key.offset = length;
10132 btrfs_init_path(&path);
10133 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10136 "chunk[%llu %llu) did not find the related block group item",
10137 chunk_key.offset, chunk_end);
10138 err |= REFERENCER_MISSING;
10140 leaf = path.nodes[0];
10141 bi = btrfs_item_ptr(leaf, path.slots[0],
10142 struct btrfs_block_group_item);
10143 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10145 if (btrfs_block_group_flags(&bg_item) != type) {
10147 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10148 chunk_key.offset, chunk_end, type,
10149 btrfs_block_group_flags(&bg_item));
10150 err |= REFERENCER_MISSING;
10154 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10155 for (i = 0; i < num_stripes; i++) {
10156 btrfs_release_path(&path);
10157 btrfs_init_path(&path);
10158 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10159 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10160 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10162 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10165 goto not_match_dev;
10167 leaf = path.nodes[0];
10168 ptr = btrfs_item_ptr(leaf, path.slots[0],
10169 struct btrfs_dev_extent);
10170 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10171 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10172 if (objectid != chunk_key.objectid ||
10173 offset != chunk_key.offset ||
10174 btrfs_dev_extent_length(leaf, ptr) != length)
10175 goto not_match_dev;
10178 err |= BACKREF_MISSING;
10180 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10181 chunk_key.objectid, chunk_end, i);
10184 btrfs_release_path(&path);
10190 * Main entry function to check known items and update related accounting info
10192 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10194 struct btrfs_fs_info *fs_info = root->fs_info;
10195 struct btrfs_key key;
10198 struct btrfs_extent_data_ref *dref;
10203 btrfs_item_key_to_cpu(eb, &key, slot);
10207 case BTRFS_EXTENT_DATA_KEY:
10208 ret = check_extent_data_item(root, eb, slot);
10211 case BTRFS_BLOCK_GROUP_ITEM_KEY:
10212 ret = check_block_group_item(fs_info, eb, slot);
10215 case BTRFS_DEV_ITEM_KEY:
10216 ret = check_dev_item(fs_info, eb, slot);
10219 case BTRFS_CHUNK_ITEM_KEY:
10220 ret = check_chunk_item(fs_info, eb, slot);
10223 case BTRFS_DEV_EXTENT_KEY:
10224 ret = check_dev_extent_item(fs_info, eb, slot);
10227 case BTRFS_EXTENT_ITEM_KEY:
10228 case BTRFS_METADATA_ITEM_KEY:
10229 ret = check_extent_item(fs_info, eb, slot);
10232 case BTRFS_EXTENT_CSUM_KEY:
10233 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10235 case BTRFS_TREE_BLOCK_REF_KEY:
10236 ret = check_tree_block_backref(fs_info, key.offset,
10240 case BTRFS_EXTENT_DATA_REF_KEY:
10241 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10242 ret = check_extent_data_backref(fs_info,
10243 btrfs_extent_data_ref_root(eb, dref),
10244 btrfs_extent_data_ref_objectid(eb, dref),
10245 btrfs_extent_data_ref_offset(eb, dref),
10247 btrfs_extent_data_ref_count(eb, dref));
10250 case BTRFS_SHARED_BLOCK_REF_KEY:
10251 ret = check_shared_block_backref(fs_info, key.offset,
10255 case BTRFS_SHARED_DATA_REF_KEY:
10256 ret = check_shared_data_backref(fs_info, key.offset,
10264 if (++slot < btrfs_header_nritems(eb))
10271 * Helper function for later fs/subvol tree check. To determine if a tree
10272 * block should be checked.
10273 * This function will ensure only the direct referencer with lowest rootid to
10274 * check a fs/subvolume tree block.
10276 * Backref check at extent tree would detect errors like missing subvolume
10277 * tree, so we can do aggressive check to reduce duplicated checks.
10279 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10281 struct btrfs_root *extent_root = root->fs_info->extent_root;
10282 struct btrfs_key key;
10283 struct btrfs_path path;
10284 struct extent_buffer *leaf;
10286 struct btrfs_extent_item *ei;
10292 struct btrfs_extent_inline_ref *iref;
10295 btrfs_init_path(&path);
10296 key.objectid = btrfs_header_bytenr(eb);
10297 key.type = BTRFS_METADATA_ITEM_KEY;
10298 key.offset = (u64)-1;
10301 * Any failure in backref resolving means we can't determine
10302 * whom the tree block belongs to.
10303 * So in that case, we need to check that tree block
10305 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10309 ret = btrfs_previous_extent_item(extent_root, &path,
10310 btrfs_header_bytenr(eb));
10314 leaf = path.nodes[0];
10315 slot = path.slots[0];
10316 btrfs_item_key_to_cpu(leaf, &key, slot);
10317 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10319 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10320 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10322 struct btrfs_tree_block_info *info;
10324 info = (struct btrfs_tree_block_info *)(ei + 1);
10325 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10328 item_size = btrfs_item_size_nr(leaf, slot);
10329 ptr = (unsigned long)iref;
10330 end = (unsigned long)ei + item_size;
10331 while (ptr < end) {
10332 iref = (struct btrfs_extent_inline_ref *)ptr;
10333 type = btrfs_extent_inline_ref_type(leaf, iref);
10334 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10337 * We only check the tree block if current root is
10338 * the lowest referencer of it.
10340 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10341 offset < root->objectid) {
10342 btrfs_release_path(&path);
10346 ptr += btrfs_extent_inline_ref_size(type);
10349 * Normally we should also check keyed tree block ref, but that may be
10350 * very time consuming. Inlined ref should already make us skip a lot
10351 * of refs now. So skip search keyed tree block ref.
10355 btrfs_release_path(&path);
10360 * Traversal function for tree block. We will do:
10361 * 1) Skip shared fs/subvolume tree blocks
10362 * 2) Update related bytes accounting
10363 * 3) Pre-order traversal
10365 static int traverse_tree_block(struct btrfs_root *root,
10366 struct extent_buffer *node)
10368 struct extent_buffer *eb;
10369 struct btrfs_key key;
10370 struct btrfs_key drop_key;
10378 * Skip shared fs/subvolume tree block, in that case they will
10379 * be checked by referencer with lowest rootid
10381 if (is_fstree(root->objectid) && !should_check(root, node))
10384 /* Update bytes accounting */
10385 total_btree_bytes += node->len;
10386 if (fs_root_objectid(btrfs_header_owner(node)))
10387 total_fs_tree_bytes += node->len;
10388 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10389 total_extent_tree_bytes += node->len;
10390 if (!found_old_backref &&
10391 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10392 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10393 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10394 found_old_backref = 1;
10396 /* pre-order tranversal, check itself first */
10397 level = btrfs_header_level(node);
10398 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10399 btrfs_header_level(node),
10400 btrfs_header_owner(node));
10404 "check %s failed root %llu bytenr %llu level %d, force continue check",
10405 level ? "node":"leaf", root->objectid,
10406 btrfs_header_bytenr(node), btrfs_header_level(node));
10409 btree_space_waste += btrfs_leaf_free_space(root, node);
10410 ret = check_leaf_items(root, node);
10415 nr = btrfs_header_nritems(node);
10416 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10417 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10418 sizeof(struct btrfs_key_ptr);
10420 /* Then check all its children */
10421 for (i = 0; i < nr; i++) {
10422 u64 blocknr = btrfs_node_blockptr(node, i);
10424 btrfs_node_key_to_cpu(node, &key, i);
10425 if (level == root->root_item.drop_level &&
10426 is_dropped_key(&key, &drop_key))
10430 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10431 * to call the function itself.
10433 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10434 if (extent_buffer_uptodate(eb)) {
10435 ret = traverse_tree_block(root, eb);
10438 free_extent_buffer(eb);
10445 * Low memory usage version check_chunks_and_extents.
10447 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10449 struct btrfs_path path;
10450 struct btrfs_key key;
10451 struct btrfs_root *root1;
10452 struct btrfs_root *cur_root;
10456 root1 = root->fs_info->chunk_root;
10457 ret = traverse_tree_block(root1, root1->node);
10460 root1 = root->fs_info->tree_root;
10461 ret = traverse_tree_block(root1, root1->node);
10464 btrfs_init_path(&path);
10465 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10467 key.type = BTRFS_ROOT_ITEM_KEY;
10469 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10471 error("cannot find extent treet in tree_root");
10476 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10477 if (key.type != BTRFS_ROOT_ITEM_KEY)
10479 key.offset = (u64)-1;
10481 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10482 if (IS_ERR(cur_root) || !cur_root) {
10483 error("failed to read tree: %lld", key.objectid);
10487 ret = traverse_tree_block(cur_root, cur_root->node);
10491 ret = btrfs_next_item(root1, &path);
10497 btrfs_release_path(&path);
10501 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10502 struct btrfs_root *root, int overwrite)
10504 struct extent_buffer *c;
10505 struct extent_buffer *old = root->node;
10508 struct btrfs_disk_key disk_key = {0,0,0};
10514 extent_buffer_get(c);
10517 c = btrfs_alloc_free_block(trans, root,
10519 root->root_key.objectid,
10520 &disk_key, level, 0, 0);
10523 extent_buffer_get(c);
10527 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10528 btrfs_set_header_level(c, level);
10529 btrfs_set_header_bytenr(c, c->start);
10530 btrfs_set_header_generation(c, trans->transid);
10531 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10532 btrfs_set_header_owner(c, root->root_key.objectid);
10534 write_extent_buffer(c, root->fs_info->fsid,
10535 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10537 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10538 btrfs_header_chunk_tree_uuid(c),
10541 btrfs_mark_buffer_dirty(c);
10543 * this case can happen in the following case:
10545 * 1.overwrite previous root.
10547 * 2.reinit reloc data root, this is because we skip pin
10548 * down reloc data tree before which means we can allocate
10549 * same block bytenr here.
10551 if (old->start == c->start) {
10552 btrfs_set_root_generation(&root->root_item,
10554 root->root_item.level = btrfs_header_level(root->node);
10555 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10556 &root->root_key, &root->root_item);
10558 free_extent_buffer(c);
10562 free_extent_buffer(old);
10564 add_root_to_dirty_list(root);
10568 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10569 struct extent_buffer *eb, int tree_root)
10571 struct extent_buffer *tmp;
10572 struct btrfs_root_item *ri;
10573 struct btrfs_key key;
10576 int level = btrfs_header_level(eb);
10582 * If we have pinned this block before, don't pin it again.
10583 * This can not only avoid forever loop with broken filesystem
10584 * but also give us some speedups.
10586 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10587 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10590 btrfs_pin_extent(fs_info, eb->start, eb->len);
10592 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10593 nritems = btrfs_header_nritems(eb);
10594 for (i = 0; i < nritems; i++) {
10596 btrfs_item_key_to_cpu(eb, &key, i);
10597 if (key.type != BTRFS_ROOT_ITEM_KEY)
10599 /* Skip the extent root and reloc roots */
10600 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10601 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10602 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10604 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10605 bytenr = btrfs_disk_root_bytenr(eb, ri);
10608 * If at any point we start needing the real root we
10609 * will have to build a stump root for the root we are
10610 * in, but for now this doesn't actually use the root so
10611 * just pass in extent_root.
10613 tmp = read_tree_block(fs_info->extent_root, bytenr,
10615 if (!extent_buffer_uptodate(tmp)) {
10616 fprintf(stderr, "Error reading root block\n");
10619 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10620 free_extent_buffer(tmp);
10624 bytenr = btrfs_node_blockptr(eb, i);
10626 /* If we aren't the tree root don't read the block */
10627 if (level == 1 && !tree_root) {
10628 btrfs_pin_extent(fs_info, bytenr, nodesize);
10632 tmp = read_tree_block(fs_info->extent_root, bytenr,
10634 if (!extent_buffer_uptodate(tmp)) {
10635 fprintf(stderr, "Error reading tree block\n");
10638 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10639 free_extent_buffer(tmp);
10648 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10652 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10656 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10659 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10661 struct btrfs_block_group_cache *cache;
10662 struct btrfs_path path;
10663 struct extent_buffer *leaf;
10664 struct btrfs_chunk *chunk;
10665 struct btrfs_key key;
10669 btrfs_init_path(&path);
10671 key.type = BTRFS_CHUNK_ITEM_KEY;
10673 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10675 btrfs_release_path(&path);
10680 * We do this in case the block groups were screwed up and had alloc
10681 * bits that aren't actually set on the chunks. This happens with
10682 * restored images every time and could happen in real life I guess.
10684 fs_info->avail_data_alloc_bits = 0;
10685 fs_info->avail_metadata_alloc_bits = 0;
10686 fs_info->avail_system_alloc_bits = 0;
10688 /* First we need to create the in-memory block groups */
10690 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10691 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10693 btrfs_release_path(&path);
10701 leaf = path.nodes[0];
10702 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10703 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10708 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10709 btrfs_add_block_group(fs_info, 0,
10710 btrfs_chunk_type(leaf, chunk),
10711 key.objectid, key.offset,
10712 btrfs_chunk_length(leaf, chunk));
10713 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10714 key.offset + btrfs_chunk_length(leaf, chunk),
10720 cache = btrfs_lookup_first_block_group(fs_info, start);
10724 start = cache->key.objectid + cache->key.offset;
10727 btrfs_release_path(&path);
10731 static int reset_balance(struct btrfs_trans_handle *trans,
10732 struct btrfs_fs_info *fs_info)
10734 struct btrfs_root *root = fs_info->tree_root;
10735 struct btrfs_path path;
10736 struct extent_buffer *leaf;
10737 struct btrfs_key key;
10738 int del_slot, del_nr = 0;
10742 btrfs_init_path(&path);
10743 key.objectid = BTRFS_BALANCE_OBJECTID;
10744 key.type = BTRFS_BALANCE_ITEM_KEY;
10746 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10751 goto reinit_data_reloc;
10756 ret = btrfs_del_item(trans, root, &path);
10759 btrfs_release_path(&path);
10761 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10762 key.type = BTRFS_ROOT_ITEM_KEY;
10764 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10768 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10773 ret = btrfs_del_items(trans, root, &path,
10780 btrfs_release_path(&path);
10783 ret = btrfs_search_slot(trans, root, &key, &path,
10790 leaf = path.nodes[0];
10791 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10792 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10794 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10799 del_slot = path.slots[0];
10808 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
10812 btrfs_release_path(&path);
10815 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10816 key.type = BTRFS_ROOT_ITEM_KEY;
10817 key.offset = (u64)-1;
10818 root = btrfs_read_fs_root(fs_info, &key);
10819 if (IS_ERR(root)) {
10820 fprintf(stderr, "Error reading data reloc tree\n");
10821 ret = PTR_ERR(root);
10824 record_root_in_trans(trans, root);
10825 ret = btrfs_fsck_reinit_root(trans, root, 0);
10828 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10830 btrfs_release_path(&path);
10834 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10835 struct btrfs_fs_info *fs_info)
10841 * The only reason we don't do this is because right now we're just
10842 * walking the trees we find and pinning down their bytes, we don't look
10843 * at any of the leaves. In order to do mixed groups we'd have to check
10844 * the leaves of any fs roots and pin down the bytes for any file
10845 * extents we find. Not hard but why do it if we don't have to?
10847 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10848 fprintf(stderr, "We don't support re-initing the extent tree "
10849 "for mixed block groups yet, please notify a btrfs "
10850 "developer you want to do this so they can add this "
10851 "functionality.\n");
10856 * first we need to walk all of the trees except the extent tree and pin
10857 * down the bytes that are in use so we don't overwrite any existing
10860 ret = pin_metadata_blocks(fs_info);
10862 fprintf(stderr, "error pinning down used bytes\n");
10867 * Need to drop all the block groups since we're going to recreate all
10870 btrfs_free_block_groups(fs_info);
10871 ret = reset_block_groups(fs_info);
10873 fprintf(stderr, "error resetting the block groups\n");
10877 /* Ok we can allocate now, reinit the extent root */
10878 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10880 fprintf(stderr, "extent root initialization failed\n");
10882 * When the transaction code is updated we should end the
10883 * transaction, but for now progs only knows about commit so
10884 * just return an error.
10890 * Now we have all the in-memory block groups setup so we can make
10891 * allocations properly, and the metadata we care about is safe since we
10892 * pinned all of it above.
10895 struct btrfs_block_group_cache *cache;
10897 cache = btrfs_lookup_first_block_group(fs_info, start);
10900 start = cache->key.objectid + cache->key.offset;
10901 ret = btrfs_insert_item(trans, fs_info->extent_root,
10902 &cache->key, &cache->item,
10903 sizeof(cache->item));
10905 fprintf(stderr, "Error adding block group\n");
10908 btrfs_extent_post_op(trans, fs_info->extent_root);
10911 ret = reset_balance(trans, fs_info);
10913 fprintf(stderr, "error resetting the pending balance\n");
10918 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10920 struct btrfs_path path;
10921 struct btrfs_trans_handle *trans;
10922 struct btrfs_key key;
10925 printf("Recowing metadata block %llu\n", eb->start);
10926 key.objectid = btrfs_header_owner(eb);
10927 key.type = BTRFS_ROOT_ITEM_KEY;
10928 key.offset = (u64)-1;
10930 root = btrfs_read_fs_root(root->fs_info, &key);
10931 if (IS_ERR(root)) {
10932 fprintf(stderr, "Couldn't find owner root %llu\n",
10934 return PTR_ERR(root);
10937 trans = btrfs_start_transaction(root, 1);
10939 return PTR_ERR(trans);
10941 btrfs_init_path(&path);
10942 path.lowest_level = btrfs_header_level(eb);
10943 if (path.lowest_level)
10944 btrfs_node_key_to_cpu(eb, &key, 0);
10946 btrfs_item_key_to_cpu(eb, &key, 0);
10948 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10949 btrfs_commit_transaction(trans, root);
10950 btrfs_release_path(&path);
10954 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10956 struct btrfs_path path;
10957 struct btrfs_trans_handle *trans;
10958 struct btrfs_key key;
10961 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10962 bad->key.type, bad->key.offset);
10963 key.objectid = bad->root_id;
10964 key.type = BTRFS_ROOT_ITEM_KEY;
10965 key.offset = (u64)-1;
10967 root = btrfs_read_fs_root(root->fs_info, &key);
10968 if (IS_ERR(root)) {
10969 fprintf(stderr, "Couldn't find owner root %llu\n",
10971 return PTR_ERR(root);
10974 trans = btrfs_start_transaction(root, 1);
10976 return PTR_ERR(trans);
10978 btrfs_init_path(&path);
10979 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
10985 ret = btrfs_del_item(trans, root, &path);
10987 btrfs_commit_transaction(trans, root);
10988 btrfs_release_path(&path);
10992 static int zero_log_tree(struct btrfs_root *root)
10994 struct btrfs_trans_handle *trans;
10997 trans = btrfs_start_transaction(root, 1);
10998 if (IS_ERR(trans)) {
10999 ret = PTR_ERR(trans);
11002 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11003 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11004 ret = btrfs_commit_transaction(trans, root);
11008 static int populate_csum(struct btrfs_trans_handle *trans,
11009 struct btrfs_root *csum_root, char *buf, u64 start,
11016 while (offset < len) {
11017 sectorsize = csum_root->sectorsize;
11018 ret = read_extent_data(csum_root, buf, start + offset,
11022 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11023 start + offset, buf, sectorsize);
11026 offset += sectorsize;
11031 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11032 struct btrfs_root *csum_root,
11033 struct btrfs_root *cur_root)
11035 struct btrfs_path path;
11036 struct btrfs_key key;
11037 struct extent_buffer *node;
11038 struct btrfs_file_extent_item *fi;
11045 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11049 btrfs_init_path(&path);
11053 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11056 /* Iterate all regular file extents and fill its csum */
11058 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11060 if (key.type != BTRFS_EXTENT_DATA_KEY)
11062 node = path.nodes[0];
11063 slot = path.slots[0];
11064 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11065 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11067 start = btrfs_file_extent_disk_bytenr(node, fi);
11068 len = btrfs_file_extent_disk_num_bytes(node, fi);
11070 ret = populate_csum(trans, csum_root, buf, start, len);
11071 if (ret == -EEXIST)
11077 * TODO: if next leaf is corrupted, jump to nearest next valid
11080 ret = btrfs_next_item(cur_root, &path);
11090 btrfs_release_path(&path);
11095 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11096 struct btrfs_root *csum_root)
11098 struct btrfs_fs_info *fs_info = csum_root->fs_info;
11099 struct btrfs_path path;
11100 struct btrfs_root *tree_root = fs_info->tree_root;
11101 struct btrfs_root *cur_root;
11102 struct extent_buffer *node;
11103 struct btrfs_key key;
11107 btrfs_init_path(&path);
11108 key.objectid = BTRFS_FS_TREE_OBJECTID;
11110 key.type = BTRFS_ROOT_ITEM_KEY;
11111 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11120 node = path.nodes[0];
11121 slot = path.slots[0];
11122 btrfs_item_key_to_cpu(node, &key, slot);
11123 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11125 if (key.type != BTRFS_ROOT_ITEM_KEY)
11127 if (!is_fstree(key.objectid))
11129 key.offset = (u64)-1;
11131 cur_root = btrfs_read_fs_root(fs_info, &key);
11132 if (IS_ERR(cur_root) || !cur_root) {
11133 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11137 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11142 ret = btrfs_next_item(tree_root, &path);
11152 btrfs_release_path(&path);
11156 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11157 struct btrfs_root *csum_root)
11159 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11160 struct btrfs_path path;
11161 struct btrfs_extent_item *ei;
11162 struct extent_buffer *leaf;
11164 struct btrfs_key key;
11167 btrfs_init_path(&path);
11169 key.type = BTRFS_EXTENT_ITEM_KEY;
11171 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11173 btrfs_release_path(&path);
11177 buf = malloc(csum_root->sectorsize);
11179 btrfs_release_path(&path);
11184 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11185 ret = btrfs_next_leaf(extent_root, &path);
11193 leaf = path.nodes[0];
11195 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11196 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11201 ei = btrfs_item_ptr(leaf, path.slots[0],
11202 struct btrfs_extent_item);
11203 if (!(btrfs_extent_flags(leaf, ei) &
11204 BTRFS_EXTENT_FLAG_DATA)) {
11209 ret = populate_csum(trans, csum_root, buf, key.objectid,
11216 btrfs_release_path(&path);
11222 * Recalculate the csum and put it into the csum tree.
11224 * Extent tree init will wipe out all the extent info, so in that case, we
11225 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
11226 * will use fs/subvol trees to init the csum tree.
11228 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11229 struct btrfs_root *csum_root,
11230 int search_fs_tree)
11232 if (search_fs_tree)
11233 return fill_csum_tree_from_fs(trans, csum_root);
11235 return fill_csum_tree_from_extent(trans, csum_root);
11238 static void free_roots_info_cache(void)
11240 if (!roots_info_cache)
11243 while (!cache_tree_empty(roots_info_cache)) {
11244 struct cache_extent *entry;
11245 struct root_item_info *rii;
11247 entry = first_cache_extent(roots_info_cache);
11250 remove_cache_extent(roots_info_cache, entry);
11251 rii = container_of(entry, struct root_item_info, cache_extent);
11255 free(roots_info_cache);
11256 roots_info_cache = NULL;
11259 static int build_roots_info_cache(struct btrfs_fs_info *info)
11262 struct btrfs_key key;
11263 struct extent_buffer *leaf;
11264 struct btrfs_path path;
11266 if (!roots_info_cache) {
11267 roots_info_cache = malloc(sizeof(*roots_info_cache));
11268 if (!roots_info_cache)
11270 cache_tree_init(roots_info_cache);
11273 btrfs_init_path(&path);
11275 key.type = BTRFS_EXTENT_ITEM_KEY;
11277 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11280 leaf = path.nodes[0];
11283 struct btrfs_key found_key;
11284 struct btrfs_extent_item *ei;
11285 struct btrfs_extent_inline_ref *iref;
11286 int slot = path.slots[0];
11291 struct cache_extent *entry;
11292 struct root_item_info *rii;
11294 if (slot >= btrfs_header_nritems(leaf)) {
11295 ret = btrfs_next_leaf(info->extent_root, &path);
11302 leaf = path.nodes[0];
11303 slot = path.slots[0];
11306 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11308 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11309 found_key.type != BTRFS_METADATA_ITEM_KEY)
11312 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11313 flags = btrfs_extent_flags(leaf, ei);
11315 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11316 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11319 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11320 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11321 level = found_key.offset;
11323 struct btrfs_tree_block_info *binfo;
11325 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11326 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11327 level = btrfs_tree_block_level(leaf, binfo);
11331 * For a root extent, it must be of the following type and the
11332 * first (and only one) iref in the item.
11334 type = btrfs_extent_inline_ref_type(leaf, iref);
11335 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11338 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11339 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11341 rii = malloc(sizeof(struct root_item_info));
11346 rii->cache_extent.start = root_id;
11347 rii->cache_extent.size = 1;
11348 rii->level = (u8)-1;
11349 entry = &rii->cache_extent;
11350 ret = insert_cache_extent(roots_info_cache, entry);
11353 rii = container_of(entry, struct root_item_info,
11357 ASSERT(rii->cache_extent.start == root_id);
11358 ASSERT(rii->cache_extent.size == 1);
11360 if (level > rii->level || rii->level == (u8)-1) {
11361 rii->level = level;
11362 rii->bytenr = found_key.objectid;
11363 rii->gen = btrfs_extent_generation(leaf, ei);
11364 rii->node_count = 1;
11365 } else if (level == rii->level) {
11373 btrfs_release_path(&path);
11378 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11379 struct btrfs_path *path,
11380 const struct btrfs_key *root_key,
11381 const int read_only_mode)
11383 const u64 root_id = root_key->objectid;
11384 struct cache_extent *entry;
11385 struct root_item_info *rii;
11386 struct btrfs_root_item ri;
11387 unsigned long offset;
11389 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11392 "Error: could not find extent items for root %llu\n",
11393 root_key->objectid);
11397 rii = container_of(entry, struct root_item_info, cache_extent);
11398 ASSERT(rii->cache_extent.start == root_id);
11399 ASSERT(rii->cache_extent.size == 1);
11401 if (rii->node_count != 1) {
11403 "Error: could not find btree root extent for root %llu\n",
11408 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11409 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11411 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11412 btrfs_root_level(&ri) != rii->level ||
11413 btrfs_root_generation(&ri) != rii->gen) {
11416 * If we're in repair mode but our caller told us to not update
11417 * the root item, i.e. just check if it needs to be updated, don't
11418 * print this message, since the caller will call us again shortly
11419 * for the same root item without read only mode (the caller will
11420 * open a transaction first).
11422 if (!(read_only_mode && repair))
11424 "%sroot item for root %llu,"
11425 " current bytenr %llu, current gen %llu, current level %u,"
11426 " new bytenr %llu, new gen %llu, new level %u\n",
11427 (read_only_mode ? "" : "fixing "),
11429 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11430 btrfs_root_level(&ri),
11431 rii->bytenr, rii->gen, rii->level);
11433 if (btrfs_root_generation(&ri) > rii->gen) {
11435 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11436 root_id, btrfs_root_generation(&ri), rii->gen);
11440 if (!read_only_mode) {
11441 btrfs_set_root_bytenr(&ri, rii->bytenr);
11442 btrfs_set_root_level(&ri, rii->level);
11443 btrfs_set_root_generation(&ri, rii->gen);
11444 write_extent_buffer(path->nodes[0], &ri,
11445 offset, sizeof(ri));
11455 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11456 * caused read-only snapshots to be corrupted if they were created at a moment
11457 * when the source subvolume/snapshot had orphan items. The issue was that the
11458 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11459 * node instead of the post orphan cleanup root node.
11460 * So this function, and its callees, just detects and fixes those cases. Even
11461 * though the regression was for read-only snapshots, this function applies to
11462 * any snapshot/subvolume root.
11463 * This must be run before any other repair code - not doing it so, makes other
11464 * repair code delete or modify backrefs in the extent tree for example, which
11465 * will result in an inconsistent fs after repairing the root items.
11467 static int repair_root_items(struct btrfs_fs_info *info)
11469 struct btrfs_path path;
11470 struct btrfs_key key;
11471 struct extent_buffer *leaf;
11472 struct btrfs_trans_handle *trans = NULL;
11475 int need_trans = 0;
11477 btrfs_init_path(&path);
11479 ret = build_roots_info_cache(info);
11483 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11484 key.type = BTRFS_ROOT_ITEM_KEY;
11489 * Avoid opening and committing transactions if a leaf doesn't have
11490 * any root items that need to be fixed, so that we avoid rotating
11491 * backup roots unnecessarily.
11494 trans = btrfs_start_transaction(info->tree_root, 1);
11495 if (IS_ERR(trans)) {
11496 ret = PTR_ERR(trans);
11501 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11505 leaf = path.nodes[0];
11508 struct btrfs_key found_key;
11510 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11511 int no_more_keys = find_next_key(&path, &key);
11513 btrfs_release_path(&path);
11515 ret = btrfs_commit_transaction(trans,
11527 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11529 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11531 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11534 ret = maybe_repair_root_item(info, &path, &found_key,
11539 if (!trans && repair) {
11542 btrfs_release_path(&path);
11552 free_roots_info_cache();
11553 btrfs_release_path(&path);
11555 btrfs_commit_transaction(trans, info->tree_root);
11562 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11564 struct btrfs_trans_handle *trans;
11565 struct btrfs_block_group_cache *bg_cache;
11569 /* Clear all free space cache inodes and its extent data */
11571 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11574 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11577 current = bg_cache->key.objectid + bg_cache->key.offset;
11580 /* Don't forget to set cache_generation to -1 */
11581 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11582 if (IS_ERR(trans)) {
11583 error("failed to update super block cache generation");
11584 return PTR_ERR(trans);
11586 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11587 btrfs_commit_transaction(trans, fs_info->tree_root);
11592 const char * const cmd_check_usage[] = {
11593 "btrfs check [options] <device>",
11594 "Check structural integrity of a filesystem (unmounted).",
11595 "Check structural integrity of an unmounted filesystem. Verify internal",
11596 "trees' consistency and item connectivity. In the repair mode try to",
11597 "fix the problems found. ",
11598 "WARNING: the repair mode is considered dangerous",
11600 "-s|--super <superblock> use this superblock copy",
11601 "-b|--backup use the first valid backup root copy",
11602 "--repair try to repair the filesystem",
11603 "--readonly run in read-only mode (default)",
11604 "--init-csum-tree create a new CRC tree",
11605 "--init-extent-tree create a new extent tree",
11606 "--mode <MODE> allows choice of memory/IO trade-offs",
11607 " where MODE is one of:",
11608 " original - read inodes and extents to memory (requires",
11609 " more memory, does less IO)",
11610 " lowmem - try to use less memory but read blocks again",
11612 "--check-data-csum verify checksums of data blocks",
11613 "-Q|--qgroup-report print a report on qgroup consistency",
11614 "-E|--subvol-extents <subvolid>",
11615 " print subvolume extents and sharing state",
11616 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11617 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11618 "-p|--progress indicate progress",
11619 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11623 int cmd_check(int argc, char **argv)
11625 struct cache_tree root_cache;
11626 struct btrfs_root *root;
11627 struct btrfs_fs_info *info;
11630 u64 tree_root_bytenr = 0;
11631 u64 chunk_root_bytenr = 0;
11632 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11635 int init_csum_tree = 0;
11637 int clear_space_cache = 0;
11638 int qgroup_report = 0;
11639 int qgroups_repaired = 0;
11640 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11644 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11645 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11646 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11647 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11648 static const struct option long_options[] = {
11649 { "super", required_argument, NULL, 's' },
11650 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11651 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11652 { "init-csum-tree", no_argument, NULL,
11653 GETOPT_VAL_INIT_CSUM },
11654 { "init-extent-tree", no_argument, NULL,
11655 GETOPT_VAL_INIT_EXTENT },
11656 { "check-data-csum", no_argument, NULL,
11657 GETOPT_VAL_CHECK_CSUM },
11658 { "backup", no_argument, NULL, 'b' },
11659 { "subvol-extents", required_argument, NULL, 'E' },
11660 { "qgroup-report", no_argument, NULL, 'Q' },
11661 { "tree-root", required_argument, NULL, 'r' },
11662 { "chunk-root", required_argument, NULL,
11663 GETOPT_VAL_CHUNK_TREE },
11664 { "progress", no_argument, NULL, 'p' },
11665 { "mode", required_argument, NULL,
11667 { "clear-space-cache", required_argument, NULL,
11668 GETOPT_VAL_CLEAR_SPACE_CACHE},
11669 { NULL, 0, NULL, 0}
11672 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11676 case 'a': /* ignored */ break;
11678 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11681 num = arg_strtou64(optarg);
11682 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11684 "super mirror should be less than %d",
11685 BTRFS_SUPER_MIRROR_MAX);
11688 bytenr = btrfs_sb_offset(((int)num));
11689 printf("using SB copy %llu, bytenr %llu\n", num,
11690 (unsigned long long)bytenr);
11696 subvolid = arg_strtou64(optarg);
11699 tree_root_bytenr = arg_strtou64(optarg);
11701 case GETOPT_VAL_CHUNK_TREE:
11702 chunk_root_bytenr = arg_strtou64(optarg);
11705 ctx.progress_enabled = true;
11709 usage(cmd_check_usage);
11710 case GETOPT_VAL_REPAIR:
11711 printf("enabling repair mode\n");
11713 ctree_flags |= OPEN_CTREE_WRITES;
11715 case GETOPT_VAL_READONLY:
11718 case GETOPT_VAL_INIT_CSUM:
11719 printf("Creating a new CRC tree\n");
11720 init_csum_tree = 1;
11722 ctree_flags |= OPEN_CTREE_WRITES;
11724 case GETOPT_VAL_INIT_EXTENT:
11725 init_extent_tree = 1;
11726 ctree_flags |= (OPEN_CTREE_WRITES |
11727 OPEN_CTREE_NO_BLOCK_GROUPS);
11730 case GETOPT_VAL_CHECK_CSUM:
11731 check_data_csum = 1;
11733 case GETOPT_VAL_MODE:
11734 check_mode = parse_check_mode(optarg);
11735 if (check_mode == CHECK_MODE_UNKNOWN) {
11736 error("unknown mode: %s", optarg);
11740 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11741 if (strcmp(optarg, "v1") == 0) {
11742 clear_space_cache = 1;
11743 } else if (strcmp(optarg, "v2") == 0) {
11744 clear_space_cache = 2;
11745 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11748 "invalid argument to --clear-space-cache, must be v1 or v2");
11751 ctree_flags |= OPEN_CTREE_WRITES;
11756 if (check_argc_exact(argc - optind, 1))
11757 usage(cmd_check_usage);
11759 if (ctx.progress_enabled) {
11760 ctx.tp = TASK_NOTHING;
11761 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11764 /* This check is the only reason for --readonly to exist */
11765 if (readonly && repair) {
11766 error("repair options are not compatible with --readonly");
11771 * Not supported yet
11773 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11774 error("low memory mode doesn't support repair yet");
11779 cache_tree_init(&root_cache);
11781 if((ret = check_mounted(argv[optind])) < 0) {
11782 error("could not check mount status: %s", strerror(-ret));
11785 error("%s is currently mounted, aborting", argv[optind]);
11790 /* only allow partial opening under repair mode */
11792 ctree_flags |= OPEN_CTREE_PARTIAL;
11794 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11795 chunk_root_bytenr, ctree_flags);
11797 error("cannot open file system");
11802 global_info = info;
11803 root = info->fs_root;
11804 if (clear_space_cache == 1) {
11805 if (btrfs_fs_compat_ro(info,
11806 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11808 "free space cache v2 detected, use --clear-space-cache v2");
11812 printf("Clearing free space cache\n");
11813 ret = clear_free_space_cache(info);
11815 error("failed to clear free space cache");
11818 printf("Free space cache cleared\n");
11821 } else if (clear_space_cache == 2) {
11822 if (!btrfs_fs_compat_ro(info,
11823 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11824 printf("no free space cache v2 to clear\n");
11828 printf("Clear free space cache v2\n");
11829 ret = btrfs_clear_free_space_tree(info);
11831 error("failed to clear free space cache v2: %d", ret);
11834 printf("free space cache v2 cleared\n");
11840 * repair mode will force us to commit transaction which
11841 * will make us fail to load log tree when mounting.
11843 if (repair && btrfs_super_log_root(info->super_copy)) {
11844 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11849 ret = zero_log_tree(root);
11851 error("failed to zero log tree: %d", ret);
11856 uuid_unparse(info->super_copy->fsid, uuidbuf);
11857 if (qgroup_report) {
11858 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11860 ret = qgroup_verify_all(info);
11866 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11867 subvolid, argv[optind], uuidbuf);
11868 ret = print_extent_state(info, subvolid);
11871 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11873 if (!extent_buffer_uptodate(info->tree_root->node) ||
11874 !extent_buffer_uptodate(info->dev_root->node) ||
11875 !extent_buffer_uptodate(info->chunk_root->node)) {
11876 error("critical roots corrupted, unable to check the filesystem");
11881 if (init_extent_tree || init_csum_tree) {
11882 struct btrfs_trans_handle *trans;
11884 trans = btrfs_start_transaction(info->extent_root, 0);
11885 if (IS_ERR(trans)) {
11886 error("error starting transaction");
11887 ret = PTR_ERR(trans);
11891 if (init_extent_tree) {
11892 printf("Creating a new extent tree\n");
11893 ret = reinit_extent_tree(trans, info);
11898 if (init_csum_tree) {
11899 printf("Reinitialize checksum tree\n");
11900 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11902 error("checksum tree initialization failed: %d",
11908 ret = fill_csum_tree(trans, info->csum_root,
11911 error("checksum tree refilling failed: %d", ret);
11916 * Ok now we commit and run the normal fsck, which will add
11917 * extent entries for all of the items it finds.
11919 ret = btrfs_commit_transaction(trans, info->extent_root);
11923 if (!extent_buffer_uptodate(info->extent_root->node)) {
11924 error("critical: extent_root, unable to check the filesystem");
11928 if (!extent_buffer_uptodate(info->csum_root->node)) {
11929 error("critical: csum_root, unable to check the filesystem");
11934 if (!ctx.progress_enabled)
11935 fprintf(stderr, "checking extents\n");
11936 if (check_mode == CHECK_MODE_LOWMEM)
11937 ret = check_chunks_and_extents_v2(root);
11939 ret = check_chunks_and_extents(root);
11942 "errors found in extent allocation tree or chunk allocation");
11944 ret = repair_root_items(info);
11948 fprintf(stderr, "Fixed %d roots.\n", ret);
11950 } else if (ret > 0) {
11952 "Found %d roots with an outdated root item.\n",
11955 "Please run a filesystem check with the option --repair to fix them.\n");
11960 if (!ctx.progress_enabled) {
11961 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11962 fprintf(stderr, "checking free space tree\n");
11964 fprintf(stderr, "checking free space cache\n");
11966 ret = check_space_cache(root);
11971 * We used to have to have these hole extents in between our real
11972 * extents so if we don't have this flag set we need to make sure there
11973 * are no gaps in the file extents for inodes, otherwise we can just
11974 * ignore it when this happens.
11976 no_holes = btrfs_fs_incompat(root->fs_info,
11977 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11978 if (!ctx.progress_enabled)
11979 fprintf(stderr, "checking fs roots\n");
11980 ret = check_fs_roots(root, &root_cache);
11984 fprintf(stderr, "checking csums\n");
11985 ret = check_csums(root);
11989 fprintf(stderr, "checking root refs\n");
11990 ret = check_root_refs(root, &root_cache);
11994 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11995 struct extent_buffer *eb;
11997 eb = list_first_entry(&root->fs_info->recow_ebs,
11998 struct extent_buffer, recow);
11999 list_del_init(&eb->recow);
12000 ret = recow_extent_buffer(root, eb);
12005 while (!list_empty(&delete_items)) {
12006 struct bad_item *bad;
12008 bad = list_first_entry(&delete_items, struct bad_item, list);
12009 list_del_init(&bad->list);
12011 ret = delete_bad_item(root, bad);
12015 if (info->quota_enabled) {
12017 fprintf(stderr, "checking quota groups\n");
12018 err = qgroup_verify_all(info);
12022 err = repair_qgroups(info, &qgroups_repaired);
12027 if (!list_empty(&root->fs_info->recow_ebs)) {
12028 error("transid errors in file system");
12032 /* Don't override original ret */
12033 if (!ret && qgroups_repaired)
12034 ret = qgroups_repaired;
12036 if (found_old_backref) { /*
12037 * there was a disk format change when mixed
12038 * backref was in testing tree. The old format
12039 * existed about one week.
12041 printf("\n * Found old mixed backref format. "
12042 "The old format is not supported! *"
12043 "\n * Please mount the FS in readonly mode, "
12044 "backup data and re-format the FS. *\n\n");
12047 printf("found %llu bytes used err is %d\n",
12048 (unsigned long long)bytes_used, ret);
12049 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12050 printf("total tree bytes: %llu\n",
12051 (unsigned long long)total_btree_bytes);
12052 printf("total fs tree bytes: %llu\n",
12053 (unsigned long long)total_fs_tree_bytes);
12054 printf("total extent tree bytes: %llu\n",
12055 (unsigned long long)total_extent_tree_bytes);
12056 printf("btree space waste bytes: %llu\n",
12057 (unsigned long long)btree_space_waste);
12058 printf("file data blocks allocated: %llu\n referenced %llu\n",
12059 (unsigned long long)data_bytes_allocated,
12060 (unsigned long long)data_bytes_referenced);
12062 free_qgroup_counts();
12063 free_root_recs_tree(&root_cache);
12067 if (ctx.progress_enabled)
12068 task_deinit(ctx.info);