2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
50 TASK_NOTHING, /* have to be the last element */
55 enum task_position tp;
57 struct task_info *info;
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
118 return container_of(back, struct data_backref, node);
122 * Much like data_backref, just removed the undetermined members
123 * and change it to use list_head.
124 * During extent scan, it is stored in root->orphan_data_extent.
125 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
127 struct orphan_data_extent {
128 struct list_head list;
136 struct tree_backref {
137 struct extent_backref node;
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
146 return container_of(back, struct tree_backref, node);
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
152 struct extent_record {
153 struct list_head backrefs;
154 struct list_head dups;
155 struct list_head list;
156 struct cache_extent cache;
157 struct btrfs_disk_key parent_key;
162 u64 extent_item_refs;
164 u64 parent_generation;
168 unsigned int flag_block_full_backref:2;
169 unsigned int found_rec:1;
170 unsigned int content_checked:1;
171 unsigned int owner_ref_checked:1;
172 unsigned int is_root:1;
173 unsigned int metadata:1;
174 unsigned int bad_full_backref:1;
175 unsigned int crossing_stripes:1;
176 unsigned int wrong_chunk_type:1;
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
181 return container_of(entry, struct extent_record, list);
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
200 return list_entry(entry, struct inode_backref, list);
203 struct root_item_record {
204 struct list_head list;
211 struct btrfs_key drop_key;
214 #define REF_ERR_NO_DIR_ITEM (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX (1 << 1)
216 #define REF_ERR_NO_INODE_REF (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
219 #define REF_ERR_DUP_INODE_REF (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
228 struct file_extent_hole {
234 struct inode_record {
235 struct list_head backrefs;
236 unsigned int checked:1;
237 unsigned int merging:1;
238 unsigned int found_inode_item:1;
239 unsigned int found_dir_item:1;
240 unsigned int found_file_extent:1;
241 unsigned int found_csum_item:1;
242 unsigned int some_csum_missing:1;
243 unsigned int nodatasum:1;
256 struct rb_root holes;
257 struct list_head orphan_extents;
262 #define I_ERR_NO_INODE_ITEM (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
278 struct root_backref {
279 struct list_head list;
280 unsigned int found_dir_item:1;
281 unsigned int found_dir_index:1;
282 unsigned int found_back_ref:1;
283 unsigned int found_forward_ref:1;
284 unsigned int reachable:1;
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
295 return list_entry(entry, struct root_backref, list);
299 struct list_head backrefs;
300 struct cache_extent cache;
301 unsigned int found_root_item:1;
307 struct cache_extent cache;
312 struct cache_extent cache;
313 struct cache_tree root_cache;
314 struct cache_tree inode_cache;
315 struct inode_record *current;
324 struct walk_control {
325 struct cache_tree shared;
326 struct shared_node *nodes[BTRFS_MAX_LEVEL];
332 struct btrfs_key key;
334 struct list_head list;
337 struct extent_entry {
342 struct list_head list;
345 struct root_item_info {
346 /* level of the root */
348 /* number of nodes at this level, must be 1 for a root */
352 struct cache_extent cache_extent;
356 * Error bit for low memory mode check.
358 * Currently no caller cares about it yet. Just internal use for error
361 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH (1 << 8)
372 static void *print_status_check(void *p)
374 struct task_ctx *priv = p;
375 const char work_indicator[] = { '.', 'o', 'O', 'o' };
377 static char *task_position_string[] = {
379 "checking free space cache",
383 task_period_start(priv->info, 1000 /* 1s */);
385 if (priv->tp == TASK_NOTHING)
389 printf("%s [%c]\r", task_position_string[priv->tp],
390 work_indicator[count % 4]);
393 task_period_wait(priv->info);
398 static int print_status_return(void *p)
406 static enum btrfs_check_mode parse_check_mode(const char *str)
408 if (strcmp(str, "lowmem") == 0)
409 return CHECK_MODE_LOWMEM;
410 if (strcmp(str, "orig") == 0)
411 return CHECK_MODE_ORIGINAL;
412 if (strcmp(str, "original") == 0)
413 return CHECK_MODE_ORIGINAL;
415 return CHECK_MODE_UNKNOWN;
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
421 struct file_extent_hole *hole;
423 if (RB_EMPTY_ROOT(holes))
426 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
432 struct file_extent_hole *hole1;
433 struct file_extent_hole *hole2;
435 hole1 = rb_entry(node1, struct file_extent_hole, node);
436 hole2 = rb_entry(node2, struct file_extent_hole, node);
438 if (hole1->start > hole2->start)
440 if (hole1->start < hole2->start)
442 /* Now hole1->start == hole2->start */
443 if (hole1->len >= hole2->len)
445 * Hole 1 will be merge center
446 * Same hole will be merged later
449 /* Hole 2 will be merge center */
454 * Add a hole to the record
456 * This will do hole merge for copy_file_extent_holes(),
457 * which will ensure there won't be continuous holes.
459 static int add_file_extent_hole(struct rb_root *holes,
462 struct file_extent_hole *hole;
463 struct file_extent_hole *prev = NULL;
464 struct file_extent_hole *next = NULL;
466 hole = malloc(sizeof(*hole));
471 /* Since compare will not return 0, no -EEXIST will happen */
472 rb_insert(holes, &hole->node, compare_hole);
474 /* simple merge with previous hole */
475 if (rb_prev(&hole->node))
476 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
478 if (prev && prev->start + prev->len >= hole->start) {
479 hole->len = hole->start + hole->len - prev->start;
480 hole->start = prev->start;
481 rb_erase(&prev->node, holes);
486 /* iterate merge with next holes */
488 if (!rb_next(&hole->node))
490 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
492 if (hole->start + hole->len >= next->start) {
493 if (hole->start + hole->len <= next->start + next->len)
494 hole->len = next->start + next->len -
496 rb_erase(&next->node, holes);
505 static int compare_hole_range(struct rb_node *node, void *data)
507 struct file_extent_hole *hole;
510 hole = (struct file_extent_hole *)data;
513 hole = rb_entry(node, struct file_extent_hole, node);
514 if (start < hole->start)
516 if (start >= hole->start && start < hole->start + hole->len)
522 * Delete a hole in the record
524 * This will do the hole split and is much restrict than add.
526 static int del_file_extent_hole(struct rb_root *holes,
529 struct file_extent_hole *hole;
530 struct file_extent_hole tmp;
535 struct rb_node *node;
542 node = rb_search(holes, &tmp, compare_hole_range, NULL);
545 hole = rb_entry(node, struct file_extent_hole, node);
546 if (start + len > hole->start + hole->len)
550 * Now there will be no overlap, delete the hole and re-add the
551 * split(s) if they exists.
553 if (start > hole->start) {
554 prev_start = hole->start;
555 prev_len = start - hole->start;
558 if (hole->start + hole->len > start + len) {
559 next_start = start + len;
560 next_len = hole->start + hole->len - start - len;
563 rb_erase(node, holes);
566 ret = add_file_extent_hole(holes, prev_start, prev_len);
571 ret = add_file_extent_hole(holes, next_start, next_len);
578 static int copy_file_extent_holes(struct rb_root *dst,
581 struct file_extent_hole *hole;
582 struct rb_node *node;
585 node = rb_first(src);
587 hole = rb_entry(node, struct file_extent_hole, node);
588 ret = add_file_extent_hole(dst, hole->start, hole->len);
591 node = rb_next(node);
596 static void free_file_extent_holes(struct rb_root *holes)
598 struct rb_node *node;
599 struct file_extent_hole *hole;
601 node = rb_first(holes);
603 hole = rb_entry(node, struct file_extent_hole, node);
604 rb_erase(node, holes);
606 node = rb_first(holes);
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613 struct btrfs_root *root)
615 if (root->last_trans != trans->transid) {
616 root->track_dirty = 1;
617 root->last_trans = trans->transid;
618 root->commit_root = root->node;
619 extent_buffer_get(root->node);
623 static u8 imode_to_type(u32 imode)
626 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
628 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
629 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
630 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
631 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
632 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
633 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
636 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
642 struct device_record *rec1;
643 struct device_record *rec2;
645 rec1 = rb_entry(node1, struct device_record, node);
646 rec2 = rb_entry(node2, struct device_record, node);
647 if (rec1->devid > rec2->devid)
649 else if (rec1->devid < rec2->devid)
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
657 struct inode_record *rec;
658 struct inode_backref *backref;
659 struct inode_backref *orig;
660 struct inode_backref *tmp;
661 struct orphan_data_extent *src_orphan;
662 struct orphan_data_extent *dst_orphan;
667 rec = malloc(sizeof(*rec));
669 return ERR_PTR(-ENOMEM);
670 memcpy(rec, orig_rec, sizeof(*rec));
672 INIT_LIST_HEAD(&rec->backrefs);
673 INIT_LIST_HEAD(&rec->orphan_extents);
674 rec->holes = RB_ROOT;
676 list_for_each_entry(orig, &orig_rec->backrefs, list) {
677 size = sizeof(*orig) + orig->namelen + 1;
678 backref = malloc(size);
683 memcpy(backref, orig, size);
684 list_add_tail(&backref->list, &rec->backrefs);
686 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687 dst_orphan = malloc(sizeof(*dst_orphan));
692 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
695 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
702 rb = rb_first(&rec->holes);
704 struct file_extent_hole *hole;
706 hole = rb_entry(rb, struct file_extent_hole, node);
712 if (!list_empty(&rec->backrefs))
713 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714 list_del(&orig->list);
718 if (!list_empty(&rec->orphan_extents))
719 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720 list_del(&orig->list);
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
732 struct orphan_data_extent *orphan;
734 if (list_empty(orphan_extents))
736 printf("The following data extent is lost in tree %llu:\n",
738 list_for_each_entry(orphan, orphan_extents, list) {
739 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740 orphan->objectid, orphan->offset, orphan->disk_bytenr,
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
747 u64 root_objectid = root->root_key.objectid;
748 int errors = rec->errors;
752 /* reloc root errors, we print its corresponding fs root objectid*/
753 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754 root_objectid = root->root_key.offset;
755 fprintf(stderr, "reloc");
757 fprintf(stderr, "root %llu inode %llu errors %x",
758 (unsigned long long) root_objectid,
759 (unsigned long long) rec->ino, rec->errors);
761 if (errors & I_ERR_NO_INODE_ITEM)
762 fprintf(stderr, ", no inode item");
763 if (errors & I_ERR_NO_ORPHAN_ITEM)
764 fprintf(stderr, ", no orphan item");
765 if (errors & I_ERR_DUP_INODE_ITEM)
766 fprintf(stderr, ", dup inode item");
767 if (errors & I_ERR_DUP_DIR_INDEX)
768 fprintf(stderr, ", dup dir index");
769 if (errors & I_ERR_ODD_DIR_ITEM)
770 fprintf(stderr, ", odd dir item");
771 if (errors & I_ERR_ODD_FILE_EXTENT)
772 fprintf(stderr, ", odd file extent");
773 if (errors & I_ERR_BAD_FILE_EXTENT)
774 fprintf(stderr, ", bad file extent");
775 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776 fprintf(stderr, ", file extent overlap");
777 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778 fprintf(stderr, ", file extent discount");
779 if (errors & I_ERR_DIR_ISIZE_WRONG)
780 fprintf(stderr, ", dir isize wrong");
781 if (errors & I_ERR_FILE_NBYTES_WRONG)
782 fprintf(stderr, ", nbytes wrong");
783 if (errors & I_ERR_ODD_CSUM_ITEM)
784 fprintf(stderr, ", odd csum item");
785 if (errors & I_ERR_SOME_CSUM_MISSING)
786 fprintf(stderr, ", some csum missing");
787 if (errors & I_ERR_LINK_COUNT_WRONG)
788 fprintf(stderr, ", link count wrong");
789 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790 fprintf(stderr, ", orphan file extent");
791 fprintf(stderr, "\n");
792 /* Print the orphan extents if needed */
793 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
796 /* Print the holes if needed */
797 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798 struct file_extent_hole *hole;
799 struct rb_node *node;
802 node = rb_first(&rec->holes);
803 fprintf(stderr, "Found file extent holes:\n");
806 hole = rb_entry(node, struct file_extent_hole, node);
807 fprintf(stderr, "\tstart: %llu, len: %llu\n",
808 hole->start, hole->len);
809 node = rb_next(node);
812 fprintf(stderr, "\tstart: 0, len: %llu\n",
813 round_up(rec->isize, root->sectorsize));
817 static void print_ref_error(int errors)
819 if (errors & REF_ERR_NO_DIR_ITEM)
820 fprintf(stderr, ", no dir item");
821 if (errors & REF_ERR_NO_DIR_INDEX)
822 fprintf(stderr, ", no dir index");
823 if (errors & REF_ERR_NO_INODE_REF)
824 fprintf(stderr, ", no inode ref");
825 if (errors & REF_ERR_DUP_DIR_ITEM)
826 fprintf(stderr, ", dup dir item");
827 if (errors & REF_ERR_DUP_DIR_INDEX)
828 fprintf(stderr, ", dup dir index");
829 if (errors & REF_ERR_DUP_INODE_REF)
830 fprintf(stderr, ", dup inode ref");
831 if (errors & REF_ERR_INDEX_UNMATCH)
832 fprintf(stderr, ", index mismatch");
833 if (errors & REF_ERR_FILETYPE_UNMATCH)
834 fprintf(stderr, ", filetype mismatch");
835 if (errors & REF_ERR_NAME_TOO_LONG)
836 fprintf(stderr, ", name too long");
837 if (errors & REF_ERR_NO_ROOT_REF)
838 fprintf(stderr, ", no root ref");
839 if (errors & REF_ERR_NO_ROOT_BACKREF)
840 fprintf(stderr, ", no root backref");
841 if (errors & REF_ERR_DUP_ROOT_REF)
842 fprintf(stderr, ", dup root ref");
843 if (errors & REF_ERR_DUP_ROOT_BACKREF)
844 fprintf(stderr, ", dup root backref");
845 fprintf(stderr, "\n");
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
851 struct ptr_node *node;
852 struct cache_extent *cache;
853 struct inode_record *rec = NULL;
856 cache = lookup_cache_extent(inode_cache, ino, 1);
858 node = container_of(cache, struct ptr_node, cache);
860 if (mod && rec->refs > 1) {
861 node->data = clone_inode_rec(rec);
862 if (IS_ERR(node->data))
868 rec = calloc(1, sizeof(*rec));
870 return ERR_PTR(-ENOMEM);
872 rec->extent_start = (u64)-1;
874 INIT_LIST_HEAD(&rec->backrefs);
875 INIT_LIST_HEAD(&rec->orphan_extents);
876 rec->holes = RB_ROOT;
878 node = malloc(sizeof(*node));
881 return ERR_PTR(-ENOMEM);
883 node->cache.start = ino;
884 node->cache.size = 1;
887 if (ino == BTRFS_FREE_INO_OBJECTID)
890 ret = insert_cache_extent(inode_cache, &node->cache);
892 return ERR_PTR(-EEXIST);
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
899 struct orphan_data_extent *orphan;
901 while (!list_empty(orphan_extents)) {
902 orphan = list_entry(orphan_extents->next,
903 struct orphan_data_extent, list);
904 list_del(&orphan->list);
909 static void free_inode_rec(struct inode_record *rec)
911 struct inode_backref *backref;
916 while (!list_empty(&rec->backrefs)) {
917 backref = to_inode_backref(rec->backrefs.next);
918 list_del(&backref->list);
921 free_orphan_data_extents(&rec->orphan_extents);
922 free_file_extent_holes(&rec->holes);
926 static int can_free_inode_rec(struct inode_record *rec)
928 if (!rec->errors && rec->checked && rec->found_inode_item &&
929 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935 struct inode_record *rec)
937 struct cache_extent *cache;
938 struct inode_backref *tmp, *backref;
939 struct ptr_node *node;
942 if (!rec->found_inode_item)
945 filetype = imode_to_type(rec->imode);
946 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947 if (backref->found_dir_item && backref->found_dir_index) {
948 if (backref->filetype != filetype)
949 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950 if (!backref->errors && backref->found_inode_ref &&
951 rec->nlink == rec->found_link) {
952 list_del(&backref->list);
958 if (!rec->checked || rec->merging)
961 if (S_ISDIR(rec->imode)) {
962 if (rec->found_size != rec->isize)
963 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964 if (rec->found_file_extent)
965 rec->errors |= I_ERR_ODD_FILE_EXTENT;
966 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967 if (rec->found_dir_item)
968 rec->errors |= I_ERR_ODD_DIR_ITEM;
969 if (rec->found_size != rec->nbytes)
970 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971 if (rec->nlink > 0 && !no_holes &&
972 (rec->extent_end < rec->isize ||
973 first_extent_gap(&rec->holes) < rec->isize))
974 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
977 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978 if (rec->found_csum_item && rec->nodatasum)
979 rec->errors |= I_ERR_ODD_CSUM_ITEM;
980 if (rec->some_csum_missing && !rec->nodatasum)
981 rec->errors |= I_ERR_SOME_CSUM_MISSING;
984 BUG_ON(rec->refs != 1);
985 if (can_free_inode_rec(rec)) {
986 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987 node = container_of(cache, struct ptr_node, cache);
988 BUG_ON(node->data != rec);
989 remove_cache_extent(inode_cache, &node->cache);
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
997 struct btrfs_path path;
998 struct btrfs_key key;
1001 key.objectid = BTRFS_ORPHAN_OBJECTID;
1002 key.type = BTRFS_ORPHAN_ITEM_KEY;
1005 btrfs_init_path(&path);
1006 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007 btrfs_release_path(&path);
1013 static int process_inode_item(struct extent_buffer *eb,
1014 int slot, struct btrfs_key *key,
1015 struct shared_node *active_node)
1017 struct inode_record *rec;
1018 struct btrfs_inode_item *item;
1020 rec = active_node->current;
1021 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022 if (rec->found_inode_item) {
1023 rec->errors |= I_ERR_DUP_INODE_ITEM;
1026 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027 rec->nlink = btrfs_inode_nlink(eb, item);
1028 rec->isize = btrfs_inode_size(eb, item);
1029 rec->nbytes = btrfs_inode_nbytes(eb, item);
1030 rec->imode = btrfs_inode_mode(eb, item);
1031 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1033 rec->found_inode_item = 1;
1034 if (rec->nlink == 0)
1035 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036 maybe_free_inode_rec(&active_node->inode_cache, rec);
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1042 int namelen, u64 dir)
1044 struct inode_backref *backref;
1046 list_for_each_entry(backref, &rec->backrefs, list) {
1047 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1049 if (backref->dir != dir || backref->namelen != namelen)
1051 if (memcmp(name, backref->name, namelen))
1056 backref = malloc(sizeof(*backref) + namelen + 1);
1059 memset(backref, 0, sizeof(*backref));
1061 backref->namelen = namelen;
1062 memcpy(backref->name, name, namelen);
1063 backref->name[namelen] = '\0';
1064 list_add_tail(&backref->list, &rec->backrefs);
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069 u64 ino, u64 dir, u64 index,
1070 const char *name, int namelen,
1071 u8 filetype, u8 itemtype, int errors)
1073 struct inode_record *rec;
1074 struct inode_backref *backref;
1076 rec = get_inode_rec(inode_cache, ino, 1);
1077 BUG_ON(IS_ERR(rec));
1078 backref = get_inode_backref(rec, name, namelen, dir);
1081 backref->errors |= errors;
1082 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083 if (backref->found_dir_index)
1084 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085 if (backref->found_inode_ref && backref->index != index)
1086 backref->errors |= REF_ERR_INDEX_UNMATCH;
1087 if (backref->found_dir_item && backref->filetype != filetype)
1088 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1090 backref->index = index;
1091 backref->filetype = filetype;
1092 backref->found_dir_index = 1;
1093 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1095 if (backref->found_dir_item)
1096 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097 if (backref->found_dir_index && backref->filetype != filetype)
1098 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1100 backref->filetype = filetype;
1101 backref->found_dir_item = 1;
1102 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104 if (backref->found_inode_ref)
1105 backref->errors |= REF_ERR_DUP_INODE_REF;
1106 if (backref->found_dir_index && backref->index != index)
1107 backref->errors |= REF_ERR_INDEX_UNMATCH;
1109 backref->index = index;
1111 backref->ref_type = itemtype;
1112 backref->found_inode_ref = 1;
1117 maybe_free_inode_rec(inode_cache, rec);
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122 struct cache_tree *dst_cache)
1124 struct inode_backref *backref;
1129 list_for_each_entry(backref, &src->backrefs, list) {
1130 if (backref->found_dir_index) {
1131 add_inode_backref(dst_cache, dst->ino, backref->dir,
1132 backref->index, backref->name,
1133 backref->namelen, backref->filetype,
1134 BTRFS_DIR_INDEX_KEY, backref->errors);
1136 if (backref->found_dir_item) {
1138 add_inode_backref(dst_cache, dst->ino,
1139 backref->dir, 0, backref->name,
1140 backref->namelen, backref->filetype,
1141 BTRFS_DIR_ITEM_KEY, backref->errors);
1143 if (backref->found_inode_ref) {
1144 add_inode_backref(dst_cache, dst->ino,
1145 backref->dir, backref->index,
1146 backref->name, backref->namelen, 0,
1147 backref->ref_type, backref->errors);
1151 if (src->found_dir_item)
1152 dst->found_dir_item = 1;
1153 if (src->found_file_extent)
1154 dst->found_file_extent = 1;
1155 if (src->found_csum_item)
1156 dst->found_csum_item = 1;
1157 if (src->some_csum_missing)
1158 dst->some_csum_missing = 1;
1159 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1165 BUG_ON(src->found_link < dir_count);
1166 dst->found_link += src->found_link - dir_count;
1167 dst->found_size += src->found_size;
1168 if (src->extent_start != (u64)-1) {
1169 if (dst->extent_start == (u64)-1) {
1170 dst->extent_start = src->extent_start;
1171 dst->extent_end = src->extent_end;
1173 if (dst->extent_end > src->extent_start)
1174 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175 else if (dst->extent_end < src->extent_start) {
1176 ret = add_file_extent_hole(&dst->holes,
1178 src->extent_start - dst->extent_end);
1180 if (dst->extent_end < src->extent_end)
1181 dst->extent_end = src->extent_end;
1185 dst->errors |= src->errors;
1186 if (src->found_inode_item) {
1187 if (!dst->found_inode_item) {
1188 dst->nlink = src->nlink;
1189 dst->isize = src->isize;
1190 dst->nbytes = src->nbytes;
1191 dst->imode = src->imode;
1192 dst->nodatasum = src->nodatasum;
1193 dst->found_inode_item = 1;
1195 dst->errors |= I_ERR_DUP_INODE_ITEM;
1203 static int splice_shared_node(struct shared_node *src_node,
1204 struct shared_node *dst_node)
1206 struct cache_extent *cache;
1207 struct ptr_node *node, *ins;
1208 struct cache_tree *src, *dst;
1209 struct inode_record *rec, *conflict;
1210 u64 current_ino = 0;
1214 if (--src_node->refs == 0)
1216 if (src_node->current)
1217 current_ino = src_node->current->ino;
1219 src = &src_node->root_cache;
1220 dst = &dst_node->root_cache;
1222 cache = search_cache_extent(src, 0);
1224 node = container_of(cache, struct ptr_node, cache);
1226 cache = next_cache_extent(cache);
1229 remove_cache_extent(src, &node->cache);
1232 ins = malloc(sizeof(*ins));
1234 ins->cache.start = node->cache.start;
1235 ins->cache.size = node->cache.size;
1239 ret = insert_cache_extent(dst, &ins->cache);
1240 if (ret == -EEXIST) {
1241 conflict = get_inode_rec(dst, rec->ino, 1);
1242 BUG_ON(IS_ERR(conflict));
1243 merge_inode_recs(rec, conflict, dst);
1245 conflict->checked = 1;
1246 if (dst_node->current == conflict)
1247 dst_node->current = NULL;
1249 maybe_free_inode_rec(dst, conflict);
1250 free_inode_rec(rec);
1257 if (src == &src_node->root_cache) {
1258 src = &src_node->inode_cache;
1259 dst = &dst_node->inode_cache;
1263 if (current_ino > 0 && (!dst_node->current ||
1264 current_ino > dst_node->current->ino)) {
1265 if (dst_node->current) {
1266 dst_node->current->checked = 1;
1267 maybe_free_inode_rec(dst, dst_node->current);
1269 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270 BUG_ON(IS_ERR(dst_node->current));
1275 static void free_inode_ptr(struct cache_extent *cache)
1277 struct ptr_node *node;
1278 struct inode_record *rec;
1280 node = container_of(cache, struct ptr_node, cache);
1282 free_inode_rec(rec);
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1291 struct cache_extent *cache;
1292 struct shared_node *node;
1294 cache = lookup_cache_extent(shared, bytenr, 1);
1296 node = container_of(cache, struct shared_node, cache);
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1305 struct shared_node *node;
1307 node = calloc(1, sizeof(*node));
1310 node->cache.start = bytenr;
1311 node->cache.size = 1;
1312 cache_tree_init(&node->root_cache);
1313 cache_tree_init(&node->inode_cache);
1316 ret = insert_cache_extent(shared, &node->cache);
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322 struct walk_control *wc, int level)
1324 struct shared_node *node;
1325 struct shared_node *dest;
1328 if (level == wc->active_node)
1331 BUG_ON(wc->active_node <= level);
1332 node = find_shared_node(&wc->shared, bytenr);
1334 ret = add_shared_node(&wc->shared, bytenr, refs);
1336 node = find_shared_node(&wc->shared, bytenr);
1337 wc->nodes[level] = node;
1338 wc->active_node = level;
1342 if (wc->root_level == wc->active_node &&
1343 btrfs_root_refs(&root->root_item) == 0) {
1344 if (--node->refs == 0) {
1345 free_inode_recs_tree(&node->root_cache);
1346 free_inode_recs_tree(&node->inode_cache);
1347 remove_cache_extent(&wc->shared, &node->cache);
1353 dest = wc->nodes[wc->active_node];
1354 splice_shared_node(node, dest);
1355 if (node->refs == 0) {
1356 remove_cache_extent(&wc->shared, &node->cache);
1362 static int leave_shared_node(struct btrfs_root *root,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->root_level)
1372 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1376 BUG_ON(i >= BTRFS_MAX_LEVEL);
1378 node = wc->nodes[wc->active_node];
1379 wc->nodes[wc->active_node] = NULL;
1380 wc->active_node = i;
1382 dest = wc->nodes[wc->active_node];
1383 if (wc->active_node < wc->root_level ||
1384 btrfs_root_refs(&root->root_item) > 0) {
1385 BUG_ON(node->refs <= 1);
1386 splice_shared_node(node, dest);
1388 BUG_ON(node->refs < 2);
1397 * 1 - if the root with id child_root_id is a child of root parent_root_id
1398 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1399 * has other root(s) as parent(s)
1400 * 2 - if the root child_root_id doesn't have any parent roots
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1405 struct btrfs_path path;
1406 struct btrfs_key key;
1407 struct extent_buffer *leaf;
1411 btrfs_init_path(&path);
1413 key.objectid = parent_root_id;
1414 key.type = BTRFS_ROOT_REF_KEY;
1415 key.offset = child_root_id;
1416 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1420 btrfs_release_path(&path);
1424 key.objectid = child_root_id;
1425 key.type = BTRFS_ROOT_BACKREF_KEY;
1427 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1433 leaf = path.nodes[0];
1434 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1438 leaf = path.nodes[0];
1441 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442 if (key.objectid != child_root_id ||
1443 key.type != BTRFS_ROOT_BACKREF_KEY)
1448 if (key.offset == parent_root_id) {
1449 btrfs_release_path(&path);
1456 btrfs_release_path(&path);
1459 return has_parent ? 0 : 2;
1462 static int process_dir_item(struct btrfs_root *root,
1463 struct extent_buffer *eb,
1464 int slot, struct btrfs_key *key,
1465 struct shared_node *active_node)
1475 struct btrfs_dir_item *di;
1476 struct inode_record *rec;
1477 struct cache_tree *root_cache;
1478 struct cache_tree *inode_cache;
1479 struct btrfs_key location;
1480 char namebuf[BTRFS_NAME_LEN];
1482 root_cache = &active_node->root_cache;
1483 inode_cache = &active_node->inode_cache;
1484 rec = active_node->current;
1485 rec->found_dir_item = 1;
1487 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1491 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492 name_len = btrfs_dir_name_len(eb, di);
1493 data_len = btrfs_dir_data_len(eb, di);
1494 filetype = btrfs_dir_type(eb, di);
1496 rec->found_size += name_len;
1497 if (name_len <= BTRFS_NAME_LEN) {
1501 len = BTRFS_NAME_LEN;
1502 error = REF_ERR_NAME_TOO_LONG;
1504 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1506 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507 add_inode_backref(inode_cache, location.objectid,
1508 key->objectid, key->offset, namebuf,
1509 len, filetype, key->type, error);
1510 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511 add_inode_backref(root_cache, location.objectid,
1512 key->objectid, key->offset,
1513 namebuf, len, filetype,
1516 fprintf(stderr, "invalid location in dir item %u\n",
1518 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519 key->objectid, key->offset, namebuf,
1520 len, filetype, key->type, error);
1523 len = sizeof(*di) + name_len + data_len;
1524 di = (struct btrfs_dir_item *)((char *)di + len);
1527 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528 rec->errors |= I_ERR_DUP_DIR_INDEX;
1533 static int process_inode_ref(struct extent_buffer *eb,
1534 int slot, struct btrfs_key *key,
1535 struct shared_node *active_node)
1543 struct cache_tree *inode_cache;
1544 struct btrfs_inode_ref *ref;
1545 char namebuf[BTRFS_NAME_LEN];
1547 inode_cache = &active_node->inode_cache;
1549 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550 total = btrfs_item_size_nr(eb, slot);
1551 while (cur < total) {
1552 name_len = btrfs_inode_ref_name_len(eb, ref);
1553 index = btrfs_inode_ref_index(eb, ref);
1554 if (name_len <= BTRFS_NAME_LEN) {
1558 len = BTRFS_NAME_LEN;
1559 error = REF_ERR_NAME_TOO_LONG;
1561 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562 add_inode_backref(inode_cache, key->objectid, key->offset,
1563 index, namebuf, len, 0, key->type, error);
1565 len = sizeof(*ref) + name_len;
1566 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1572 static int process_inode_extref(struct extent_buffer *eb,
1573 int slot, struct btrfs_key *key,
1574 struct shared_node *active_node)
1583 struct cache_tree *inode_cache;
1584 struct btrfs_inode_extref *extref;
1585 char namebuf[BTRFS_NAME_LEN];
1587 inode_cache = &active_node->inode_cache;
1589 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590 total = btrfs_item_size_nr(eb, slot);
1591 while (cur < total) {
1592 name_len = btrfs_inode_extref_name_len(eb, extref);
1593 index = btrfs_inode_extref_index(eb, extref);
1594 parent = btrfs_inode_extref_parent(eb, extref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf,
1603 (unsigned long)(extref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, parent,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*extref) + name_len;
1608 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616 u64 len, u64 *found)
1618 struct btrfs_key key;
1619 struct btrfs_path path;
1620 struct extent_buffer *leaf;
1625 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1627 btrfs_init_path(&path);
1629 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1631 key.type = BTRFS_EXTENT_CSUM_KEY;
1633 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1637 if (ret > 0 && path.slots[0] > 0) {
1638 leaf = path.nodes[0];
1639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641 key.type == BTRFS_EXTENT_CSUM_KEY)
1646 leaf = path.nodes[0];
1647 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1653 leaf = path.nodes[0];
1656 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658 key.type != BTRFS_EXTENT_CSUM_KEY)
1661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662 if (key.offset >= start + len)
1665 if (key.offset > start)
1668 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670 if (csum_end > start) {
1671 size = min(csum_end - start, len);
1680 btrfs_release_path(&path);
1686 static int process_file_extent(struct btrfs_root *root,
1687 struct extent_buffer *eb,
1688 int slot, struct btrfs_key *key,
1689 struct shared_node *active_node)
1691 struct inode_record *rec;
1692 struct btrfs_file_extent_item *fi;
1694 u64 disk_bytenr = 0;
1695 u64 extent_offset = 0;
1696 u64 mask = root->sectorsize - 1;
1700 rec = active_node->current;
1701 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702 rec->found_file_extent = 1;
1704 if (rec->extent_start == (u64)-1) {
1705 rec->extent_start = key->offset;
1706 rec->extent_end = key->offset;
1709 if (rec->extent_end > key->offset)
1710 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711 else if (rec->extent_end < key->offset) {
1712 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713 key->offset - rec->extent_end);
1718 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719 extent_type = btrfs_file_extent_type(eb, fi);
1721 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1724 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725 rec->found_size += num_bytes;
1726 num_bytes = (num_bytes + mask) & ~mask;
1727 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731 extent_offset = btrfs_file_extent_offset(eb, fi);
1732 if (num_bytes == 0 || (num_bytes & mask))
1733 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 if (num_bytes + extent_offset >
1735 btrfs_file_extent_ram_bytes(eb, fi))
1736 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738 (btrfs_file_extent_compression(eb, fi) ||
1739 btrfs_file_extent_encryption(eb, fi) ||
1740 btrfs_file_extent_other_encoding(eb, fi)))
1741 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742 if (disk_bytenr > 0)
1743 rec->found_size += num_bytes;
1745 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1747 rec->extent_end = key->offset + num_bytes;
1750 * The data reloc tree will copy full extents into its inode and then
1751 * copy the corresponding csums. Because the extent it copied could be
1752 * a preallocated extent that hasn't been written to yet there may be no
1753 * csums to copy, ergo we won't have csums for our file extent. This is
1754 * ok so just don't bother checking csums if the inode belongs to the
1757 if (disk_bytenr > 0 &&
1758 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1760 if (btrfs_file_extent_compression(eb, fi))
1761 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1763 disk_bytenr += extent_offset;
1765 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1768 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1770 rec->found_csum_item = 1;
1771 if (found < num_bytes)
1772 rec->some_csum_missing = 1;
1773 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1775 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782 struct walk_control *wc)
1784 struct btrfs_key key;
1788 struct cache_tree *inode_cache;
1789 struct shared_node *active_node;
1791 if (wc->root_level == wc->active_node &&
1792 btrfs_root_refs(&root->root_item) == 0)
1795 active_node = wc->nodes[wc->active_node];
1796 inode_cache = &active_node->inode_cache;
1797 nritems = btrfs_header_nritems(eb);
1798 for (i = 0; i < nritems; i++) {
1799 btrfs_item_key_to_cpu(eb, &key, i);
1801 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1803 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1806 if (active_node->current == NULL ||
1807 active_node->current->ino < key.objectid) {
1808 if (active_node->current) {
1809 active_node->current->checked = 1;
1810 maybe_free_inode_rec(inode_cache,
1811 active_node->current);
1813 active_node->current = get_inode_rec(inode_cache,
1815 BUG_ON(IS_ERR(active_node->current));
1818 case BTRFS_DIR_ITEM_KEY:
1819 case BTRFS_DIR_INDEX_KEY:
1820 ret = process_dir_item(root, eb, i, &key, active_node);
1822 case BTRFS_INODE_REF_KEY:
1823 ret = process_inode_ref(eb, i, &key, active_node);
1825 case BTRFS_INODE_EXTREF_KEY:
1826 ret = process_inode_extref(eb, i, &key, active_node);
1828 case BTRFS_INODE_ITEM_KEY:
1829 ret = process_inode_item(eb, i, &key, active_node);
1831 case BTRFS_EXTENT_DATA_KEY:
1832 ret = process_file_extent(root, eb, i, &key,
1842 static void reada_walk_down(struct btrfs_root *root,
1843 struct extent_buffer *node, int slot)
1852 level = btrfs_header_level(node);
1856 nritems = btrfs_header_nritems(node);
1857 blocksize = root->nodesize;
1858 for (i = slot; i < nritems; i++) {
1859 bytenr = btrfs_node_blockptr(node, i);
1860 ptr_gen = btrfs_node_ptr_generation(node, i);
1861 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1866 * Check the child node/leaf by the following condition:
1867 * 1. the first item key of the node/leaf should be the same with the one
1869 * 2. block in parent node should match the child node/leaf.
1870 * 3. generation of parent node and child's header should be consistent.
1872 * Or the child node/leaf pointed by the key in parent is not valid.
1874 * We hope to check leaf owner too, but since subvol may share leaves,
1875 * which makes leaf owner check not so strong, key check should be
1876 * sufficient enough for that case.
1878 static int check_child_node(struct btrfs_root *root,
1879 struct extent_buffer *parent, int slot,
1880 struct extent_buffer *child)
1882 struct btrfs_key parent_key;
1883 struct btrfs_key child_key;
1886 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887 if (btrfs_header_level(child) == 0)
1888 btrfs_item_key_to_cpu(child, &child_key, 0);
1890 btrfs_node_key_to_cpu(child, &child_key, 0);
1892 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1895 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896 parent_key.objectid, parent_key.type, parent_key.offset,
1897 child_key.objectid, child_key.type, child_key.offset);
1899 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1901 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902 btrfs_node_blockptr(parent, slot),
1903 btrfs_header_bytenr(child));
1905 if (btrfs_node_ptr_generation(parent, slot) !=
1906 btrfs_header_generation(child)) {
1908 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909 btrfs_header_generation(child),
1910 btrfs_node_ptr_generation(parent, slot));
1916 u64 bytenr[BTRFS_MAX_LEVEL];
1917 u64 refs[BTRFS_MAX_LEVEL];
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level,
1922 struct node_refs *nrefs)
1924 enum btrfs_tree_block_status status;
1927 struct extent_buffer *next;
1928 struct extent_buffer *cur;
1933 WARN_ON(*level < 0);
1934 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1936 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937 refs = nrefs->refs[*level];
1940 ret = btrfs_lookup_extent_info(NULL, root,
1941 path->nodes[*level]->start,
1942 *level, 1, &refs, NULL);
1947 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948 nrefs->refs[*level] = refs;
1952 ret = enter_shared_node(root, path->nodes[*level]->start,
1960 while (*level >= 0) {
1961 WARN_ON(*level < 0);
1962 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963 cur = path->nodes[*level];
1965 if (btrfs_header_level(cur) != *level)
1968 if (path->slots[*level] >= btrfs_header_nritems(cur))
1971 ret = process_one_leaf(root, cur, wc);
1976 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978 blocksize = root->nodesize;
1980 if (bytenr == nrefs->bytenr[*level - 1]) {
1981 refs = nrefs->refs[*level - 1];
1983 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984 *level - 1, 1, &refs, NULL);
1988 nrefs->bytenr[*level - 1] = bytenr;
1989 nrefs->refs[*level - 1] = refs;
1994 ret = enter_shared_node(root, bytenr, refs,
1997 path->slots[*level]++;
2002 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004 free_extent_buffer(next);
2005 reada_walk_down(root, cur, path->slots[*level]);
2006 next = read_tree_block(root, bytenr, blocksize,
2008 if (!extent_buffer_uptodate(next)) {
2009 struct btrfs_key node_key;
2011 btrfs_node_key_to_cpu(path->nodes[*level],
2013 path->slots[*level]);
2014 btrfs_add_corrupt_extent_record(root->fs_info,
2016 path->nodes[*level]->start,
2017 root->nodesize, *level);
2023 ret = check_child_node(root, cur, path->slots[*level], next);
2029 if (btrfs_is_leaf(next))
2030 status = btrfs_check_leaf(root, NULL, next);
2032 status = btrfs_check_node(root, NULL, next);
2033 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034 free_extent_buffer(next);
2039 *level = *level - 1;
2040 free_extent_buffer(path->nodes[*level]);
2041 path->nodes[*level] = next;
2042 path->slots[*level] = 0;
2045 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050 struct walk_control *wc, int *level)
2053 struct extent_buffer *leaf;
2055 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056 leaf = path->nodes[i];
2057 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2062 free_extent_buffer(path->nodes[*level]);
2063 path->nodes[*level] = NULL;
2064 BUG_ON(*level > wc->active_node);
2065 if (*level == wc->active_node)
2066 leave_shared_node(root, wc, *level);
2073 static int check_root_dir(struct inode_record *rec)
2075 struct inode_backref *backref;
2078 if (!rec->found_inode_item || rec->errors)
2080 if (rec->nlink != 1 || rec->found_link != 0)
2082 if (list_empty(&rec->backrefs))
2084 backref = to_inode_backref(rec->backrefs.next);
2085 if (!backref->found_inode_ref)
2087 if (backref->index != 0 || backref->namelen != 2 ||
2088 memcmp(backref->name, "..", 2))
2090 if (backref->found_dir_index || backref->found_dir_item)
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098 struct btrfs_root *root, struct btrfs_path *path,
2099 struct inode_record *rec)
2101 struct btrfs_inode_item *ei;
2102 struct btrfs_key key;
2105 key.objectid = rec->ino;
2106 key.type = BTRFS_INODE_ITEM_KEY;
2107 key.offset = (u64)-1;
2109 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2113 if (!path->slots[0]) {
2120 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121 if (key.objectid != rec->ino) {
2126 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127 struct btrfs_inode_item);
2128 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129 btrfs_mark_buffer_dirty(path->nodes[0]);
2130 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132 root->root_key.objectid);
2134 btrfs_release_path(path);
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139 struct btrfs_root *root,
2140 struct btrfs_path *path,
2141 struct inode_record *rec)
2145 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146 btrfs_release_path(path);
2148 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153 struct btrfs_root *root,
2154 struct btrfs_path *path,
2155 struct inode_record *rec)
2157 struct btrfs_inode_item *ei;
2158 struct btrfs_key key;
2161 key.objectid = rec->ino;
2162 key.type = BTRFS_INODE_ITEM_KEY;
2165 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2172 /* Since ret == 0, no need to check anything */
2173 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174 struct btrfs_inode_item);
2175 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176 btrfs_mark_buffer_dirty(path->nodes[0]);
2177 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178 printf("reset nbytes for ino %llu root %llu\n",
2179 rec->ino, root->root_key.objectid);
2181 btrfs_release_path(path);
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186 struct cache_tree *inode_cache,
2187 struct inode_record *rec,
2188 struct inode_backref *backref)
2190 struct btrfs_path path;
2191 struct btrfs_trans_handle *trans;
2192 struct btrfs_dir_item *dir_item;
2193 struct extent_buffer *leaf;
2194 struct btrfs_key key;
2195 struct btrfs_disk_key disk_key;
2196 struct inode_record *dir_rec;
2197 unsigned long name_ptr;
2198 u32 data_size = sizeof(*dir_item) + backref->namelen;
2201 trans = btrfs_start_transaction(root, 1);
2203 return PTR_ERR(trans);
2205 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206 (unsigned long long)rec->ino);
2208 btrfs_init_path(&path);
2209 key.objectid = backref->dir;
2210 key.type = BTRFS_DIR_INDEX_KEY;
2211 key.offset = backref->index;
2212 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2215 leaf = path.nodes[0];
2216 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2218 disk_key.objectid = cpu_to_le64(rec->ino);
2219 disk_key.type = BTRFS_INODE_ITEM_KEY;
2220 disk_key.offset = 0;
2222 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224 btrfs_set_dir_data_len(leaf, dir_item, 0);
2225 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226 name_ptr = (unsigned long)(dir_item + 1);
2227 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228 btrfs_mark_buffer_dirty(leaf);
2229 btrfs_release_path(&path);
2230 btrfs_commit_transaction(trans, root);
2232 backref->found_dir_index = 1;
2233 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234 BUG_ON(IS_ERR(dir_rec));
2237 dir_rec->found_size += backref->namelen;
2238 if (dir_rec->found_size == dir_rec->isize &&
2239 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241 if (dir_rec->found_size != dir_rec->isize)
2242 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247 static int delete_dir_index(struct btrfs_root *root,
2248 struct cache_tree *inode_cache,
2249 struct inode_record *rec,
2250 struct inode_backref *backref)
2252 struct btrfs_trans_handle *trans;
2253 struct btrfs_dir_item *di;
2254 struct btrfs_path path;
2257 trans = btrfs_start_transaction(root, 1);
2259 return PTR_ERR(trans);
2261 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262 (unsigned long long)backref->dir,
2263 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264 (unsigned long long)root->objectid);
2266 btrfs_init_path(&path);
2267 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268 backref->name, backref->namelen,
2269 backref->index, -1);
2272 btrfs_release_path(&path);
2273 btrfs_commit_transaction(trans, root);
2280 ret = btrfs_del_item(trans, root, &path);
2282 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2284 btrfs_release_path(&path);
2285 btrfs_commit_transaction(trans, root);
2289 static int create_inode_item(struct btrfs_root *root,
2290 struct inode_record *rec,
2291 struct inode_backref *backref, int root_dir)
2293 struct btrfs_trans_handle *trans;
2294 struct btrfs_inode_item inode_item;
2295 time_t now = time(NULL);
2298 trans = btrfs_start_transaction(root, 1);
2299 if (IS_ERR(trans)) {
2300 ret = PTR_ERR(trans);
2304 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305 "be incomplete, please check permissions and content after "
2306 "the fsck completes.\n", (unsigned long long)root->objectid,
2307 (unsigned long long)rec->ino);
2309 memset(&inode_item, 0, sizeof(inode_item));
2310 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2312 btrfs_set_stack_inode_nlink(&inode_item, 1);
2314 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316 if (rec->found_dir_item) {
2317 if (rec->found_file_extent)
2318 fprintf(stderr, "root %llu inode %llu has both a dir "
2319 "item and extents, unsure if it is a dir or a "
2320 "regular file so setting it as a directory\n",
2321 (unsigned long long)root->objectid,
2322 (unsigned long long)rec->ino);
2323 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325 } else if (!rec->found_dir_item) {
2326 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2329 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2338 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2340 btrfs_commit_transaction(trans, root);
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345 struct inode_record *rec,
2346 struct cache_tree *inode_cache,
2349 struct inode_backref *tmp, *backref;
2350 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 if (!delete && rec->ino == root_dirid) {
2356 if (!rec->found_inode_item) {
2357 ret = create_inode_item(root, rec, backref, 1);
2364 /* Index 0 for root dir's are special, don't mess with it */
2365 if (rec->ino == root_dirid && backref->index == 0)
2369 ((backref->found_dir_index && !backref->found_inode_ref) ||
2370 (backref->found_dir_index && backref->found_inode_ref &&
2371 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372 ret = delete_dir_index(root, inode_cache, rec, backref);
2376 list_del(&backref->list);
2380 if (!delete && !backref->found_dir_index &&
2381 backref->found_dir_item && backref->found_inode_ref) {
2382 ret = add_missing_dir_index(root, inode_cache, rec,
2387 if (backref->found_dir_item &&
2388 backref->found_dir_index &&
2389 backref->found_dir_index) {
2390 if (!backref->errors &&
2391 backref->found_inode_ref) {
2392 list_del(&backref->list);
2398 if (!delete && (!backref->found_dir_index &&
2399 !backref->found_dir_item &&
2400 backref->found_inode_ref)) {
2401 struct btrfs_trans_handle *trans;
2402 struct btrfs_key location;
2404 ret = check_dir_conflict(root, backref->name,
2410 * let nlink fixing routine to handle it,
2411 * which can do it better.
2416 location.objectid = rec->ino;
2417 location.type = BTRFS_INODE_ITEM_KEY;
2418 location.offset = 0;
2420 trans = btrfs_start_transaction(root, 1);
2421 if (IS_ERR(trans)) {
2422 ret = PTR_ERR(trans);
2425 fprintf(stderr, "adding missing dir index/item pair "
2427 (unsigned long long)rec->ino);
2428 ret = btrfs_insert_dir_item(trans, root, backref->name,
2430 backref->dir, &location,
2431 imode_to_type(rec->imode),
2434 btrfs_commit_transaction(trans, root);
2438 if (!delete && (backref->found_inode_ref &&
2439 backref->found_dir_index &&
2440 backref->found_dir_item &&
2441 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442 !rec->found_inode_item)) {
2443 ret = create_inode_item(root, rec, backref, 0);
2450 return ret ? ret : repaired;
2454 * To determine the file type for nlink/inode_item repair
2456 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457 * Return -ENOENT if file type is not found.
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2461 struct inode_backref *backref;
2463 /* For inode item recovered case */
2464 if (rec->found_inode_item) {
2465 *type = imode_to_type(rec->imode);
2469 list_for_each_entry(backref, &rec->backrefs, list) {
2470 if (backref->found_dir_index || backref->found_dir_item) {
2471 *type = backref->filetype;
2479 * To determine the file name for nlink repair
2481 * Return 0 if file name is found, set name and namelen.
2482 * Return -ENOENT if file name is not found.
2484 static int find_file_name(struct inode_record *rec,
2485 char *name, int *namelen)
2487 struct inode_backref *backref;
2489 list_for_each_entry(backref, &rec->backrefs, list) {
2490 if (backref->found_dir_index || backref->found_dir_item ||
2491 backref->found_inode_ref) {
2492 memcpy(name, backref->name, backref->namelen);
2493 *namelen = backref->namelen;
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502 struct btrfs_root *root,
2503 struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct inode_backref *backref;
2507 struct inode_backref *tmp;
2508 struct btrfs_key key;
2509 struct btrfs_inode_item *inode_item;
2512 /* We don't believe this either, reset it and iterate backref */
2513 rec->found_link = 0;
2515 /* Remove all backref including the valid ones */
2516 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518 backref->index, backref->name,
2519 backref->namelen, 0);
2523 /* remove invalid backref, so it won't be added back */
2524 if (!(backref->found_dir_index &&
2525 backref->found_dir_item &&
2526 backref->found_inode_ref)) {
2527 list_del(&backref->list);
2534 /* Set nlink to 0 */
2535 key.objectid = rec->ino;
2536 key.type = BTRFS_INODE_ITEM_KEY;
2538 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2545 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546 struct btrfs_inode_item);
2547 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548 btrfs_mark_buffer_dirty(path->nodes[0]);
2549 btrfs_release_path(path);
2552 * Add back valid inode_ref/dir_item/dir_index,
2553 * add_link() will handle the nlink inc, so new nlink must be correct
2555 list_for_each_entry(backref, &rec->backrefs, list) {
2556 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557 backref->name, backref->namelen,
2558 backref->filetype, &backref->index, 1);
2563 btrfs_release_path(path);
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568 struct btrfs_root *root,
2569 struct btrfs_path *path,
2570 struct inode_record *rec)
2572 char *dir_name = "lost+found";
2573 char namebuf[BTRFS_NAME_LEN] = {0};
2578 int name_recovered = 0;
2579 int type_recovered = 0;
2583 * Get file name and type first before these invalid inode ref
2584 * are deleted by remove_all_invalid_backref()
2586 name_recovered = !find_file_name(rec, namebuf, &namelen);
2587 type_recovered = !find_file_type(rec, &type);
2589 if (!name_recovered) {
2590 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591 rec->ino, rec->ino);
2592 namelen = count_digits(rec->ino);
2593 sprintf(namebuf, "%llu", rec->ino);
2596 if (!type_recovered) {
2597 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2599 type = BTRFS_FT_REG_FILE;
2603 ret = reset_nlink(trans, root, path, rec);
2606 "Failed to reset nlink for inode %llu: %s\n",
2607 rec->ino, strerror(-ret));
2611 if (rec->found_link == 0) {
2612 lost_found_ino = root->highest_inode;
2613 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2618 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2622 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623 dir_name, strerror(-ret));
2626 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627 namebuf, namelen, type, NULL, 1);
2629 * Add ".INO" suffix several times to handle case where
2630 * "FILENAME.INO" is already taken by another file.
2632 while (ret == -EEXIST) {
2634 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2636 if (namelen + count_digits(rec->ino) + 1 >
2641 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2643 namelen += count_digits(rec->ino) + 1;
2644 ret = btrfs_add_link(trans, root, rec->ino,
2645 lost_found_ino, namebuf,
2646 namelen, type, NULL, 1);
2650 "Failed to link the inode %llu to %s dir: %s\n",
2651 rec->ino, dir_name, strerror(-ret));
2655 * Just increase the found_link, don't actually add the
2656 * backref. This will make things easier and this inode
2657 * record will be freed after the repair is done.
2658 * So fsck will not report problem about this inode.
2661 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662 namelen, namebuf, dir_name);
2664 printf("Fixed the nlink of inode %llu\n", rec->ino);
2667 * Clear the flag anyway, or we will loop forever for the same inode
2668 * as it will not be removed from the bad inode list and the dead loop
2671 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672 btrfs_release_path(path);
2677 * Check if there is any normal(reg or prealloc) file extent for given
2679 * This is used to determine the file type when neither its dir_index/item or
2680 * inode_item exists.
2682 * This will *NOT* report error, if any error happens, just consider it does
2683 * not have any normal file extent.
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2687 struct btrfs_path path;
2688 struct btrfs_key key;
2689 struct btrfs_key found_key;
2690 struct btrfs_file_extent_item *fi;
2694 btrfs_init_path(&path);
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2699 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2704 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705 ret = btrfs_next_leaf(root, &path);
2712 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2717 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path.nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2726 btrfs_release_path(&path);
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2752 int type_recovered = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2768 * 1. If found backref(inode_index/item is already handled) to it,
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2782 filetype = BTRFS_FT_REG_FILE;
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2884 node = rb_first(&rec->holes);
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path path;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2930 * For nlink repair, it may create a dir and add link, so
2931 * 2 for parent(256)'s dir_index and dir_item
2932 * 2 for lost+found dir's inode_item and inode_ref
2933 * 1 for the new inode_ref of the file
2934 * 2 for lost+found dir's dir_index and dir_item for the file
2936 trans = btrfs_start_transaction(root, 7);
2938 return PTR_ERR(trans);
2940 btrfs_init_path(&path);
2941 if (rec->errors & I_ERR_NO_INODE_ITEM)
2942 ret = repair_inode_no_item(trans, root, &path, rec);
2943 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948 ret = repair_inode_isize(trans, root, &path, rec);
2949 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952 ret = repair_inode_nlinks(trans, root, &path, rec);
2953 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954 ret = repair_inode_nbytes(trans, root, &path, rec);
2955 btrfs_commit_transaction(trans, root);
2956 btrfs_release_path(&path);
2960 static int check_inode_recs(struct btrfs_root *root,
2961 struct cache_tree *inode_cache)
2963 struct cache_extent *cache;
2964 struct ptr_node *node;
2965 struct inode_record *rec;
2966 struct inode_backref *backref;
2971 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2973 if (btrfs_root_refs(&root->root_item) == 0) {
2974 if (!cache_tree_empty(inode_cache))
2975 fprintf(stderr, "warning line %d\n", __LINE__);
2980 * We need to record the highest inode number for later 'lost+found'
2982 * We must select an ino not used/referred by any existing inode, or
2983 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984 * this may cause 'lost+found' dir has wrong nlinks.
2986 cache = last_cache_extent(inode_cache);
2988 node = container_of(cache, struct ptr_node, cache);
2990 if (rec->ino > root->highest_inode)
2991 root->highest_inode = rec->ino;
2995 * We need to repair backrefs first because we could change some of the
2996 * errors in the inode recs.
2998 * We also need to go through and delete invalid backrefs first and then
2999 * add the correct ones second. We do this because we may get EEXIST
3000 * when adding back the correct index because we hadn't yet deleted the
3003 * For example, if we were missing a dir index then the directories
3004 * isize would be wrong, so if we fixed the isize to what we thought it
3005 * would be and then fixed the backref we'd still have a invalid fs, so
3006 * we need to add back the dir index and then check to see if the isize
3011 if (stage == 3 && !err)
3014 cache = search_cache_extent(inode_cache, 0);
3015 while (repair && cache) {
3016 node = container_of(cache, struct ptr_node, cache);
3018 cache = next_cache_extent(cache);
3020 /* Need to free everything up and rescan */
3022 remove_cache_extent(inode_cache, &node->cache);
3024 free_inode_rec(rec);
3028 if (list_empty(&rec->backrefs))
3031 ret = repair_inode_backrefs(root, rec, inode_cache,
3045 rec = get_inode_rec(inode_cache, root_dirid, 0);
3046 BUG_ON(IS_ERR(rec));
3048 ret = check_root_dir(rec);
3050 fprintf(stderr, "root %llu root dir %llu error\n",
3051 (unsigned long long)root->root_key.objectid,
3052 (unsigned long long)root_dirid);
3053 print_inode_error(root, rec);
3058 struct btrfs_trans_handle *trans;
3060 trans = btrfs_start_transaction(root, 1);
3061 if (IS_ERR(trans)) {
3062 err = PTR_ERR(trans);
3067 "root %llu missing its root dir, recreating\n",
3068 (unsigned long long)root->objectid);
3070 ret = btrfs_make_root_dir(trans, root, root_dirid);
3073 btrfs_commit_transaction(trans, root);
3077 fprintf(stderr, "root %llu root dir %llu not found\n",
3078 (unsigned long long)root->root_key.objectid,
3079 (unsigned long long)root_dirid);
3083 cache = search_cache_extent(inode_cache, 0);
3086 node = container_of(cache, struct ptr_node, cache);
3088 remove_cache_extent(inode_cache, &node->cache);
3090 if (rec->ino == root_dirid ||
3091 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092 free_inode_rec(rec);
3096 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097 ret = check_orphan_item(root, rec->ino);
3099 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100 if (can_free_inode_rec(rec)) {
3101 free_inode_rec(rec);
3106 if (!rec->found_inode_item)
3107 rec->errors |= I_ERR_NO_INODE_ITEM;
3108 if (rec->found_link != rec->nlink)
3109 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3111 ret = try_repair_inode(root, rec);
3112 if (ret == 0 && can_free_inode_rec(rec)) {
3113 free_inode_rec(rec);
3119 if (!(repair && ret == 0))
3121 print_inode_error(root, rec);
3122 list_for_each_entry(backref, &rec->backrefs, list) {
3123 if (!backref->found_dir_item)
3124 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125 if (!backref->found_dir_index)
3126 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127 if (!backref->found_inode_ref)
3128 backref->errors |= REF_ERR_NO_INODE_REF;
3129 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130 " namelen %u name %s filetype %d errors %x",
3131 (unsigned long long)backref->dir,
3132 (unsigned long long)backref->index,
3133 backref->namelen, backref->name,
3134 backref->filetype, backref->errors);
3135 print_ref_error(backref->errors);
3137 free_inode_rec(rec);
3139 return (error > 0) ? -1 : 0;
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3145 struct cache_extent *cache;
3146 struct root_record *rec = NULL;
3149 cache = lookup_cache_extent(root_cache, objectid, 1);
3151 rec = container_of(cache, struct root_record, cache);
3153 rec = calloc(1, sizeof(*rec));
3155 return ERR_PTR(-ENOMEM);
3156 rec->objectid = objectid;
3157 INIT_LIST_HEAD(&rec->backrefs);
3158 rec->cache.start = objectid;
3159 rec->cache.size = 1;
3161 ret = insert_cache_extent(root_cache, &rec->cache);
3163 return ERR_PTR(-EEXIST);
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169 u64 ref_root, u64 dir, u64 index,
3170 const char *name, int namelen)
3172 struct root_backref *backref;
3174 list_for_each_entry(backref, &rec->backrefs, list) {
3175 if (backref->ref_root != ref_root || backref->dir != dir ||
3176 backref->namelen != namelen)
3178 if (memcmp(name, backref->name, namelen))
3183 backref = calloc(1, sizeof(*backref) + namelen + 1);
3186 backref->ref_root = ref_root;
3188 backref->index = index;
3189 backref->namelen = namelen;
3190 memcpy(backref->name, name, namelen);
3191 backref->name[namelen] = '\0';
3192 list_add_tail(&backref->list, &rec->backrefs);
3196 static void free_root_record(struct cache_extent *cache)
3198 struct root_record *rec;
3199 struct root_backref *backref;
3201 rec = container_of(cache, struct root_record, cache);
3202 while (!list_empty(&rec->backrefs)) {
3203 backref = to_root_backref(rec->backrefs.next);
3204 list_del(&backref->list);
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3213 static int add_root_backref(struct cache_tree *root_cache,
3214 u64 root_id, u64 ref_root, u64 dir, u64 index,
3215 const char *name, int namelen,
3216 int item_type, int errors)
3218 struct root_record *rec;
3219 struct root_backref *backref;
3221 rec = get_root_rec(root_cache, root_id);
3222 BUG_ON(IS_ERR(rec));
3223 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3226 backref->errors |= errors;
3228 if (item_type != BTRFS_DIR_ITEM_KEY) {
3229 if (backref->found_dir_index || backref->found_back_ref ||
3230 backref->found_forward_ref) {
3231 if (backref->index != index)
3232 backref->errors |= REF_ERR_INDEX_UNMATCH;
3234 backref->index = index;
3238 if (item_type == BTRFS_DIR_ITEM_KEY) {
3239 if (backref->found_forward_ref)
3241 backref->found_dir_item = 1;
3242 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243 backref->found_dir_index = 1;
3244 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245 if (backref->found_forward_ref)
3246 backref->errors |= REF_ERR_DUP_ROOT_REF;
3247 else if (backref->found_dir_item)
3249 backref->found_forward_ref = 1;
3250 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251 if (backref->found_back_ref)
3252 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253 backref->found_back_ref = 1;
3258 if (backref->found_forward_ref && backref->found_dir_item)
3259 backref->reachable = 1;
3263 static int merge_root_recs(struct btrfs_root *root,
3264 struct cache_tree *src_cache,
3265 struct cache_tree *dst_cache)
3267 struct cache_extent *cache;
3268 struct ptr_node *node;
3269 struct inode_record *rec;
3270 struct inode_backref *backref;
3273 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274 free_inode_recs_tree(src_cache);
3279 cache = search_cache_extent(src_cache, 0);
3282 node = container_of(cache, struct ptr_node, cache);
3284 remove_cache_extent(src_cache, &node->cache);
3287 ret = is_child_root(root, root->objectid, rec->ino);
3293 list_for_each_entry(backref, &rec->backrefs, list) {
3294 BUG_ON(backref->found_inode_ref);
3295 if (backref->found_dir_item)
3296 add_root_backref(dst_cache, rec->ino,
3297 root->root_key.objectid, backref->dir,
3298 backref->index, backref->name,
3299 backref->namelen, BTRFS_DIR_ITEM_KEY,
3301 if (backref->found_dir_index)
3302 add_root_backref(dst_cache, rec->ino,
3303 root->root_key.objectid, backref->dir,
3304 backref->index, backref->name,
3305 backref->namelen, BTRFS_DIR_INDEX_KEY,
3309 free_inode_rec(rec);
3316 static int check_root_refs(struct btrfs_root *root,
3317 struct cache_tree *root_cache)
3319 struct root_record *rec;
3320 struct root_record *ref_root;
3321 struct root_backref *backref;
3322 struct cache_extent *cache;
3328 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329 BUG_ON(IS_ERR(rec));
3332 /* fixme: this can not detect circular references */
3335 cache = search_cache_extent(root_cache, 0);
3339 rec = container_of(cache, struct root_record, cache);
3340 cache = next_cache_extent(cache);
3342 if (rec->found_ref == 0)
3345 list_for_each_entry(backref, &rec->backrefs, list) {
3346 if (!backref->reachable)
3349 ref_root = get_root_rec(root_cache,
3351 BUG_ON(IS_ERR(ref_root));
3352 if (ref_root->found_ref > 0)
3355 backref->reachable = 0;
3357 if (rec->found_ref == 0)
3363 cache = search_cache_extent(root_cache, 0);
3367 rec = container_of(cache, struct root_record, cache);
3368 cache = next_cache_extent(cache);
3370 if (rec->found_ref == 0 &&
3371 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373 ret = check_orphan_item(root->fs_info->tree_root,
3379 * If we don't have a root item then we likely just have
3380 * a dir item in a snapshot for this root but no actual
3381 * ref key or anything so it's meaningless.
3383 if (!rec->found_root_item)
3386 fprintf(stderr, "fs tree %llu not referenced\n",
3387 (unsigned long long)rec->objectid);
3391 if (rec->found_ref > 0 && !rec->found_root_item)
3393 list_for_each_entry(backref, &rec->backrefs, list) {
3394 if (!backref->found_dir_item)
3395 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396 if (!backref->found_dir_index)
3397 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398 if (!backref->found_back_ref)
3399 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400 if (!backref->found_forward_ref)
3401 backref->errors |= REF_ERR_NO_ROOT_REF;
3402 if (backref->reachable && backref->errors)
3409 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410 (unsigned long long)rec->objectid, rec->found_ref,
3411 rec->found_root_item ? "" : "not found");
3413 list_for_each_entry(backref, &rec->backrefs, list) {
3414 if (!backref->reachable)
3416 if (!backref->errors && rec->found_root_item)
3418 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419 " index %llu namelen %u name %s errors %x\n",
3420 (unsigned long long)backref->ref_root,
3421 (unsigned long long)backref->dir,
3422 (unsigned long long)backref->index,
3423 backref->namelen, backref->name,
3425 print_ref_error(backref->errors);
3428 return errors > 0 ? 1 : 0;
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432 struct btrfs_key *key,
3433 struct cache_tree *root_cache)
3439 struct btrfs_root_ref *ref;
3440 char namebuf[BTRFS_NAME_LEN];
3443 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3445 dirid = btrfs_root_ref_dirid(eb, ref);
3446 index = btrfs_root_ref_sequence(eb, ref);
3447 name_len = btrfs_root_ref_name_len(eb, ref);
3449 if (name_len <= BTRFS_NAME_LEN) {
3453 len = BTRFS_NAME_LEN;
3454 error = REF_ERR_NAME_TOO_LONG;
3456 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3458 if (key->type == BTRFS_ROOT_REF_KEY) {
3459 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460 index, namebuf, len, key->type, error);
3462 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463 index, namebuf, len, key->type, error);
3468 static void free_corrupt_block(struct cache_extent *cache)
3470 struct btrfs_corrupt_block *corrupt;
3472 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3479 * Repair the btree of the given root.
3481 * The fix is to remove the node key in corrupt_blocks cache_tree.
3482 * and rebalance the tree.
3483 * After the fix, the btree should be writeable.
3485 static int repair_btree(struct btrfs_root *root,
3486 struct cache_tree *corrupt_blocks)
3488 struct btrfs_trans_handle *trans;
3489 struct btrfs_path path;
3490 struct btrfs_corrupt_block *corrupt;
3491 struct cache_extent *cache;
3492 struct btrfs_key key;
3497 if (cache_tree_empty(corrupt_blocks))
3500 trans = btrfs_start_transaction(root, 1);
3501 if (IS_ERR(trans)) {
3502 ret = PTR_ERR(trans);
3503 fprintf(stderr, "Error starting transaction: %s\n",
3507 btrfs_init_path(&path);
3508 cache = first_cache_extent(corrupt_blocks);
3510 corrupt = container_of(cache, struct btrfs_corrupt_block,
3512 level = corrupt->level;
3513 path.lowest_level = level;
3514 key.objectid = corrupt->key.objectid;
3515 key.type = corrupt->key.type;
3516 key.offset = corrupt->key.offset;
3519 * Here we don't want to do any tree balance, since it may
3520 * cause a balance with corrupted brother leaf/node,
3521 * so ins_len set to 0 here.
3522 * Balance will be done after all corrupt node/leaf is deleted.
3524 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3527 offset = btrfs_node_blockptr(path.nodes[level],
3530 /* Remove the ptr */
3531 ret = btrfs_del_ptr(trans, root, &path, level,
3536 * Remove the corresponding extent
3537 * return value is not concerned.
3539 btrfs_release_path(&path);
3540 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541 0, root->root_key.objectid,
3543 cache = next_cache_extent(cache);
3546 /* Balance the btree using btrfs_search_slot() */
3547 cache = first_cache_extent(corrupt_blocks);
3549 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551 memcpy(&key, &corrupt->key, sizeof(key));
3552 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3555 /* return will always >0 since it won't find the item */
3557 btrfs_release_path(&path);
3558 cache = next_cache_extent(cache);
3561 btrfs_commit_transaction(trans, root);
3562 btrfs_release_path(&path);
3566 static int check_fs_root(struct btrfs_root *root,
3567 struct cache_tree *root_cache,
3568 struct walk_control *wc)
3574 struct btrfs_path path;
3575 struct shared_node root_node;
3576 struct root_record *rec;
3577 struct btrfs_root_item *root_item = &root->root_item;
3578 struct cache_tree corrupt_blocks;
3579 struct orphan_data_extent *orphan;
3580 struct orphan_data_extent *tmp;
3581 enum btrfs_tree_block_status status;
3582 struct node_refs nrefs;
3585 * Reuse the corrupt_block cache tree to record corrupted tree block
3587 * Unlike the usage in extent tree check, here we do it in a per
3588 * fs/subvol tree base.
3590 cache_tree_init(&corrupt_blocks);
3591 root->fs_info->corrupt_blocks = &corrupt_blocks;
3593 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594 rec = get_root_rec(root_cache, root->root_key.objectid);
3595 BUG_ON(IS_ERR(rec));
3596 if (btrfs_root_refs(root_item) > 0)
3597 rec->found_root_item = 1;
3600 btrfs_init_path(&path);
3601 memset(&root_node, 0, sizeof(root_node));
3602 cache_tree_init(&root_node.root_cache);
3603 cache_tree_init(&root_node.inode_cache);
3604 memset(&nrefs, 0, sizeof(nrefs));
3606 /* Move the orphan extent record to corresponding inode_record */
3607 list_for_each_entry_safe(orphan, tmp,
3608 &root->orphan_data_extents, list) {
3609 struct inode_record *inode;
3611 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3613 BUG_ON(IS_ERR(inode));
3614 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615 list_move(&orphan->list, &inode->orphan_extents);
3618 level = btrfs_header_level(root->node);
3619 memset(wc->nodes, 0, sizeof(wc->nodes));
3620 wc->nodes[level] = &root_node;
3621 wc->active_node = level;
3622 wc->root_level = level;
3624 /* We may not have checked the root block, lets do that now */
3625 if (btrfs_is_leaf(root->node))
3626 status = btrfs_check_leaf(root, NULL, root->node);
3628 status = btrfs_check_node(root, NULL, root->node);
3629 if (status != BTRFS_TREE_BLOCK_CLEAN)
3632 if (btrfs_root_refs(root_item) > 0 ||
3633 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634 path.nodes[level] = root->node;
3635 extent_buffer_get(root->node);
3636 path.slots[level] = 0;
3638 struct btrfs_key key;
3639 struct btrfs_disk_key found_key;
3641 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642 level = root_item->drop_level;
3643 path.lowest_level = level;
3644 if (level > btrfs_header_level(root->node) ||
3645 level >= BTRFS_MAX_LEVEL) {
3646 error("ignoring invalid drop level: %u", level);
3649 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3652 btrfs_node_key(path.nodes[level], &found_key,
3654 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655 sizeof(found_key)));
3659 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3665 wret = walk_up_tree(root, &path, wc, &level);
3672 btrfs_release_path(&path);
3674 if (!cache_tree_empty(&corrupt_blocks)) {
3675 struct cache_extent *cache;
3676 struct btrfs_corrupt_block *corrupt;
3678 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679 root->root_key.objectid);
3680 cache = first_cache_extent(&corrupt_blocks);
3682 corrupt = container_of(cache,
3683 struct btrfs_corrupt_block,
3685 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686 cache->start, corrupt->level,
3687 corrupt->key.objectid, corrupt->key.type,
3688 corrupt->key.offset);
3689 cache = next_cache_extent(cache);
3692 printf("Try to repair the btree for root %llu\n",
3693 root->root_key.objectid);
3694 ret = repair_btree(root, &corrupt_blocks);
3696 fprintf(stderr, "Failed to repair btree: %s\n",
3699 printf("Btree for root %llu is fixed\n",
3700 root->root_key.objectid);
3704 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3708 if (root_node.current) {
3709 root_node.current->checked = 1;
3710 maybe_free_inode_rec(&root_node.inode_cache,
3714 err = check_inode_recs(root, &root_node.inode_cache);
3718 free_corrupt_blocks_tree(&corrupt_blocks);
3719 root->fs_info->corrupt_blocks = NULL;
3720 free_orphan_data_extents(&root->orphan_data_extents);
3724 static int fs_root_objectid(u64 objectid)
3726 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3729 return is_fstree(objectid);
3732 static int check_fs_roots(struct btrfs_root *root,
3733 struct cache_tree *root_cache)
3735 struct btrfs_path path;
3736 struct btrfs_key key;
3737 struct walk_control wc;
3738 struct extent_buffer *leaf, *tree_node;
3739 struct btrfs_root *tmp_root;
3740 struct btrfs_root *tree_root = root->fs_info->tree_root;
3744 if (ctx.progress_enabled) {
3745 ctx.tp = TASK_FS_ROOTS;
3746 task_start(ctx.info);
3750 * Just in case we made any changes to the extent tree that weren't
3751 * reflected into the free space cache yet.
3754 reset_cached_block_groups(root->fs_info);
3755 memset(&wc, 0, sizeof(wc));
3756 cache_tree_init(&wc.shared);
3757 btrfs_init_path(&path);
3762 key.type = BTRFS_ROOT_ITEM_KEY;
3763 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3768 tree_node = tree_root->node;
3770 if (tree_node != tree_root->node) {
3771 free_root_recs_tree(root_cache);
3772 btrfs_release_path(&path);
3775 leaf = path.nodes[0];
3776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777 ret = btrfs_next_leaf(tree_root, &path);
3783 leaf = path.nodes[0];
3785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787 fs_root_objectid(key.objectid)) {
3788 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789 tmp_root = btrfs_read_fs_root_no_cache(
3790 root->fs_info, &key);
3792 key.offset = (u64)-1;
3793 tmp_root = btrfs_read_fs_root(
3794 root->fs_info, &key);
3796 if (IS_ERR(tmp_root)) {
3800 ret = check_fs_root(tmp_root, root_cache, &wc);
3801 if (ret == -EAGAIN) {
3802 free_root_recs_tree(root_cache);
3803 btrfs_release_path(&path);
3808 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809 btrfs_free_fs_root(tmp_root);
3810 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811 key.type == BTRFS_ROOT_BACKREF_KEY) {
3812 process_root_ref(leaf, path.slots[0], &key,
3819 btrfs_release_path(&path);
3821 free_extent_cache_tree(&wc.shared);
3822 if (!cache_tree_empty(&wc.shared))
3823 fprintf(stderr, "warning line %d\n", __LINE__);
3825 task_stop(ctx.info);
3830 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
3835 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3836 * INODE_REF/INODE_EXTREF match.
3838 * @root: the root of the fs/file tree
3839 * @ref_key: the key of the INODE_REF/INODE_EXTREF
3840 * @key: the key of the DIR_ITEM/DIR_INDEX
3841 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
3842 * distinguish root_dir between normal dir/file
3843 * @name: the name in the INODE_REF/INODE_EXTREF
3844 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
3845 * @mode: the st_mode of INODE_ITEM
3847 * Return 0 if no error occurred.
3848 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3849 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3851 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3852 * not match for normal dir/file.
3854 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3855 struct btrfs_key *key, u64 index, char *name,
3856 u32 namelen, u32 mode)
3858 struct btrfs_path path;
3859 struct extent_buffer *node;
3860 struct btrfs_dir_item *di;
3861 struct btrfs_key location;
3862 char namebuf[BTRFS_NAME_LEN] = {0};
3872 btrfs_init_path(&path);
3873 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3875 ret = DIR_ITEM_MISSING;
3879 /* Process root dir and goto out*/
3882 ret = ROOT_DIR_ERROR;
3884 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3886 ref_key->type == BTRFS_INODE_REF_KEY ?
3888 ref_key->objectid, ref_key->offset,
3889 key->type == BTRFS_DIR_ITEM_KEY ?
3890 "DIR_ITEM" : "DIR_INDEX");
3898 /* Process normal file/dir */
3900 ret = DIR_ITEM_MISSING;
3902 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3904 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3905 ref_key->objectid, ref_key->offset,
3906 key->type == BTRFS_DIR_ITEM_KEY ?
3907 "DIR_ITEM" : "DIR_INDEX",
3908 key->objectid, key->offset, namelen, name,
3909 imode_to_type(mode));
3913 /* Check whether inode_id/filetype/name match */
3914 node = path.nodes[0];
3915 slot = path.slots[0];
3916 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3917 total = btrfs_item_size_nr(node, slot);
3918 while (cur < total) {
3919 ret = DIR_ITEM_MISMATCH;
3920 name_len = btrfs_dir_name_len(node, di);
3921 data_len = btrfs_dir_data_len(node, di);
3923 btrfs_dir_item_key_to_cpu(node, di, &location);
3924 if (location.objectid != ref_key->objectid ||
3925 location.type != BTRFS_INODE_ITEM_KEY ||
3926 location.offset != 0)
3929 filetype = btrfs_dir_type(node, di);
3930 if (imode_to_type(mode) != filetype)
3933 if (name_len <= BTRFS_NAME_LEN) {
3936 len = BTRFS_NAME_LEN;
3937 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3939 key->type == BTRFS_DIR_ITEM_KEY ?
3940 "DIR_ITEM" : "DIR_INDEX",
3941 key->objectid, key->offset, name_len);
3943 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3944 if (len != namelen || strncmp(namebuf, name, len))
3950 len = sizeof(*di) + name_len + data_len;
3951 di = (struct btrfs_dir_item *)((char *)di + len);
3954 if (ret == DIR_ITEM_MISMATCH)
3956 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3958 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3959 ref_key->objectid, ref_key->offset,
3960 key->type == BTRFS_DIR_ITEM_KEY ?
3961 "DIR_ITEM" : "DIR_INDEX",
3962 key->objectid, key->offset, namelen, name,
3963 imode_to_type(mode));
3965 btrfs_release_path(&path);
3970 * Traverse the given INODE_REF and call find_dir_item() to find related
3971 * DIR_ITEM/DIR_INDEX.
3973 * @root: the root of the fs/file tree
3974 * @ref_key: the key of the INODE_REF
3975 * @refs: the count of INODE_REF
3976 * @mode: the st_mode of INODE_ITEM
3978 * Return 0 if no error occurred.
3980 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3981 struct extent_buffer *node, int slot, u64 *refs,
3984 struct btrfs_key key;
3985 struct btrfs_inode_ref *ref;
3986 char namebuf[BTRFS_NAME_LEN] = {0};
3994 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
3995 total = btrfs_item_size_nr(node, slot);
3998 /* Update inode ref count */
4001 index = btrfs_inode_ref_index(node, ref);
4002 name_len = btrfs_inode_ref_name_len(node, ref);
4003 if (name_len <= BTRFS_NAME_LEN) {
4006 len = BTRFS_NAME_LEN;
4007 warning("root %llu INODE_REF[%llu %llu] name too long",
4008 root->objectid, ref_key->objectid, ref_key->offset);
4011 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4013 /* Check root dir ref name */
4014 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4015 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4016 root->objectid, ref_key->objectid, ref_key->offset,
4018 err |= ROOT_DIR_ERROR;
4021 /* Find related DIR_INDEX */
4022 key.objectid = ref_key->offset;
4023 key.type = BTRFS_DIR_INDEX_KEY;
4025 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4028 /* Find related dir_item */
4029 key.objectid = ref_key->offset;
4030 key.type = BTRFS_DIR_ITEM_KEY;
4031 key.offset = btrfs_name_hash(namebuf, len);
4032 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4035 len = sizeof(*ref) + name_len;
4036 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4045 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4046 * DIR_ITEM/DIR_INDEX.
4048 * @root: the root of the fs/file tree
4049 * @ref_key: the key of the INODE_EXTREF
4050 * @refs: the count of INODE_EXTREF
4051 * @mode: the st_mode of INODE_ITEM
4053 * Return 0 if no error occurred.
4055 static int check_inode_extref(struct btrfs_root *root,
4056 struct btrfs_key *ref_key,
4057 struct extent_buffer *node, int slot, u64 *refs,
4060 struct btrfs_key key;
4061 struct btrfs_inode_extref *extref;
4062 char namebuf[BTRFS_NAME_LEN] = {0};
4072 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4073 total = btrfs_item_size_nr(node, slot);
4076 /* update inode ref count */
4078 name_len = btrfs_inode_extref_name_len(node, extref);
4079 index = btrfs_inode_extref_index(node, extref);
4080 parent = btrfs_inode_extref_parent(node, extref);
4081 if (name_len <= BTRFS_NAME_LEN) {
4084 len = BTRFS_NAME_LEN;
4085 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4086 root->objectid, ref_key->objectid, ref_key->offset);
4088 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4090 /* Check root dir ref name */
4091 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4092 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4093 root->objectid, ref_key->objectid, ref_key->offset,
4095 err |= ROOT_DIR_ERROR;
4098 /* find related dir_index */
4099 key.objectid = parent;
4100 key.type = BTRFS_DIR_INDEX_KEY;
4102 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4105 /* find related dir_item */
4106 key.objectid = parent;
4107 key.type = BTRFS_DIR_ITEM_KEY;
4108 key.offset = btrfs_name_hash(namebuf, len);
4109 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4112 len = sizeof(*extref) + name_len;
4113 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4122 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4124 struct list_head *cur = rec->backrefs.next;
4125 struct extent_backref *back;
4126 struct tree_backref *tback;
4127 struct data_backref *dback;
4131 while(cur != &rec->backrefs) {
4132 back = to_extent_backref(cur);
4134 if (!back->found_extent_tree) {
4138 if (back->is_data) {
4139 dback = to_data_backref(back);
4140 fprintf(stderr, "Backref %llu %s %llu"
4141 " owner %llu offset %llu num_refs %lu"
4142 " not found in extent tree\n",
4143 (unsigned long long)rec->start,
4144 back->full_backref ?
4146 back->full_backref ?
4147 (unsigned long long)dback->parent:
4148 (unsigned long long)dback->root,
4149 (unsigned long long)dback->owner,
4150 (unsigned long long)dback->offset,
4151 (unsigned long)dback->num_refs);
4153 tback = to_tree_backref(back);
4154 fprintf(stderr, "Backref %llu parent %llu"
4155 " root %llu not found in extent tree\n",
4156 (unsigned long long)rec->start,
4157 (unsigned long long)tback->parent,
4158 (unsigned long long)tback->root);
4161 if (!back->is_data && !back->found_ref) {
4165 tback = to_tree_backref(back);
4166 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4167 (unsigned long long)rec->start,
4168 back->full_backref ? "parent" : "root",
4169 back->full_backref ?
4170 (unsigned long long)tback->parent :
4171 (unsigned long long)tback->root, back);
4173 if (back->is_data) {
4174 dback = to_data_backref(back);
4175 if (dback->found_ref != dback->num_refs) {
4179 fprintf(stderr, "Incorrect local backref count"
4180 " on %llu %s %llu owner %llu"
4181 " offset %llu found %u wanted %u back %p\n",
4182 (unsigned long long)rec->start,
4183 back->full_backref ?
4185 back->full_backref ?
4186 (unsigned long long)dback->parent:
4187 (unsigned long long)dback->root,
4188 (unsigned long long)dback->owner,
4189 (unsigned long long)dback->offset,
4190 dback->found_ref, dback->num_refs, back);
4192 if (dback->disk_bytenr != rec->start) {
4196 fprintf(stderr, "Backref disk bytenr does not"
4197 " match extent record, bytenr=%llu, "
4198 "ref bytenr=%llu\n",
4199 (unsigned long long)rec->start,
4200 (unsigned long long)dback->disk_bytenr);
4203 if (dback->bytes != rec->nr) {
4207 fprintf(stderr, "Backref bytes do not match "
4208 "extent backref, bytenr=%llu, ref "
4209 "bytes=%llu, backref bytes=%llu\n",
4210 (unsigned long long)rec->start,
4211 (unsigned long long)rec->nr,
4212 (unsigned long long)dback->bytes);
4215 if (!back->is_data) {
4218 dback = to_data_backref(back);
4219 found += dback->found_ref;
4222 if (found != rec->refs) {
4226 fprintf(stderr, "Incorrect global backref count "
4227 "on %llu found %llu wanted %llu\n",
4228 (unsigned long long)rec->start,
4229 (unsigned long long)found,
4230 (unsigned long long)rec->refs);
4236 static int free_all_extent_backrefs(struct extent_record *rec)
4238 struct extent_backref *back;
4239 struct list_head *cur;
4240 while (!list_empty(&rec->backrefs)) {
4241 cur = rec->backrefs.next;
4242 back = to_extent_backref(cur);
4249 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4250 struct cache_tree *extent_cache)
4252 struct cache_extent *cache;
4253 struct extent_record *rec;
4256 cache = first_cache_extent(extent_cache);
4259 rec = container_of(cache, struct extent_record, cache);
4260 remove_cache_extent(extent_cache, cache);
4261 free_all_extent_backrefs(rec);
4266 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4267 struct extent_record *rec)
4269 if (rec->content_checked && rec->owner_ref_checked &&
4270 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4271 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4272 !rec->bad_full_backref && !rec->crossing_stripes &&
4273 !rec->wrong_chunk_type) {
4274 remove_cache_extent(extent_cache, &rec->cache);
4275 free_all_extent_backrefs(rec);
4276 list_del_init(&rec->list);
4282 static int check_owner_ref(struct btrfs_root *root,
4283 struct extent_record *rec,
4284 struct extent_buffer *buf)
4286 struct extent_backref *node;
4287 struct tree_backref *back;
4288 struct btrfs_root *ref_root;
4289 struct btrfs_key key;
4290 struct btrfs_path path;
4291 struct extent_buffer *parent;
4296 list_for_each_entry(node, &rec->backrefs, list) {
4299 if (!node->found_ref)
4301 if (node->full_backref)
4303 back = to_tree_backref(node);
4304 if (btrfs_header_owner(buf) == back->root)
4307 BUG_ON(rec->is_root);
4309 /* try to find the block by search corresponding fs tree */
4310 key.objectid = btrfs_header_owner(buf);
4311 key.type = BTRFS_ROOT_ITEM_KEY;
4312 key.offset = (u64)-1;
4314 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4315 if (IS_ERR(ref_root))
4318 level = btrfs_header_level(buf);
4320 btrfs_item_key_to_cpu(buf, &key, 0);
4322 btrfs_node_key_to_cpu(buf, &key, 0);
4324 btrfs_init_path(&path);
4325 path.lowest_level = level + 1;
4326 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4330 parent = path.nodes[level + 1];
4331 if (parent && buf->start == btrfs_node_blockptr(parent,
4332 path.slots[level + 1]))
4335 btrfs_release_path(&path);
4336 return found ? 0 : 1;
4339 static int is_extent_tree_record(struct extent_record *rec)
4341 struct list_head *cur = rec->backrefs.next;
4342 struct extent_backref *node;
4343 struct tree_backref *back;
4346 while(cur != &rec->backrefs) {
4347 node = to_extent_backref(cur);
4351 back = to_tree_backref(node);
4352 if (node->full_backref)
4354 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4361 static int record_bad_block_io(struct btrfs_fs_info *info,
4362 struct cache_tree *extent_cache,
4365 struct extent_record *rec;
4366 struct cache_extent *cache;
4367 struct btrfs_key key;
4369 cache = lookup_cache_extent(extent_cache, start, len);
4373 rec = container_of(cache, struct extent_record, cache);
4374 if (!is_extent_tree_record(rec))
4377 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4378 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4381 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4382 struct extent_buffer *buf, int slot)
4384 if (btrfs_header_level(buf)) {
4385 struct btrfs_key_ptr ptr1, ptr2;
4387 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4388 sizeof(struct btrfs_key_ptr));
4389 read_extent_buffer(buf, &ptr2,
4390 btrfs_node_key_ptr_offset(slot + 1),
4391 sizeof(struct btrfs_key_ptr));
4392 write_extent_buffer(buf, &ptr1,
4393 btrfs_node_key_ptr_offset(slot + 1),
4394 sizeof(struct btrfs_key_ptr));
4395 write_extent_buffer(buf, &ptr2,
4396 btrfs_node_key_ptr_offset(slot),
4397 sizeof(struct btrfs_key_ptr));
4399 struct btrfs_disk_key key;
4400 btrfs_node_key(buf, &key, 0);
4401 btrfs_fixup_low_keys(root, path, &key,
4402 btrfs_header_level(buf) + 1);
4405 struct btrfs_item *item1, *item2;
4406 struct btrfs_key k1, k2;
4407 char *item1_data, *item2_data;
4408 u32 item1_offset, item2_offset, item1_size, item2_size;
4410 item1 = btrfs_item_nr(slot);
4411 item2 = btrfs_item_nr(slot + 1);
4412 btrfs_item_key_to_cpu(buf, &k1, slot);
4413 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4414 item1_offset = btrfs_item_offset(buf, item1);
4415 item2_offset = btrfs_item_offset(buf, item2);
4416 item1_size = btrfs_item_size(buf, item1);
4417 item2_size = btrfs_item_size(buf, item2);
4419 item1_data = malloc(item1_size);
4422 item2_data = malloc(item2_size);
4428 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4429 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4431 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4432 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4436 btrfs_set_item_offset(buf, item1, item2_offset);
4437 btrfs_set_item_offset(buf, item2, item1_offset);
4438 btrfs_set_item_size(buf, item1, item2_size);
4439 btrfs_set_item_size(buf, item2, item1_size);
4441 path->slots[0] = slot;
4442 btrfs_set_item_key_unsafe(root, path, &k2);
4443 path->slots[0] = slot + 1;
4444 btrfs_set_item_key_unsafe(root, path, &k1);
4449 static int fix_key_order(struct btrfs_trans_handle *trans,
4450 struct btrfs_root *root,
4451 struct btrfs_path *path)
4453 struct extent_buffer *buf;
4454 struct btrfs_key k1, k2;
4456 int level = path->lowest_level;
4459 buf = path->nodes[level];
4460 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4462 btrfs_node_key_to_cpu(buf, &k1, i);
4463 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4465 btrfs_item_key_to_cpu(buf, &k1, i);
4466 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4468 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4470 ret = swap_values(root, path, buf, i);
4473 btrfs_mark_buffer_dirty(buf);
4479 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4480 struct btrfs_root *root,
4481 struct btrfs_path *path,
4482 struct extent_buffer *buf, int slot)
4484 struct btrfs_key key;
4485 int nritems = btrfs_header_nritems(buf);
4487 btrfs_item_key_to_cpu(buf, &key, slot);
4489 /* These are all the keys we can deal with missing. */
4490 if (key.type != BTRFS_DIR_INDEX_KEY &&
4491 key.type != BTRFS_EXTENT_ITEM_KEY &&
4492 key.type != BTRFS_METADATA_ITEM_KEY &&
4493 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4494 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4497 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4498 (unsigned long long)key.objectid, key.type,
4499 (unsigned long long)key.offset, slot, buf->start);
4500 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4501 btrfs_item_nr_offset(slot + 1),
4502 sizeof(struct btrfs_item) *
4503 (nritems - slot - 1));
4504 btrfs_set_header_nritems(buf, nritems - 1);
4506 struct btrfs_disk_key disk_key;
4508 btrfs_item_key(buf, &disk_key, 0);
4509 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4511 btrfs_mark_buffer_dirty(buf);
4515 static int fix_item_offset(struct btrfs_trans_handle *trans,
4516 struct btrfs_root *root,
4517 struct btrfs_path *path)
4519 struct extent_buffer *buf;
4523 /* We should only get this for leaves */
4524 BUG_ON(path->lowest_level);
4525 buf = path->nodes[0];
4527 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4528 unsigned int shift = 0, offset;
4530 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4531 BTRFS_LEAF_DATA_SIZE(root)) {
4532 if (btrfs_item_end_nr(buf, i) >
4533 BTRFS_LEAF_DATA_SIZE(root)) {
4534 ret = delete_bogus_item(trans, root, path,
4538 fprintf(stderr, "item is off the end of the "
4539 "leaf, can't fix\n");
4543 shift = BTRFS_LEAF_DATA_SIZE(root) -
4544 btrfs_item_end_nr(buf, i);
4545 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4546 btrfs_item_offset_nr(buf, i - 1)) {
4547 if (btrfs_item_end_nr(buf, i) >
4548 btrfs_item_offset_nr(buf, i - 1)) {
4549 ret = delete_bogus_item(trans, root, path,
4553 fprintf(stderr, "items overlap, can't fix\n");
4557 shift = btrfs_item_offset_nr(buf, i - 1) -
4558 btrfs_item_end_nr(buf, i);
4563 printf("Shifting item nr %d by %u bytes in block %llu\n",
4564 i, shift, (unsigned long long)buf->start);
4565 offset = btrfs_item_offset_nr(buf, i);
4566 memmove_extent_buffer(buf,
4567 btrfs_leaf_data(buf) + offset + shift,
4568 btrfs_leaf_data(buf) + offset,
4569 btrfs_item_size_nr(buf, i));
4570 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4572 btrfs_mark_buffer_dirty(buf);
4576 * We may have moved things, in which case we want to exit so we don't
4577 * write those changes out. Once we have proper abort functionality in
4578 * progs this can be changed to something nicer.
4585 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4586 * then just return -EIO.
4588 static int try_to_fix_bad_block(struct btrfs_root *root,
4589 struct extent_buffer *buf,
4590 enum btrfs_tree_block_status status)
4592 struct btrfs_trans_handle *trans;
4593 struct ulist *roots;
4594 struct ulist_node *node;
4595 struct btrfs_root *search_root;
4596 struct btrfs_path path;
4597 struct ulist_iterator iter;
4598 struct btrfs_key root_key, key;
4601 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4602 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4605 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4609 btrfs_init_path(&path);
4610 ULIST_ITER_INIT(&iter);
4611 while ((node = ulist_next(roots, &iter))) {
4612 root_key.objectid = node->val;
4613 root_key.type = BTRFS_ROOT_ITEM_KEY;
4614 root_key.offset = (u64)-1;
4616 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4623 trans = btrfs_start_transaction(search_root, 0);
4624 if (IS_ERR(trans)) {
4625 ret = PTR_ERR(trans);
4629 path.lowest_level = btrfs_header_level(buf);
4630 path.skip_check_block = 1;
4631 if (path.lowest_level)
4632 btrfs_node_key_to_cpu(buf, &key, 0);
4634 btrfs_item_key_to_cpu(buf, &key, 0);
4635 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4638 btrfs_commit_transaction(trans, search_root);
4641 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4642 ret = fix_key_order(trans, search_root, &path);
4643 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4644 ret = fix_item_offset(trans, search_root, &path);
4646 btrfs_commit_transaction(trans, search_root);
4649 btrfs_release_path(&path);
4650 btrfs_commit_transaction(trans, search_root);
4653 btrfs_release_path(&path);
4657 static int check_block(struct btrfs_root *root,
4658 struct cache_tree *extent_cache,
4659 struct extent_buffer *buf, u64 flags)
4661 struct extent_record *rec;
4662 struct cache_extent *cache;
4663 struct btrfs_key key;
4664 enum btrfs_tree_block_status status;
4668 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4671 rec = container_of(cache, struct extent_record, cache);
4672 rec->generation = btrfs_header_generation(buf);
4674 level = btrfs_header_level(buf);
4675 if (btrfs_header_nritems(buf) > 0) {
4678 btrfs_item_key_to_cpu(buf, &key, 0);
4680 btrfs_node_key_to_cpu(buf, &key, 0);
4682 rec->info_objectid = key.objectid;
4684 rec->info_level = level;
4686 if (btrfs_is_leaf(buf))
4687 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4689 status = btrfs_check_node(root, &rec->parent_key, buf);
4691 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4693 status = try_to_fix_bad_block(root, buf, status);
4694 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4696 fprintf(stderr, "bad block %llu\n",
4697 (unsigned long long)buf->start);
4700 * Signal to callers we need to start the scan over
4701 * again since we'll have cowed blocks.
4706 rec->content_checked = 1;
4707 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4708 rec->owner_ref_checked = 1;
4710 ret = check_owner_ref(root, rec, buf);
4712 rec->owner_ref_checked = 1;
4716 maybe_free_extent_rec(extent_cache, rec);
4720 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4721 u64 parent, u64 root)
4723 struct list_head *cur = rec->backrefs.next;
4724 struct extent_backref *node;
4725 struct tree_backref *back;
4727 while(cur != &rec->backrefs) {
4728 node = to_extent_backref(cur);
4732 back = to_tree_backref(node);
4734 if (!node->full_backref)
4736 if (parent == back->parent)
4739 if (node->full_backref)
4741 if (back->root == root)
4748 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4749 u64 parent, u64 root)
4751 struct tree_backref *ref = malloc(sizeof(*ref));
4755 memset(&ref->node, 0, sizeof(ref->node));
4757 ref->parent = parent;
4758 ref->node.full_backref = 1;
4761 ref->node.full_backref = 0;
4763 list_add_tail(&ref->node.list, &rec->backrefs);
4768 static struct data_backref *find_data_backref(struct extent_record *rec,
4769 u64 parent, u64 root,
4770 u64 owner, u64 offset,
4772 u64 disk_bytenr, u64 bytes)
4774 struct list_head *cur = rec->backrefs.next;
4775 struct extent_backref *node;
4776 struct data_backref *back;
4778 while(cur != &rec->backrefs) {
4779 node = to_extent_backref(cur);
4783 back = to_data_backref(node);
4785 if (!node->full_backref)
4787 if (parent == back->parent)
4790 if (node->full_backref)
4792 if (back->root == root && back->owner == owner &&
4793 back->offset == offset) {
4794 if (found_ref && node->found_ref &&
4795 (back->bytes != bytes ||
4796 back->disk_bytenr != disk_bytenr))
4805 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4806 u64 parent, u64 root,
4807 u64 owner, u64 offset,
4810 struct data_backref *ref = malloc(sizeof(*ref));
4814 memset(&ref->node, 0, sizeof(ref->node));
4815 ref->node.is_data = 1;
4818 ref->parent = parent;
4821 ref->node.full_backref = 1;
4825 ref->offset = offset;
4826 ref->node.full_backref = 0;
4828 ref->bytes = max_size;
4831 list_add_tail(&ref->node.list, &rec->backrefs);
4832 if (max_size > rec->max_size)
4833 rec->max_size = max_size;
4837 /* Check if the type of extent matches with its chunk */
4838 static void check_extent_type(struct extent_record *rec)
4840 struct btrfs_block_group_cache *bg_cache;
4842 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4846 /* data extent, check chunk directly*/
4847 if (!rec->metadata) {
4848 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4849 rec->wrong_chunk_type = 1;
4853 /* metadata extent, check the obvious case first */
4854 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4855 BTRFS_BLOCK_GROUP_METADATA))) {
4856 rec->wrong_chunk_type = 1;
4861 * Check SYSTEM extent, as it's also marked as metadata, we can only
4862 * make sure it's a SYSTEM extent by its backref
4864 if (!list_empty(&rec->backrefs)) {
4865 struct extent_backref *node;
4866 struct tree_backref *tback;
4869 node = to_extent_backref(rec->backrefs.next);
4870 if (node->is_data) {
4871 /* tree block shouldn't have data backref */
4872 rec->wrong_chunk_type = 1;
4875 tback = container_of(node, struct tree_backref, node);
4877 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4878 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4880 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4881 if (!(bg_cache->flags & bg_type))
4882 rec->wrong_chunk_type = 1;
4887 * Allocate a new extent record, fill default values from @tmpl and insert int
4888 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4889 * the cache, otherwise it fails.
4891 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4892 struct extent_record *tmpl)
4894 struct extent_record *rec;
4897 rec = malloc(sizeof(*rec));
4900 rec->start = tmpl->start;
4901 rec->max_size = tmpl->max_size;
4902 rec->nr = max(tmpl->nr, tmpl->max_size);
4903 rec->found_rec = tmpl->found_rec;
4904 rec->content_checked = tmpl->content_checked;
4905 rec->owner_ref_checked = tmpl->owner_ref_checked;
4906 rec->num_duplicates = 0;
4907 rec->metadata = tmpl->metadata;
4908 rec->flag_block_full_backref = FLAG_UNSET;
4909 rec->bad_full_backref = 0;
4910 rec->crossing_stripes = 0;
4911 rec->wrong_chunk_type = 0;
4912 rec->is_root = tmpl->is_root;
4913 rec->refs = tmpl->refs;
4914 rec->extent_item_refs = tmpl->extent_item_refs;
4915 rec->parent_generation = tmpl->parent_generation;
4916 INIT_LIST_HEAD(&rec->backrefs);
4917 INIT_LIST_HEAD(&rec->dups);
4918 INIT_LIST_HEAD(&rec->list);
4919 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4920 rec->cache.start = tmpl->start;
4921 rec->cache.size = tmpl->nr;
4922 ret = insert_cache_extent(extent_cache, &rec->cache);
4927 bytes_used += rec->nr;
4930 rec->crossing_stripes = check_crossing_stripes(global_info,
4931 rec->start, global_info->tree_root->nodesize);
4932 check_extent_type(rec);
4937 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4939 * - refs - if found, increase refs
4940 * - is_root - if found, set
4941 * - content_checked - if found, set
4942 * - owner_ref_checked - if found, set
4944 * If not found, create a new one, initialize and insert.
4946 static int add_extent_rec(struct cache_tree *extent_cache,
4947 struct extent_record *tmpl)
4949 struct extent_record *rec;
4950 struct cache_extent *cache;
4954 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4956 rec = container_of(cache, struct extent_record, cache);
4960 rec->nr = max(tmpl->nr, tmpl->max_size);
4963 * We need to make sure to reset nr to whatever the extent
4964 * record says was the real size, this way we can compare it to
4967 if (tmpl->found_rec) {
4968 if (tmpl->start != rec->start || rec->found_rec) {
4969 struct extent_record *tmp;
4972 if (list_empty(&rec->list))
4973 list_add_tail(&rec->list,
4974 &duplicate_extents);
4977 * We have to do this song and dance in case we
4978 * find an extent record that falls inside of
4979 * our current extent record but does not have
4980 * the same objectid.
4982 tmp = malloc(sizeof(*tmp));
4985 tmp->start = tmpl->start;
4986 tmp->max_size = tmpl->max_size;
4989 tmp->metadata = tmpl->metadata;
4990 tmp->extent_item_refs = tmpl->extent_item_refs;
4991 INIT_LIST_HEAD(&tmp->list);
4992 list_add_tail(&tmp->list, &rec->dups);
4993 rec->num_duplicates++;
5000 if (tmpl->extent_item_refs && !dup) {
5001 if (rec->extent_item_refs) {
5002 fprintf(stderr, "block %llu rec "
5003 "extent_item_refs %llu, passed %llu\n",
5004 (unsigned long long)tmpl->start,
5005 (unsigned long long)
5006 rec->extent_item_refs,
5007 (unsigned long long)tmpl->extent_item_refs);
5009 rec->extent_item_refs = tmpl->extent_item_refs;
5013 if (tmpl->content_checked)
5014 rec->content_checked = 1;
5015 if (tmpl->owner_ref_checked)
5016 rec->owner_ref_checked = 1;
5017 memcpy(&rec->parent_key, &tmpl->parent_key,
5018 sizeof(tmpl->parent_key));
5019 if (tmpl->parent_generation)
5020 rec->parent_generation = tmpl->parent_generation;
5021 if (rec->max_size < tmpl->max_size)
5022 rec->max_size = tmpl->max_size;
5025 * A metadata extent can't cross stripe_len boundary, otherwise
5026 * kernel scrub won't be able to handle it.
5027 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5031 rec->crossing_stripes = check_crossing_stripes(
5032 global_info, rec->start,
5033 global_info->tree_root->nodesize);
5034 check_extent_type(rec);
5035 maybe_free_extent_rec(extent_cache, rec);
5039 ret = add_extent_rec_nolookup(extent_cache, tmpl);
5044 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5045 u64 parent, u64 root, int found_ref)
5047 struct extent_record *rec;
5048 struct tree_backref *back;
5049 struct cache_extent *cache;
5052 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5054 struct extent_record tmpl;
5056 memset(&tmpl, 0, sizeof(tmpl));
5057 tmpl.start = bytenr;
5061 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5065 /* really a bug in cache_extent implement now */
5066 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5071 rec = container_of(cache, struct extent_record, cache);
5072 if (rec->start != bytenr) {
5074 * Several cause, from unaligned bytenr to over lapping extents
5079 back = find_tree_backref(rec, parent, root);
5081 back = alloc_tree_backref(rec, parent, root);
5087 if (back->node.found_ref) {
5088 fprintf(stderr, "Extent back ref already exists "
5089 "for %llu parent %llu root %llu \n",
5090 (unsigned long long)bytenr,
5091 (unsigned long long)parent,
5092 (unsigned long long)root);
5094 back->node.found_ref = 1;
5096 if (back->node.found_extent_tree) {
5097 fprintf(stderr, "Extent back ref already exists "
5098 "for %llu parent %llu root %llu \n",
5099 (unsigned long long)bytenr,
5100 (unsigned long long)parent,
5101 (unsigned long long)root);
5103 back->node.found_extent_tree = 1;
5105 check_extent_type(rec);
5106 maybe_free_extent_rec(extent_cache, rec);
5110 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5111 u64 parent, u64 root, u64 owner, u64 offset,
5112 u32 num_refs, int found_ref, u64 max_size)
5114 struct extent_record *rec;
5115 struct data_backref *back;
5116 struct cache_extent *cache;
5119 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5121 struct extent_record tmpl;
5123 memset(&tmpl, 0, sizeof(tmpl));
5124 tmpl.start = bytenr;
5126 tmpl.max_size = max_size;
5128 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5132 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5137 rec = container_of(cache, struct extent_record, cache);
5138 if (rec->max_size < max_size)
5139 rec->max_size = max_size;
5142 * If found_ref is set then max_size is the real size and must match the
5143 * existing refs. So if we have already found a ref then we need to
5144 * make sure that this ref matches the existing one, otherwise we need
5145 * to add a new backref so we can notice that the backrefs don't match
5146 * and we need to figure out who is telling the truth. This is to
5147 * account for that awful fsync bug I introduced where we'd end up with
5148 * a btrfs_file_extent_item that would have its length include multiple
5149 * prealloc extents or point inside of a prealloc extent.
5151 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5154 back = alloc_data_backref(rec, parent, root, owner, offset,
5160 BUG_ON(num_refs != 1);
5161 if (back->node.found_ref)
5162 BUG_ON(back->bytes != max_size);
5163 back->node.found_ref = 1;
5164 back->found_ref += 1;
5165 back->bytes = max_size;
5166 back->disk_bytenr = bytenr;
5168 rec->content_checked = 1;
5169 rec->owner_ref_checked = 1;
5171 if (back->node.found_extent_tree) {
5172 fprintf(stderr, "Extent back ref already exists "
5173 "for %llu parent %llu root %llu "
5174 "owner %llu offset %llu num_refs %lu\n",
5175 (unsigned long long)bytenr,
5176 (unsigned long long)parent,
5177 (unsigned long long)root,
5178 (unsigned long long)owner,
5179 (unsigned long long)offset,
5180 (unsigned long)num_refs);
5182 back->num_refs = num_refs;
5183 back->node.found_extent_tree = 1;
5185 maybe_free_extent_rec(extent_cache, rec);
5189 static int add_pending(struct cache_tree *pending,
5190 struct cache_tree *seen, u64 bytenr, u32 size)
5193 ret = add_cache_extent(seen, bytenr, size);
5196 add_cache_extent(pending, bytenr, size);
5200 static int pick_next_pending(struct cache_tree *pending,
5201 struct cache_tree *reada,
5202 struct cache_tree *nodes,
5203 u64 last, struct block_info *bits, int bits_nr,
5206 unsigned long node_start = last;
5207 struct cache_extent *cache;
5210 cache = search_cache_extent(reada, 0);
5212 bits[0].start = cache->start;
5213 bits[0].size = cache->size;
5218 if (node_start > 32768)
5219 node_start -= 32768;
5221 cache = search_cache_extent(nodes, node_start);
5223 cache = search_cache_extent(nodes, 0);
5226 cache = search_cache_extent(pending, 0);
5231 bits[ret].start = cache->start;
5232 bits[ret].size = cache->size;
5233 cache = next_cache_extent(cache);
5235 } while (cache && ret < bits_nr);
5241 bits[ret].start = cache->start;
5242 bits[ret].size = cache->size;
5243 cache = next_cache_extent(cache);
5245 } while (cache && ret < bits_nr);
5247 if (bits_nr - ret > 8) {
5248 u64 lookup = bits[0].start + bits[0].size;
5249 struct cache_extent *next;
5250 next = search_cache_extent(pending, lookup);
5252 if (next->start - lookup > 32768)
5254 bits[ret].start = next->start;
5255 bits[ret].size = next->size;
5256 lookup = next->start + next->size;
5260 next = next_cache_extent(next);
5268 static void free_chunk_record(struct cache_extent *cache)
5270 struct chunk_record *rec;
5272 rec = container_of(cache, struct chunk_record, cache);
5273 list_del_init(&rec->list);
5274 list_del_init(&rec->dextents);
5278 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5280 cache_tree_free_extents(chunk_cache, free_chunk_record);
5283 static void free_device_record(struct rb_node *node)
5285 struct device_record *rec;
5287 rec = container_of(node, struct device_record, node);
5291 FREE_RB_BASED_TREE(device_cache, free_device_record);
5293 int insert_block_group_record(struct block_group_tree *tree,
5294 struct block_group_record *bg_rec)
5298 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5302 list_add_tail(&bg_rec->list, &tree->block_groups);
5306 static void free_block_group_record(struct cache_extent *cache)
5308 struct block_group_record *rec;
5310 rec = container_of(cache, struct block_group_record, cache);
5311 list_del_init(&rec->list);
5315 void free_block_group_tree(struct block_group_tree *tree)
5317 cache_tree_free_extents(&tree->tree, free_block_group_record);
5320 int insert_device_extent_record(struct device_extent_tree *tree,
5321 struct device_extent_record *de_rec)
5326 * Device extent is a bit different from the other extents, because
5327 * the extents which belong to the different devices may have the
5328 * same start and size, so we need use the special extent cache
5329 * search/insert functions.
5331 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5335 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5336 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5340 static void free_device_extent_record(struct cache_extent *cache)
5342 struct device_extent_record *rec;
5344 rec = container_of(cache, struct device_extent_record, cache);
5345 if (!list_empty(&rec->chunk_list))
5346 list_del_init(&rec->chunk_list);
5347 if (!list_empty(&rec->device_list))
5348 list_del_init(&rec->device_list);
5352 void free_device_extent_tree(struct device_extent_tree *tree)
5354 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5357 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5358 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5359 struct extent_buffer *leaf, int slot)
5361 struct btrfs_extent_ref_v0 *ref0;
5362 struct btrfs_key key;
5365 btrfs_item_key_to_cpu(leaf, &key, slot);
5366 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5367 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5368 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5371 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5372 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5378 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5379 struct btrfs_key *key,
5382 struct btrfs_chunk *ptr;
5383 struct chunk_record *rec;
5386 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5387 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5389 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5391 fprintf(stderr, "memory allocation failed\n");
5395 INIT_LIST_HEAD(&rec->list);
5396 INIT_LIST_HEAD(&rec->dextents);
5399 rec->cache.start = key->offset;
5400 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5402 rec->generation = btrfs_header_generation(leaf);
5404 rec->objectid = key->objectid;
5405 rec->type = key->type;
5406 rec->offset = key->offset;
5408 rec->length = rec->cache.size;
5409 rec->owner = btrfs_chunk_owner(leaf, ptr);
5410 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5411 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5412 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5413 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5414 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5415 rec->num_stripes = num_stripes;
5416 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5418 for (i = 0; i < rec->num_stripes; ++i) {
5419 rec->stripes[i].devid =
5420 btrfs_stripe_devid_nr(leaf, ptr, i);
5421 rec->stripes[i].offset =
5422 btrfs_stripe_offset_nr(leaf, ptr, i);
5423 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5424 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5431 static int process_chunk_item(struct cache_tree *chunk_cache,
5432 struct btrfs_key *key, struct extent_buffer *eb,
5435 struct chunk_record *rec;
5436 struct btrfs_chunk *chunk;
5439 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5441 * Do extra check for this chunk item,
5443 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5444 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5445 * and owner<->key_type check.
5447 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5450 error("chunk(%llu, %llu) is not valid, ignore it",
5451 key->offset, btrfs_chunk_length(eb, chunk));
5454 rec = btrfs_new_chunk_record(eb, key, slot);
5455 ret = insert_cache_extent(chunk_cache, &rec->cache);
5457 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5458 rec->offset, rec->length);
5465 static int process_device_item(struct rb_root *dev_cache,
5466 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5468 struct btrfs_dev_item *ptr;
5469 struct device_record *rec;
5472 ptr = btrfs_item_ptr(eb,
5473 slot, struct btrfs_dev_item);
5475 rec = malloc(sizeof(*rec));
5477 fprintf(stderr, "memory allocation failed\n");
5481 rec->devid = key->offset;
5482 rec->generation = btrfs_header_generation(eb);
5484 rec->objectid = key->objectid;
5485 rec->type = key->type;
5486 rec->offset = key->offset;
5488 rec->devid = btrfs_device_id(eb, ptr);
5489 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5490 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5492 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5494 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5501 struct block_group_record *
5502 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5505 struct btrfs_block_group_item *ptr;
5506 struct block_group_record *rec;
5508 rec = calloc(1, sizeof(*rec));
5510 fprintf(stderr, "memory allocation failed\n");
5514 rec->cache.start = key->objectid;
5515 rec->cache.size = key->offset;
5517 rec->generation = btrfs_header_generation(leaf);
5519 rec->objectid = key->objectid;
5520 rec->type = key->type;
5521 rec->offset = key->offset;
5523 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5524 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5526 INIT_LIST_HEAD(&rec->list);
5531 static int process_block_group_item(struct block_group_tree *block_group_cache,
5532 struct btrfs_key *key,
5533 struct extent_buffer *eb, int slot)
5535 struct block_group_record *rec;
5538 rec = btrfs_new_block_group_record(eb, key, slot);
5539 ret = insert_block_group_record(block_group_cache, rec);
5541 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5542 rec->objectid, rec->offset);
5549 struct device_extent_record *
5550 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5551 struct btrfs_key *key, int slot)
5553 struct device_extent_record *rec;
5554 struct btrfs_dev_extent *ptr;
5556 rec = calloc(1, sizeof(*rec));
5558 fprintf(stderr, "memory allocation failed\n");
5562 rec->cache.objectid = key->objectid;
5563 rec->cache.start = key->offset;
5565 rec->generation = btrfs_header_generation(leaf);
5567 rec->objectid = key->objectid;
5568 rec->type = key->type;
5569 rec->offset = key->offset;
5571 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5572 rec->chunk_objecteid =
5573 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5575 btrfs_dev_extent_chunk_offset(leaf, ptr);
5576 rec->length = btrfs_dev_extent_length(leaf, ptr);
5577 rec->cache.size = rec->length;
5579 INIT_LIST_HEAD(&rec->chunk_list);
5580 INIT_LIST_HEAD(&rec->device_list);
5586 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5587 struct btrfs_key *key, struct extent_buffer *eb,
5590 struct device_extent_record *rec;
5593 rec = btrfs_new_device_extent_record(eb, key, slot);
5594 ret = insert_device_extent_record(dev_extent_cache, rec);
5597 "Device extent[%llu, %llu, %llu] existed.\n",
5598 rec->objectid, rec->offset, rec->length);
5605 static int process_extent_item(struct btrfs_root *root,
5606 struct cache_tree *extent_cache,
5607 struct extent_buffer *eb, int slot)
5609 struct btrfs_extent_item *ei;
5610 struct btrfs_extent_inline_ref *iref;
5611 struct btrfs_extent_data_ref *dref;
5612 struct btrfs_shared_data_ref *sref;
5613 struct btrfs_key key;
5614 struct extent_record tmpl;
5619 u32 item_size = btrfs_item_size_nr(eb, slot);
5625 btrfs_item_key_to_cpu(eb, &key, slot);
5627 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5629 num_bytes = root->nodesize;
5631 num_bytes = key.offset;
5634 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5635 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5636 key.objectid, root->sectorsize);
5639 if (item_size < sizeof(*ei)) {
5640 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5641 struct btrfs_extent_item_v0 *ei0;
5642 BUG_ON(item_size != sizeof(*ei0));
5643 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5644 refs = btrfs_extent_refs_v0(eb, ei0);
5648 memset(&tmpl, 0, sizeof(tmpl));
5649 tmpl.start = key.objectid;
5650 tmpl.nr = num_bytes;
5651 tmpl.extent_item_refs = refs;
5652 tmpl.metadata = metadata;
5654 tmpl.max_size = num_bytes;
5656 return add_extent_rec(extent_cache, &tmpl);
5659 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5660 refs = btrfs_extent_refs(eb, ei);
5661 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5665 if (metadata && num_bytes != root->nodesize) {
5666 error("ignore invalid metadata extent, length %llu does not equal to %u",
5667 num_bytes, root->nodesize);
5670 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5671 error("ignore invalid data extent, length %llu is not aligned to %u",
5672 num_bytes, root->sectorsize);
5676 memset(&tmpl, 0, sizeof(tmpl));
5677 tmpl.start = key.objectid;
5678 tmpl.nr = num_bytes;
5679 tmpl.extent_item_refs = refs;
5680 tmpl.metadata = metadata;
5682 tmpl.max_size = num_bytes;
5683 add_extent_rec(extent_cache, &tmpl);
5685 ptr = (unsigned long)(ei + 1);
5686 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5687 key.type == BTRFS_EXTENT_ITEM_KEY)
5688 ptr += sizeof(struct btrfs_tree_block_info);
5690 end = (unsigned long)ei + item_size;
5692 iref = (struct btrfs_extent_inline_ref *)ptr;
5693 type = btrfs_extent_inline_ref_type(eb, iref);
5694 offset = btrfs_extent_inline_ref_offset(eb, iref);
5696 case BTRFS_TREE_BLOCK_REF_KEY:
5697 ret = add_tree_backref(extent_cache, key.objectid,
5700 error("add_tree_backref failed: %s",
5703 case BTRFS_SHARED_BLOCK_REF_KEY:
5704 ret = add_tree_backref(extent_cache, key.objectid,
5707 error("add_tree_backref failed: %s",
5710 case BTRFS_EXTENT_DATA_REF_KEY:
5711 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5712 add_data_backref(extent_cache, key.objectid, 0,
5713 btrfs_extent_data_ref_root(eb, dref),
5714 btrfs_extent_data_ref_objectid(eb,
5716 btrfs_extent_data_ref_offset(eb, dref),
5717 btrfs_extent_data_ref_count(eb, dref),
5720 case BTRFS_SHARED_DATA_REF_KEY:
5721 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5722 add_data_backref(extent_cache, key.objectid, offset,
5724 btrfs_shared_data_ref_count(eb, sref),
5728 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5729 key.objectid, key.type, num_bytes);
5732 ptr += btrfs_extent_inline_ref_size(type);
5739 static int check_cache_range(struct btrfs_root *root,
5740 struct btrfs_block_group_cache *cache,
5741 u64 offset, u64 bytes)
5743 struct btrfs_free_space *entry;
5749 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5750 bytenr = btrfs_sb_offset(i);
5751 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5752 cache->key.objectid, bytenr, 0,
5753 &logical, &nr, &stripe_len);
5758 if (logical[nr] + stripe_len <= offset)
5760 if (offset + bytes <= logical[nr])
5762 if (logical[nr] == offset) {
5763 if (stripe_len >= bytes) {
5767 bytes -= stripe_len;
5768 offset += stripe_len;
5769 } else if (logical[nr] < offset) {
5770 if (logical[nr] + stripe_len >=
5775 bytes = (offset + bytes) -
5776 (logical[nr] + stripe_len);
5777 offset = logical[nr] + stripe_len;
5780 * Could be tricky, the super may land in the
5781 * middle of the area we're checking. First
5782 * check the easiest case, it's at the end.
5784 if (logical[nr] + stripe_len >=
5786 bytes = logical[nr] - offset;
5790 /* Check the left side */
5791 ret = check_cache_range(root, cache,
5793 logical[nr] - offset);
5799 /* Now we continue with the right side */
5800 bytes = (offset + bytes) -
5801 (logical[nr] + stripe_len);
5802 offset = logical[nr] + stripe_len;
5809 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5811 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5812 offset, offset+bytes);
5816 if (entry->offset != offset) {
5817 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5822 if (entry->bytes != bytes) {
5823 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5824 bytes, entry->bytes, offset);
5828 unlink_free_space(cache->free_space_ctl, entry);
5833 static int verify_space_cache(struct btrfs_root *root,
5834 struct btrfs_block_group_cache *cache)
5836 struct btrfs_path path;
5837 struct extent_buffer *leaf;
5838 struct btrfs_key key;
5842 root = root->fs_info->extent_root;
5844 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5846 btrfs_init_path(&path);
5847 key.objectid = last;
5849 key.type = BTRFS_EXTENT_ITEM_KEY;
5850 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5855 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5856 ret = btrfs_next_leaf(root, &path);
5864 leaf = path.nodes[0];
5865 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5866 if (key.objectid >= cache->key.offset + cache->key.objectid)
5868 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5869 key.type != BTRFS_METADATA_ITEM_KEY) {
5874 if (last == key.objectid) {
5875 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5876 last = key.objectid + key.offset;
5878 last = key.objectid + root->nodesize;
5883 ret = check_cache_range(root, cache, last,
5884 key.objectid - last);
5887 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5888 last = key.objectid + key.offset;
5890 last = key.objectid + root->nodesize;
5894 if (last < cache->key.objectid + cache->key.offset)
5895 ret = check_cache_range(root, cache, last,
5896 cache->key.objectid +
5897 cache->key.offset - last);
5900 btrfs_release_path(&path);
5903 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5904 fprintf(stderr, "There are still entries left in the space "
5912 static int check_space_cache(struct btrfs_root *root)
5914 struct btrfs_block_group_cache *cache;
5915 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5919 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5920 btrfs_super_generation(root->fs_info->super_copy) !=
5921 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5922 printf("cache and super generation don't match, space cache "
5923 "will be invalidated\n");
5927 if (ctx.progress_enabled) {
5928 ctx.tp = TASK_FREE_SPACE;
5929 task_start(ctx.info);
5933 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5937 start = cache->key.objectid + cache->key.offset;
5938 if (!cache->free_space_ctl) {
5939 if (btrfs_init_free_space_ctl(cache,
5940 root->sectorsize)) {
5945 btrfs_remove_free_space_cache(cache);
5948 if (btrfs_fs_compat_ro(root->fs_info,
5949 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5950 ret = exclude_super_stripes(root, cache);
5952 fprintf(stderr, "could not exclude super stripes: %s\n",
5957 ret = load_free_space_tree(root->fs_info, cache);
5958 free_excluded_extents(root, cache);
5960 fprintf(stderr, "could not load free space tree: %s\n",
5967 ret = load_free_space_cache(root->fs_info, cache);
5972 ret = verify_space_cache(root, cache);
5974 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5975 cache->key.objectid);
5980 task_stop(ctx.info);
5982 return error ? -EINVAL : 0;
5985 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5986 u64 num_bytes, unsigned long leaf_offset,
5987 struct extent_buffer *eb) {
5990 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5992 unsigned long csum_offset;
5996 u64 data_checked = 0;
6002 if (num_bytes % root->sectorsize)
6005 data = malloc(num_bytes);
6009 while (offset < num_bytes) {
6012 read_len = num_bytes - offset;
6013 /* read as much space once a time */
6014 ret = read_extent_data(root, data + offset,
6015 bytenr + offset, &read_len, mirror);
6019 /* verify every 4k data's checksum */
6020 while (data_checked < read_len) {
6022 tmp = offset + data_checked;
6024 csum = btrfs_csum_data(NULL, (char *)data + tmp,
6025 csum, root->sectorsize);
6026 btrfs_csum_final(csum, (u8 *)&csum);
6028 csum_offset = leaf_offset +
6029 tmp / root->sectorsize * csum_size;
6030 read_extent_buffer(eb, (char *)&csum_expected,
6031 csum_offset, csum_size);
6032 /* try another mirror */
6033 if (csum != csum_expected) {
6034 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6035 mirror, bytenr + tmp,
6036 csum, csum_expected);
6037 num_copies = btrfs_num_copies(
6038 &root->fs_info->mapping_tree,
6040 if (mirror < num_copies - 1) {
6045 data_checked += root->sectorsize;
6054 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6057 struct btrfs_path path;
6058 struct extent_buffer *leaf;
6059 struct btrfs_key key;
6062 btrfs_init_path(&path);
6063 key.objectid = bytenr;
6064 key.type = BTRFS_EXTENT_ITEM_KEY;
6065 key.offset = (u64)-1;
6068 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6071 fprintf(stderr, "Error looking up extent record %d\n", ret);
6072 btrfs_release_path(&path);
6075 if (path.slots[0] > 0) {
6078 ret = btrfs_prev_leaf(root, &path);
6081 } else if (ret > 0) {
6088 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6091 * Block group items come before extent items if they have the same
6092 * bytenr, so walk back one more just in case. Dear future traveller,
6093 * first congrats on mastering time travel. Now if it's not too much
6094 * trouble could you go back to 2006 and tell Chris to make the
6095 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6096 * EXTENT_ITEM_KEY please?
6098 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6099 if (path.slots[0] > 0) {
6102 ret = btrfs_prev_leaf(root, &path);
6105 } else if (ret > 0) {
6110 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6114 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6115 ret = btrfs_next_leaf(root, &path);
6117 fprintf(stderr, "Error going to next leaf "
6119 btrfs_release_path(&path);
6125 leaf = path.nodes[0];
6126 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6127 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6131 if (key.objectid + key.offset < bytenr) {
6135 if (key.objectid > bytenr + num_bytes)
6138 if (key.objectid == bytenr) {
6139 if (key.offset >= num_bytes) {
6143 num_bytes -= key.offset;
6144 bytenr += key.offset;
6145 } else if (key.objectid < bytenr) {
6146 if (key.objectid + key.offset >= bytenr + num_bytes) {
6150 num_bytes = (bytenr + num_bytes) -
6151 (key.objectid + key.offset);
6152 bytenr = key.objectid + key.offset;
6154 if (key.objectid + key.offset < bytenr + num_bytes) {
6155 u64 new_start = key.objectid + key.offset;
6156 u64 new_bytes = bytenr + num_bytes - new_start;
6159 * Weird case, the extent is in the middle of
6160 * our range, we'll have to search one side
6161 * and then the other. Not sure if this happens
6162 * in real life, but no harm in coding it up
6163 * anyway just in case.
6165 btrfs_release_path(&path);
6166 ret = check_extent_exists(root, new_start,
6169 fprintf(stderr, "Right section didn't "
6173 num_bytes = key.objectid - bytenr;
6176 num_bytes = key.objectid - bytenr;
6183 if (num_bytes && !ret) {
6184 fprintf(stderr, "There are no extents for csum range "
6185 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6189 btrfs_release_path(&path);
6193 static int check_csums(struct btrfs_root *root)
6195 struct btrfs_path path;
6196 struct extent_buffer *leaf;
6197 struct btrfs_key key;
6198 u64 offset = 0, num_bytes = 0;
6199 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6203 unsigned long leaf_offset;
6205 root = root->fs_info->csum_root;
6206 if (!extent_buffer_uptodate(root->node)) {
6207 fprintf(stderr, "No valid csum tree found\n");
6211 btrfs_init_path(&path);
6212 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6213 key.type = BTRFS_EXTENT_CSUM_KEY;
6215 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6217 fprintf(stderr, "Error searching csum tree %d\n", ret);
6218 btrfs_release_path(&path);
6222 if (ret > 0 && path.slots[0])
6227 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6228 ret = btrfs_next_leaf(root, &path);
6230 fprintf(stderr, "Error going to next leaf "
6237 leaf = path.nodes[0];
6239 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6240 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6245 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6246 csum_size) * root->sectorsize;
6247 if (!check_data_csum)
6248 goto skip_csum_check;
6249 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6250 ret = check_extent_csums(root, key.offset, data_len,
6256 offset = key.offset;
6257 } else if (key.offset != offset + num_bytes) {
6258 ret = check_extent_exists(root, offset, num_bytes);
6260 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6261 "there is no extent record\n",
6262 offset, offset+num_bytes);
6265 offset = key.offset;
6268 num_bytes += data_len;
6272 btrfs_release_path(&path);
6276 static int is_dropped_key(struct btrfs_key *key,
6277 struct btrfs_key *drop_key) {
6278 if (key->objectid < drop_key->objectid)
6280 else if (key->objectid == drop_key->objectid) {
6281 if (key->type < drop_key->type)
6283 else if (key->type == drop_key->type) {
6284 if (key->offset < drop_key->offset)
6292 * Here are the rules for FULL_BACKREF.
6294 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6295 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6297 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6298 * if it happened after the relocation occurred since we'll have dropped the
6299 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6300 * have no real way to know for sure.
6302 * We process the blocks one root at a time, and we start from the lowest root
6303 * objectid and go to the highest. So we can just lookup the owner backref for
6304 * the record and if we don't find it then we know it doesn't exist and we have
6307 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6308 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6309 * be set or not and then we can check later once we've gathered all the refs.
6311 static int calc_extent_flag(struct btrfs_root *root,
6312 struct cache_tree *extent_cache,
6313 struct extent_buffer *buf,
6314 struct root_item_record *ri,
6317 struct extent_record *rec;
6318 struct cache_extent *cache;
6319 struct tree_backref *tback;
6322 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6323 /* we have added this extent before */
6327 rec = container_of(cache, struct extent_record, cache);
6330 * Except file/reloc tree, we can not have
6333 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6338 if (buf->start == ri->bytenr)
6341 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6344 owner = btrfs_header_owner(buf);
6345 if (owner == ri->objectid)
6348 tback = find_tree_backref(rec, 0, owner);
6353 if (rec->flag_block_full_backref != FLAG_UNSET &&
6354 rec->flag_block_full_backref != 0)
6355 rec->bad_full_backref = 1;
6358 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6359 if (rec->flag_block_full_backref != FLAG_UNSET &&
6360 rec->flag_block_full_backref != 1)
6361 rec->bad_full_backref = 1;
6365 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6367 fprintf(stderr, "Invalid key type(");
6368 print_key_type(stderr, 0, key_type);
6369 fprintf(stderr, ") found in root(");
6370 print_objectid(stderr, rootid, 0);
6371 fprintf(stderr, ")\n");
6375 * Check if the key is valid with its extent buffer.
6377 * This is a early check in case invalid key exists in a extent buffer
6378 * This is not comprehensive yet, but should prevent wrong key/item passed
6381 static int check_type_with_root(u64 rootid, u8 key_type)
6384 /* Only valid in chunk tree */
6385 case BTRFS_DEV_ITEM_KEY:
6386 case BTRFS_CHUNK_ITEM_KEY:
6387 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6390 /* valid in csum and log tree */
6391 case BTRFS_CSUM_TREE_OBJECTID:
6392 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6396 case BTRFS_EXTENT_ITEM_KEY:
6397 case BTRFS_METADATA_ITEM_KEY:
6398 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6399 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6402 case BTRFS_ROOT_ITEM_KEY:
6403 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6406 case BTRFS_DEV_EXTENT_KEY:
6407 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6413 report_mismatch_key_root(key_type, rootid);
6417 static int run_next_block(struct btrfs_root *root,
6418 struct block_info *bits,
6421 struct cache_tree *pending,
6422 struct cache_tree *seen,
6423 struct cache_tree *reada,
6424 struct cache_tree *nodes,
6425 struct cache_tree *extent_cache,
6426 struct cache_tree *chunk_cache,
6427 struct rb_root *dev_cache,
6428 struct block_group_tree *block_group_cache,
6429 struct device_extent_tree *dev_extent_cache,
6430 struct root_item_record *ri)
6432 struct extent_buffer *buf;
6433 struct extent_record *rec = NULL;
6444 struct btrfs_key key;
6445 struct cache_extent *cache;
6448 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6449 bits_nr, &reada_bits);
6454 for(i = 0; i < nritems; i++) {
6455 ret = add_cache_extent(reada, bits[i].start,
6460 /* fixme, get the parent transid */
6461 readahead_tree_block(root, bits[i].start,
6465 *last = bits[0].start;
6466 bytenr = bits[0].start;
6467 size = bits[0].size;
6469 cache = lookup_cache_extent(pending, bytenr, size);
6471 remove_cache_extent(pending, cache);
6474 cache = lookup_cache_extent(reada, bytenr, size);
6476 remove_cache_extent(reada, cache);
6479 cache = lookup_cache_extent(nodes, bytenr, size);
6481 remove_cache_extent(nodes, cache);
6484 cache = lookup_cache_extent(extent_cache, bytenr, size);
6486 rec = container_of(cache, struct extent_record, cache);
6487 gen = rec->parent_generation;
6490 /* fixme, get the real parent transid */
6491 buf = read_tree_block(root, bytenr, size, gen);
6492 if (!extent_buffer_uptodate(buf)) {
6493 record_bad_block_io(root->fs_info,
6494 extent_cache, bytenr, size);
6498 nritems = btrfs_header_nritems(buf);
6501 if (!init_extent_tree) {
6502 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6503 btrfs_header_level(buf), 1, NULL,
6506 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6508 fprintf(stderr, "Couldn't calc extent flags\n");
6509 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6514 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6516 fprintf(stderr, "Couldn't calc extent flags\n");
6517 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6521 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6523 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6524 ri->objectid == btrfs_header_owner(buf)) {
6526 * Ok we got to this block from it's original owner and
6527 * we have FULL_BACKREF set. Relocation can leave
6528 * converted blocks over so this is altogether possible,
6529 * however it's not possible if the generation > the
6530 * last snapshot, so check for this case.
6532 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6533 btrfs_header_generation(buf) > ri->last_snapshot) {
6534 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6535 rec->bad_full_backref = 1;
6540 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6541 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6542 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6543 rec->bad_full_backref = 1;
6547 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6548 rec->flag_block_full_backref = 1;
6552 rec->flag_block_full_backref = 0;
6554 owner = btrfs_header_owner(buf);
6557 ret = check_block(root, extent_cache, buf, flags);
6561 if (btrfs_is_leaf(buf)) {
6562 btree_space_waste += btrfs_leaf_free_space(root, buf);
6563 for (i = 0; i < nritems; i++) {
6564 struct btrfs_file_extent_item *fi;
6565 btrfs_item_key_to_cpu(buf, &key, i);
6567 * Check key type against the leaf owner.
6568 * Could filter quite a lot of early error if
6571 if (check_type_with_root(btrfs_header_owner(buf),
6573 fprintf(stderr, "ignoring invalid key\n");
6576 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6577 process_extent_item(root, extent_cache, buf,
6581 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6582 process_extent_item(root, extent_cache, buf,
6586 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6588 btrfs_item_size_nr(buf, i);
6591 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6592 process_chunk_item(chunk_cache, &key, buf, i);
6595 if (key.type == BTRFS_DEV_ITEM_KEY) {
6596 process_device_item(dev_cache, &key, buf, i);
6599 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6600 process_block_group_item(block_group_cache,
6604 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6605 process_device_extent_item(dev_extent_cache,
6610 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6611 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6612 process_extent_ref_v0(extent_cache, buf, i);
6619 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6620 ret = add_tree_backref(extent_cache,
6621 key.objectid, 0, key.offset, 0);
6623 error("add_tree_backref failed: %s",
6627 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6628 ret = add_tree_backref(extent_cache,
6629 key.objectid, key.offset, 0, 0);
6631 error("add_tree_backref failed: %s",
6635 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6636 struct btrfs_extent_data_ref *ref;
6637 ref = btrfs_item_ptr(buf, i,
6638 struct btrfs_extent_data_ref);
6639 add_data_backref(extent_cache,
6641 btrfs_extent_data_ref_root(buf, ref),
6642 btrfs_extent_data_ref_objectid(buf,
6644 btrfs_extent_data_ref_offset(buf, ref),
6645 btrfs_extent_data_ref_count(buf, ref),
6646 0, root->sectorsize);
6649 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6650 struct btrfs_shared_data_ref *ref;
6651 ref = btrfs_item_ptr(buf, i,
6652 struct btrfs_shared_data_ref);
6653 add_data_backref(extent_cache,
6654 key.objectid, key.offset, 0, 0, 0,
6655 btrfs_shared_data_ref_count(buf, ref),
6656 0, root->sectorsize);
6659 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6660 struct bad_item *bad;
6662 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6666 bad = malloc(sizeof(struct bad_item));
6669 INIT_LIST_HEAD(&bad->list);
6670 memcpy(&bad->key, &key,
6671 sizeof(struct btrfs_key));
6672 bad->root_id = owner;
6673 list_add_tail(&bad->list, &delete_items);
6676 if (key.type != BTRFS_EXTENT_DATA_KEY)
6678 fi = btrfs_item_ptr(buf, i,
6679 struct btrfs_file_extent_item);
6680 if (btrfs_file_extent_type(buf, fi) ==
6681 BTRFS_FILE_EXTENT_INLINE)
6683 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6686 data_bytes_allocated +=
6687 btrfs_file_extent_disk_num_bytes(buf, fi);
6688 if (data_bytes_allocated < root->sectorsize) {
6691 data_bytes_referenced +=
6692 btrfs_file_extent_num_bytes(buf, fi);
6693 add_data_backref(extent_cache,
6694 btrfs_file_extent_disk_bytenr(buf, fi),
6695 parent, owner, key.objectid, key.offset -
6696 btrfs_file_extent_offset(buf, fi), 1, 1,
6697 btrfs_file_extent_disk_num_bytes(buf, fi));
6701 struct btrfs_key first_key;
6703 first_key.objectid = 0;
6706 btrfs_item_key_to_cpu(buf, &first_key, 0);
6707 level = btrfs_header_level(buf);
6708 for (i = 0; i < nritems; i++) {
6709 struct extent_record tmpl;
6711 ptr = btrfs_node_blockptr(buf, i);
6712 size = root->nodesize;
6713 btrfs_node_key_to_cpu(buf, &key, i);
6715 if ((level == ri->drop_level)
6716 && is_dropped_key(&key, &ri->drop_key)) {
6721 memset(&tmpl, 0, sizeof(tmpl));
6722 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6723 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6728 tmpl.max_size = size;
6729 ret = add_extent_rec(extent_cache, &tmpl);
6733 ret = add_tree_backref(extent_cache, ptr, parent,
6736 error("add_tree_backref failed: %s",
6742 add_pending(nodes, seen, ptr, size);
6744 add_pending(pending, seen, ptr, size);
6747 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6748 nritems) * sizeof(struct btrfs_key_ptr);
6750 total_btree_bytes += buf->len;
6751 if (fs_root_objectid(btrfs_header_owner(buf)))
6752 total_fs_tree_bytes += buf->len;
6753 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6754 total_extent_tree_bytes += buf->len;
6755 if (!found_old_backref &&
6756 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6757 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6758 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6759 found_old_backref = 1;
6761 free_extent_buffer(buf);
6765 static int add_root_to_pending(struct extent_buffer *buf,
6766 struct cache_tree *extent_cache,
6767 struct cache_tree *pending,
6768 struct cache_tree *seen,
6769 struct cache_tree *nodes,
6772 struct extent_record tmpl;
6775 if (btrfs_header_level(buf) > 0)
6776 add_pending(nodes, seen, buf->start, buf->len);
6778 add_pending(pending, seen, buf->start, buf->len);
6780 memset(&tmpl, 0, sizeof(tmpl));
6781 tmpl.start = buf->start;
6786 tmpl.max_size = buf->len;
6787 add_extent_rec(extent_cache, &tmpl);
6789 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6790 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6791 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6794 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6799 /* as we fix the tree, we might be deleting blocks that
6800 * we're tracking for repair. This hook makes sure we
6801 * remove any backrefs for blocks as we are fixing them.
6803 static int free_extent_hook(struct btrfs_trans_handle *trans,
6804 struct btrfs_root *root,
6805 u64 bytenr, u64 num_bytes, u64 parent,
6806 u64 root_objectid, u64 owner, u64 offset,
6809 struct extent_record *rec;
6810 struct cache_extent *cache;
6812 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6814 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6815 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6819 rec = container_of(cache, struct extent_record, cache);
6821 struct data_backref *back;
6822 back = find_data_backref(rec, parent, root_objectid, owner,
6823 offset, 1, bytenr, num_bytes);
6826 if (back->node.found_ref) {
6827 back->found_ref -= refs_to_drop;
6829 rec->refs -= refs_to_drop;
6831 if (back->node.found_extent_tree) {
6832 back->num_refs -= refs_to_drop;
6833 if (rec->extent_item_refs)
6834 rec->extent_item_refs -= refs_to_drop;
6836 if (back->found_ref == 0)
6837 back->node.found_ref = 0;
6838 if (back->num_refs == 0)
6839 back->node.found_extent_tree = 0;
6841 if (!back->node.found_extent_tree && back->node.found_ref) {
6842 list_del(&back->node.list);
6846 struct tree_backref *back;
6847 back = find_tree_backref(rec, parent, root_objectid);
6850 if (back->node.found_ref) {
6853 back->node.found_ref = 0;
6855 if (back->node.found_extent_tree) {
6856 if (rec->extent_item_refs)
6857 rec->extent_item_refs--;
6858 back->node.found_extent_tree = 0;
6860 if (!back->node.found_extent_tree && back->node.found_ref) {
6861 list_del(&back->node.list);
6865 maybe_free_extent_rec(extent_cache, rec);
6870 static int delete_extent_records(struct btrfs_trans_handle *trans,
6871 struct btrfs_root *root,
6872 struct btrfs_path *path,
6873 u64 bytenr, u64 new_len)
6875 struct btrfs_key key;
6876 struct btrfs_key found_key;
6877 struct extent_buffer *leaf;
6882 key.objectid = bytenr;
6884 key.offset = (u64)-1;
6887 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6894 if (path->slots[0] == 0)
6900 leaf = path->nodes[0];
6901 slot = path->slots[0];
6903 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6904 if (found_key.objectid != bytenr)
6907 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6908 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6909 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6910 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6911 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6912 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6913 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6914 btrfs_release_path(path);
6915 if (found_key.type == 0) {
6916 if (found_key.offset == 0)
6918 key.offset = found_key.offset - 1;
6919 key.type = found_key.type;
6921 key.type = found_key.type - 1;
6922 key.offset = (u64)-1;
6926 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6927 found_key.objectid, found_key.type, found_key.offset);
6929 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6932 btrfs_release_path(path);
6934 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6935 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6936 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6937 found_key.offset : root->nodesize;
6939 ret = btrfs_update_block_group(trans, root, bytenr,
6946 btrfs_release_path(path);
6951 * for a single backref, this will allocate a new extent
6952 * and add the backref to it.
6954 static int record_extent(struct btrfs_trans_handle *trans,
6955 struct btrfs_fs_info *info,
6956 struct btrfs_path *path,
6957 struct extent_record *rec,
6958 struct extent_backref *back,
6959 int allocated, u64 flags)
6962 struct btrfs_root *extent_root = info->extent_root;
6963 struct extent_buffer *leaf;
6964 struct btrfs_key ins_key;
6965 struct btrfs_extent_item *ei;
6966 struct data_backref *dback;
6967 struct btrfs_tree_block_info *bi;
6970 rec->max_size = max_t(u64, rec->max_size,
6971 info->extent_root->nodesize);
6974 u32 item_size = sizeof(*ei);
6977 item_size += sizeof(*bi);
6979 ins_key.objectid = rec->start;
6980 ins_key.offset = rec->max_size;
6981 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6983 ret = btrfs_insert_empty_item(trans, extent_root, path,
6984 &ins_key, item_size);
6988 leaf = path->nodes[0];
6989 ei = btrfs_item_ptr(leaf, path->slots[0],
6990 struct btrfs_extent_item);
6992 btrfs_set_extent_refs(leaf, ei, 0);
6993 btrfs_set_extent_generation(leaf, ei, rec->generation);
6995 if (back->is_data) {
6996 btrfs_set_extent_flags(leaf, ei,
6997 BTRFS_EXTENT_FLAG_DATA);
6999 struct btrfs_disk_key copy_key;;
7001 bi = (struct btrfs_tree_block_info *)(ei + 1);
7002 memset_extent_buffer(leaf, 0, (unsigned long)bi,
7005 btrfs_set_disk_key_objectid(©_key,
7006 rec->info_objectid);
7007 btrfs_set_disk_key_type(©_key, 0);
7008 btrfs_set_disk_key_offset(©_key, 0);
7010 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7011 btrfs_set_tree_block_key(leaf, bi, ©_key);
7013 btrfs_set_extent_flags(leaf, ei,
7014 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7017 btrfs_mark_buffer_dirty(leaf);
7018 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7019 rec->max_size, 1, 0);
7022 btrfs_release_path(path);
7025 if (back->is_data) {
7029 dback = to_data_backref(back);
7030 if (back->full_backref)
7031 parent = dback->parent;
7035 for (i = 0; i < dback->found_ref; i++) {
7036 /* if parent != 0, we're doing a full backref
7037 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7038 * just makes the backref allocator create a data
7041 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7042 rec->start, rec->max_size,
7046 BTRFS_FIRST_FREE_OBJECTID :
7052 fprintf(stderr, "adding new data backref"
7053 " on %llu %s %llu owner %llu"
7054 " offset %llu found %d\n",
7055 (unsigned long long)rec->start,
7056 back->full_backref ?
7058 back->full_backref ?
7059 (unsigned long long)parent :
7060 (unsigned long long)dback->root,
7061 (unsigned long long)dback->owner,
7062 (unsigned long long)dback->offset,
7066 struct tree_backref *tback;
7068 tback = to_tree_backref(back);
7069 if (back->full_backref)
7070 parent = tback->parent;
7074 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7075 rec->start, rec->max_size,
7076 parent, tback->root, 0, 0);
7077 fprintf(stderr, "adding new tree backref on "
7078 "start %llu len %llu parent %llu root %llu\n",
7079 rec->start, rec->max_size, parent, tback->root);
7082 btrfs_release_path(path);
7086 static struct extent_entry *find_entry(struct list_head *entries,
7087 u64 bytenr, u64 bytes)
7089 struct extent_entry *entry = NULL;
7091 list_for_each_entry(entry, entries, list) {
7092 if (entry->bytenr == bytenr && entry->bytes == bytes)
7099 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7101 struct extent_entry *entry, *best = NULL, *prev = NULL;
7103 list_for_each_entry(entry, entries, list) {
7105 * If there are as many broken entries as entries then we know
7106 * not to trust this particular entry.
7108 if (entry->broken == entry->count)
7112 * Special case, when there are only two entries and 'best' is
7122 * If our current entry == best then we can't be sure our best
7123 * is really the best, so we need to keep searching.
7125 if (best && best->count == entry->count) {
7131 /* Prev == entry, not good enough, have to keep searching */
7132 if (!prev->broken && prev->count == entry->count)
7136 best = (prev->count > entry->count) ? prev : entry;
7137 else if (best->count < entry->count)
7145 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7146 struct data_backref *dback, struct extent_entry *entry)
7148 struct btrfs_trans_handle *trans;
7149 struct btrfs_root *root;
7150 struct btrfs_file_extent_item *fi;
7151 struct extent_buffer *leaf;
7152 struct btrfs_key key;
7156 key.objectid = dback->root;
7157 key.type = BTRFS_ROOT_ITEM_KEY;
7158 key.offset = (u64)-1;
7159 root = btrfs_read_fs_root(info, &key);
7161 fprintf(stderr, "Couldn't find root for our ref\n");
7166 * The backref points to the original offset of the extent if it was
7167 * split, so we need to search down to the offset we have and then walk
7168 * forward until we find the backref we're looking for.
7170 key.objectid = dback->owner;
7171 key.type = BTRFS_EXTENT_DATA_KEY;
7172 key.offset = dback->offset;
7173 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7175 fprintf(stderr, "Error looking up ref %d\n", ret);
7180 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7181 ret = btrfs_next_leaf(root, path);
7183 fprintf(stderr, "Couldn't find our ref, next\n");
7187 leaf = path->nodes[0];
7188 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7189 if (key.objectid != dback->owner ||
7190 key.type != BTRFS_EXTENT_DATA_KEY) {
7191 fprintf(stderr, "Couldn't find our ref, search\n");
7194 fi = btrfs_item_ptr(leaf, path->slots[0],
7195 struct btrfs_file_extent_item);
7196 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7197 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7199 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7204 btrfs_release_path(path);
7206 trans = btrfs_start_transaction(root, 1);
7208 return PTR_ERR(trans);
7211 * Ok we have the key of the file extent we want to fix, now we can cow
7212 * down to the thing and fix it.
7214 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7216 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7217 key.objectid, key.type, key.offset, ret);
7221 fprintf(stderr, "Well that's odd, we just found this key "
7222 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7227 leaf = path->nodes[0];
7228 fi = btrfs_item_ptr(leaf, path->slots[0],
7229 struct btrfs_file_extent_item);
7231 if (btrfs_file_extent_compression(leaf, fi) &&
7232 dback->disk_bytenr != entry->bytenr) {
7233 fprintf(stderr, "Ref doesn't match the record start and is "
7234 "compressed, please take a btrfs-image of this file "
7235 "system and send it to a btrfs developer so they can "
7236 "complete this functionality for bytenr %Lu\n",
7237 dback->disk_bytenr);
7242 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7243 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7244 } else if (dback->disk_bytenr > entry->bytenr) {
7245 u64 off_diff, offset;
7247 off_diff = dback->disk_bytenr - entry->bytenr;
7248 offset = btrfs_file_extent_offset(leaf, fi);
7249 if (dback->disk_bytenr + offset +
7250 btrfs_file_extent_num_bytes(leaf, fi) >
7251 entry->bytenr + entry->bytes) {
7252 fprintf(stderr, "Ref is past the entry end, please "
7253 "take a btrfs-image of this file system and "
7254 "send it to a btrfs developer, ref %Lu\n",
7255 dback->disk_bytenr);
7260 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7261 btrfs_set_file_extent_offset(leaf, fi, offset);
7262 } else if (dback->disk_bytenr < entry->bytenr) {
7265 offset = btrfs_file_extent_offset(leaf, fi);
7266 if (dback->disk_bytenr + offset < entry->bytenr) {
7267 fprintf(stderr, "Ref is before the entry start, please"
7268 " take a btrfs-image of this file system and "
7269 "send it to a btrfs developer, ref %Lu\n",
7270 dback->disk_bytenr);
7275 offset += dback->disk_bytenr;
7276 offset -= entry->bytenr;
7277 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7278 btrfs_set_file_extent_offset(leaf, fi, offset);
7281 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7284 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7285 * only do this if we aren't using compression, otherwise it's a
7288 if (!btrfs_file_extent_compression(leaf, fi))
7289 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7291 printf("ram bytes may be wrong?\n");
7292 btrfs_mark_buffer_dirty(leaf);
7294 err = btrfs_commit_transaction(trans, root);
7295 btrfs_release_path(path);
7296 return ret ? ret : err;
7299 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7300 struct extent_record *rec)
7302 struct extent_backref *back;
7303 struct data_backref *dback;
7304 struct extent_entry *entry, *best = NULL;
7307 int broken_entries = 0;
7312 * Metadata is easy and the backrefs should always agree on bytenr and
7313 * size, if not we've got bigger issues.
7318 list_for_each_entry(back, &rec->backrefs, list) {
7319 if (back->full_backref || !back->is_data)
7322 dback = to_data_backref(back);
7325 * We only pay attention to backrefs that we found a real
7328 if (dback->found_ref == 0)
7332 * For now we only catch when the bytes don't match, not the
7333 * bytenr. We can easily do this at the same time, but I want
7334 * to have a fs image to test on before we just add repair
7335 * functionality willy-nilly so we know we won't screw up the
7339 entry = find_entry(&entries, dback->disk_bytenr,
7342 entry = malloc(sizeof(struct extent_entry));
7347 memset(entry, 0, sizeof(*entry));
7348 entry->bytenr = dback->disk_bytenr;
7349 entry->bytes = dback->bytes;
7350 list_add_tail(&entry->list, &entries);
7355 * If we only have on entry we may think the entries agree when
7356 * in reality they don't so we have to do some extra checking.
7358 if (dback->disk_bytenr != rec->start ||
7359 dback->bytes != rec->nr || back->broken)
7370 /* Yay all the backrefs agree, carry on good sir */
7371 if (nr_entries <= 1 && !mismatch)
7374 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7375 "%Lu\n", rec->start);
7378 * First we want to see if the backrefs can agree amongst themselves who
7379 * is right, so figure out which one of the entries has the highest
7382 best = find_most_right_entry(&entries);
7385 * Ok so we may have an even split between what the backrefs think, so
7386 * this is where we use the extent ref to see what it thinks.
7389 entry = find_entry(&entries, rec->start, rec->nr);
7390 if (!entry && (!broken_entries || !rec->found_rec)) {
7391 fprintf(stderr, "Backrefs don't agree with each other "
7392 "and extent record doesn't agree with anybody,"
7393 " so we can't fix bytenr %Lu bytes %Lu\n",
7394 rec->start, rec->nr);
7397 } else if (!entry) {
7399 * Ok our backrefs were broken, we'll assume this is the
7400 * correct value and add an entry for this range.
7402 entry = malloc(sizeof(struct extent_entry));
7407 memset(entry, 0, sizeof(*entry));
7408 entry->bytenr = rec->start;
7409 entry->bytes = rec->nr;
7410 list_add_tail(&entry->list, &entries);
7414 best = find_most_right_entry(&entries);
7416 fprintf(stderr, "Backrefs and extent record evenly "
7417 "split on who is right, this is going to "
7418 "require user input to fix bytenr %Lu bytes "
7419 "%Lu\n", rec->start, rec->nr);
7426 * I don't think this can happen currently as we'll abort() if we catch
7427 * this case higher up, but in case somebody removes that we still can't
7428 * deal with it properly here yet, so just bail out of that's the case.
7430 if (best->bytenr != rec->start) {
7431 fprintf(stderr, "Extent start and backref starts don't match, "
7432 "please use btrfs-image on this file system and send "
7433 "it to a btrfs developer so they can make fsck fix "
7434 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7435 rec->start, rec->nr);
7441 * Ok great we all agreed on an extent record, let's go find the real
7442 * references and fix up the ones that don't match.
7444 list_for_each_entry(back, &rec->backrefs, list) {
7445 if (back->full_backref || !back->is_data)
7448 dback = to_data_backref(back);
7451 * Still ignoring backrefs that don't have a real ref attached
7454 if (dback->found_ref == 0)
7457 if (dback->bytes == best->bytes &&
7458 dback->disk_bytenr == best->bytenr)
7461 ret = repair_ref(info, path, dback, best);
7467 * Ok we messed with the actual refs, which means we need to drop our
7468 * entire cache and go back and rescan. I know this is a huge pain and
7469 * adds a lot of extra work, but it's the only way to be safe. Once all
7470 * the backrefs agree we may not need to do anything to the extent
7475 while (!list_empty(&entries)) {
7476 entry = list_entry(entries.next, struct extent_entry, list);
7477 list_del_init(&entry->list);
7483 static int process_duplicates(struct btrfs_root *root,
7484 struct cache_tree *extent_cache,
7485 struct extent_record *rec)
7487 struct extent_record *good, *tmp;
7488 struct cache_extent *cache;
7492 * If we found a extent record for this extent then return, or if we
7493 * have more than one duplicate we are likely going to need to delete
7496 if (rec->found_rec || rec->num_duplicates > 1)
7499 /* Shouldn't happen but just in case */
7500 BUG_ON(!rec->num_duplicates);
7503 * So this happens if we end up with a backref that doesn't match the
7504 * actual extent entry. So either the backref is bad or the extent
7505 * entry is bad. Either way we want to have the extent_record actually
7506 * reflect what we found in the extent_tree, so we need to take the
7507 * duplicate out and use that as the extent_record since the only way we
7508 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7510 remove_cache_extent(extent_cache, &rec->cache);
7512 good = to_extent_record(rec->dups.next);
7513 list_del_init(&good->list);
7514 INIT_LIST_HEAD(&good->backrefs);
7515 INIT_LIST_HEAD(&good->dups);
7516 good->cache.start = good->start;
7517 good->cache.size = good->nr;
7518 good->content_checked = 0;
7519 good->owner_ref_checked = 0;
7520 good->num_duplicates = 0;
7521 good->refs = rec->refs;
7522 list_splice_init(&rec->backrefs, &good->backrefs);
7524 cache = lookup_cache_extent(extent_cache, good->start,
7528 tmp = container_of(cache, struct extent_record, cache);
7531 * If we find another overlapping extent and it's found_rec is
7532 * set then it's a duplicate and we need to try and delete
7535 if (tmp->found_rec || tmp->num_duplicates > 0) {
7536 if (list_empty(&good->list))
7537 list_add_tail(&good->list,
7538 &duplicate_extents);
7539 good->num_duplicates += tmp->num_duplicates + 1;
7540 list_splice_init(&tmp->dups, &good->dups);
7541 list_del_init(&tmp->list);
7542 list_add_tail(&tmp->list, &good->dups);
7543 remove_cache_extent(extent_cache, &tmp->cache);
7548 * Ok we have another non extent item backed extent rec, so lets
7549 * just add it to this extent and carry on like we did above.
7551 good->refs += tmp->refs;
7552 list_splice_init(&tmp->backrefs, &good->backrefs);
7553 remove_cache_extent(extent_cache, &tmp->cache);
7556 ret = insert_cache_extent(extent_cache, &good->cache);
7559 return good->num_duplicates ? 0 : 1;
7562 static int delete_duplicate_records(struct btrfs_root *root,
7563 struct extent_record *rec)
7565 struct btrfs_trans_handle *trans;
7566 LIST_HEAD(delete_list);
7567 struct btrfs_path path;
7568 struct extent_record *tmp, *good, *n;
7571 struct btrfs_key key;
7573 btrfs_init_path(&path);
7576 /* Find the record that covers all of the duplicates. */
7577 list_for_each_entry(tmp, &rec->dups, list) {
7578 if (good->start < tmp->start)
7580 if (good->nr > tmp->nr)
7583 if (tmp->start + tmp->nr < good->start + good->nr) {
7584 fprintf(stderr, "Ok we have overlapping extents that "
7585 "aren't completely covered by each other, this "
7586 "is going to require more careful thought. "
7587 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7588 tmp->start, tmp->nr, good->start, good->nr);
7595 list_add_tail(&rec->list, &delete_list);
7597 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7600 list_move_tail(&tmp->list, &delete_list);
7603 root = root->fs_info->extent_root;
7604 trans = btrfs_start_transaction(root, 1);
7605 if (IS_ERR(trans)) {
7606 ret = PTR_ERR(trans);
7610 list_for_each_entry(tmp, &delete_list, list) {
7611 if (tmp->found_rec == 0)
7613 key.objectid = tmp->start;
7614 key.type = BTRFS_EXTENT_ITEM_KEY;
7615 key.offset = tmp->nr;
7617 /* Shouldn't happen but just in case */
7618 if (tmp->metadata) {
7619 fprintf(stderr, "Well this shouldn't happen, extent "
7620 "record overlaps but is metadata? "
7621 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7625 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7631 ret = btrfs_del_item(trans, root, &path);
7634 btrfs_release_path(&path);
7637 err = btrfs_commit_transaction(trans, root);
7641 while (!list_empty(&delete_list)) {
7642 tmp = to_extent_record(delete_list.next);
7643 list_del_init(&tmp->list);
7649 while (!list_empty(&rec->dups)) {
7650 tmp = to_extent_record(rec->dups.next);
7651 list_del_init(&tmp->list);
7655 btrfs_release_path(&path);
7657 if (!ret && !nr_del)
7658 rec->num_duplicates = 0;
7660 return ret ? ret : nr_del;
7663 static int find_possible_backrefs(struct btrfs_fs_info *info,
7664 struct btrfs_path *path,
7665 struct cache_tree *extent_cache,
7666 struct extent_record *rec)
7668 struct btrfs_root *root;
7669 struct extent_backref *back;
7670 struct data_backref *dback;
7671 struct cache_extent *cache;
7672 struct btrfs_file_extent_item *fi;
7673 struct btrfs_key key;
7677 list_for_each_entry(back, &rec->backrefs, list) {
7678 /* Don't care about full backrefs (poor unloved backrefs) */
7679 if (back->full_backref || !back->is_data)
7682 dback = to_data_backref(back);
7684 /* We found this one, we don't need to do a lookup */
7685 if (dback->found_ref)
7688 key.objectid = dback->root;
7689 key.type = BTRFS_ROOT_ITEM_KEY;
7690 key.offset = (u64)-1;
7692 root = btrfs_read_fs_root(info, &key);
7694 /* No root, definitely a bad ref, skip */
7695 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7697 /* Other err, exit */
7699 return PTR_ERR(root);
7701 key.objectid = dback->owner;
7702 key.type = BTRFS_EXTENT_DATA_KEY;
7703 key.offset = dback->offset;
7704 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7706 btrfs_release_path(path);
7709 /* Didn't find it, we can carry on */
7714 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7715 struct btrfs_file_extent_item);
7716 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7717 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7718 btrfs_release_path(path);
7719 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7721 struct extent_record *tmp;
7722 tmp = container_of(cache, struct extent_record, cache);
7725 * If we found an extent record for the bytenr for this
7726 * particular backref then we can't add it to our
7727 * current extent record. We only want to add backrefs
7728 * that don't have a corresponding extent item in the
7729 * extent tree since they likely belong to this record
7730 * and we need to fix it if it doesn't match bytenrs.
7736 dback->found_ref += 1;
7737 dback->disk_bytenr = bytenr;
7738 dback->bytes = bytes;
7741 * Set this so the verify backref code knows not to trust the
7742 * values in this backref.
7751 * Record orphan data ref into corresponding root.
7753 * Return 0 if the extent item contains data ref and recorded.
7754 * Return 1 if the extent item contains no useful data ref
7755 * On that case, it may contains only shared_dataref or metadata backref
7756 * or the file extent exists(this should be handled by the extent bytenr
7758 * Return <0 if something goes wrong.
7760 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7761 struct extent_record *rec)
7763 struct btrfs_key key;
7764 struct btrfs_root *dest_root;
7765 struct extent_backref *back;
7766 struct data_backref *dback;
7767 struct orphan_data_extent *orphan;
7768 struct btrfs_path path;
7769 int recorded_data_ref = 0;
7774 btrfs_init_path(&path);
7775 list_for_each_entry(back, &rec->backrefs, list) {
7776 if (back->full_backref || !back->is_data ||
7777 !back->found_extent_tree)
7779 dback = to_data_backref(back);
7780 if (dback->found_ref)
7782 key.objectid = dback->root;
7783 key.type = BTRFS_ROOT_ITEM_KEY;
7784 key.offset = (u64)-1;
7786 dest_root = btrfs_read_fs_root(fs_info, &key);
7788 /* For non-exist root we just skip it */
7789 if (IS_ERR(dest_root) || !dest_root)
7792 key.objectid = dback->owner;
7793 key.type = BTRFS_EXTENT_DATA_KEY;
7794 key.offset = dback->offset;
7796 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7797 btrfs_release_path(&path);
7799 * For ret < 0, it's OK since the fs-tree may be corrupted,
7800 * we need to record it for inode/file extent rebuild.
7801 * For ret > 0, we record it only for file extent rebuild.
7802 * For ret == 0, the file extent exists but only bytenr
7803 * mismatch, let the original bytenr fix routine to handle,
7809 orphan = malloc(sizeof(*orphan));
7814 INIT_LIST_HEAD(&orphan->list);
7815 orphan->root = dback->root;
7816 orphan->objectid = dback->owner;
7817 orphan->offset = dback->offset;
7818 orphan->disk_bytenr = rec->cache.start;
7819 orphan->disk_len = rec->cache.size;
7820 list_add(&dest_root->orphan_data_extents, &orphan->list);
7821 recorded_data_ref = 1;
7824 btrfs_release_path(&path);
7826 return !recorded_data_ref;
7832 * when an incorrect extent item is found, this will delete
7833 * all of the existing entries for it and recreate them
7834 * based on what the tree scan found.
7836 static int fixup_extent_refs(struct btrfs_fs_info *info,
7837 struct cache_tree *extent_cache,
7838 struct extent_record *rec)
7840 struct btrfs_trans_handle *trans = NULL;
7842 struct btrfs_path path;
7843 struct list_head *cur = rec->backrefs.next;
7844 struct cache_extent *cache;
7845 struct extent_backref *back;
7849 if (rec->flag_block_full_backref)
7850 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7852 btrfs_init_path(&path);
7853 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7855 * Sometimes the backrefs themselves are so broken they don't
7856 * get attached to any meaningful rec, so first go back and
7857 * check any of our backrefs that we couldn't find and throw
7858 * them into the list if we find the backref so that
7859 * verify_backrefs can figure out what to do.
7861 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7866 /* step one, make sure all of the backrefs agree */
7867 ret = verify_backrefs(info, &path, rec);
7871 trans = btrfs_start_transaction(info->extent_root, 1);
7872 if (IS_ERR(trans)) {
7873 ret = PTR_ERR(trans);
7877 /* step two, delete all the existing records */
7878 ret = delete_extent_records(trans, info->extent_root, &path,
7879 rec->start, rec->max_size);
7884 /* was this block corrupt? If so, don't add references to it */
7885 cache = lookup_cache_extent(info->corrupt_blocks,
7886 rec->start, rec->max_size);
7892 /* step three, recreate all the refs we did find */
7893 while(cur != &rec->backrefs) {
7894 back = to_extent_backref(cur);
7898 * if we didn't find any references, don't create a
7901 if (!back->found_ref)
7904 rec->bad_full_backref = 0;
7905 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
7913 int err = btrfs_commit_transaction(trans, info->extent_root);
7918 btrfs_release_path(&path);
7922 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7923 struct extent_record *rec)
7925 struct btrfs_trans_handle *trans;
7926 struct btrfs_root *root = fs_info->extent_root;
7927 struct btrfs_path path;
7928 struct btrfs_extent_item *ei;
7929 struct btrfs_key key;
7933 key.objectid = rec->start;
7934 if (rec->metadata) {
7935 key.type = BTRFS_METADATA_ITEM_KEY;
7936 key.offset = rec->info_level;
7938 key.type = BTRFS_EXTENT_ITEM_KEY;
7939 key.offset = rec->max_size;
7942 trans = btrfs_start_transaction(root, 0);
7944 return PTR_ERR(trans);
7946 btrfs_init_path(&path);
7947 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7949 btrfs_release_path(&path);
7950 btrfs_commit_transaction(trans, root);
7953 fprintf(stderr, "Didn't find extent for %llu\n",
7954 (unsigned long long)rec->start);
7955 btrfs_release_path(&path);
7956 btrfs_commit_transaction(trans, root);
7960 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7961 struct btrfs_extent_item);
7962 flags = btrfs_extent_flags(path.nodes[0], ei);
7963 if (rec->flag_block_full_backref) {
7964 fprintf(stderr, "setting full backref on %llu\n",
7965 (unsigned long long)key.objectid);
7966 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7968 fprintf(stderr, "clearing full backref on %llu\n",
7969 (unsigned long long)key.objectid);
7970 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7972 btrfs_set_extent_flags(path.nodes[0], ei, flags);
7973 btrfs_mark_buffer_dirty(path.nodes[0]);
7974 btrfs_release_path(&path);
7975 return btrfs_commit_transaction(trans, root);
7978 /* right now we only prune from the extent allocation tree */
7979 static int prune_one_block(struct btrfs_trans_handle *trans,
7980 struct btrfs_fs_info *info,
7981 struct btrfs_corrupt_block *corrupt)
7984 struct btrfs_path path;
7985 struct extent_buffer *eb;
7989 int level = corrupt->level + 1;
7991 btrfs_init_path(&path);
7993 /* we want to stop at the parent to our busted block */
7994 path.lowest_level = level;
7996 ret = btrfs_search_slot(trans, info->extent_root,
7997 &corrupt->key, &path, -1, 1);
8002 eb = path.nodes[level];
8009 * hopefully the search gave us the block we want to prune,
8010 * lets try that first
8012 slot = path.slots[level];
8013 found = btrfs_node_blockptr(eb, slot);
8014 if (found == corrupt->cache.start)
8017 nritems = btrfs_header_nritems(eb);
8019 /* the search failed, lets scan this node and hope we find it */
8020 for (slot = 0; slot < nritems; slot++) {
8021 found = btrfs_node_blockptr(eb, slot);
8022 if (found == corrupt->cache.start)
8026 * we couldn't find the bad block. TODO, search all the nodes for pointers
8029 if (eb == info->extent_root->node) {
8034 btrfs_release_path(&path);
8039 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8040 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8043 btrfs_release_path(&path);
8047 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8049 struct btrfs_trans_handle *trans = NULL;
8050 struct cache_extent *cache;
8051 struct btrfs_corrupt_block *corrupt;
8054 cache = search_cache_extent(info->corrupt_blocks, 0);
8058 trans = btrfs_start_transaction(info->extent_root, 1);
8060 return PTR_ERR(trans);
8062 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8063 prune_one_block(trans, info, corrupt);
8064 remove_cache_extent(info->corrupt_blocks, cache);
8067 return btrfs_commit_transaction(trans, info->extent_root);
8071 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8073 struct btrfs_block_group_cache *cache;
8078 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8079 &start, &end, EXTENT_DIRTY);
8082 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8088 cache = btrfs_lookup_first_block_group(fs_info, start);
8093 start = cache->key.objectid + cache->key.offset;
8097 static int check_extent_refs(struct btrfs_root *root,
8098 struct cache_tree *extent_cache)
8100 struct extent_record *rec;
8101 struct cache_extent *cache;
8110 * if we're doing a repair, we have to make sure
8111 * we don't allocate from the problem extents.
8112 * In the worst case, this will be all the
8115 cache = search_cache_extent(extent_cache, 0);
8117 rec = container_of(cache, struct extent_record, cache);
8118 set_extent_dirty(root->fs_info->excluded_extents,
8120 rec->start + rec->max_size - 1,
8122 cache = next_cache_extent(cache);
8125 /* pin down all the corrupted blocks too */
8126 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8128 set_extent_dirty(root->fs_info->excluded_extents,
8130 cache->start + cache->size - 1,
8132 cache = next_cache_extent(cache);
8134 prune_corrupt_blocks(root->fs_info);
8135 reset_cached_block_groups(root->fs_info);
8138 reset_cached_block_groups(root->fs_info);
8141 * We need to delete any duplicate entries we find first otherwise we
8142 * could mess up the extent tree when we have backrefs that actually
8143 * belong to a different extent item and not the weird duplicate one.
8145 while (repair && !list_empty(&duplicate_extents)) {
8146 rec = to_extent_record(duplicate_extents.next);
8147 list_del_init(&rec->list);
8149 /* Sometimes we can find a backref before we find an actual
8150 * extent, so we need to process it a little bit to see if there
8151 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8152 * if this is a backref screwup. If we need to delete stuff
8153 * process_duplicates() will return 0, otherwise it will return
8156 if (process_duplicates(root, extent_cache, rec))
8158 ret = delete_duplicate_records(root, rec);
8162 * delete_duplicate_records will return the number of entries
8163 * deleted, so if it's greater than 0 then we know we actually
8164 * did something and we need to remove.
8178 cache = search_cache_extent(extent_cache, 0);
8181 rec = container_of(cache, struct extent_record, cache);
8182 if (rec->num_duplicates) {
8183 fprintf(stderr, "extent item %llu has multiple extent "
8184 "items\n", (unsigned long long)rec->start);
8189 if (rec->refs != rec->extent_item_refs) {
8190 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8191 (unsigned long long)rec->start,
8192 (unsigned long long)rec->nr);
8193 fprintf(stderr, "extent item %llu, found %llu\n",
8194 (unsigned long long)rec->extent_item_refs,
8195 (unsigned long long)rec->refs);
8196 ret = record_orphan_data_extents(root->fs_info, rec);
8203 * we can't use the extent to repair file
8204 * extent, let the fallback method handle it.
8206 if (!fixed && repair) {
8207 ret = fixup_extent_refs(
8218 if (all_backpointers_checked(rec, 1)) {
8219 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8220 (unsigned long long)rec->start,
8221 (unsigned long long)rec->nr);
8223 if (!fixed && !recorded && repair) {
8224 ret = fixup_extent_refs(root->fs_info,
8233 if (!rec->owner_ref_checked) {
8234 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8235 (unsigned long long)rec->start,
8236 (unsigned long long)rec->nr);
8237 if (!fixed && !recorded && repair) {
8238 ret = fixup_extent_refs(root->fs_info,
8247 if (rec->bad_full_backref) {
8248 fprintf(stderr, "bad full backref, on [%llu]\n",
8249 (unsigned long long)rec->start);
8251 ret = fixup_extent_flags(root->fs_info, rec);
8260 * Although it's not a extent ref's problem, we reuse this
8261 * routine for error reporting.
8262 * No repair function yet.
8264 if (rec->crossing_stripes) {
8266 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8267 rec->start, rec->start + rec->max_size);
8272 if (rec->wrong_chunk_type) {
8274 "bad extent [%llu, %llu), type mismatch with chunk\n",
8275 rec->start, rec->start + rec->max_size);
8280 remove_cache_extent(extent_cache, cache);
8281 free_all_extent_backrefs(rec);
8282 if (!init_extent_tree && repair && (!cur_err || fixed))
8283 clear_extent_dirty(root->fs_info->excluded_extents,
8285 rec->start + rec->max_size - 1,
8291 if (ret && ret != -EAGAIN) {
8292 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8295 struct btrfs_trans_handle *trans;
8297 root = root->fs_info->extent_root;
8298 trans = btrfs_start_transaction(root, 1);
8299 if (IS_ERR(trans)) {
8300 ret = PTR_ERR(trans);
8304 btrfs_fix_block_accounting(trans, root);
8305 ret = btrfs_commit_transaction(trans, root);
8310 fprintf(stderr, "repaired damaged extent references\n");
8316 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8320 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8321 stripe_size = length;
8322 stripe_size /= num_stripes;
8323 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8324 stripe_size = length * 2;
8325 stripe_size /= num_stripes;
8326 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8327 stripe_size = length;
8328 stripe_size /= (num_stripes - 1);
8329 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8330 stripe_size = length;
8331 stripe_size /= (num_stripes - 2);
8333 stripe_size = length;
8339 * Check the chunk with its block group/dev list ref:
8340 * Return 0 if all refs seems valid.
8341 * Return 1 if part of refs seems valid, need later check for rebuild ref
8342 * like missing block group and needs to search extent tree to rebuild them.
8343 * Return -1 if essential refs are missing and unable to rebuild.
8345 static int check_chunk_refs(struct chunk_record *chunk_rec,
8346 struct block_group_tree *block_group_cache,
8347 struct device_extent_tree *dev_extent_cache,
8350 struct cache_extent *block_group_item;
8351 struct block_group_record *block_group_rec;
8352 struct cache_extent *dev_extent_item;
8353 struct device_extent_record *dev_extent_rec;
8357 int metadump_v2 = 0;
8361 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8364 if (block_group_item) {
8365 block_group_rec = container_of(block_group_item,
8366 struct block_group_record,
8368 if (chunk_rec->length != block_group_rec->offset ||
8369 chunk_rec->offset != block_group_rec->objectid ||
8371 chunk_rec->type_flags != block_group_rec->flags)) {
8374 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8375 chunk_rec->objectid,
8380 chunk_rec->type_flags,
8381 block_group_rec->objectid,
8382 block_group_rec->type,
8383 block_group_rec->offset,
8384 block_group_rec->offset,
8385 block_group_rec->objectid,
8386 block_group_rec->flags);
8389 list_del_init(&block_group_rec->list);
8390 chunk_rec->bg_rec = block_group_rec;
8395 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8396 chunk_rec->objectid,
8401 chunk_rec->type_flags);
8408 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8409 chunk_rec->num_stripes);
8410 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8411 devid = chunk_rec->stripes[i].devid;
8412 offset = chunk_rec->stripes[i].offset;
8413 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8414 devid, offset, length);
8415 if (dev_extent_item) {
8416 dev_extent_rec = container_of(dev_extent_item,
8417 struct device_extent_record,
8419 if (dev_extent_rec->objectid != devid ||
8420 dev_extent_rec->offset != offset ||
8421 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8422 dev_extent_rec->length != length) {
8425 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8426 chunk_rec->objectid,
8429 chunk_rec->stripes[i].devid,
8430 chunk_rec->stripes[i].offset,
8431 dev_extent_rec->objectid,
8432 dev_extent_rec->offset,
8433 dev_extent_rec->length);
8436 list_move(&dev_extent_rec->chunk_list,
8437 &chunk_rec->dextents);
8442 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8443 chunk_rec->objectid,
8446 chunk_rec->stripes[i].devid,
8447 chunk_rec->stripes[i].offset);
8454 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8455 int check_chunks(struct cache_tree *chunk_cache,
8456 struct block_group_tree *block_group_cache,
8457 struct device_extent_tree *dev_extent_cache,
8458 struct list_head *good, struct list_head *bad,
8459 struct list_head *rebuild, int silent)
8461 struct cache_extent *chunk_item;
8462 struct chunk_record *chunk_rec;
8463 struct block_group_record *bg_rec;
8464 struct device_extent_record *dext_rec;
8468 chunk_item = first_cache_extent(chunk_cache);
8469 while (chunk_item) {
8470 chunk_rec = container_of(chunk_item, struct chunk_record,
8472 err = check_chunk_refs(chunk_rec, block_group_cache,
8473 dev_extent_cache, silent);
8476 if (err == 0 && good)
8477 list_add_tail(&chunk_rec->list, good);
8478 if (err > 0 && rebuild)
8479 list_add_tail(&chunk_rec->list, rebuild);
8481 list_add_tail(&chunk_rec->list, bad);
8482 chunk_item = next_cache_extent(chunk_item);
8485 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8488 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8496 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8500 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8511 static int check_device_used(struct device_record *dev_rec,
8512 struct device_extent_tree *dext_cache)
8514 struct cache_extent *cache;
8515 struct device_extent_record *dev_extent_rec;
8518 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8520 dev_extent_rec = container_of(cache,
8521 struct device_extent_record,
8523 if (dev_extent_rec->objectid != dev_rec->devid)
8526 list_del_init(&dev_extent_rec->device_list);
8527 total_byte += dev_extent_rec->length;
8528 cache = next_cache_extent(cache);
8531 if (total_byte != dev_rec->byte_used) {
8533 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8534 total_byte, dev_rec->byte_used, dev_rec->objectid,
8535 dev_rec->type, dev_rec->offset);
8542 /* check btrfs_dev_item -> btrfs_dev_extent */
8543 static int check_devices(struct rb_root *dev_cache,
8544 struct device_extent_tree *dev_extent_cache)
8546 struct rb_node *dev_node;
8547 struct device_record *dev_rec;
8548 struct device_extent_record *dext_rec;
8552 dev_node = rb_first(dev_cache);
8554 dev_rec = container_of(dev_node, struct device_record, node);
8555 err = check_device_used(dev_rec, dev_extent_cache);
8559 dev_node = rb_next(dev_node);
8561 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8564 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8565 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8572 static int add_root_item_to_list(struct list_head *head,
8573 u64 objectid, u64 bytenr, u64 last_snapshot,
8574 u8 level, u8 drop_level,
8575 int level_size, struct btrfs_key *drop_key)
8578 struct root_item_record *ri_rec;
8579 ri_rec = malloc(sizeof(*ri_rec));
8582 ri_rec->bytenr = bytenr;
8583 ri_rec->objectid = objectid;
8584 ri_rec->level = level;
8585 ri_rec->level_size = level_size;
8586 ri_rec->drop_level = drop_level;
8587 ri_rec->last_snapshot = last_snapshot;
8589 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8590 list_add_tail(&ri_rec->list, head);
8595 static void free_root_item_list(struct list_head *list)
8597 struct root_item_record *ri_rec;
8599 while (!list_empty(list)) {
8600 ri_rec = list_first_entry(list, struct root_item_record,
8602 list_del_init(&ri_rec->list);
8607 static int deal_root_from_list(struct list_head *list,
8608 struct btrfs_root *root,
8609 struct block_info *bits,
8611 struct cache_tree *pending,
8612 struct cache_tree *seen,
8613 struct cache_tree *reada,
8614 struct cache_tree *nodes,
8615 struct cache_tree *extent_cache,
8616 struct cache_tree *chunk_cache,
8617 struct rb_root *dev_cache,
8618 struct block_group_tree *block_group_cache,
8619 struct device_extent_tree *dev_extent_cache)
8624 while (!list_empty(list)) {
8625 struct root_item_record *rec;
8626 struct extent_buffer *buf;
8627 rec = list_entry(list->next,
8628 struct root_item_record, list);
8630 buf = read_tree_block(root->fs_info->tree_root,
8631 rec->bytenr, rec->level_size, 0);
8632 if (!extent_buffer_uptodate(buf)) {
8633 free_extent_buffer(buf);
8637 ret = add_root_to_pending(buf, extent_cache, pending,
8638 seen, nodes, rec->objectid);
8642 * To rebuild extent tree, we need deal with snapshot
8643 * one by one, otherwise we deal with node firstly which
8644 * can maximize readahead.
8647 ret = run_next_block(root, bits, bits_nr, &last,
8648 pending, seen, reada, nodes,
8649 extent_cache, chunk_cache,
8650 dev_cache, block_group_cache,
8651 dev_extent_cache, rec);
8655 free_extent_buffer(buf);
8656 list_del(&rec->list);
8662 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8663 reada, nodes, extent_cache, chunk_cache,
8664 dev_cache, block_group_cache,
8665 dev_extent_cache, NULL);
8675 static int check_chunks_and_extents(struct btrfs_root *root)
8677 struct rb_root dev_cache;
8678 struct cache_tree chunk_cache;
8679 struct block_group_tree block_group_cache;
8680 struct device_extent_tree dev_extent_cache;
8681 struct cache_tree extent_cache;
8682 struct cache_tree seen;
8683 struct cache_tree pending;
8684 struct cache_tree reada;
8685 struct cache_tree nodes;
8686 struct extent_io_tree excluded_extents;
8687 struct cache_tree corrupt_blocks;
8688 struct btrfs_path path;
8689 struct btrfs_key key;
8690 struct btrfs_key found_key;
8692 struct block_info *bits;
8694 struct extent_buffer *leaf;
8696 struct btrfs_root_item ri;
8697 struct list_head dropping_trees;
8698 struct list_head normal_trees;
8699 struct btrfs_root *root1;
8704 dev_cache = RB_ROOT;
8705 cache_tree_init(&chunk_cache);
8706 block_group_tree_init(&block_group_cache);
8707 device_extent_tree_init(&dev_extent_cache);
8709 cache_tree_init(&extent_cache);
8710 cache_tree_init(&seen);
8711 cache_tree_init(&pending);
8712 cache_tree_init(&nodes);
8713 cache_tree_init(&reada);
8714 cache_tree_init(&corrupt_blocks);
8715 extent_io_tree_init(&excluded_extents);
8716 INIT_LIST_HEAD(&dropping_trees);
8717 INIT_LIST_HEAD(&normal_trees);
8720 root->fs_info->excluded_extents = &excluded_extents;
8721 root->fs_info->fsck_extent_cache = &extent_cache;
8722 root->fs_info->free_extent_hook = free_extent_hook;
8723 root->fs_info->corrupt_blocks = &corrupt_blocks;
8727 bits = malloc(bits_nr * sizeof(struct block_info));
8733 if (ctx.progress_enabled) {
8734 ctx.tp = TASK_EXTENTS;
8735 task_start(ctx.info);
8739 root1 = root->fs_info->tree_root;
8740 level = btrfs_header_level(root1->node);
8741 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8742 root1->node->start, 0, level, 0,
8743 root1->nodesize, NULL);
8746 root1 = root->fs_info->chunk_root;
8747 level = btrfs_header_level(root1->node);
8748 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8749 root1->node->start, 0, level, 0,
8750 root1->nodesize, NULL);
8753 btrfs_init_path(&path);
8756 key.type = BTRFS_ROOT_ITEM_KEY;
8757 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8762 leaf = path.nodes[0];
8763 slot = path.slots[0];
8764 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8765 ret = btrfs_next_leaf(root, &path);
8768 leaf = path.nodes[0];
8769 slot = path.slots[0];
8771 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8772 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8773 unsigned long offset;
8776 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8777 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8778 last_snapshot = btrfs_root_last_snapshot(&ri);
8779 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8780 level = btrfs_root_level(&ri);
8781 level_size = root->nodesize;
8782 ret = add_root_item_to_list(&normal_trees,
8784 btrfs_root_bytenr(&ri),
8785 last_snapshot, level,
8786 0, level_size, NULL);
8790 level = btrfs_root_level(&ri);
8791 level_size = root->nodesize;
8792 objectid = found_key.objectid;
8793 btrfs_disk_key_to_cpu(&found_key,
8795 ret = add_root_item_to_list(&dropping_trees,
8797 btrfs_root_bytenr(&ri),
8798 last_snapshot, level,
8800 level_size, &found_key);
8807 btrfs_release_path(&path);
8810 * check_block can return -EAGAIN if it fixes something, please keep
8811 * this in mind when dealing with return values from these functions, if
8812 * we get -EAGAIN we want to fall through and restart the loop.
8814 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8815 &seen, &reada, &nodes, &extent_cache,
8816 &chunk_cache, &dev_cache, &block_group_cache,
8823 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8824 &pending, &seen, &reada, &nodes,
8825 &extent_cache, &chunk_cache, &dev_cache,
8826 &block_group_cache, &dev_extent_cache);
8833 ret = check_chunks(&chunk_cache, &block_group_cache,
8834 &dev_extent_cache, NULL, NULL, NULL, 0);
8841 ret = check_extent_refs(root, &extent_cache);
8848 ret = check_devices(&dev_cache, &dev_extent_cache);
8853 task_stop(ctx.info);
8855 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8856 extent_io_tree_cleanup(&excluded_extents);
8857 root->fs_info->fsck_extent_cache = NULL;
8858 root->fs_info->free_extent_hook = NULL;
8859 root->fs_info->corrupt_blocks = NULL;
8860 root->fs_info->excluded_extents = NULL;
8863 free_chunk_cache_tree(&chunk_cache);
8864 free_device_cache_tree(&dev_cache);
8865 free_block_group_tree(&block_group_cache);
8866 free_device_extent_tree(&dev_extent_cache);
8867 free_extent_cache_tree(&seen);
8868 free_extent_cache_tree(&pending);
8869 free_extent_cache_tree(&reada);
8870 free_extent_cache_tree(&nodes);
8873 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8874 free_extent_cache_tree(&seen);
8875 free_extent_cache_tree(&pending);
8876 free_extent_cache_tree(&reada);
8877 free_extent_cache_tree(&nodes);
8878 free_chunk_cache_tree(&chunk_cache);
8879 free_block_group_tree(&block_group_cache);
8880 free_device_cache_tree(&dev_cache);
8881 free_device_extent_tree(&dev_extent_cache);
8882 free_extent_record_cache(root->fs_info, &extent_cache);
8883 free_root_item_list(&normal_trees);
8884 free_root_item_list(&dropping_trees);
8885 extent_io_tree_cleanup(&excluded_extents);
8890 * Check backrefs of a tree block given by @bytenr or @eb.
8892 * @root: the root containing the @bytenr or @eb
8893 * @eb: tree block extent buffer, can be NULL
8894 * @bytenr: bytenr of the tree block to search
8895 * @level: tree level of the tree block
8896 * @owner: owner of the tree block
8898 * Return >0 for any error found and output error message
8899 * Return 0 for no error found
8901 static int check_tree_block_ref(struct btrfs_root *root,
8902 struct extent_buffer *eb, u64 bytenr,
8903 int level, u64 owner)
8905 struct btrfs_key key;
8906 struct btrfs_root *extent_root = root->fs_info->extent_root;
8907 struct btrfs_path path;
8908 struct btrfs_extent_item *ei;
8909 struct btrfs_extent_inline_ref *iref;
8910 struct extent_buffer *leaf;
8916 u32 nodesize = root->nodesize;
8923 btrfs_init_path(&path);
8924 key.objectid = bytenr;
8925 if (btrfs_fs_incompat(root->fs_info,
8926 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8927 key.type = BTRFS_METADATA_ITEM_KEY;
8929 key.type = BTRFS_EXTENT_ITEM_KEY;
8930 key.offset = (u64)-1;
8932 /* Search for the backref in extent tree */
8933 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8935 err |= BACKREF_MISSING;
8938 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8940 err |= BACKREF_MISSING;
8944 leaf = path.nodes[0];
8945 slot = path.slots[0];
8946 btrfs_item_key_to_cpu(leaf, &key, slot);
8948 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8950 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8951 skinny_level = (int)key.offset;
8952 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8954 struct btrfs_tree_block_info *info;
8956 info = (struct btrfs_tree_block_info *)(ei + 1);
8957 skinny_level = btrfs_tree_block_level(leaf, info);
8958 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8965 if (!(btrfs_extent_flags(leaf, ei) &
8966 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8968 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8969 key.objectid, nodesize,
8970 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8971 err = BACKREF_MISMATCH;
8973 header_gen = btrfs_header_generation(eb);
8974 extent_gen = btrfs_extent_generation(leaf, ei);
8975 if (header_gen != extent_gen) {
8977 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8978 key.objectid, nodesize, header_gen,
8980 err = BACKREF_MISMATCH;
8982 if (level != skinny_level) {
8984 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8985 key.objectid, nodesize, level, skinny_level);
8986 err = BACKREF_MISMATCH;
8988 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8990 "extent[%llu %u] is referred by other roots than %llu",
8991 key.objectid, nodesize, root->objectid);
8992 err = BACKREF_MISMATCH;
8997 * Iterate the extent/metadata item to find the exact backref
8999 item_size = btrfs_item_size_nr(leaf, slot);
9000 ptr = (unsigned long)iref;
9001 end = (unsigned long)ei + item_size;
9003 iref = (struct btrfs_extent_inline_ref *)ptr;
9004 type = btrfs_extent_inline_ref_type(leaf, iref);
9005 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9007 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9008 (offset == root->objectid || offset == owner)) {
9010 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9011 /* Check if the backref points to valid referencer */
9012 found_ref = !check_tree_block_ref(root, NULL, offset,
9018 ptr += btrfs_extent_inline_ref_size(type);
9022 * Inlined extent item doesn't have what we need, check
9023 * TREE_BLOCK_REF_KEY
9026 btrfs_release_path(&path);
9027 key.objectid = bytenr;
9028 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9029 key.offset = root->objectid;
9031 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9036 err |= BACKREF_MISSING;
9038 btrfs_release_path(&path);
9039 if (eb && (err & BACKREF_MISSING))
9040 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9041 bytenr, nodesize, owner, level);
9046 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9048 * Return >0 any error found and output error message
9049 * Return 0 for no error found
9051 static int check_extent_data_item(struct btrfs_root *root,
9052 struct extent_buffer *eb, int slot)
9054 struct btrfs_file_extent_item *fi;
9055 struct btrfs_path path;
9056 struct btrfs_root *extent_root = root->fs_info->extent_root;
9057 struct btrfs_key fi_key;
9058 struct btrfs_key dbref_key;
9059 struct extent_buffer *leaf;
9060 struct btrfs_extent_item *ei;
9061 struct btrfs_extent_inline_ref *iref;
9062 struct btrfs_extent_data_ref *dref;
9064 u64 file_extent_gen;
9067 u64 extent_num_bytes;
9075 int found_dbackref = 0;
9079 btrfs_item_key_to_cpu(eb, &fi_key, slot);
9080 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9081 file_extent_gen = btrfs_file_extent_generation(eb, fi);
9083 /* Nothing to check for hole and inline data extents */
9084 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9085 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9088 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9089 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9090 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9092 /* Check unaligned disk_num_bytes and num_bytes */
9093 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9095 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9096 fi_key.objectid, fi_key.offset, disk_num_bytes,
9098 err |= BYTES_UNALIGNED;
9100 data_bytes_allocated += disk_num_bytes;
9102 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9104 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9105 fi_key.objectid, fi_key.offset, extent_num_bytes,
9107 err |= BYTES_UNALIGNED;
9109 data_bytes_referenced += extent_num_bytes;
9111 owner = btrfs_header_owner(eb);
9113 /* Check the extent item of the file extent in extent tree */
9114 btrfs_init_path(&path);
9115 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9116 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9117 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9119 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9121 err |= BACKREF_MISSING;
9125 leaf = path.nodes[0];
9126 slot = path.slots[0];
9127 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9129 extent_flags = btrfs_extent_flags(leaf, ei);
9130 extent_gen = btrfs_extent_generation(leaf, ei);
9132 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9134 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9135 disk_bytenr, disk_num_bytes,
9136 BTRFS_EXTENT_FLAG_DATA);
9137 err |= BACKREF_MISMATCH;
9140 if (file_extent_gen < extent_gen) {
9142 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9143 disk_bytenr, disk_num_bytes, file_extent_gen,
9145 err |= BACKREF_MISMATCH;
9148 /* Check data backref inside that extent item */
9149 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9150 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9151 ptr = (unsigned long)iref;
9152 end = (unsigned long)ei + item_size;
9154 iref = (struct btrfs_extent_inline_ref *)ptr;
9155 type = btrfs_extent_inline_ref_type(leaf, iref);
9156 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9158 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9159 ref_root = btrfs_extent_data_ref_root(leaf, dref);
9160 if (ref_root == owner || ref_root == root->objectid)
9162 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9163 found_dbackref = !check_tree_block_ref(root, NULL,
9164 btrfs_extent_inline_ref_offset(leaf, iref),
9170 ptr += btrfs_extent_inline_ref_size(type);
9173 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9174 if (!found_dbackref) {
9175 btrfs_release_path(&path);
9177 btrfs_init_path(&path);
9178 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9179 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9180 dbref_key.offset = hash_extent_data_ref(root->objectid,
9181 fi_key.objectid, fi_key.offset);
9183 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9184 &dbref_key, &path, 0, 0);
9189 if (!found_dbackref)
9190 err |= BACKREF_MISSING;
9192 btrfs_release_path(&path);
9193 if (err & BACKREF_MISSING) {
9194 error("data extent[%llu %llu] backref lost",
9195 disk_bytenr, disk_num_bytes);
9201 * Get real tree block level for the case like shared block
9202 * Return >= 0 as tree level
9203 * Return <0 for error
9205 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9207 struct extent_buffer *eb;
9208 struct btrfs_path path;
9209 struct btrfs_key key;
9210 struct btrfs_extent_item *ei;
9213 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9218 /* Search extent tree for extent generation and level */
9219 key.objectid = bytenr;
9220 key.type = BTRFS_METADATA_ITEM_KEY;
9221 key.offset = (u64)-1;
9223 btrfs_init_path(&path);
9224 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9227 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9235 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9236 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9237 struct btrfs_extent_item);
9238 flags = btrfs_extent_flags(path.nodes[0], ei);
9239 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9244 /* Get transid for later read_tree_block() check */
9245 transid = btrfs_extent_generation(path.nodes[0], ei);
9247 /* Get backref level as one source */
9248 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9249 backref_level = key.offset;
9251 struct btrfs_tree_block_info *info;
9253 info = (struct btrfs_tree_block_info *)(ei + 1);
9254 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9256 btrfs_release_path(&path);
9258 /* Get level from tree block as an alternative source */
9259 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9260 if (!extent_buffer_uptodate(eb)) {
9261 free_extent_buffer(eb);
9264 header_level = btrfs_header_level(eb);
9265 free_extent_buffer(eb);
9267 if (header_level != backref_level)
9269 return header_level;
9272 btrfs_release_path(&path);
9277 * Check if a tree block backref is valid (points to a valid tree block)
9278 * if level == -1, level will be resolved
9279 * Return >0 for any error found and print error message
9281 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9282 u64 bytenr, int level)
9284 struct btrfs_root *root;
9285 struct btrfs_key key;
9286 struct btrfs_path path;
9287 struct extent_buffer *eb;
9288 struct extent_buffer *node;
9289 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9293 /* Query level for level == -1 special case */
9295 level = query_tree_block_level(fs_info, bytenr);
9297 err |= REFERENCER_MISSING;
9301 key.objectid = root_id;
9302 key.type = BTRFS_ROOT_ITEM_KEY;
9303 key.offset = (u64)-1;
9305 root = btrfs_read_fs_root(fs_info, &key);
9307 err |= REFERENCER_MISSING;
9311 /* Read out the tree block to get item/node key */
9312 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9313 if (!extent_buffer_uptodate(eb)) {
9314 err |= REFERENCER_MISSING;
9315 free_extent_buffer(eb);
9319 /* Empty tree, no need to check key */
9320 if (!btrfs_header_nritems(eb) && !level) {
9321 free_extent_buffer(eb);
9326 btrfs_node_key_to_cpu(eb, &key, 0);
9328 btrfs_item_key_to_cpu(eb, &key, 0);
9330 free_extent_buffer(eb);
9332 btrfs_init_path(&path);
9333 path.lowest_level = level;
9334 /* Search with the first key, to ensure we can reach it */
9335 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9337 err |= REFERENCER_MISSING;
9341 node = path.nodes[level];
9342 if (btrfs_header_bytenr(node) != bytenr) {
9344 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9345 bytenr, nodesize, bytenr,
9346 btrfs_header_bytenr(node));
9347 err |= REFERENCER_MISMATCH;
9349 if (btrfs_header_level(node) != level) {
9351 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9352 bytenr, nodesize, level,
9353 btrfs_header_level(node));
9354 err |= REFERENCER_MISMATCH;
9358 btrfs_release_path(&path);
9360 if (err & REFERENCER_MISSING) {
9362 error("extent [%llu %d] lost referencer (owner: %llu)",
9363 bytenr, nodesize, root_id);
9366 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9367 bytenr, nodesize, root_id, level);
9374 * Check referencer for shared block backref
9375 * If level == -1, this function will resolve the level.
9377 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9378 u64 parent, u64 bytenr, int level)
9380 struct extent_buffer *eb;
9381 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9383 int found_parent = 0;
9386 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9387 if (!extent_buffer_uptodate(eb))
9391 level = query_tree_block_level(fs_info, bytenr);
9395 if (level + 1 != btrfs_header_level(eb))
9398 nr = btrfs_header_nritems(eb);
9399 for (i = 0; i < nr; i++) {
9400 if (bytenr == btrfs_node_blockptr(eb, i)) {
9406 free_extent_buffer(eb);
9407 if (!found_parent) {
9409 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9410 bytenr, nodesize, parent, level);
9411 return REFERENCER_MISSING;
9417 * Check referencer for normal (inlined) data ref
9418 * If len == 0, it will be resolved by searching in extent tree
9420 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9421 u64 root_id, u64 objectid, u64 offset,
9422 u64 bytenr, u64 len, u32 count)
9424 struct btrfs_root *root;
9425 struct btrfs_root *extent_root = fs_info->extent_root;
9426 struct btrfs_key key;
9427 struct btrfs_path path;
9428 struct extent_buffer *leaf;
9429 struct btrfs_file_extent_item *fi;
9430 u32 found_count = 0;
9435 key.objectid = bytenr;
9436 key.type = BTRFS_EXTENT_ITEM_KEY;
9437 key.offset = (u64)-1;
9439 btrfs_init_path(&path);
9440 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9443 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9446 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9447 if (key.objectid != bytenr ||
9448 key.type != BTRFS_EXTENT_ITEM_KEY)
9451 btrfs_release_path(&path);
9453 key.objectid = root_id;
9454 key.type = BTRFS_ROOT_ITEM_KEY;
9455 key.offset = (u64)-1;
9456 btrfs_init_path(&path);
9458 root = btrfs_read_fs_root(fs_info, &key);
9462 key.objectid = objectid;
9463 key.type = BTRFS_EXTENT_DATA_KEY;
9465 * It can be nasty as data backref offset is
9466 * file offset - file extent offset, which is smaller or
9467 * equal to original backref offset. The only special case is
9468 * overflow. So we need to special check and do further search.
9470 key.offset = offset & (1ULL << 63) ? 0 : offset;
9472 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9477 * Search afterwards to get correct one
9478 * NOTE: As we must do a comprehensive check on the data backref to
9479 * make sure the dref count also matches, we must iterate all file
9480 * extents for that inode.
9483 leaf = path.nodes[0];
9484 slot = path.slots[0];
9486 btrfs_item_key_to_cpu(leaf, &key, slot);
9487 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9489 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9491 * Except normal disk bytenr and disk num bytes, we still
9492 * need to do extra check on dbackref offset as
9493 * dbackref offset = file_offset - file_extent_offset
9495 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9496 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9497 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9501 ret = btrfs_next_item(root, &path);
9506 btrfs_release_path(&path);
9507 if (found_count != count) {
9509 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9510 bytenr, len, root_id, objectid, offset, count, found_count);
9511 return REFERENCER_MISSING;
9517 * Check if the referencer of a shared data backref exists
9519 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9520 u64 parent, u64 bytenr)
9522 struct extent_buffer *eb;
9523 struct btrfs_key key;
9524 struct btrfs_file_extent_item *fi;
9525 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9527 int found_parent = 0;
9530 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9531 if (!extent_buffer_uptodate(eb))
9534 nr = btrfs_header_nritems(eb);
9535 for (i = 0; i < nr; i++) {
9536 btrfs_item_key_to_cpu(eb, &key, i);
9537 if (key.type != BTRFS_EXTENT_DATA_KEY)
9540 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9541 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9544 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9551 free_extent_buffer(eb);
9552 if (!found_parent) {
9553 error("shared extent %llu referencer lost (parent: %llu)",
9555 return REFERENCER_MISSING;
9561 * This function will check a given extent item, including its backref and
9562 * itself (like crossing stripe boundary and type)
9564 * Since we don't use extent_record anymore, introduce new error bit
9566 static int check_extent_item(struct btrfs_fs_info *fs_info,
9567 struct extent_buffer *eb, int slot)
9569 struct btrfs_extent_item *ei;
9570 struct btrfs_extent_inline_ref *iref;
9571 struct btrfs_extent_data_ref *dref;
9575 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9576 u32 item_size = btrfs_item_size_nr(eb, slot);
9581 struct btrfs_key key;
9585 btrfs_item_key_to_cpu(eb, &key, slot);
9586 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9587 bytes_used += key.offset;
9589 bytes_used += nodesize;
9591 if (item_size < sizeof(*ei)) {
9593 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9594 * old thing when on disk format is still un-determined.
9595 * No need to care about it anymore
9597 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9601 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9602 flags = btrfs_extent_flags(eb, ei);
9604 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9606 if (metadata && check_crossing_stripes(global_info, key.objectid,
9608 error("bad metadata [%llu, %llu) crossing stripe boundary",
9609 key.objectid, key.objectid + nodesize);
9610 err |= CROSSING_STRIPE_BOUNDARY;
9613 ptr = (unsigned long)(ei + 1);
9615 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9616 /* Old EXTENT_ITEM metadata */
9617 struct btrfs_tree_block_info *info;
9619 info = (struct btrfs_tree_block_info *)ptr;
9620 level = btrfs_tree_block_level(eb, info);
9621 ptr += sizeof(struct btrfs_tree_block_info);
9623 /* New METADATA_ITEM */
9626 end = (unsigned long)ei + item_size;
9629 err |= ITEM_SIZE_MISMATCH;
9633 /* Now check every backref in this extent item */
9635 iref = (struct btrfs_extent_inline_ref *)ptr;
9636 type = btrfs_extent_inline_ref_type(eb, iref);
9637 offset = btrfs_extent_inline_ref_offset(eb, iref);
9639 case BTRFS_TREE_BLOCK_REF_KEY:
9640 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9644 case BTRFS_SHARED_BLOCK_REF_KEY:
9645 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9649 case BTRFS_EXTENT_DATA_REF_KEY:
9650 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9651 ret = check_extent_data_backref(fs_info,
9652 btrfs_extent_data_ref_root(eb, dref),
9653 btrfs_extent_data_ref_objectid(eb, dref),
9654 btrfs_extent_data_ref_offset(eb, dref),
9655 key.objectid, key.offset,
9656 btrfs_extent_data_ref_count(eb, dref));
9659 case BTRFS_SHARED_DATA_REF_KEY:
9660 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9664 error("extent[%llu %d %llu] has unknown ref type: %d",
9665 key.objectid, key.type, key.offset, type);
9666 err |= UNKNOWN_TYPE;
9670 ptr += btrfs_extent_inline_ref_size(type);
9679 * Check if a dev extent item is referred correctly by its chunk
9681 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9682 struct extent_buffer *eb, int slot)
9684 struct btrfs_root *chunk_root = fs_info->chunk_root;
9685 struct btrfs_dev_extent *ptr;
9686 struct btrfs_path path;
9687 struct btrfs_key chunk_key;
9688 struct btrfs_key devext_key;
9689 struct btrfs_chunk *chunk;
9690 struct extent_buffer *l;
9694 int found_chunk = 0;
9697 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9698 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9699 length = btrfs_dev_extent_length(eb, ptr);
9701 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9702 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9703 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9705 btrfs_init_path(&path);
9706 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9711 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9712 if (btrfs_chunk_length(l, chunk) != length)
9715 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9716 for (i = 0; i < num_stripes; i++) {
9717 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9718 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9720 if (devid == devext_key.objectid &&
9721 offset == devext_key.offset) {
9727 btrfs_release_path(&path);
9730 "device extent[%llu, %llu, %llu] did not find the related chunk",
9731 devext_key.objectid, devext_key.offset, length);
9732 return REFERENCER_MISSING;
9738 * Check if the used space is correct with the dev item
9740 static int check_dev_item(struct btrfs_fs_info *fs_info,
9741 struct extent_buffer *eb, int slot)
9743 struct btrfs_root *dev_root = fs_info->dev_root;
9744 struct btrfs_dev_item *dev_item;
9745 struct btrfs_path path;
9746 struct btrfs_key key;
9747 struct btrfs_dev_extent *ptr;
9753 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9754 dev_id = btrfs_device_id(eb, dev_item);
9755 used = btrfs_device_bytes_used(eb, dev_item);
9757 key.objectid = dev_id;
9758 key.type = BTRFS_DEV_EXTENT_KEY;
9761 btrfs_init_path(&path);
9762 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9764 btrfs_item_key_to_cpu(eb, &key, slot);
9765 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9766 key.objectid, key.type, key.offset);
9767 btrfs_release_path(&path);
9768 return REFERENCER_MISSING;
9771 /* Iterate dev_extents to calculate the used space of a device */
9773 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9775 if (key.objectid > dev_id)
9777 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9780 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9781 struct btrfs_dev_extent);
9782 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9784 ret = btrfs_next_item(dev_root, &path);
9788 btrfs_release_path(&path);
9790 if (used != total) {
9791 btrfs_item_key_to_cpu(eb, &key, slot);
9793 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9794 total, used, BTRFS_ROOT_TREE_OBJECTID,
9795 BTRFS_DEV_EXTENT_KEY, dev_id);
9796 return ACCOUNTING_MISMATCH;
9802 * Check a block group item with its referener (chunk) and its used space
9803 * with extent/metadata item
9805 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9806 struct extent_buffer *eb, int slot)
9808 struct btrfs_root *extent_root = fs_info->extent_root;
9809 struct btrfs_root *chunk_root = fs_info->chunk_root;
9810 struct btrfs_block_group_item *bi;
9811 struct btrfs_block_group_item bg_item;
9812 struct btrfs_path path;
9813 struct btrfs_key bg_key;
9814 struct btrfs_key chunk_key;
9815 struct btrfs_key extent_key;
9816 struct btrfs_chunk *chunk;
9817 struct extent_buffer *leaf;
9818 struct btrfs_extent_item *ei;
9819 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9827 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9828 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9829 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9830 used = btrfs_block_group_used(&bg_item);
9831 bg_flags = btrfs_block_group_flags(&bg_item);
9833 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9834 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9835 chunk_key.offset = bg_key.objectid;
9837 btrfs_init_path(&path);
9838 /* Search for the referencer chunk */
9839 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9842 "block group[%llu %llu] did not find the related chunk item",
9843 bg_key.objectid, bg_key.offset);
9844 err |= REFERENCER_MISSING;
9846 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9847 struct btrfs_chunk);
9848 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9851 "block group[%llu %llu] related chunk item length does not match",
9852 bg_key.objectid, bg_key.offset);
9853 err |= REFERENCER_MISMATCH;
9856 btrfs_release_path(&path);
9858 /* Search from the block group bytenr */
9859 extent_key.objectid = bg_key.objectid;
9860 extent_key.type = 0;
9861 extent_key.offset = 0;
9863 btrfs_init_path(&path);
9864 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9868 /* Iterate extent tree to account used space */
9870 leaf = path.nodes[0];
9871 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9872 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9875 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9876 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9878 if (extent_key.objectid < bg_key.objectid)
9881 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9884 total += extent_key.offset;
9886 ei = btrfs_item_ptr(leaf, path.slots[0],
9887 struct btrfs_extent_item);
9888 flags = btrfs_extent_flags(leaf, ei);
9889 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9890 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9892 "bad extent[%llu, %llu) type mismatch with chunk",
9893 extent_key.objectid,
9894 extent_key.objectid + extent_key.offset);
9895 err |= CHUNK_TYPE_MISMATCH;
9897 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9898 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9899 BTRFS_BLOCK_GROUP_METADATA))) {
9901 "bad extent[%llu, %llu) type mismatch with chunk",
9902 extent_key.objectid,
9903 extent_key.objectid + nodesize);
9904 err |= CHUNK_TYPE_MISMATCH;
9908 ret = btrfs_next_item(extent_root, &path);
9914 btrfs_release_path(&path);
9916 if (total != used) {
9918 "block group[%llu %llu] used %llu but extent items used %llu",
9919 bg_key.objectid, bg_key.offset, used, total);
9920 err |= ACCOUNTING_MISMATCH;
9926 * Check a chunk item.
9927 * Including checking all referred dev_extents and block group
9929 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9930 struct extent_buffer *eb, int slot)
9932 struct btrfs_root *extent_root = fs_info->extent_root;
9933 struct btrfs_root *dev_root = fs_info->dev_root;
9934 struct btrfs_path path;
9935 struct btrfs_key chunk_key;
9936 struct btrfs_key bg_key;
9937 struct btrfs_key devext_key;
9938 struct btrfs_chunk *chunk;
9939 struct extent_buffer *leaf;
9940 struct btrfs_block_group_item *bi;
9941 struct btrfs_block_group_item bg_item;
9942 struct btrfs_dev_extent *ptr;
9943 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9955 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9956 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9957 length = btrfs_chunk_length(eb, chunk);
9958 chunk_end = chunk_key.offset + length;
9959 if (!IS_ALIGNED(length, sectorsize)) {
9960 error("chunk[%llu %llu) not aligned to %u",
9961 chunk_key.offset, chunk_end, sectorsize);
9962 err |= BYTES_UNALIGNED;
9966 type = btrfs_chunk_type(eb, chunk);
9967 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9968 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9969 error("chunk[%llu %llu) has no chunk type",
9970 chunk_key.offset, chunk_end);
9971 err |= UNKNOWN_TYPE;
9973 if (profile && (profile & (profile - 1))) {
9974 error("chunk[%llu %llu) multiple profiles detected: %llx",
9975 chunk_key.offset, chunk_end, profile);
9976 err |= UNKNOWN_TYPE;
9979 bg_key.objectid = chunk_key.offset;
9980 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9981 bg_key.offset = length;
9983 btrfs_init_path(&path);
9984 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9987 "chunk[%llu %llu) did not find the related block group item",
9988 chunk_key.offset, chunk_end);
9989 err |= REFERENCER_MISSING;
9991 leaf = path.nodes[0];
9992 bi = btrfs_item_ptr(leaf, path.slots[0],
9993 struct btrfs_block_group_item);
9994 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9996 if (btrfs_block_group_flags(&bg_item) != type) {
9998 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9999 chunk_key.offset, chunk_end, type,
10000 btrfs_block_group_flags(&bg_item));
10001 err |= REFERENCER_MISSING;
10005 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10006 for (i = 0; i < num_stripes; i++) {
10007 btrfs_release_path(&path);
10008 btrfs_init_path(&path);
10009 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10010 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10011 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10013 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10016 goto not_match_dev;
10018 leaf = path.nodes[0];
10019 ptr = btrfs_item_ptr(leaf, path.slots[0],
10020 struct btrfs_dev_extent);
10021 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10022 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10023 if (objectid != chunk_key.objectid ||
10024 offset != chunk_key.offset ||
10025 btrfs_dev_extent_length(leaf, ptr) != length)
10026 goto not_match_dev;
10029 err |= BACKREF_MISSING;
10031 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10032 chunk_key.objectid, chunk_end, i);
10035 btrfs_release_path(&path);
10041 * Main entry function to check known items and update related accounting info
10043 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10045 struct btrfs_fs_info *fs_info = root->fs_info;
10046 struct btrfs_key key;
10049 struct btrfs_extent_data_ref *dref;
10054 btrfs_item_key_to_cpu(eb, &key, slot);
10058 case BTRFS_EXTENT_DATA_KEY:
10059 ret = check_extent_data_item(root, eb, slot);
10062 case BTRFS_BLOCK_GROUP_ITEM_KEY:
10063 ret = check_block_group_item(fs_info, eb, slot);
10066 case BTRFS_DEV_ITEM_KEY:
10067 ret = check_dev_item(fs_info, eb, slot);
10070 case BTRFS_CHUNK_ITEM_KEY:
10071 ret = check_chunk_item(fs_info, eb, slot);
10074 case BTRFS_DEV_EXTENT_KEY:
10075 ret = check_dev_extent_item(fs_info, eb, slot);
10078 case BTRFS_EXTENT_ITEM_KEY:
10079 case BTRFS_METADATA_ITEM_KEY:
10080 ret = check_extent_item(fs_info, eb, slot);
10083 case BTRFS_EXTENT_CSUM_KEY:
10084 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10086 case BTRFS_TREE_BLOCK_REF_KEY:
10087 ret = check_tree_block_backref(fs_info, key.offset,
10091 case BTRFS_EXTENT_DATA_REF_KEY:
10092 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10093 ret = check_extent_data_backref(fs_info,
10094 btrfs_extent_data_ref_root(eb, dref),
10095 btrfs_extent_data_ref_objectid(eb, dref),
10096 btrfs_extent_data_ref_offset(eb, dref),
10098 btrfs_extent_data_ref_count(eb, dref));
10101 case BTRFS_SHARED_BLOCK_REF_KEY:
10102 ret = check_shared_block_backref(fs_info, key.offset,
10106 case BTRFS_SHARED_DATA_REF_KEY:
10107 ret = check_shared_data_backref(fs_info, key.offset,
10115 if (++slot < btrfs_header_nritems(eb))
10122 * Helper function for later fs/subvol tree check. To determine if a tree
10123 * block should be checked.
10124 * This function will ensure only the direct referencer with lowest rootid to
10125 * check a fs/subvolume tree block.
10127 * Backref check at extent tree would detect errors like missing subvolume
10128 * tree, so we can do aggressive check to reduce duplicated checks.
10130 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10132 struct btrfs_root *extent_root = root->fs_info->extent_root;
10133 struct btrfs_key key;
10134 struct btrfs_path path;
10135 struct extent_buffer *leaf;
10137 struct btrfs_extent_item *ei;
10143 struct btrfs_extent_inline_ref *iref;
10146 btrfs_init_path(&path);
10147 key.objectid = btrfs_header_bytenr(eb);
10148 key.type = BTRFS_METADATA_ITEM_KEY;
10149 key.offset = (u64)-1;
10152 * Any failure in backref resolving means we can't determine
10153 * whom the tree block belongs to.
10154 * So in that case, we need to check that tree block
10156 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10160 ret = btrfs_previous_extent_item(extent_root, &path,
10161 btrfs_header_bytenr(eb));
10165 leaf = path.nodes[0];
10166 slot = path.slots[0];
10167 btrfs_item_key_to_cpu(leaf, &key, slot);
10168 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10170 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10171 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10173 struct btrfs_tree_block_info *info;
10175 info = (struct btrfs_tree_block_info *)(ei + 1);
10176 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10179 item_size = btrfs_item_size_nr(leaf, slot);
10180 ptr = (unsigned long)iref;
10181 end = (unsigned long)ei + item_size;
10182 while (ptr < end) {
10183 iref = (struct btrfs_extent_inline_ref *)ptr;
10184 type = btrfs_extent_inline_ref_type(leaf, iref);
10185 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10188 * We only check the tree block if current root is
10189 * the lowest referencer of it.
10191 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10192 offset < root->objectid) {
10193 btrfs_release_path(&path);
10197 ptr += btrfs_extent_inline_ref_size(type);
10200 * Normally we should also check keyed tree block ref, but that may be
10201 * very time consuming. Inlined ref should already make us skip a lot
10202 * of refs now. So skip search keyed tree block ref.
10206 btrfs_release_path(&path);
10211 * Traversal function for tree block. We will do:
10212 * 1) Skip shared fs/subvolume tree blocks
10213 * 2) Update related bytes accounting
10214 * 3) Pre-order traversal
10216 static int traverse_tree_block(struct btrfs_root *root,
10217 struct extent_buffer *node)
10219 struct extent_buffer *eb;
10220 struct btrfs_key key;
10221 struct btrfs_key drop_key;
10229 * Skip shared fs/subvolume tree block, in that case they will
10230 * be checked by referencer with lowest rootid
10232 if (is_fstree(root->objectid) && !should_check(root, node))
10235 /* Update bytes accounting */
10236 total_btree_bytes += node->len;
10237 if (fs_root_objectid(btrfs_header_owner(node)))
10238 total_fs_tree_bytes += node->len;
10239 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10240 total_extent_tree_bytes += node->len;
10241 if (!found_old_backref &&
10242 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10243 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10244 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10245 found_old_backref = 1;
10247 /* pre-order tranversal, check itself first */
10248 level = btrfs_header_level(node);
10249 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10250 btrfs_header_level(node),
10251 btrfs_header_owner(node));
10255 "check %s failed root %llu bytenr %llu level %d, force continue check",
10256 level ? "node":"leaf", root->objectid,
10257 btrfs_header_bytenr(node), btrfs_header_level(node));
10260 btree_space_waste += btrfs_leaf_free_space(root, node);
10261 ret = check_leaf_items(root, node);
10266 nr = btrfs_header_nritems(node);
10267 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10268 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10269 sizeof(struct btrfs_key_ptr);
10271 /* Then check all its children */
10272 for (i = 0; i < nr; i++) {
10273 u64 blocknr = btrfs_node_blockptr(node, i);
10275 btrfs_node_key_to_cpu(node, &key, i);
10276 if (level == root->root_item.drop_level &&
10277 is_dropped_key(&key, &drop_key))
10281 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10282 * to call the function itself.
10284 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10285 if (extent_buffer_uptodate(eb)) {
10286 ret = traverse_tree_block(root, eb);
10289 free_extent_buffer(eb);
10296 * Low memory usage version check_chunks_and_extents.
10298 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10300 struct btrfs_path path;
10301 struct btrfs_key key;
10302 struct btrfs_root *root1;
10303 struct btrfs_root *cur_root;
10307 root1 = root->fs_info->chunk_root;
10308 ret = traverse_tree_block(root1, root1->node);
10311 root1 = root->fs_info->tree_root;
10312 ret = traverse_tree_block(root1, root1->node);
10315 btrfs_init_path(&path);
10316 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10318 key.type = BTRFS_ROOT_ITEM_KEY;
10320 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10322 error("cannot find extent treet in tree_root");
10327 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10328 if (key.type != BTRFS_ROOT_ITEM_KEY)
10330 key.offset = (u64)-1;
10332 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10333 if (IS_ERR(cur_root) || !cur_root) {
10334 error("failed to read tree: %lld", key.objectid);
10338 ret = traverse_tree_block(cur_root, cur_root->node);
10342 ret = btrfs_next_item(root1, &path);
10348 btrfs_release_path(&path);
10352 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10353 struct btrfs_root *root, int overwrite)
10355 struct extent_buffer *c;
10356 struct extent_buffer *old = root->node;
10359 struct btrfs_disk_key disk_key = {0,0,0};
10365 extent_buffer_get(c);
10368 c = btrfs_alloc_free_block(trans, root,
10370 root->root_key.objectid,
10371 &disk_key, level, 0, 0);
10374 extent_buffer_get(c);
10378 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10379 btrfs_set_header_level(c, level);
10380 btrfs_set_header_bytenr(c, c->start);
10381 btrfs_set_header_generation(c, trans->transid);
10382 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10383 btrfs_set_header_owner(c, root->root_key.objectid);
10385 write_extent_buffer(c, root->fs_info->fsid,
10386 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10388 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10389 btrfs_header_chunk_tree_uuid(c),
10392 btrfs_mark_buffer_dirty(c);
10394 * this case can happen in the following case:
10396 * 1.overwrite previous root.
10398 * 2.reinit reloc data root, this is because we skip pin
10399 * down reloc data tree before which means we can allocate
10400 * same block bytenr here.
10402 if (old->start == c->start) {
10403 btrfs_set_root_generation(&root->root_item,
10405 root->root_item.level = btrfs_header_level(root->node);
10406 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10407 &root->root_key, &root->root_item);
10409 free_extent_buffer(c);
10413 free_extent_buffer(old);
10415 add_root_to_dirty_list(root);
10419 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10420 struct extent_buffer *eb, int tree_root)
10422 struct extent_buffer *tmp;
10423 struct btrfs_root_item *ri;
10424 struct btrfs_key key;
10427 int level = btrfs_header_level(eb);
10433 * If we have pinned this block before, don't pin it again.
10434 * This can not only avoid forever loop with broken filesystem
10435 * but also give us some speedups.
10437 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10438 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10441 btrfs_pin_extent(fs_info, eb->start, eb->len);
10443 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10444 nritems = btrfs_header_nritems(eb);
10445 for (i = 0; i < nritems; i++) {
10447 btrfs_item_key_to_cpu(eb, &key, i);
10448 if (key.type != BTRFS_ROOT_ITEM_KEY)
10450 /* Skip the extent root and reloc roots */
10451 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10452 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10453 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10455 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10456 bytenr = btrfs_disk_root_bytenr(eb, ri);
10459 * If at any point we start needing the real root we
10460 * will have to build a stump root for the root we are
10461 * in, but for now this doesn't actually use the root so
10462 * just pass in extent_root.
10464 tmp = read_tree_block(fs_info->extent_root, bytenr,
10466 if (!extent_buffer_uptodate(tmp)) {
10467 fprintf(stderr, "Error reading root block\n");
10470 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10471 free_extent_buffer(tmp);
10475 bytenr = btrfs_node_blockptr(eb, i);
10477 /* If we aren't the tree root don't read the block */
10478 if (level == 1 && !tree_root) {
10479 btrfs_pin_extent(fs_info, bytenr, nodesize);
10483 tmp = read_tree_block(fs_info->extent_root, bytenr,
10485 if (!extent_buffer_uptodate(tmp)) {
10486 fprintf(stderr, "Error reading tree block\n");
10489 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10490 free_extent_buffer(tmp);
10499 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10503 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10507 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10510 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10512 struct btrfs_block_group_cache *cache;
10513 struct btrfs_path path;
10514 struct extent_buffer *leaf;
10515 struct btrfs_chunk *chunk;
10516 struct btrfs_key key;
10520 btrfs_init_path(&path);
10522 key.type = BTRFS_CHUNK_ITEM_KEY;
10524 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10526 btrfs_release_path(&path);
10531 * We do this in case the block groups were screwed up and had alloc
10532 * bits that aren't actually set on the chunks. This happens with
10533 * restored images every time and could happen in real life I guess.
10535 fs_info->avail_data_alloc_bits = 0;
10536 fs_info->avail_metadata_alloc_bits = 0;
10537 fs_info->avail_system_alloc_bits = 0;
10539 /* First we need to create the in-memory block groups */
10541 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10542 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10544 btrfs_release_path(&path);
10552 leaf = path.nodes[0];
10553 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10554 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10559 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10560 btrfs_add_block_group(fs_info, 0,
10561 btrfs_chunk_type(leaf, chunk),
10562 key.objectid, key.offset,
10563 btrfs_chunk_length(leaf, chunk));
10564 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10565 key.offset + btrfs_chunk_length(leaf, chunk),
10571 cache = btrfs_lookup_first_block_group(fs_info, start);
10575 start = cache->key.objectid + cache->key.offset;
10578 btrfs_release_path(&path);
10582 static int reset_balance(struct btrfs_trans_handle *trans,
10583 struct btrfs_fs_info *fs_info)
10585 struct btrfs_root *root = fs_info->tree_root;
10586 struct btrfs_path path;
10587 struct extent_buffer *leaf;
10588 struct btrfs_key key;
10589 int del_slot, del_nr = 0;
10593 btrfs_init_path(&path);
10594 key.objectid = BTRFS_BALANCE_OBJECTID;
10595 key.type = BTRFS_BALANCE_ITEM_KEY;
10597 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10602 goto reinit_data_reloc;
10607 ret = btrfs_del_item(trans, root, &path);
10610 btrfs_release_path(&path);
10612 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10613 key.type = BTRFS_ROOT_ITEM_KEY;
10615 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10619 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10624 ret = btrfs_del_items(trans, root, &path,
10631 btrfs_release_path(&path);
10634 ret = btrfs_search_slot(trans, root, &key, &path,
10641 leaf = path.nodes[0];
10642 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10643 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10645 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10650 del_slot = path.slots[0];
10659 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
10663 btrfs_release_path(&path);
10666 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10667 key.type = BTRFS_ROOT_ITEM_KEY;
10668 key.offset = (u64)-1;
10669 root = btrfs_read_fs_root(fs_info, &key);
10670 if (IS_ERR(root)) {
10671 fprintf(stderr, "Error reading data reloc tree\n");
10672 ret = PTR_ERR(root);
10675 record_root_in_trans(trans, root);
10676 ret = btrfs_fsck_reinit_root(trans, root, 0);
10679 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10681 btrfs_release_path(&path);
10685 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10686 struct btrfs_fs_info *fs_info)
10692 * The only reason we don't do this is because right now we're just
10693 * walking the trees we find and pinning down their bytes, we don't look
10694 * at any of the leaves. In order to do mixed groups we'd have to check
10695 * the leaves of any fs roots and pin down the bytes for any file
10696 * extents we find. Not hard but why do it if we don't have to?
10698 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10699 fprintf(stderr, "We don't support re-initing the extent tree "
10700 "for mixed block groups yet, please notify a btrfs "
10701 "developer you want to do this so they can add this "
10702 "functionality.\n");
10707 * first we need to walk all of the trees except the extent tree and pin
10708 * down the bytes that are in use so we don't overwrite any existing
10711 ret = pin_metadata_blocks(fs_info);
10713 fprintf(stderr, "error pinning down used bytes\n");
10718 * Need to drop all the block groups since we're going to recreate all
10721 btrfs_free_block_groups(fs_info);
10722 ret = reset_block_groups(fs_info);
10724 fprintf(stderr, "error resetting the block groups\n");
10728 /* Ok we can allocate now, reinit the extent root */
10729 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10731 fprintf(stderr, "extent root initialization failed\n");
10733 * When the transaction code is updated we should end the
10734 * transaction, but for now progs only knows about commit so
10735 * just return an error.
10741 * Now we have all the in-memory block groups setup so we can make
10742 * allocations properly, and the metadata we care about is safe since we
10743 * pinned all of it above.
10746 struct btrfs_block_group_cache *cache;
10748 cache = btrfs_lookup_first_block_group(fs_info, start);
10751 start = cache->key.objectid + cache->key.offset;
10752 ret = btrfs_insert_item(trans, fs_info->extent_root,
10753 &cache->key, &cache->item,
10754 sizeof(cache->item));
10756 fprintf(stderr, "Error adding block group\n");
10759 btrfs_extent_post_op(trans, fs_info->extent_root);
10762 ret = reset_balance(trans, fs_info);
10764 fprintf(stderr, "error resetting the pending balance\n");
10769 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10771 struct btrfs_path path;
10772 struct btrfs_trans_handle *trans;
10773 struct btrfs_key key;
10776 printf("Recowing metadata block %llu\n", eb->start);
10777 key.objectid = btrfs_header_owner(eb);
10778 key.type = BTRFS_ROOT_ITEM_KEY;
10779 key.offset = (u64)-1;
10781 root = btrfs_read_fs_root(root->fs_info, &key);
10782 if (IS_ERR(root)) {
10783 fprintf(stderr, "Couldn't find owner root %llu\n",
10785 return PTR_ERR(root);
10788 trans = btrfs_start_transaction(root, 1);
10790 return PTR_ERR(trans);
10792 btrfs_init_path(&path);
10793 path.lowest_level = btrfs_header_level(eb);
10794 if (path.lowest_level)
10795 btrfs_node_key_to_cpu(eb, &key, 0);
10797 btrfs_item_key_to_cpu(eb, &key, 0);
10799 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10800 btrfs_commit_transaction(trans, root);
10801 btrfs_release_path(&path);
10805 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10807 struct btrfs_path path;
10808 struct btrfs_trans_handle *trans;
10809 struct btrfs_key key;
10812 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10813 bad->key.type, bad->key.offset);
10814 key.objectid = bad->root_id;
10815 key.type = BTRFS_ROOT_ITEM_KEY;
10816 key.offset = (u64)-1;
10818 root = btrfs_read_fs_root(root->fs_info, &key);
10819 if (IS_ERR(root)) {
10820 fprintf(stderr, "Couldn't find owner root %llu\n",
10822 return PTR_ERR(root);
10825 trans = btrfs_start_transaction(root, 1);
10827 return PTR_ERR(trans);
10829 btrfs_init_path(&path);
10830 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
10836 ret = btrfs_del_item(trans, root, &path);
10838 btrfs_commit_transaction(trans, root);
10839 btrfs_release_path(&path);
10843 static int zero_log_tree(struct btrfs_root *root)
10845 struct btrfs_trans_handle *trans;
10848 trans = btrfs_start_transaction(root, 1);
10849 if (IS_ERR(trans)) {
10850 ret = PTR_ERR(trans);
10853 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10854 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10855 ret = btrfs_commit_transaction(trans, root);
10859 static int populate_csum(struct btrfs_trans_handle *trans,
10860 struct btrfs_root *csum_root, char *buf, u64 start,
10867 while (offset < len) {
10868 sectorsize = csum_root->sectorsize;
10869 ret = read_extent_data(csum_root, buf, start + offset,
10873 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10874 start + offset, buf, sectorsize);
10877 offset += sectorsize;
10882 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10883 struct btrfs_root *csum_root,
10884 struct btrfs_root *cur_root)
10886 struct btrfs_path path;
10887 struct btrfs_key key;
10888 struct extent_buffer *node;
10889 struct btrfs_file_extent_item *fi;
10896 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10900 btrfs_init_path(&path);
10904 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
10907 /* Iterate all regular file extents and fill its csum */
10909 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10911 if (key.type != BTRFS_EXTENT_DATA_KEY)
10913 node = path.nodes[0];
10914 slot = path.slots[0];
10915 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10916 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10918 start = btrfs_file_extent_disk_bytenr(node, fi);
10919 len = btrfs_file_extent_disk_num_bytes(node, fi);
10921 ret = populate_csum(trans, csum_root, buf, start, len);
10922 if (ret == -EEXIST)
10928 * TODO: if next leaf is corrupted, jump to nearest next valid
10931 ret = btrfs_next_item(cur_root, &path);
10941 btrfs_release_path(&path);
10946 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10947 struct btrfs_root *csum_root)
10949 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10950 struct btrfs_path path;
10951 struct btrfs_root *tree_root = fs_info->tree_root;
10952 struct btrfs_root *cur_root;
10953 struct extent_buffer *node;
10954 struct btrfs_key key;
10958 btrfs_init_path(&path);
10959 key.objectid = BTRFS_FS_TREE_OBJECTID;
10961 key.type = BTRFS_ROOT_ITEM_KEY;
10962 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
10971 node = path.nodes[0];
10972 slot = path.slots[0];
10973 btrfs_item_key_to_cpu(node, &key, slot);
10974 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10976 if (key.type != BTRFS_ROOT_ITEM_KEY)
10978 if (!is_fstree(key.objectid))
10980 key.offset = (u64)-1;
10982 cur_root = btrfs_read_fs_root(fs_info, &key);
10983 if (IS_ERR(cur_root) || !cur_root) {
10984 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10988 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10993 ret = btrfs_next_item(tree_root, &path);
11003 btrfs_release_path(&path);
11007 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11008 struct btrfs_root *csum_root)
11010 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11011 struct btrfs_path path;
11012 struct btrfs_extent_item *ei;
11013 struct extent_buffer *leaf;
11015 struct btrfs_key key;
11018 btrfs_init_path(&path);
11020 key.type = BTRFS_EXTENT_ITEM_KEY;
11022 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11024 btrfs_release_path(&path);
11028 buf = malloc(csum_root->sectorsize);
11030 btrfs_release_path(&path);
11035 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11036 ret = btrfs_next_leaf(extent_root, &path);
11044 leaf = path.nodes[0];
11046 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11047 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11052 ei = btrfs_item_ptr(leaf, path.slots[0],
11053 struct btrfs_extent_item);
11054 if (!(btrfs_extent_flags(leaf, ei) &
11055 BTRFS_EXTENT_FLAG_DATA)) {
11060 ret = populate_csum(trans, csum_root, buf, key.objectid,
11067 btrfs_release_path(&path);
11073 * Recalculate the csum and put it into the csum tree.
11075 * Extent tree init will wipe out all the extent info, so in that case, we
11076 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
11077 * will use fs/subvol trees to init the csum tree.
11079 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11080 struct btrfs_root *csum_root,
11081 int search_fs_tree)
11083 if (search_fs_tree)
11084 return fill_csum_tree_from_fs(trans, csum_root);
11086 return fill_csum_tree_from_extent(trans, csum_root);
11089 static void free_roots_info_cache(void)
11091 if (!roots_info_cache)
11094 while (!cache_tree_empty(roots_info_cache)) {
11095 struct cache_extent *entry;
11096 struct root_item_info *rii;
11098 entry = first_cache_extent(roots_info_cache);
11101 remove_cache_extent(roots_info_cache, entry);
11102 rii = container_of(entry, struct root_item_info, cache_extent);
11106 free(roots_info_cache);
11107 roots_info_cache = NULL;
11110 static int build_roots_info_cache(struct btrfs_fs_info *info)
11113 struct btrfs_key key;
11114 struct extent_buffer *leaf;
11115 struct btrfs_path path;
11117 if (!roots_info_cache) {
11118 roots_info_cache = malloc(sizeof(*roots_info_cache));
11119 if (!roots_info_cache)
11121 cache_tree_init(roots_info_cache);
11124 btrfs_init_path(&path);
11126 key.type = BTRFS_EXTENT_ITEM_KEY;
11128 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11131 leaf = path.nodes[0];
11134 struct btrfs_key found_key;
11135 struct btrfs_extent_item *ei;
11136 struct btrfs_extent_inline_ref *iref;
11137 int slot = path.slots[0];
11142 struct cache_extent *entry;
11143 struct root_item_info *rii;
11145 if (slot >= btrfs_header_nritems(leaf)) {
11146 ret = btrfs_next_leaf(info->extent_root, &path);
11153 leaf = path.nodes[0];
11154 slot = path.slots[0];
11157 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11159 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11160 found_key.type != BTRFS_METADATA_ITEM_KEY)
11163 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11164 flags = btrfs_extent_flags(leaf, ei);
11166 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11167 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11170 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11171 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11172 level = found_key.offset;
11174 struct btrfs_tree_block_info *binfo;
11176 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11177 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11178 level = btrfs_tree_block_level(leaf, binfo);
11182 * For a root extent, it must be of the following type and the
11183 * first (and only one) iref in the item.
11185 type = btrfs_extent_inline_ref_type(leaf, iref);
11186 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11189 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11190 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11192 rii = malloc(sizeof(struct root_item_info));
11197 rii->cache_extent.start = root_id;
11198 rii->cache_extent.size = 1;
11199 rii->level = (u8)-1;
11200 entry = &rii->cache_extent;
11201 ret = insert_cache_extent(roots_info_cache, entry);
11204 rii = container_of(entry, struct root_item_info,
11208 ASSERT(rii->cache_extent.start == root_id);
11209 ASSERT(rii->cache_extent.size == 1);
11211 if (level > rii->level || rii->level == (u8)-1) {
11212 rii->level = level;
11213 rii->bytenr = found_key.objectid;
11214 rii->gen = btrfs_extent_generation(leaf, ei);
11215 rii->node_count = 1;
11216 } else if (level == rii->level) {
11224 btrfs_release_path(&path);
11229 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11230 struct btrfs_path *path,
11231 const struct btrfs_key *root_key,
11232 const int read_only_mode)
11234 const u64 root_id = root_key->objectid;
11235 struct cache_extent *entry;
11236 struct root_item_info *rii;
11237 struct btrfs_root_item ri;
11238 unsigned long offset;
11240 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11243 "Error: could not find extent items for root %llu\n",
11244 root_key->objectid);
11248 rii = container_of(entry, struct root_item_info, cache_extent);
11249 ASSERT(rii->cache_extent.start == root_id);
11250 ASSERT(rii->cache_extent.size == 1);
11252 if (rii->node_count != 1) {
11254 "Error: could not find btree root extent for root %llu\n",
11259 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11260 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11262 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11263 btrfs_root_level(&ri) != rii->level ||
11264 btrfs_root_generation(&ri) != rii->gen) {
11267 * If we're in repair mode but our caller told us to not update
11268 * the root item, i.e. just check if it needs to be updated, don't
11269 * print this message, since the caller will call us again shortly
11270 * for the same root item without read only mode (the caller will
11271 * open a transaction first).
11273 if (!(read_only_mode && repair))
11275 "%sroot item for root %llu,"
11276 " current bytenr %llu, current gen %llu, current level %u,"
11277 " new bytenr %llu, new gen %llu, new level %u\n",
11278 (read_only_mode ? "" : "fixing "),
11280 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11281 btrfs_root_level(&ri),
11282 rii->bytenr, rii->gen, rii->level);
11284 if (btrfs_root_generation(&ri) > rii->gen) {
11286 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11287 root_id, btrfs_root_generation(&ri), rii->gen);
11291 if (!read_only_mode) {
11292 btrfs_set_root_bytenr(&ri, rii->bytenr);
11293 btrfs_set_root_level(&ri, rii->level);
11294 btrfs_set_root_generation(&ri, rii->gen);
11295 write_extent_buffer(path->nodes[0], &ri,
11296 offset, sizeof(ri));
11306 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11307 * caused read-only snapshots to be corrupted if they were created at a moment
11308 * when the source subvolume/snapshot had orphan items. The issue was that the
11309 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11310 * node instead of the post orphan cleanup root node.
11311 * So this function, and its callees, just detects and fixes those cases. Even
11312 * though the regression was for read-only snapshots, this function applies to
11313 * any snapshot/subvolume root.
11314 * This must be run before any other repair code - not doing it so, makes other
11315 * repair code delete or modify backrefs in the extent tree for example, which
11316 * will result in an inconsistent fs after repairing the root items.
11318 static int repair_root_items(struct btrfs_fs_info *info)
11320 struct btrfs_path path;
11321 struct btrfs_key key;
11322 struct extent_buffer *leaf;
11323 struct btrfs_trans_handle *trans = NULL;
11326 int need_trans = 0;
11328 btrfs_init_path(&path);
11330 ret = build_roots_info_cache(info);
11334 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11335 key.type = BTRFS_ROOT_ITEM_KEY;
11340 * Avoid opening and committing transactions if a leaf doesn't have
11341 * any root items that need to be fixed, so that we avoid rotating
11342 * backup roots unnecessarily.
11345 trans = btrfs_start_transaction(info->tree_root, 1);
11346 if (IS_ERR(trans)) {
11347 ret = PTR_ERR(trans);
11352 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11356 leaf = path.nodes[0];
11359 struct btrfs_key found_key;
11361 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11362 int no_more_keys = find_next_key(&path, &key);
11364 btrfs_release_path(&path);
11366 ret = btrfs_commit_transaction(trans,
11378 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11380 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11382 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11385 ret = maybe_repair_root_item(info, &path, &found_key,
11390 if (!trans && repair) {
11393 btrfs_release_path(&path);
11403 free_roots_info_cache();
11404 btrfs_release_path(&path);
11406 btrfs_commit_transaction(trans, info->tree_root);
11413 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11415 struct btrfs_trans_handle *trans;
11416 struct btrfs_block_group_cache *bg_cache;
11420 /* Clear all free space cache inodes and its extent data */
11422 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11425 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11428 current = bg_cache->key.objectid + bg_cache->key.offset;
11431 /* Don't forget to set cache_generation to -1 */
11432 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11433 if (IS_ERR(trans)) {
11434 error("failed to update super block cache generation");
11435 return PTR_ERR(trans);
11437 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11438 btrfs_commit_transaction(trans, fs_info->tree_root);
11443 const char * const cmd_check_usage[] = {
11444 "btrfs check [options] <device>",
11445 "Check structural integrity of a filesystem (unmounted).",
11446 "Check structural integrity of an unmounted filesystem. Verify internal",
11447 "trees' consistency and item connectivity. In the repair mode try to",
11448 "fix the problems found. ",
11449 "WARNING: the repair mode is considered dangerous",
11451 "-s|--super <superblock> use this superblock copy",
11452 "-b|--backup use the first valid backup root copy",
11453 "--repair try to repair the filesystem",
11454 "--readonly run in read-only mode (default)",
11455 "--init-csum-tree create a new CRC tree",
11456 "--init-extent-tree create a new extent tree",
11457 "--mode <MODE> allows choice of memory/IO trade-offs",
11458 " where MODE is one of:",
11459 " original - read inodes and extents to memory (requires",
11460 " more memory, does less IO)",
11461 " lowmem - try to use less memory but read blocks again",
11463 "--check-data-csum verify checksums of data blocks",
11464 "-Q|--qgroup-report print a report on qgroup consistency",
11465 "-E|--subvol-extents <subvolid>",
11466 " print subvolume extents and sharing state",
11467 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11468 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11469 "-p|--progress indicate progress",
11470 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11474 int cmd_check(int argc, char **argv)
11476 struct cache_tree root_cache;
11477 struct btrfs_root *root;
11478 struct btrfs_fs_info *info;
11481 u64 tree_root_bytenr = 0;
11482 u64 chunk_root_bytenr = 0;
11483 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11486 int init_csum_tree = 0;
11488 int clear_space_cache = 0;
11489 int qgroup_report = 0;
11490 int qgroups_repaired = 0;
11491 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11495 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11496 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11497 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11498 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11499 static const struct option long_options[] = {
11500 { "super", required_argument, NULL, 's' },
11501 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11502 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11503 { "init-csum-tree", no_argument, NULL,
11504 GETOPT_VAL_INIT_CSUM },
11505 { "init-extent-tree", no_argument, NULL,
11506 GETOPT_VAL_INIT_EXTENT },
11507 { "check-data-csum", no_argument, NULL,
11508 GETOPT_VAL_CHECK_CSUM },
11509 { "backup", no_argument, NULL, 'b' },
11510 { "subvol-extents", required_argument, NULL, 'E' },
11511 { "qgroup-report", no_argument, NULL, 'Q' },
11512 { "tree-root", required_argument, NULL, 'r' },
11513 { "chunk-root", required_argument, NULL,
11514 GETOPT_VAL_CHUNK_TREE },
11515 { "progress", no_argument, NULL, 'p' },
11516 { "mode", required_argument, NULL,
11518 { "clear-space-cache", required_argument, NULL,
11519 GETOPT_VAL_CLEAR_SPACE_CACHE},
11520 { NULL, 0, NULL, 0}
11523 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11527 case 'a': /* ignored */ break;
11529 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11532 num = arg_strtou64(optarg);
11533 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11535 "super mirror should be less than %d",
11536 BTRFS_SUPER_MIRROR_MAX);
11539 bytenr = btrfs_sb_offset(((int)num));
11540 printf("using SB copy %llu, bytenr %llu\n", num,
11541 (unsigned long long)bytenr);
11547 subvolid = arg_strtou64(optarg);
11550 tree_root_bytenr = arg_strtou64(optarg);
11552 case GETOPT_VAL_CHUNK_TREE:
11553 chunk_root_bytenr = arg_strtou64(optarg);
11556 ctx.progress_enabled = true;
11560 usage(cmd_check_usage);
11561 case GETOPT_VAL_REPAIR:
11562 printf("enabling repair mode\n");
11564 ctree_flags |= OPEN_CTREE_WRITES;
11566 case GETOPT_VAL_READONLY:
11569 case GETOPT_VAL_INIT_CSUM:
11570 printf("Creating a new CRC tree\n");
11571 init_csum_tree = 1;
11573 ctree_flags |= OPEN_CTREE_WRITES;
11575 case GETOPT_VAL_INIT_EXTENT:
11576 init_extent_tree = 1;
11577 ctree_flags |= (OPEN_CTREE_WRITES |
11578 OPEN_CTREE_NO_BLOCK_GROUPS);
11581 case GETOPT_VAL_CHECK_CSUM:
11582 check_data_csum = 1;
11584 case GETOPT_VAL_MODE:
11585 check_mode = parse_check_mode(optarg);
11586 if (check_mode == CHECK_MODE_UNKNOWN) {
11587 error("unknown mode: %s", optarg);
11591 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11592 if (strcmp(optarg, "v1") == 0) {
11593 clear_space_cache = 1;
11594 } else if (strcmp(optarg, "v2") == 0) {
11595 clear_space_cache = 2;
11596 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11599 "invalid argument to --clear-space-cache, must be v1 or v2");
11602 ctree_flags |= OPEN_CTREE_WRITES;
11607 if (check_argc_exact(argc - optind, 1))
11608 usage(cmd_check_usage);
11610 if (ctx.progress_enabled) {
11611 ctx.tp = TASK_NOTHING;
11612 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11615 /* This check is the only reason for --readonly to exist */
11616 if (readonly && repair) {
11617 error("repair options are not compatible with --readonly");
11622 * Not supported yet
11624 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11625 error("low memory mode doesn't support repair yet");
11630 cache_tree_init(&root_cache);
11632 if((ret = check_mounted(argv[optind])) < 0) {
11633 error("could not check mount status: %s", strerror(-ret));
11636 error("%s is currently mounted, aborting", argv[optind]);
11641 /* only allow partial opening under repair mode */
11643 ctree_flags |= OPEN_CTREE_PARTIAL;
11645 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11646 chunk_root_bytenr, ctree_flags);
11648 error("cannot open file system");
11653 global_info = info;
11654 root = info->fs_root;
11655 if (clear_space_cache == 1) {
11656 if (btrfs_fs_compat_ro(info,
11657 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11659 "free space cache v2 detected, use --clear-space-cache v2");
11663 printf("Clearing free space cache\n");
11664 ret = clear_free_space_cache(info);
11666 error("failed to clear free space cache");
11669 printf("Free space cache cleared\n");
11672 } else if (clear_space_cache == 2) {
11673 if (!btrfs_fs_compat_ro(info,
11674 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11675 printf("no free space cache v2 to clear\n");
11679 printf("Clear free space cache v2\n");
11680 ret = btrfs_clear_free_space_tree(info);
11682 error("failed to clear free space cache v2: %d", ret);
11685 printf("free space cache v2 cleared\n");
11691 * repair mode will force us to commit transaction which
11692 * will make us fail to load log tree when mounting.
11694 if (repair && btrfs_super_log_root(info->super_copy)) {
11695 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11700 ret = zero_log_tree(root);
11702 error("failed to zero log tree: %d", ret);
11707 uuid_unparse(info->super_copy->fsid, uuidbuf);
11708 if (qgroup_report) {
11709 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11711 ret = qgroup_verify_all(info);
11717 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11718 subvolid, argv[optind], uuidbuf);
11719 ret = print_extent_state(info, subvolid);
11722 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11724 if (!extent_buffer_uptodate(info->tree_root->node) ||
11725 !extent_buffer_uptodate(info->dev_root->node) ||
11726 !extent_buffer_uptodate(info->chunk_root->node)) {
11727 error("critical roots corrupted, unable to check the filesystem");
11732 if (init_extent_tree || init_csum_tree) {
11733 struct btrfs_trans_handle *trans;
11735 trans = btrfs_start_transaction(info->extent_root, 0);
11736 if (IS_ERR(trans)) {
11737 error("error starting transaction");
11738 ret = PTR_ERR(trans);
11742 if (init_extent_tree) {
11743 printf("Creating a new extent tree\n");
11744 ret = reinit_extent_tree(trans, info);
11749 if (init_csum_tree) {
11750 printf("Reinitialize checksum tree\n");
11751 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11753 error("checksum tree initialization failed: %d",
11759 ret = fill_csum_tree(trans, info->csum_root,
11762 error("checksum tree refilling failed: %d", ret);
11767 * Ok now we commit and run the normal fsck, which will add
11768 * extent entries for all of the items it finds.
11770 ret = btrfs_commit_transaction(trans, info->extent_root);
11774 if (!extent_buffer_uptodate(info->extent_root->node)) {
11775 error("critical: extent_root, unable to check the filesystem");
11779 if (!extent_buffer_uptodate(info->csum_root->node)) {
11780 error("critical: csum_root, unable to check the filesystem");
11785 if (!ctx.progress_enabled)
11786 fprintf(stderr, "checking extents\n");
11787 if (check_mode == CHECK_MODE_LOWMEM)
11788 ret = check_chunks_and_extents_v2(root);
11790 ret = check_chunks_and_extents(root);
11793 "errors found in extent allocation tree or chunk allocation");
11795 ret = repair_root_items(info);
11799 fprintf(stderr, "Fixed %d roots.\n", ret);
11801 } else if (ret > 0) {
11803 "Found %d roots with an outdated root item.\n",
11806 "Please run a filesystem check with the option --repair to fix them.\n");
11811 if (!ctx.progress_enabled) {
11812 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11813 fprintf(stderr, "checking free space tree\n");
11815 fprintf(stderr, "checking free space cache\n");
11817 ret = check_space_cache(root);
11822 * We used to have to have these hole extents in between our real
11823 * extents so if we don't have this flag set we need to make sure there
11824 * are no gaps in the file extents for inodes, otherwise we can just
11825 * ignore it when this happens.
11827 no_holes = btrfs_fs_incompat(root->fs_info,
11828 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11829 if (!ctx.progress_enabled)
11830 fprintf(stderr, "checking fs roots\n");
11831 ret = check_fs_roots(root, &root_cache);
11835 fprintf(stderr, "checking csums\n");
11836 ret = check_csums(root);
11840 fprintf(stderr, "checking root refs\n");
11841 ret = check_root_refs(root, &root_cache);
11845 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11846 struct extent_buffer *eb;
11848 eb = list_first_entry(&root->fs_info->recow_ebs,
11849 struct extent_buffer, recow);
11850 list_del_init(&eb->recow);
11851 ret = recow_extent_buffer(root, eb);
11856 while (!list_empty(&delete_items)) {
11857 struct bad_item *bad;
11859 bad = list_first_entry(&delete_items, struct bad_item, list);
11860 list_del_init(&bad->list);
11862 ret = delete_bad_item(root, bad);
11866 if (info->quota_enabled) {
11868 fprintf(stderr, "checking quota groups\n");
11869 err = qgroup_verify_all(info);
11873 err = repair_qgroups(info, &qgroups_repaired);
11878 if (!list_empty(&root->fs_info->recow_ebs)) {
11879 error("transid errors in file system");
11883 /* Don't override original ret */
11884 if (!ret && qgroups_repaired)
11885 ret = qgroups_repaired;
11887 if (found_old_backref) { /*
11888 * there was a disk format change when mixed
11889 * backref was in testing tree. The old format
11890 * existed about one week.
11892 printf("\n * Found old mixed backref format. "
11893 "The old format is not supported! *"
11894 "\n * Please mount the FS in readonly mode, "
11895 "backup data and re-format the FS. *\n\n");
11898 printf("found %llu bytes used err is %d\n",
11899 (unsigned long long)bytes_used, ret);
11900 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11901 printf("total tree bytes: %llu\n",
11902 (unsigned long long)total_btree_bytes);
11903 printf("total fs tree bytes: %llu\n",
11904 (unsigned long long)total_fs_tree_bytes);
11905 printf("total extent tree bytes: %llu\n",
11906 (unsigned long long)total_extent_tree_bytes);
11907 printf("btree space waste bytes: %llu\n",
11908 (unsigned long long)btree_space_waste);
11909 printf("file data blocks allocated: %llu\n referenced %llu\n",
11910 (unsigned long long)data_bytes_allocated,
11911 (unsigned long long)data_bytes_referenced);
11913 free_qgroup_counts();
11914 free_root_recs_tree(&root_cache);
11918 if (ctx.progress_enabled)
11919 task_deinit(ctx.info);