2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
50 TASK_NOTHING, /* have to be the last element */
55 enum task_position tp;
57 struct task_info *info;
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
118 return container_of(back, struct data_backref, node);
122 * Much like data_backref, just removed the undetermined members
123 * and change it to use list_head.
124 * During extent scan, it is stored in root->orphan_data_extent.
125 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
127 struct orphan_data_extent {
128 struct list_head list;
136 struct tree_backref {
137 struct extent_backref node;
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
146 return container_of(back, struct tree_backref, node);
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
152 struct extent_record {
153 struct list_head backrefs;
154 struct list_head dups;
155 struct list_head list;
156 struct cache_extent cache;
157 struct btrfs_disk_key parent_key;
162 u64 extent_item_refs;
164 u64 parent_generation;
168 unsigned int flag_block_full_backref:2;
169 unsigned int found_rec:1;
170 unsigned int content_checked:1;
171 unsigned int owner_ref_checked:1;
172 unsigned int is_root:1;
173 unsigned int metadata:1;
174 unsigned int bad_full_backref:1;
175 unsigned int crossing_stripes:1;
176 unsigned int wrong_chunk_type:1;
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
181 return container_of(entry, struct extent_record, list);
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
200 return list_entry(entry, struct inode_backref, list);
203 struct root_item_record {
204 struct list_head list;
211 struct btrfs_key drop_key;
214 #define REF_ERR_NO_DIR_ITEM (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX (1 << 1)
216 #define REF_ERR_NO_INODE_REF (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
219 #define REF_ERR_DUP_INODE_REF (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
228 struct file_extent_hole {
234 struct inode_record {
235 struct list_head backrefs;
236 unsigned int checked:1;
237 unsigned int merging:1;
238 unsigned int found_inode_item:1;
239 unsigned int found_dir_item:1;
240 unsigned int found_file_extent:1;
241 unsigned int found_csum_item:1;
242 unsigned int some_csum_missing:1;
243 unsigned int nodatasum:1;
256 struct rb_root holes;
257 struct list_head orphan_extents;
262 #define I_ERR_NO_INODE_ITEM (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
278 struct root_backref {
279 struct list_head list;
280 unsigned int found_dir_item:1;
281 unsigned int found_dir_index:1;
282 unsigned int found_back_ref:1;
283 unsigned int found_forward_ref:1;
284 unsigned int reachable:1;
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
295 return list_entry(entry, struct root_backref, list);
299 struct list_head backrefs;
300 struct cache_extent cache;
301 unsigned int found_root_item:1;
307 struct cache_extent cache;
312 struct cache_extent cache;
313 struct cache_tree root_cache;
314 struct cache_tree inode_cache;
315 struct inode_record *current;
324 struct walk_control {
325 struct cache_tree shared;
326 struct shared_node *nodes[BTRFS_MAX_LEVEL];
332 struct btrfs_key key;
334 struct list_head list;
337 struct extent_entry {
342 struct list_head list;
345 struct root_item_info {
346 /* level of the root */
348 /* number of nodes at this level, must be 1 for a root */
352 struct cache_extent cache_extent;
356 * Error bit for low memory mode check.
358 * Currently no caller cares about it yet. Just internal use for error
361 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH (1 << 8)
372 static void *print_status_check(void *p)
374 struct task_ctx *priv = p;
375 const char work_indicator[] = { '.', 'o', 'O', 'o' };
377 static char *task_position_string[] = {
379 "checking free space cache",
383 task_period_start(priv->info, 1000 /* 1s */);
385 if (priv->tp == TASK_NOTHING)
389 printf("%s [%c]\r", task_position_string[priv->tp],
390 work_indicator[count % 4]);
393 task_period_wait(priv->info);
398 static int print_status_return(void *p)
406 static enum btrfs_check_mode parse_check_mode(const char *str)
408 if (strcmp(str, "lowmem") == 0)
409 return CHECK_MODE_LOWMEM;
410 if (strcmp(str, "orig") == 0)
411 return CHECK_MODE_ORIGINAL;
412 if (strcmp(str, "original") == 0)
413 return CHECK_MODE_ORIGINAL;
415 return CHECK_MODE_UNKNOWN;
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
421 struct file_extent_hole *hole;
423 if (RB_EMPTY_ROOT(holes))
426 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
432 struct file_extent_hole *hole1;
433 struct file_extent_hole *hole2;
435 hole1 = rb_entry(node1, struct file_extent_hole, node);
436 hole2 = rb_entry(node2, struct file_extent_hole, node);
438 if (hole1->start > hole2->start)
440 if (hole1->start < hole2->start)
442 /* Now hole1->start == hole2->start */
443 if (hole1->len >= hole2->len)
445 * Hole 1 will be merge center
446 * Same hole will be merged later
449 /* Hole 2 will be merge center */
454 * Add a hole to the record
456 * This will do hole merge for copy_file_extent_holes(),
457 * which will ensure there won't be continuous holes.
459 static int add_file_extent_hole(struct rb_root *holes,
462 struct file_extent_hole *hole;
463 struct file_extent_hole *prev = NULL;
464 struct file_extent_hole *next = NULL;
466 hole = malloc(sizeof(*hole));
471 /* Since compare will not return 0, no -EEXIST will happen */
472 rb_insert(holes, &hole->node, compare_hole);
474 /* simple merge with previous hole */
475 if (rb_prev(&hole->node))
476 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
478 if (prev && prev->start + prev->len >= hole->start) {
479 hole->len = hole->start + hole->len - prev->start;
480 hole->start = prev->start;
481 rb_erase(&prev->node, holes);
486 /* iterate merge with next holes */
488 if (!rb_next(&hole->node))
490 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
492 if (hole->start + hole->len >= next->start) {
493 if (hole->start + hole->len <= next->start + next->len)
494 hole->len = next->start + next->len -
496 rb_erase(&next->node, holes);
505 static int compare_hole_range(struct rb_node *node, void *data)
507 struct file_extent_hole *hole;
510 hole = (struct file_extent_hole *)data;
513 hole = rb_entry(node, struct file_extent_hole, node);
514 if (start < hole->start)
516 if (start >= hole->start && start < hole->start + hole->len)
522 * Delete a hole in the record
524 * This will do the hole split and is much restrict than add.
526 static int del_file_extent_hole(struct rb_root *holes,
529 struct file_extent_hole *hole;
530 struct file_extent_hole tmp;
535 struct rb_node *node;
542 node = rb_search(holes, &tmp, compare_hole_range, NULL);
545 hole = rb_entry(node, struct file_extent_hole, node);
546 if (start + len > hole->start + hole->len)
550 * Now there will be no overlap, delete the hole and re-add the
551 * split(s) if they exists.
553 if (start > hole->start) {
554 prev_start = hole->start;
555 prev_len = start - hole->start;
558 if (hole->start + hole->len > start + len) {
559 next_start = start + len;
560 next_len = hole->start + hole->len - start - len;
563 rb_erase(node, holes);
566 ret = add_file_extent_hole(holes, prev_start, prev_len);
571 ret = add_file_extent_hole(holes, next_start, next_len);
578 static int copy_file_extent_holes(struct rb_root *dst,
581 struct file_extent_hole *hole;
582 struct rb_node *node;
585 node = rb_first(src);
587 hole = rb_entry(node, struct file_extent_hole, node);
588 ret = add_file_extent_hole(dst, hole->start, hole->len);
591 node = rb_next(node);
596 static void free_file_extent_holes(struct rb_root *holes)
598 struct rb_node *node;
599 struct file_extent_hole *hole;
601 node = rb_first(holes);
603 hole = rb_entry(node, struct file_extent_hole, node);
604 rb_erase(node, holes);
606 node = rb_first(holes);
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613 struct btrfs_root *root)
615 if (root->last_trans != trans->transid) {
616 root->track_dirty = 1;
617 root->last_trans = trans->transid;
618 root->commit_root = root->node;
619 extent_buffer_get(root->node);
623 static u8 imode_to_type(u32 imode)
626 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
628 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
629 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
630 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
631 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
632 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
633 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
636 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
642 struct device_record *rec1;
643 struct device_record *rec2;
645 rec1 = rb_entry(node1, struct device_record, node);
646 rec2 = rb_entry(node2, struct device_record, node);
647 if (rec1->devid > rec2->devid)
649 else if (rec1->devid < rec2->devid)
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
657 struct inode_record *rec;
658 struct inode_backref *backref;
659 struct inode_backref *orig;
660 struct inode_backref *tmp;
661 struct orphan_data_extent *src_orphan;
662 struct orphan_data_extent *dst_orphan;
667 rec = malloc(sizeof(*rec));
669 return ERR_PTR(-ENOMEM);
670 memcpy(rec, orig_rec, sizeof(*rec));
672 INIT_LIST_HEAD(&rec->backrefs);
673 INIT_LIST_HEAD(&rec->orphan_extents);
674 rec->holes = RB_ROOT;
676 list_for_each_entry(orig, &orig_rec->backrefs, list) {
677 size = sizeof(*orig) + orig->namelen + 1;
678 backref = malloc(size);
683 memcpy(backref, orig, size);
684 list_add_tail(&backref->list, &rec->backrefs);
686 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687 dst_orphan = malloc(sizeof(*dst_orphan));
692 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
695 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
702 rb = rb_first(&rec->holes);
704 struct file_extent_hole *hole;
706 hole = rb_entry(rb, struct file_extent_hole, node);
712 if (!list_empty(&rec->backrefs))
713 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714 list_del(&orig->list);
718 if (!list_empty(&rec->orphan_extents))
719 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720 list_del(&orig->list);
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
732 struct orphan_data_extent *orphan;
734 if (list_empty(orphan_extents))
736 printf("The following data extent is lost in tree %llu:\n",
738 list_for_each_entry(orphan, orphan_extents, list) {
739 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740 orphan->objectid, orphan->offset, orphan->disk_bytenr,
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
747 u64 root_objectid = root->root_key.objectid;
748 int errors = rec->errors;
752 /* reloc root errors, we print its corresponding fs root objectid*/
753 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754 root_objectid = root->root_key.offset;
755 fprintf(stderr, "reloc");
757 fprintf(stderr, "root %llu inode %llu errors %x",
758 (unsigned long long) root_objectid,
759 (unsigned long long) rec->ino, rec->errors);
761 if (errors & I_ERR_NO_INODE_ITEM)
762 fprintf(stderr, ", no inode item");
763 if (errors & I_ERR_NO_ORPHAN_ITEM)
764 fprintf(stderr, ", no orphan item");
765 if (errors & I_ERR_DUP_INODE_ITEM)
766 fprintf(stderr, ", dup inode item");
767 if (errors & I_ERR_DUP_DIR_INDEX)
768 fprintf(stderr, ", dup dir index");
769 if (errors & I_ERR_ODD_DIR_ITEM)
770 fprintf(stderr, ", odd dir item");
771 if (errors & I_ERR_ODD_FILE_EXTENT)
772 fprintf(stderr, ", odd file extent");
773 if (errors & I_ERR_BAD_FILE_EXTENT)
774 fprintf(stderr, ", bad file extent");
775 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776 fprintf(stderr, ", file extent overlap");
777 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778 fprintf(stderr, ", file extent discount");
779 if (errors & I_ERR_DIR_ISIZE_WRONG)
780 fprintf(stderr, ", dir isize wrong");
781 if (errors & I_ERR_FILE_NBYTES_WRONG)
782 fprintf(stderr, ", nbytes wrong");
783 if (errors & I_ERR_ODD_CSUM_ITEM)
784 fprintf(stderr, ", odd csum item");
785 if (errors & I_ERR_SOME_CSUM_MISSING)
786 fprintf(stderr, ", some csum missing");
787 if (errors & I_ERR_LINK_COUNT_WRONG)
788 fprintf(stderr, ", link count wrong");
789 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790 fprintf(stderr, ", orphan file extent");
791 fprintf(stderr, "\n");
792 /* Print the orphan extents if needed */
793 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
796 /* Print the holes if needed */
797 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798 struct file_extent_hole *hole;
799 struct rb_node *node;
802 node = rb_first(&rec->holes);
803 fprintf(stderr, "Found file extent holes:\n");
806 hole = rb_entry(node, struct file_extent_hole, node);
807 fprintf(stderr, "\tstart: %llu, len: %llu\n",
808 hole->start, hole->len);
809 node = rb_next(node);
812 fprintf(stderr, "\tstart: 0, len: %llu\n",
813 round_up(rec->isize, root->sectorsize));
817 static void print_ref_error(int errors)
819 if (errors & REF_ERR_NO_DIR_ITEM)
820 fprintf(stderr, ", no dir item");
821 if (errors & REF_ERR_NO_DIR_INDEX)
822 fprintf(stderr, ", no dir index");
823 if (errors & REF_ERR_NO_INODE_REF)
824 fprintf(stderr, ", no inode ref");
825 if (errors & REF_ERR_DUP_DIR_ITEM)
826 fprintf(stderr, ", dup dir item");
827 if (errors & REF_ERR_DUP_DIR_INDEX)
828 fprintf(stderr, ", dup dir index");
829 if (errors & REF_ERR_DUP_INODE_REF)
830 fprintf(stderr, ", dup inode ref");
831 if (errors & REF_ERR_INDEX_UNMATCH)
832 fprintf(stderr, ", index mismatch");
833 if (errors & REF_ERR_FILETYPE_UNMATCH)
834 fprintf(stderr, ", filetype mismatch");
835 if (errors & REF_ERR_NAME_TOO_LONG)
836 fprintf(stderr, ", name too long");
837 if (errors & REF_ERR_NO_ROOT_REF)
838 fprintf(stderr, ", no root ref");
839 if (errors & REF_ERR_NO_ROOT_BACKREF)
840 fprintf(stderr, ", no root backref");
841 if (errors & REF_ERR_DUP_ROOT_REF)
842 fprintf(stderr, ", dup root ref");
843 if (errors & REF_ERR_DUP_ROOT_BACKREF)
844 fprintf(stderr, ", dup root backref");
845 fprintf(stderr, "\n");
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
851 struct ptr_node *node;
852 struct cache_extent *cache;
853 struct inode_record *rec = NULL;
856 cache = lookup_cache_extent(inode_cache, ino, 1);
858 node = container_of(cache, struct ptr_node, cache);
860 if (mod && rec->refs > 1) {
861 node->data = clone_inode_rec(rec);
862 if (IS_ERR(node->data))
868 rec = calloc(1, sizeof(*rec));
870 return ERR_PTR(-ENOMEM);
872 rec->extent_start = (u64)-1;
874 INIT_LIST_HEAD(&rec->backrefs);
875 INIT_LIST_HEAD(&rec->orphan_extents);
876 rec->holes = RB_ROOT;
878 node = malloc(sizeof(*node));
881 return ERR_PTR(-ENOMEM);
883 node->cache.start = ino;
884 node->cache.size = 1;
887 if (ino == BTRFS_FREE_INO_OBJECTID)
890 ret = insert_cache_extent(inode_cache, &node->cache);
892 return ERR_PTR(-EEXIST);
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
899 struct orphan_data_extent *orphan;
901 while (!list_empty(orphan_extents)) {
902 orphan = list_entry(orphan_extents->next,
903 struct orphan_data_extent, list);
904 list_del(&orphan->list);
909 static void free_inode_rec(struct inode_record *rec)
911 struct inode_backref *backref;
916 while (!list_empty(&rec->backrefs)) {
917 backref = to_inode_backref(rec->backrefs.next);
918 list_del(&backref->list);
921 free_orphan_data_extents(&rec->orphan_extents);
922 free_file_extent_holes(&rec->holes);
926 static int can_free_inode_rec(struct inode_record *rec)
928 if (!rec->errors && rec->checked && rec->found_inode_item &&
929 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935 struct inode_record *rec)
937 struct cache_extent *cache;
938 struct inode_backref *tmp, *backref;
939 struct ptr_node *node;
942 if (!rec->found_inode_item)
945 filetype = imode_to_type(rec->imode);
946 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947 if (backref->found_dir_item && backref->found_dir_index) {
948 if (backref->filetype != filetype)
949 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950 if (!backref->errors && backref->found_inode_ref &&
951 rec->nlink == rec->found_link) {
952 list_del(&backref->list);
958 if (!rec->checked || rec->merging)
961 if (S_ISDIR(rec->imode)) {
962 if (rec->found_size != rec->isize)
963 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964 if (rec->found_file_extent)
965 rec->errors |= I_ERR_ODD_FILE_EXTENT;
966 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967 if (rec->found_dir_item)
968 rec->errors |= I_ERR_ODD_DIR_ITEM;
969 if (rec->found_size != rec->nbytes)
970 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971 if (rec->nlink > 0 && !no_holes &&
972 (rec->extent_end < rec->isize ||
973 first_extent_gap(&rec->holes) < rec->isize))
974 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
977 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978 if (rec->found_csum_item && rec->nodatasum)
979 rec->errors |= I_ERR_ODD_CSUM_ITEM;
980 if (rec->some_csum_missing && !rec->nodatasum)
981 rec->errors |= I_ERR_SOME_CSUM_MISSING;
984 BUG_ON(rec->refs != 1);
985 if (can_free_inode_rec(rec)) {
986 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987 node = container_of(cache, struct ptr_node, cache);
988 BUG_ON(node->data != rec);
989 remove_cache_extent(inode_cache, &node->cache);
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
997 struct btrfs_path path;
998 struct btrfs_key key;
1001 key.objectid = BTRFS_ORPHAN_OBJECTID;
1002 key.type = BTRFS_ORPHAN_ITEM_KEY;
1005 btrfs_init_path(&path);
1006 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007 btrfs_release_path(&path);
1013 static int process_inode_item(struct extent_buffer *eb,
1014 int slot, struct btrfs_key *key,
1015 struct shared_node *active_node)
1017 struct inode_record *rec;
1018 struct btrfs_inode_item *item;
1020 rec = active_node->current;
1021 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022 if (rec->found_inode_item) {
1023 rec->errors |= I_ERR_DUP_INODE_ITEM;
1026 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027 rec->nlink = btrfs_inode_nlink(eb, item);
1028 rec->isize = btrfs_inode_size(eb, item);
1029 rec->nbytes = btrfs_inode_nbytes(eb, item);
1030 rec->imode = btrfs_inode_mode(eb, item);
1031 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1033 rec->found_inode_item = 1;
1034 if (rec->nlink == 0)
1035 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036 maybe_free_inode_rec(&active_node->inode_cache, rec);
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1042 int namelen, u64 dir)
1044 struct inode_backref *backref;
1046 list_for_each_entry(backref, &rec->backrefs, list) {
1047 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1049 if (backref->dir != dir || backref->namelen != namelen)
1051 if (memcmp(name, backref->name, namelen))
1056 backref = malloc(sizeof(*backref) + namelen + 1);
1059 memset(backref, 0, sizeof(*backref));
1061 backref->namelen = namelen;
1062 memcpy(backref->name, name, namelen);
1063 backref->name[namelen] = '\0';
1064 list_add_tail(&backref->list, &rec->backrefs);
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069 u64 ino, u64 dir, u64 index,
1070 const char *name, int namelen,
1071 u8 filetype, u8 itemtype, int errors)
1073 struct inode_record *rec;
1074 struct inode_backref *backref;
1076 rec = get_inode_rec(inode_cache, ino, 1);
1077 BUG_ON(IS_ERR(rec));
1078 backref = get_inode_backref(rec, name, namelen, dir);
1081 backref->errors |= errors;
1082 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083 if (backref->found_dir_index)
1084 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085 if (backref->found_inode_ref && backref->index != index)
1086 backref->errors |= REF_ERR_INDEX_UNMATCH;
1087 if (backref->found_dir_item && backref->filetype != filetype)
1088 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1090 backref->index = index;
1091 backref->filetype = filetype;
1092 backref->found_dir_index = 1;
1093 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1095 if (backref->found_dir_item)
1096 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097 if (backref->found_dir_index && backref->filetype != filetype)
1098 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1100 backref->filetype = filetype;
1101 backref->found_dir_item = 1;
1102 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104 if (backref->found_inode_ref)
1105 backref->errors |= REF_ERR_DUP_INODE_REF;
1106 if (backref->found_dir_index && backref->index != index)
1107 backref->errors |= REF_ERR_INDEX_UNMATCH;
1109 backref->index = index;
1111 backref->ref_type = itemtype;
1112 backref->found_inode_ref = 1;
1117 maybe_free_inode_rec(inode_cache, rec);
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122 struct cache_tree *dst_cache)
1124 struct inode_backref *backref;
1129 list_for_each_entry(backref, &src->backrefs, list) {
1130 if (backref->found_dir_index) {
1131 add_inode_backref(dst_cache, dst->ino, backref->dir,
1132 backref->index, backref->name,
1133 backref->namelen, backref->filetype,
1134 BTRFS_DIR_INDEX_KEY, backref->errors);
1136 if (backref->found_dir_item) {
1138 add_inode_backref(dst_cache, dst->ino,
1139 backref->dir, 0, backref->name,
1140 backref->namelen, backref->filetype,
1141 BTRFS_DIR_ITEM_KEY, backref->errors);
1143 if (backref->found_inode_ref) {
1144 add_inode_backref(dst_cache, dst->ino,
1145 backref->dir, backref->index,
1146 backref->name, backref->namelen, 0,
1147 backref->ref_type, backref->errors);
1151 if (src->found_dir_item)
1152 dst->found_dir_item = 1;
1153 if (src->found_file_extent)
1154 dst->found_file_extent = 1;
1155 if (src->found_csum_item)
1156 dst->found_csum_item = 1;
1157 if (src->some_csum_missing)
1158 dst->some_csum_missing = 1;
1159 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1165 BUG_ON(src->found_link < dir_count);
1166 dst->found_link += src->found_link - dir_count;
1167 dst->found_size += src->found_size;
1168 if (src->extent_start != (u64)-1) {
1169 if (dst->extent_start == (u64)-1) {
1170 dst->extent_start = src->extent_start;
1171 dst->extent_end = src->extent_end;
1173 if (dst->extent_end > src->extent_start)
1174 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175 else if (dst->extent_end < src->extent_start) {
1176 ret = add_file_extent_hole(&dst->holes,
1178 src->extent_start - dst->extent_end);
1180 if (dst->extent_end < src->extent_end)
1181 dst->extent_end = src->extent_end;
1185 dst->errors |= src->errors;
1186 if (src->found_inode_item) {
1187 if (!dst->found_inode_item) {
1188 dst->nlink = src->nlink;
1189 dst->isize = src->isize;
1190 dst->nbytes = src->nbytes;
1191 dst->imode = src->imode;
1192 dst->nodatasum = src->nodatasum;
1193 dst->found_inode_item = 1;
1195 dst->errors |= I_ERR_DUP_INODE_ITEM;
1203 static int splice_shared_node(struct shared_node *src_node,
1204 struct shared_node *dst_node)
1206 struct cache_extent *cache;
1207 struct ptr_node *node, *ins;
1208 struct cache_tree *src, *dst;
1209 struct inode_record *rec, *conflict;
1210 u64 current_ino = 0;
1214 if (--src_node->refs == 0)
1216 if (src_node->current)
1217 current_ino = src_node->current->ino;
1219 src = &src_node->root_cache;
1220 dst = &dst_node->root_cache;
1222 cache = search_cache_extent(src, 0);
1224 node = container_of(cache, struct ptr_node, cache);
1226 cache = next_cache_extent(cache);
1229 remove_cache_extent(src, &node->cache);
1232 ins = malloc(sizeof(*ins));
1234 ins->cache.start = node->cache.start;
1235 ins->cache.size = node->cache.size;
1239 ret = insert_cache_extent(dst, &ins->cache);
1240 if (ret == -EEXIST) {
1241 conflict = get_inode_rec(dst, rec->ino, 1);
1242 BUG_ON(IS_ERR(conflict));
1243 merge_inode_recs(rec, conflict, dst);
1245 conflict->checked = 1;
1246 if (dst_node->current == conflict)
1247 dst_node->current = NULL;
1249 maybe_free_inode_rec(dst, conflict);
1250 free_inode_rec(rec);
1257 if (src == &src_node->root_cache) {
1258 src = &src_node->inode_cache;
1259 dst = &dst_node->inode_cache;
1263 if (current_ino > 0 && (!dst_node->current ||
1264 current_ino > dst_node->current->ino)) {
1265 if (dst_node->current) {
1266 dst_node->current->checked = 1;
1267 maybe_free_inode_rec(dst, dst_node->current);
1269 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270 BUG_ON(IS_ERR(dst_node->current));
1275 static void free_inode_ptr(struct cache_extent *cache)
1277 struct ptr_node *node;
1278 struct inode_record *rec;
1280 node = container_of(cache, struct ptr_node, cache);
1282 free_inode_rec(rec);
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1291 struct cache_extent *cache;
1292 struct shared_node *node;
1294 cache = lookup_cache_extent(shared, bytenr, 1);
1296 node = container_of(cache, struct shared_node, cache);
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1305 struct shared_node *node;
1307 node = calloc(1, sizeof(*node));
1310 node->cache.start = bytenr;
1311 node->cache.size = 1;
1312 cache_tree_init(&node->root_cache);
1313 cache_tree_init(&node->inode_cache);
1316 ret = insert_cache_extent(shared, &node->cache);
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322 struct walk_control *wc, int level)
1324 struct shared_node *node;
1325 struct shared_node *dest;
1328 if (level == wc->active_node)
1331 BUG_ON(wc->active_node <= level);
1332 node = find_shared_node(&wc->shared, bytenr);
1334 ret = add_shared_node(&wc->shared, bytenr, refs);
1336 node = find_shared_node(&wc->shared, bytenr);
1337 wc->nodes[level] = node;
1338 wc->active_node = level;
1342 if (wc->root_level == wc->active_node &&
1343 btrfs_root_refs(&root->root_item) == 0) {
1344 if (--node->refs == 0) {
1345 free_inode_recs_tree(&node->root_cache);
1346 free_inode_recs_tree(&node->inode_cache);
1347 remove_cache_extent(&wc->shared, &node->cache);
1353 dest = wc->nodes[wc->active_node];
1354 splice_shared_node(node, dest);
1355 if (node->refs == 0) {
1356 remove_cache_extent(&wc->shared, &node->cache);
1362 static int leave_shared_node(struct btrfs_root *root,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->root_level)
1372 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1376 BUG_ON(i >= BTRFS_MAX_LEVEL);
1378 node = wc->nodes[wc->active_node];
1379 wc->nodes[wc->active_node] = NULL;
1380 wc->active_node = i;
1382 dest = wc->nodes[wc->active_node];
1383 if (wc->active_node < wc->root_level ||
1384 btrfs_root_refs(&root->root_item) > 0) {
1385 BUG_ON(node->refs <= 1);
1386 splice_shared_node(node, dest);
1388 BUG_ON(node->refs < 2);
1397 * 1 - if the root with id child_root_id is a child of root parent_root_id
1398 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1399 * has other root(s) as parent(s)
1400 * 2 - if the root child_root_id doesn't have any parent roots
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1405 struct btrfs_path path;
1406 struct btrfs_key key;
1407 struct extent_buffer *leaf;
1411 btrfs_init_path(&path);
1413 key.objectid = parent_root_id;
1414 key.type = BTRFS_ROOT_REF_KEY;
1415 key.offset = child_root_id;
1416 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1420 btrfs_release_path(&path);
1424 key.objectid = child_root_id;
1425 key.type = BTRFS_ROOT_BACKREF_KEY;
1427 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1433 leaf = path.nodes[0];
1434 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1438 leaf = path.nodes[0];
1441 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442 if (key.objectid != child_root_id ||
1443 key.type != BTRFS_ROOT_BACKREF_KEY)
1448 if (key.offset == parent_root_id) {
1449 btrfs_release_path(&path);
1456 btrfs_release_path(&path);
1459 return has_parent ? 0 : 2;
1462 static int process_dir_item(struct btrfs_root *root,
1463 struct extent_buffer *eb,
1464 int slot, struct btrfs_key *key,
1465 struct shared_node *active_node)
1475 struct btrfs_dir_item *di;
1476 struct inode_record *rec;
1477 struct cache_tree *root_cache;
1478 struct cache_tree *inode_cache;
1479 struct btrfs_key location;
1480 char namebuf[BTRFS_NAME_LEN];
1482 root_cache = &active_node->root_cache;
1483 inode_cache = &active_node->inode_cache;
1484 rec = active_node->current;
1485 rec->found_dir_item = 1;
1487 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1491 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492 name_len = btrfs_dir_name_len(eb, di);
1493 data_len = btrfs_dir_data_len(eb, di);
1494 filetype = btrfs_dir_type(eb, di);
1496 rec->found_size += name_len;
1497 if (name_len <= BTRFS_NAME_LEN) {
1501 len = BTRFS_NAME_LEN;
1502 error = REF_ERR_NAME_TOO_LONG;
1504 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1506 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507 add_inode_backref(inode_cache, location.objectid,
1508 key->objectid, key->offset, namebuf,
1509 len, filetype, key->type, error);
1510 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511 add_inode_backref(root_cache, location.objectid,
1512 key->objectid, key->offset,
1513 namebuf, len, filetype,
1516 fprintf(stderr, "invalid location in dir item %u\n",
1518 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519 key->objectid, key->offset, namebuf,
1520 len, filetype, key->type, error);
1523 len = sizeof(*di) + name_len + data_len;
1524 di = (struct btrfs_dir_item *)((char *)di + len);
1527 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528 rec->errors |= I_ERR_DUP_DIR_INDEX;
1533 static int process_inode_ref(struct extent_buffer *eb,
1534 int slot, struct btrfs_key *key,
1535 struct shared_node *active_node)
1543 struct cache_tree *inode_cache;
1544 struct btrfs_inode_ref *ref;
1545 char namebuf[BTRFS_NAME_LEN];
1547 inode_cache = &active_node->inode_cache;
1549 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550 total = btrfs_item_size_nr(eb, slot);
1551 while (cur < total) {
1552 name_len = btrfs_inode_ref_name_len(eb, ref);
1553 index = btrfs_inode_ref_index(eb, ref);
1554 if (name_len <= BTRFS_NAME_LEN) {
1558 len = BTRFS_NAME_LEN;
1559 error = REF_ERR_NAME_TOO_LONG;
1561 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562 add_inode_backref(inode_cache, key->objectid, key->offset,
1563 index, namebuf, len, 0, key->type, error);
1565 len = sizeof(*ref) + name_len;
1566 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1572 static int process_inode_extref(struct extent_buffer *eb,
1573 int slot, struct btrfs_key *key,
1574 struct shared_node *active_node)
1583 struct cache_tree *inode_cache;
1584 struct btrfs_inode_extref *extref;
1585 char namebuf[BTRFS_NAME_LEN];
1587 inode_cache = &active_node->inode_cache;
1589 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590 total = btrfs_item_size_nr(eb, slot);
1591 while (cur < total) {
1592 name_len = btrfs_inode_extref_name_len(eb, extref);
1593 index = btrfs_inode_extref_index(eb, extref);
1594 parent = btrfs_inode_extref_parent(eb, extref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf,
1603 (unsigned long)(extref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, parent,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*extref) + name_len;
1608 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616 u64 len, u64 *found)
1618 struct btrfs_key key;
1619 struct btrfs_path path;
1620 struct extent_buffer *leaf;
1625 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1627 btrfs_init_path(&path);
1629 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1631 key.type = BTRFS_EXTENT_CSUM_KEY;
1633 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1637 if (ret > 0 && path.slots[0] > 0) {
1638 leaf = path.nodes[0];
1639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641 key.type == BTRFS_EXTENT_CSUM_KEY)
1646 leaf = path.nodes[0];
1647 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1653 leaf = path.nodes[0];
1656 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658 key.type != BTRFS_EXTENT_CSUM_KEY)
1661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662 if (key.offset >= start + len)
1665 if (key.offset > start)
1668 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670 if (csum_end > start) {
1671 size = min(csum_end - start, len);
1680 btrfs_release_path(&path);
1686 static int process_file_extent(struct btrfs_root *root,
1687 struct extent_buffer *eb,
1688 int slot, struct btrfs_key *key,
1689 struct shared_node *active_node)
1691 struct inode_record *rec;
1692 struct btrfs_file_extent_item *fi;
1694 u64 disk_bytenr = 0;
1695 u64 extent_offset = 0;
1696 u64 mask = root->sectorsize - 1;
1700 rec = active_node->current;
1701 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702 rec->found_file_extent = 1;
1704 if (rec->extent_start == (u64)-1) {
1705 rec->extent_start = key->offset;
1706 rec->extent_end = key->offset;
1709 if (rec->extent_end > key->offset)
1710 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711 else if (rec->extent_end < key->offset) {
1712 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713 key->offset - rec->extent_end);
1718 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719 extent_type = btrfs_file_extent_type(eb, fi);
1721 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1724 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725 rec->found_size += num_bytes;
1726 num_bytes = (num_bytes + mask) & ~mask;
1727 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731 extent_offset = btrfs_file_extent_offset(eb, fi);
1732 if (num_bytes == 0 || (num_bytes & mask))
1733 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 if (num_bytes + extent_offset >
1735 btrfs_file_extent_ram_bytes(eb, fi))
1736 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738 (btrfs_file_extent_compression(eb, fi) ||
1739 btrfs_file_extent_encryption(eb, fi) ||
1740 btrfs_file_extent_other_encoding(eb, fi)))
1741 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742 if (disk_bytenr > 0)
1743 rec->found_size += num_bytes;
1745 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1747 rec->extent_end = key->offset + num_bytes;
1750 * The data reloc tree will copy full extents into its inode and then
1751 * copy the corresponding csums. Because the extent it copied could be
1752 * a preallocated extent that hasn't been written to yet there may be no
1753 * csums to copy, ergo we won't have csums for our file extent. This is
1754 * ok so just don't bother checking csums if the inode belongs to the
1757 if (disk_bytenr > 0 &&
1758 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1760 if (btrfs_file_extent_compression(eb, fi))
1761 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1763 disk_bytenr += extent_offset;
1765 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1768 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1770 rec->found_csum_item = 1;
1771 if (found < num_bytes)
1772 rec->some_csum_missing = 1;
1773 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1775 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782 struct walk_control *wc)
1784 struct btrfs_key key;
1788 struct cache_tree *inode_cache;
1789 struct shared_node *active_node;
1791 if (wc->root_level == wc->active_node &&
1792 btrfs_root_refs(&root->root_item) == 0)
1795 active_node = wc->nodes[wc->active_node];
1796 inode_cache = &active_node->inode_cache;
1797 nritems = btrfs_header_nritems(eb);
1798 for (i = 0; i < nritems; i++) {
1799 btrfs_item_key_to_cpu(eb, &key, i);
1801 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1803 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1806 if (active_node->current == NULL ||
1807 active_node->current->ino < key.objectid) {
1808 if (active_node->current) {
1809 active_node->current->checked = 1;
1810 maybe_free_inode_rec(inode_cache,
1811 active_node->current);
1813 active_node->current = get_inode_rec(inode_cache,
1815 BUG_ON(IS_ERR(active_node->current));
1818 case BTRFS_DIR_ITEM_KEY:
1819 case BTRFS_DIR_INDEX_KEY:
1820 ret = process_dir_item(root, eb, i, &key, active_node);
1822 case BTRFS_INODE_REF_KEY:
1823 ret = process_inode_ref(eb, i, &key, active_node);
1825 case BTRFS_INODE_EXTREF_KEY:
1826 ret = process_inode_extref(eb, i, &key, active_node);
1828 case BTRFS_INODE_ITEM_KEY:
1829 ret = process_inode_item(eb, i, &key, active_node);
1831 case BTRFS_EXTENT_DATA_KEY:
1832 ret = process_file_extent(root, eb, i, &key,
1842 static void reada_walk_down(struct btrfs_root *root,
1843 struct extent_buffer *node, int slot)
1852 level = btrfs_header_level(node);
1856 nritems = btrfs_header_nritems(node);
1857 blocksize = root->nodesize;
1858 for (i = slot; i < nritems; i++) {
1859 bytenr = btrfs_node_blockptr(node, i);
1860 ptr_gen = btrfs_node_ptr_generation(node, i);
1861 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1866 * Check the child node/leaf by the following condition:
1867 * 1. the first item key of the node/leaf should be the same with the one
1869 * 2. block in parent node should match the child node/leaf.
1870 * 3. generation of parent node and child's header should be consistent.
1872 * Or the child node/leaf pointed by the key in parent is not valid.
1874 * We hope to check leaf owner too, but since subvol may share leaves,
1875 * which makes leaf owner check not so strong, key check should be
1876 * sufficient enough for that case.
1878 static int check_child_node(struct btrfs_root *root,
1879 struct extent_buffer *parent, int slot,
1880 struct extent_buffer *child)
1882 struct btrfs_key parent_key;
1883 struct btrfs_key child_key;
1886 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887 if (btrfs_header_level(child) == 0)
1888 btrfs_item_key_to_cpu(child, &child_key, 0);
1890 btrfs_node_key_to_cpu(child, &child_key, 0);
1892 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1895 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896 parent_key.objectid, parent_key.type, parent_key.offset,
1897 child_key.objectid, child_key.type, child_key.offset);
1899 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1901 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902 btrfs_node_blockptr(parent, slot),
1903 btrfs_header_bytenr(child));
1905 if (btrfs_node_ptr_generation(parent, slot) !=
1906 btrfs_header_generation(child)) {
1908 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909 btrfs_header_generation(child),
1910 btrfs_node_ptr_generation(parent, slot));
1916 u64 bytenr[BTRFS_MAX_LEVEL];
1917 u64 refs[BTRFS_MAX_LEVEL];
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level,
1922 struct node_refs *nrefs)
1924 enum btrfs_tree_block_status status;
1927 struct extent_buffer *next;
1928 struct extent_buffer *cur;
1933 WARN_ON(*level < 0);
1934 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1936 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937 refs = nrefs->refs[*level];
1940 ret = btrfs_lookup_extent_info(NULL, root,
1941 path->nodes[*level]->start,
1942 *level, 1, &refs, NULL);
1947 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948 nrefs->refs[*level] = refs;
1952 ret = enter_shared_node(root, path->nodes[*level]->start,
1960 while (*level >= 0) {
1961 WARN_ON(*level < 0);
1962 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963 cur = path->nodes[*level];
1965 if (btrfs_header_level(cur) != *level)
1968 if (path->slots[*level] >= btrfs_header_nritems(cur))
1971 ret = process_one_leaf(root, cur, wc);
1976 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978 blocksize = root->nodesize;
1980 if (bytenr == nrefs->bytenr[*level - 1]) {
1981 refs = nrefs->refs[*level - 1];
1983 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984 *level - 1, 1, &refs, NULL);
1988 nrefs->bytenr[*level - 1] = bytenr;
1989 nrefs->refs[*level - 1] = refs;
1994 ret = enter_shared_node(root, bytenr, refs,
1997 path->slots[*level]++;
2002 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004 free_extent_buffer(next);
2005 reada_walk_down(root, cur, path->slots[*level]);
2006 next = read_tree_block(root, bytenr, blocksize,
2008 if (!extent_buffer_uptodate(next)) {
2009 struct btrfs_key node_key;
2011 btrfs_node_key_to_cpu(path->nodes[*level],
2013 path->slots[*level]);
2014 btrfs_add_corrupt_extent_record(root->fs_info,
2016 path->nodes[*level]->start,
2017 root->nodesize, *level);
2023 ret = check_child_node(root, cur, path->slots[*level], next);
2029 if (btrfs_is_leaf(next))
2030 status = btrfs_check_leaf(root, NULL, next);
2032 status = btrfs_check_node(root, NULL, next);
2033 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034 free_extent_buffer(next);
2039 *level = *level - 1;
2040 free_extent_buffer(path->nodes[*level]);
2041 path->nodes[*level] = next;
2042 path->slots[*level] = 0;
2045 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050 struct walk_control *wc, int *level)
2053 struct extent_buffer *leaf;
2055 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056 leaf = path->nodes[i];
2057 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2062 free_extent_buffer(path->nodes[*level]);
2063 path->nodes[*level] = NULL;
2064 BUG_ON(*level > wc->active_node);
2065 if (*level == wc->active_node)
2066 leave_shared_node(root, wc, *level);
2073 static int check_root_dir(struct inode_record *rec)
2075 struct inode_backref *backref;
2078 if (!rec->found_inode_item || rec->errors)
2080 if (rec->nlink != 1 || rec->found_link != 0)
2082 if (list_empty(&rec->backrefs))
2084 backref = to_inode_backref(rec->backrefs.next);
2085 if (!backref->found_inode_ref)
2087 if (backref->index != 0 || backref->namelen != 2 ||
2088 memcmp(backref->name, "..", 2))
2090 if (backref->found_dir_index || backref->found_dir_item)
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098 struct btrfs_root *root, struct btrfs_path *path,
2099 struct inode_record *rec)
2101 struct btrfs_inode_item *ei;
2102 struct btrfs_key key;
2105 key.objectid = rec->ino;
2106 key.type = BTRFS_INODE_ITEM_KEY;
2107 key.offset = (u64)-1;
2109 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2113 if (!path->slots[0]) {
2120 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121 if (key.objectid != rec->ino) {
2126 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127 struct btrfs_inode_item);
2128 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129 btrfs_mark_buffer_dirty(path->nodes[0]);
2130 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132 root->root_key.objectid);
2134 btrfs_release_path(path);
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139 struct btrfs_root *root,
2140 struct btrfs_path *path,
2141 struct inode_record *rec)
2145 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146 btrfs_release_path(path);
2148 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153 struct btrfs_root *root,
2154 struct btrfs_path *path,
2155 struct inode_record *rec)
2157 struct btrfs_inode_item *ei;
2158 struct btrfs_key key;
2161 key.objectid = rec->ino;
2162 key.type = BTRFS_INODE_ITEM_KEY;
2165 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2172 /* Since ret == 0, no need to check anything */
2173 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174 struct btrfs_inode_item);
2175 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176 btrfs_mark_buffer_dirty(path->nodes[0]);
2177 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178 printf("reset nbytes for ino %llu root %llu\n",
2179 rec->ino, root->root_key.objectid);
2181 btrfs_release_path(path);
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186 struct cache_tree *inode_cache,
2187 struct inode_record *rec,
2188 struct inode_backref *backref)
2190 struct btrfs_path path;
2191 struct btrfs_trans_handle *trans;
2192 struct btrfs_dir_item *dir_item;
2193 struct extent_buffer *leaf;
2194 struct btrfs_key key;
2195 struct btrfs_disk_key disk_key;
2196 struct inode_record *dir_rec;
2197 unsigned long name_ptr;
2198 u32 data_size = sizeof(*dir_item) + backref->namelen;
2201 trans = btrfs_start_transaction(root, 1);
2203 return PTR_ERR(trans);
2205 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206 (unsigned long long)rec->ino);
2208 btrfs_init_path(&path);
2209 key.objectid = backref->dir;
2210 key.type = BTRFS_DIR_INDEX_KEY;
2211 key.offset = backref->index;
2212 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2215 leaf = path.nodes[0];
2216 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2218 disk_key.objectid = cpu_to_le64(rec->ino);
2219 disk_key.type = BTRFS_INODE_ITEM_KEY;
2220 disk_key.offset = 0;
2222 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224 btrfs_set_dir_data_len(leaf, dir_item, 0);
2225 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226 name_ptr = (unsigned long)(dir_item + 1);
2227 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228 btrfs_mark_buffer_dirty(leaf);
2229 btrfs_release_path(&path);
2230 btrfs_commit_transaction(trans, root);
2232 backref->found_dir_index = 1;
2233 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234 BUG_ON(IS_ERR(dir_rec));
2237 dir_rec->found_size += backref->namelen;
2238 if (dir_rec->found_size == dir_rec->isize &&
2239 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241 if (dir_rec->found_size != dir_rec->isize)
2242 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247 static int delete_dir_index(struct btrfs_root *root,
2248 struct cache_tree *inode_cache,
2249 struct inode_record *rec,
2250 struct inode_backref *backref)
2252 struct btrfs_trans_handle *trans;
2253 struct btrfs_dir_item *di;
2254 struct btrfs_path path;
2257 trans = btrfs_start_transaction(root, 1);
2259 return PTR_ERR(trans);
2261 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262 (unsigned long long)backref->dir,
2263 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264 (unsigned long long)root->objectid);
2266 btrfs_init_path(&path);
2267 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268 backref->name, backref->namelen,
2269 backref->index, -1);
2272 btrfs_release_path(&path);
2273 btrfs_commit_transaction(trans, root);
2280 ret = btrfs_del_item(trans, root, &path);
2282 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2284 btrfs_release_path(&path);
2285 btrfs_commit_transaction(trans, root);
2289 static int create_inode_item(struct btrfs_root *root,
2290 struct inode_record *rec,
2291 struct inode_backref *backref, int root_dir)
2293 struct btrfs_trans_handle *trans;
2294 struct btrfs_inode_item inode_item;
2295 time_t now = time(NULL);
2298 trans = btrfs_start_transaction(root, 1);
2299 if (IS_ERR(trans)) {
2300 ret = PTR_ERR(trans);
2304 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305 "be incomplete, please check permissions and content after "
2306 "the fsck completes.\n", (unsigned long long)root->objectid,
2307 (unsigned long long)rec->ino);
2309 memset(&inode_item, 0, sizeof(inode_item));
2310 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2312 btrfs_set_stack_inode_nlink(&inode_item, 1);
2314 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316 if (rec->found_dir_item) {
2317 if (rec->found_file_extent)
2318 fprintf(stderr, "root %llu inode %llu has both a dir "
2319 "item and extents, unsure if it is a dir or a "
2320 "regular file so setting it as a directory\n",
2321 (unsigned long long)root->objectid,
2322 (unsigned long long)rec->ino);
2323 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325 } else if (!rec->found_dir_item) {
2326 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2329 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2338 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2340 btrfs_commit_transaction(trans, root);
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345 struct inode_record *rec,
2346 struct cache_tree *inode_cache,
2349 struct inode_backref *tmp, *backref;
2350 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 if (!delete && rec->ino == root_dirid) {
2356 if (!rec->found_inode_item) {
2357 ret = create_inode_item(root, rec, backref, 1);
2364 /* Index 0 for root dir's are special, don't mess with it */
2365 if (rec->ino == root_dirid && backref->index == 0)
2369 ((backref->found_dir_index && !backref->found_inode_ref) ||
2370 (backref->found_dir_index && backref->found_inode_ref &&
2371 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372 ret = delete_dir_index(root, inode_cache, rec, backref);
2376 list_del(&backref->list);
2380 if (!delete && !backref->found_dir_index &&
2381 backref->found_dir_item && backref->found_inode_ref) {
2382 ret = add_missing_dir_index(root, inode_cache, rec,
2387 if (backref->found_dir_item &&
2388 backref->found_dir_index &&
2389 backref->found_dir_index) {
2390 if (!backref->errors &&
2391 backref->found_inode_ref) {
2392 list_del(&backref->list);
2398 if (!delete && (!backref->found_dir_index &&
2399 !backref->found_dir_item &&
2400 backref->found_inode_ref)) {
2401 struct btrfs_trans_handle *trans;
2402 struct btrfs_key location;
2404 ret = check_dir_conflict(root, backref->name,
2410 * let nlink fixing routine to handle it,
2411 * which can do it better.
2416 location.objectid = rec->ino;
2417 location.type = BTRFS_INODE_ITEM_KEY;
2418 location.offset = 0;
2420 trans = btrfs_start_transaction(root, 1);
2421 if (IS_ERR(trans)) {
2422 ret = PTR_ERR(trans);
2425 fprintf(stderr, "adding missing dir index/item pair "
2427 (unsigned long long)rec->ino);
2428 ret = btrfs_insert_dir_item(trans, root, backref->name,
2430 backref->dir, &location,
2431 imode_to_type(rec->imode),
2434 btrfs_commit_transaction(trans, root);
2438 if (!delete && (backref->found_inode_ref &&
2439 backref->found_dir_index &&
2440 backref->found_dir_item &&
2441 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442 !rec->found_inode_item)) {
2443 ret = create_inode_item(root, rec, backref, 0);
2450 return ret ? ret : repaired;
2454 * To determine the file type for nlink/inode_item repair
2456 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457 * Return -ENOENT if file type is not found.
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2461 struct inode_backref *backref;
2463 /* For inode item recovered case */
2464 if (rec->found_inode_item) {
2465 *type = imode_to_type(rec->imode);
2469 list_for_each_entry(backref, &rec->backrefs, list) {
2470 if (backref->found_dir_index || backref->found_dir_item) {
2471 *type = backref->filetype;
2479 * To determine the file name for nlink repair
2481 * Return 0 if file name is found, set name and namelen.
2482 * Return -ENOENT if file name is not found.
2484 static int find_file_name(struct inode_record *rec,
2485 char *name, int *namelen)
2487 struct inode_backref *backref;
2489 list_for_each_entry(backref, &rec->backrefs, list) {
2490 if (backref->found_dir_index || backref->found_dir_item ||
2491 backref->found_inode_ref) {
2492 memcpy(name, backref->name, backref->namelen);
2493 *namelen = backref->namelen;
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502 struct btrfs_root *root,
2503 struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct inode_backref *backref;
2507 struct inode_backref *tmp;
2508 struct btrfs_key key;
2509 struct btrfs_inode_item *inode_item;
2512 /* We don't believe this either, reset it and iterate backref */
2513 rec->found_link = 0;
2515 /* Remove all backref including the valid ones */
2516 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518 backref->index, backref->name,
2519 backref->namelen, 0);
2523 /* remove invalid backref, so it won't be added back */
2524 if (!(backref->found_dir_index &&
2525 backref->found_dir_item &&
2526 backref->found_inode_ref)) {
2527 list_del(&backref->list);
2534 /* Set nlink to 0 */
2535 key.objectid = rec->ino;
2536 key.type = BTRFS_INODE_ITEM_KEY;
2538 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2545 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546 struct btrfs_inode_item);
2547 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548 btrfs_mark_buffer_dirty(path->nodes[0]);
2549 btrfs_release_path(path);
2552 * Add back valid inode_ref/dir_item/dir_index,
2553 * add_link() will handle the nlink inc, so new nlink must be correct
2555 list_for_each_entry(backref, &rec->backrefs, list) {
2556 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557 backref->name, backref->namelen,
2558 backref->filetype, &backref->index, 1);
2563 btrfs_release_path(path);
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568 struct btrfs_root *root,
2569 struct btrfs_path *path,
2570 struct inode_record *rec)
2572 char *dir_name = "lost+found";
2573 char namebuf[BTRFS_NAME_LEN] = {0};
2578 int name_recovered = 0;
2579 int type_recovered = 0;
2583 * Get file name and type first before these invalid inode ref
2584 * are deleted by remove_all_invalid_backref()
2586 name_recovered = !find_file_name(rec, namebuf, &namelen);
2587 type_recovered = !find_file_type(rec, &type);
2589 if (!name_recovered) {
2590 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591 rec->ino, rec->ino);
2592 namelen = count_digits(rec->ino);
2593 sprintf(namebuf, "%llu", rec->ino);
2596 if (!type_recovered) {
2597 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2599 type = BTRFS_FT_REG_FILE;
2603 ret = reset_nlink(trans, root, path, rec);
2606 "Failed to reset nlink for inode %llu: %s\n",
2607 rec->ino, strerror(-ret));
2611 if (rec->found_link == 0) {
2612 lost_found_ino = root->highest_inode;
2613 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2618 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2622 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623 dir_name, strerror(-ret));
2626 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627 namebuf, namelen, type, NULL, 1);
2629 * Add ".INO" suffix several times to handle case where
2630 * "FILENAME.INO" is already taken by another file.
2632 while (ret == -EEXIST) {
2634 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2636 if (namelen + count_digits(rec->ino) + 1 >
2641 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2643 namelen += count_digits(rec->ino) + 1;
2644 ret = btrfs_add_link(trans, root, rec->ino,
2645 lost_found_ino, namebuf,
2646 namelen, type, NULL, 1);
2650 "Failed to link the inode %llu to %s dir: %s\n",
2651 rec->ino, dir_name, strerror(-ret));
2655 * Just increase the found_link, don't actually add the
2656 * backref. This will make things easier and this inode
2657 * record will be freed after the repair is done.
2658 * So fsck will not report problem about this inode.
2661 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662 namelen, namebuf, dir_name);
2664 printf("Fixed the nlink of inode %llu\n", rec->ino);
2667 * Clear the flag anyway, or we will loop forever for the same inode
2668 * as it will not be removed from the bad inode list and the dead loop
2671 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672 btrfs_release_path(path);
2677 * Check if there is any normal(reg or prealloc) file extent for given
2679 * This is used to determine the file type when neither its dir_index/item or
2680 * inode_item exists.
2682 * This will *NOT* report error, if any error happens, just consider it does
2683 * not have any normal file extent.
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2687 struct btrfs_path path;
2688 struct btrfs_key key;
2689 struct btrfs_key found_key;
2690 struct btrfs_file_extent_item *fi;
2694 btrfs_init_path(&path);
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2699 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2704 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705 ret = btrfs_next_leaf(root, &path);
2712 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2717 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path.nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2726 btrfs_release_path(&path);
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2752 int type_recovered = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2768 * 1. If found backref(inode_index/item is already handled) to it,
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2782 filetype = BTRFS_FT_REG_FILE;
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2884 node = rb_first(&rec->holes);
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path path;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2930 * For nlink repair, it may create a dir and add link, so
2931 * 2 for parent(256)'s dir_index and dir_item
2932 * 2 for lost+found dir's inode_item and inode_ref
2933 * 1 for the new inode_ref of the file
2934 * 2 for lost+found dir's dir_index and dir_item for the file
2936 trans = btrfs_start_transaction(root, 7);
2938 return PTR_ERR(trans);
2940 btrfs_init_path(&path);
2941 if (rec->errors & I_ERR_NO_INODE_ITEM)
2942 ret = repair_inode_no_item(trans, root, &path, rec);
2943 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948 ret = repair_inode_isize(trans, root, &path, rec);
2949 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952 ret = repair_inode_nlinks(trans, root, &path, rec);
2953 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954 ret = repair_inode_nbytes(trans, root, &path, rec);
2955 btrfs_commit_transaction(trans, root);
2956 btrfs_release_path(&path);
2960 static int check_inode_recs(struct btrfs_root *root,
2961 struct cache_tree *inode_cache)
2963 struct cache_extent *cache;
2964 struct ptr_node *node;
2965 struct inode_record *rec;
2966 struct inode_backref *backref;
2971 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2973 if (btrfs_root_refs(&root->root_item) == 0) {
2974 if (!cache_tree_empty(inode_cache))
2975 fprintf(stderr, "warning line %d\n", __LINE__);
2980 * We need to record the highest inode number for later 'lost+found'
2982 * We must select an ino not used/referred by any existing inode, or
2983 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984 * this may cause 'lost+found' dir has wrong nlinks.
2986 cache = last_cache_extent(inode_cache);
2988 node = container_of(cache, struct ptr_node, cache);
2990 if (rec->ino > root->highest_inode)
2991 root->highest_inode = rec->ino;
2995 * We need to repair backrefs first because we could change some of the
2996 * errors in the inode recs.
2998 * We also need to go through and delete invalid backrefs first and then
2999 * add the correct ones second. We do this because we may get EEXIST
3000 * when adding back the correct index because we hadn't yet deleted the
3003 * For example, if we were missing a dir index then the directories
3004 * isize would be wrong, so if we fixed the isize to what we thought it
3005 * would be and then fixed the backref we'd still have a invalid fs, so
3006 * we need to add back the dir index and then check to see if the isize
3011 if (stage == 3 && !err)
3014 cache = search_cache_extent(inode_cache, 0);
3015 while (repair && cache) {
3016 node = container_of(cache, struct ptr_node, cache);
3018 cache = next_cache_extent(cache);
3020 /* Need to free everything up and rescan */
3022 remove_cache_extent(inode_cache, &node->cache);
3024 free_inode_rec(rec);
3028 if (list_empty(&rec->backrefs))
3031 ret = repair_inode_backrefs(root, rec, inode_cache,
3045 rec = get_inode_rec(inode_cache, root_dirid, 0);
3046 BUG_ON(IS_ERR(rec));
3048 ret = check_root_dir(rec);
3050 fprintf(stderr, "root %llu root dir %llu error\n",
3051 (unsigned long long)root->root_key.objectid,
3052 (unsigned long long)root_dirid);
3053 print_inode_error(root, rec);
3058 struct btrfs_trans_handle *trans;
3060 trans = btrfs_start_transaction(root, 1);
3061 if (IS_ERR(trans)) {
3062 err = PTR_ERR(trans);
3067 "root %llu missing its root dir, recreating\n",
3068 (unsigned long long)root->objectid);
3070 ret = btrfs_make_root_dir(trans, root, root_dirid);
3073 btrfs_commit_transaction(trans, root);
3077 fprintf(stderr, "root %llu root dir %llu not found\n",
3078 (unsigned long long)root->root_key.objectid,
3079 (unsigned long long)root_dirid);
3083 cache = search_cache_extent(inode_cache, 0);
3086 node = container_of(cache, struct ptr_node, cache);
3088 remove_cache_extent(inode_cache, &node->cache);
3090 if (rec->ino == root_dirid ||
3091 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092 free_inode_rec(rec);
3096 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097 ret = check_orphan_item(root, rec->ino);
3099 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100 if (can_free_inode_rec(rec)) {
3101 free_inode_rec(rec);
3106 if (!rec->found_inode_item)
3107 rec->errors |= I_ERR_NO_INODE_ITEM;
3108 if (rec->found_link != rec->nlink)
3109 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3111 ret = try_repair_inode(root, rec);
3112 if (ret == 0 && can_free_inode_rec(rec)) {
3113 free_inode_rec(rec);
3119 if (!(repair && ret == 0))
3121 print_inode_error(root, rec);
3122 list_for_each_entry(backref, &rec->backrefs, list) {
3123 if (!backref->found_dir_item)
3124 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125 if (!backref->found_dir_index)
3126 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127 if (!backref->found_inode_ref)
3128 backref->errors |= REF_ERR_NO_INODE_REF;
3129 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130 " namelen %u name %s filetype %d errors %x",
3131 (unsigned long long)backref->dir,
3132 (unsigned long long)backref->index,
3133 backref->namelen, backref->name,
3134 backref->filetype, backref->errors);
3135 print_ref_error(backref->errors);
3137 free_inode_rec(rec);
3139 return (error > 0) ? -1 : 0;
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3145 struct cache_extent *cache;
3146 struct root_record *rec = NULL;
3149 cache = lookup_cache_extent(root_cache, objectid, 1);
3151 rec = container_of(cache, struct root_record, cache);
3153 rec = calloc(1, sizeof(*rec));
3155 return ERR_PTR(-ENOMEM);
3156 rec->objectid = objectid;
3157 INIT_LIST_HEAD(&rec->backrefs);
3158 rec->cache.start = objectid;
3159 rec->cache.size = 1;
3161 ret = insert_cache_extent(root_cache, &rec->cache);
3163 return ERR_PTR(-EEXIST);
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169 u64 ref_root, u64 dir, u64 index,
3170 const char *name, int namelen)
3172 struct root_backref *backref;
3174 list_for_each_entry(backref, &rec->backrefs, list) {
3175 if (backref->ref_root != ref_root || backref->dir != dir ||
3176 backref->namelen != namelen)
3178 if (memcmp(name, backref->name, namelen))
3183 backref = calloc(1, sizeof(*backref) + namelen + 1);
3186 backref->ref_root = ref_root;
3188 backref->index = index;
3189 backref->namelen = namelen;
3190 memcpy(backref->name, name, namelen);
3191 backref->name[namelen] = '\0';
3192 list_add_tail(&backref->list, &rec->backrefs);
3196 static void free_root_record(struct cache_extent *cache)
3198 struct root_record *rec;
3199 struct root_backref *backref;
3201 rec = container_of(cache, struct root_record, cache);
3202 while (!list_empty(&rec->backrefs)) {
3203 backref = to_root_backref(rec->backrefs.next);
3204 list_del(&backref->list);
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3213 static int add_root_backref(struct cache_tree *root_cache,
3214 u64 root_id, u64 ref_root, u64 dir, u64 index,
3215 const char *name, int namelen,
3216 int item_type, int errors)
3218 struct root_record *rec;
3219 struct root_backref *backref;
3221 rec = get_root_rec(root_cache, root_id);
3222 BUG_ON(IS_ERR(rec));
3223 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3226 backref->errors |= errors;
3228 if (item_type != BTRFS_DIR_ITEM_KEY) {
3229 if (backref->found_dir_index || backref->found_back_ref ||
3230 backref->found_forward_ref) {
3231 if (backref->index != index)
3232 backref->errors |= REF_ERR_INDEX_UNMATCH;
3234 backref->index = index;
3238 if (item_type == BTRFS_DIR_ITEM_KEY) {
3239 if (backref->found_forward_ref)
3241 backref->found_dir_item = 1;
3242 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243 backref->found_dir_index = 1;
3244 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245 if (backref->found_forward_ref)
3246 backref->errors |= REF_ERR_DUP_ROOT_REF;
3247 else if (backref->found_dir_item)
3249 backref->found_forward_ref = 1;
3250 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251 if (backref->found_back_ref)
3252 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253 backref->found_back_ref = 1;
3258 if (backref->found_forward_ref && backref->found_dir_item)
3259 backref->reachable = 1;
3263 static int merge_root_recs(struct btrfs_root *root,
3264 struct cache_tree *src_cache,
3265 struct cache_tree *dst_cache)
3267 struct cache_extent *cache;
3268 struct ptr_node *node;
3269 struct inode_record *rec;
3270 struct inode_backref *backref;
3273 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274 free_inode_recs_tree(src_cache);
3279 cache = search_cache_extent(src_cache, 0);
3282 node = container_of(cache, struct ptr_node, cache);
3284 remove_cache_extent(src_cache, &node->cache);
3287 ret = is_child_root(root, root->objectid, rec->ino);
3293 list_for_each_entry(backref, &rec->backrefs, list) {
3294 BUG_ON(backref->found_inode_ref);
3295 if (backref->found_dir_item)
3296 add_root_backref(dst_cache, rec->ino,
3297 root->root_key.objectid, backref->dir,
3298 backref->index, backref->name,
3299 backref->namelen, BTRFS_DIR_ITEM_KEY,
3301 if (backref->found_dir_index)
3302 add_root_backref(dst_cache, rec->ino,
3303 root->root_key.objectid, backref->dir,
3304 backref->index, backref->name,
3305 backref->namelen, BTRFS_DIR_INDEX_KEY,
3309 free_inode_rec(rec);
3316 static int check_root_refs(struct btrfs_root *root,
3317 struct cache_tree *root_cache)
3319 struct root_record *rec;
3320 struct root_record *ref_root;
3321 struct root_backref *backref;
3322 struct cache_extent *cache;
3328 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329 BUG_ON(IS_ERR(rec));
3332 /* fixme: this can not detect circular references */
3335 cache = search_cache_extent(root_cache, 0);
3339 rec = container_of(cache, struct root_record, cache);
3340 cache = next_cache_extent(cache);
3342 if (rec->found_ref == 0)
3345 list_for_each_entry(backref, &rec->backrefs, list) {
3346 if (!backref->reachable)
3349 ref_root = get_root_rec(root_cache,
3351 BUG_ON(IS_ERR(ref_root));
3352 if (ref_root->found_ref > 0)
3355 backref->reachable = 0;
3357 if (rec->found_ref == 0)
3363 cache = search_cache_extent(root_cache, 0);
3367 rec = container_of(cache, struct root_record, cache);
3368 cache = next_cache_extent(cache);
3370 if (rec->found_ref == 0 &&
3371 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373 ret = check_orphan_item(root->fs_info->tree_root,
3379 * If we don't have a root item then we likely just have
3380 * a dir item in a snapshot for this root but no actual
3381 * ref key or anything so it's meaningless.
3383 if (!rec->found_root_item)
3386 fprintf(stderr, "fs tree %llu not referenced\n",
3387 (unsigned long long)rec->objectid);
3391 if (rec->found_ref > 0 && !rec->found_root_item)
3393 list_for_each_entry(backref, &rec->backrefs, list) {
3394 if (!backref->found_dir_item)
3395 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396 if (!backref->found_dir_index)
3397 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398 if (!backref->found_back_ref)
3399 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400 if (!backref->found_forward_ref)
3401 backref->errors |= REF_ERR_NO_ROOT_REF;
3402 if (backref->reachable && backref->errors)
3409 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410 (unsigned long long)rec->objectid, rec->found_ref,
3411 rec->found_root_item ? "" : "not found");
3413 list_for_each_entry(backref, &rec->backrefs, list) {
3414 if (!backref->reachable)
3416 if (!backref->errors && rec->found_root_item)
3418 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419 " index %llu namelen %u name %s errors %x\n",
3420 (unsigned long long)backref->ref_root,
3421 (unsigned long long)backref->dir,
3422 (unsigned long long)backref->index,
3423 backref->namelen, backref->name,
3425 print_ref_error(backref->errors);
3428 return errors > 0 ? 1 : 0;
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432 struct btrfs_key *key,
3433 struct cache_tree *root_cache)
3439 struct btrfs_root_ref *ref;
3440 char namebuf[BTRFS_NAME_LEN];
3443 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3445 dirid = btrfs_root_ref_dirid(eb, ref);
3446 index = btrfs_root_ref_sequence(eb, ref);
3447 name_len = btrfs_root_ref_name_len(eb, ref);
3449 if (name_len <= BTRFS_NAME_LEN) {
3453 len = BTRFS_NAME_LEN;
3454 error = REF_ERR_NAME_TOO_LONG;
3456 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3458 if (key->type == BTRFS_ROOT_REF_KEY) {
3459 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460 index, namebuf, len, key->type, error);
3462 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463 index, namebuf, len, key->type, error);
3468 static void free_corrupt_block(struct cache_extent *cache)
3470 struct btrfs_corrupt_block *corrupt;
3472 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3479 * Repair the btree of the given root.
3481 * The fix is to remove the node key in corrupt_blocks cache_tree.
3482 * and rebalance the tree.
3483 * After the fix, the btree should be writeable.
3485 static int repair_btree(struct btrfs_root *root,
3486 struct cache_tree *corrupt_blocks)
3488 struct btrfs_trans_handle *trans;
3489 struct btrfs_path path;
3490 struct btrfs_corrupt_block *corrupt;
3491 struct cache_extent *cache;
3492 struct btrfs_key key;
3497 if (cache_tree_empty(corrupt_blocks))
3500 trans = btrfs_start_transaction(root, 1);
3501 if (IS_ERR(trans)) {
3502 ret = PTR_ERR(trans);
3503 fprintf(stderr, "Error starting transaction: %s\n",
3507 btrfs_init_path(&path);
3508 cache = first_cache_extent(corrupt_blocks);
3510 corrupt = container_of(cache, struct btrfs_corrupt_block,
3512 level = corrupt->level;
3513 path.lowest_level = level;
3514 key.objectid = corrupt->key.objectid;
3515 key.type = corrupt->key.type;
3516 key.offset = corrupt->key.offset;
3519 * Here we don't want to do any tree balance, since it may
3520 * cause a balance with corrupted brother leaf/node,
3521 * so ins_len set to 0 here.
3522 * Balance will be done after all corrupt node/leaf is deleted.
3524 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3527 offset = btrfs_node_blockptr(path.nodes[level],
3530 /* Remove the ptr */
3531 ret = btrfs_del_ptr(trans, root, &path, level,
3536 * Remove the corresponding extent
3537 * return value is not concerned.
3539 btrfs_release_path(&path);
3540 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541 0, root->root_key.objectid,
3543 cache = next_cache_extent(cache);
3546 /* Balance the btree using btrfs_search_slot() */
3547 cache = first_cache_extent(corrupt_blocks);
3549 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551 memcpy(&key, &corrupt->key, sizeof(key));
3552 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3555 /* return will always >0 since it won't find the item */
3557 btrfs_release_path(&path);
3558 cache = next_cache_extent(cache);
3561 btrfs_commit_transaction(trans, root);
3562 btrfs_release_path(&path);
3566 static int check_fs_root(struct btrfs_root *root,
3567 struct cache_tree *root_cache,
3568 struct walk_control *wc)
3574 struct btrfs_path path;
3575 struct shared_node root_node;
3576 struct root_record *rec;
3577 struct btrfs_root_item *root_item = &root->root_item;
3578 struct cache_tree corrupt_blocks;
3579 struct orphan_data_extent *orphan;
3580 struct orphan_data_extent *tmp;
3581 enum btrfs_tree_block_status status;
3582 struct node_refs nrefs;
3585 * Reuse the corrupt_block cache tree to record corrupted tree block
3587 * Unlike the usage in extent tree check, here we do it in a per
3588 * fs/subvol tree base.
3590 cache_tree_init(&corrupt_blocks);
3591 root->fs_info->corrupt_blocks = &corrupt_blocks;
3593 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594 rec = get_root_rec(root_cache, root->root_key.objectid);
3595 BUG_ON(IS_ERR(rec));
3596 if (btrfs_root_refs(root_item) > 0)
3597 rec->found_root_item = 1;
3600 btrfs_init_path(&path);
3601 memset(&root_node, 0, sizeof(root_node));
3602 cache_tree_init(&root_node.root_cache);
3603 cache_tree_init(&root_node.inode_cache);
3604 memset(&nrefs, 0, sizeof(nrefs));
3606 /* Move the orphan extent record to corresponding inode_record */
3607 list_for_each_entry_safe(orphan, tmp,
3608 &root->orphan_data_extents, list) {
3609 struct inode_record *inode;
3611 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3613 BUG_ON(IS_ERR(inode));
3614 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615 list_move(&orphan->list, &inode->orphan_extents);
3618 level = btrfs_header_level(root->node);
3619 memset(wc->nodes, 0, sizeof(wc->nodes));
3620 wc->nodes[level] = &root_node;
3621 wc->active_node = level;
3622 wc->root_level = level;
3624 /* We may not have checked the root block, lets do that now */
3625 if (btrfs_is_leaf(root->node))
3626 status = btrfs_check_leaf(root, NULL, root->node);
3628 status = btrfs_check_node(root, NULL, root->node);
3629 if (status != BTRFS_TREE_BLOCK_CLEAN)
3632 if (btrfs_root_refs(root_item) > 0 ||
3633 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634 path.nodes[level] = root->node;
3635 extent_buffer_get(root->node);
3636 path.slots[level] = 0;
3638 struct btrfs_key key;
3639 struct btrfs_disk_key found_key;
3641 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642 level = root_item->drop_level;
3643 path.lowest_level = level;
3644 if (level > btrfs_header_level(root->node) ||
3645 level >= BTRFS_MAX_LEVEL) {
3646 error("ignoring invalid drop level: %u", level);
3649 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3652 btrfs_node_key(path.nodes[level], &found_key,
3654 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655 sizeof(found_key)));
3659 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3665 wret = walk_up_tree(root, &path, wc, &level);
3672 btrfs_release_path(&path);
3674 if (!cache_tree_empty(&corrupt_blocks)) {
3675 struct cache_extent *cache;
3676 struct btrfs_corrupt_block *corrupt;
3678 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679 root->root_key.objectid);
3680 cache = first_cache_extent(&corrupt_blocks);
3682 corrupt = container_of(cache,
3683 struct btrfs_corrupt_block,
3685 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686 cache->start, corrupt->level,
3687 corrupt->key.objectid, corrupt->key.type,
3688 corrupt->key.offset);
3689 cache = next_cache_extent(cache);
3692 printf("Try to repair the btree for root %llu\n",
3693 root->root_key.objectid);
3694 ret = repair_btree(root, &corrupt_blocks);
3696 fprintf(stderr, "Failed to repair btree: %s\n",
3699 printf("Btree for root %llu is fixed\n",
3700 root->root_key.objectid);
3704 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3708 if (root_node.current) {
3709 root_node.current->checked = 1;
3710 maybe_free_inode_rec(&root_node.inode_cache,
3714 err = check_inode_recs(root, &root_node.inode_cache);
3718 free_corrupt_blocks_tree(&corrupt_blocks);
3719 root->fs_info->corrupt_blocks = NULL;
3720 free_orphan_data_extents(&root->orphan_data_extents);
3724 static int fs_root_objectid(u64 objectid)
3726 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3729 return is_fstree(objectid);
3732 static int check_fs_roots(struct btrfs_root *root,
3733 struct cache_tree *root_cache)
3735 struct btrfs_path path;
3736 struct btrfs_key key;
3737 struct walk_control wc;
3738 struct extent_buffer *leaf, *tree_node;
3739 struct btrfs_root *tmp_root;
3740 struct btrfs_root *tree_root = root->fs_info->tree_root;
3744 if (ctx.progress_enabled) {
3745 ctx.tp = TASK_FS_ROOTS;
3746 task_start(ctx.info);
3750 * Just in case we made any changes to the extent tree that weren't
3751 * reflected into the free space cache yet.
3754 reset_cached_block_groups(root->fs_info);
3755 memset(&wc, 0, sizeof(wc));
3756 cache_tree_init(&wc.shared);
3757 btrfs_init_path(&path);
3762 key.type = BTRFS_ROOT_ITEM_KEY;
3763 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3768 tree_node = tree_root->node;
3770 if (tree_node != tree_root->node) {
3771 free_root_recs_tree(root_cache);
3772 btrfs_release_path(&path);
3775 leaf = path.nodes[0];
3776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777 ret = btrfs_next_leaf(tree_root, &path);
3783 leaf = path.nodes[0];
3785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787 fs_root_objectid(key.objectid)) {
3788 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789 tmp_root = btrfs_read_fs_root_no_cache(
3790 root->fs_info, &key);
3792 key.offset = (u64)-1;
3793 tmp_root = btrfs_read_fs_root(
3794 root->fs_info, &key);
3796 if (IS_ERR(tmp_root)) {
3800 ret = check_fs_root(tmp_root, root_cache, &wc);
3801 if (ret == -EAGAIN) {
3802 free_root_recs_tree(root_cache);
3803 btrfs_release_path(&path);
3808 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809 btrfs_free_fs_root(tmp_root);
3810 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811 key.type == BTRFS_ROOT_BACKREF_KEY) {
3812 process_root_ref(leaf, path.slots[0], &key,
3819 btrfs_release_path(&path);
3821 free_extent_cache_tree(&wc.shared);
3822 if (!cache_tree_empty(&wc.shared))
3823 fprintf(stderr, "warning line %d\n", __LINE__);
3825 task_stop(ctx.info);
3830 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
3841 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3842 * INODE_REF/INODE_EXTREF match.
3844 * @root: the root of the fs/file tree
3845 * @ref_key: the key of the INODE_REF/INODE_EXTREF
3846 * @key: the key of the DIR_ITEM/DIR_INDEX
3847 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
3848 * distinguish root_dir between normal dir/file
3849 * @name: the name in the INODE_REF/INODE_EXTREF
3850 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
3851 * @mode: the st_mode of INODE_ITEM
3853 * Return 0 if no error occurred.
3854 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3855 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3857 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3858 * not match for normal dir/file.
3860 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3861 struct btrfs_key *key, u64 index, char *name,
3862 u32 namelen, u32 mode)
3864 struct btrfs_path path;
3865 struct extent_buffer *node;
3866 struct btrfs_dir_item *di;
3867 struct btrfs_key location;
3868 char namebuf[BTRFS_NAME_LEN] = {0};
3878 btrfs_init_path(&path);
3879 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3881 ret = DIR_ITEM_MISSING;
3885 /* Process root dir and goto out*/
3888 ret = ROOT_DIR_ERROR;
3890 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3892 ref_key->type == BTRFS_INODE_REF_KEY ?
3894 ref_key->objectid, ref_key->offset,
3895 key->type == BTRFS_DIR_ITEM_KEY ?
3896 "DIR_ITEM" : "DIR_INDEX");
3904 /* Process normal file/dir */
3906 ret = DIR_ITEM_MISSING;
3908 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3910 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3911 ref_key->objectid, ref_key->offset,
3912 key->type == BTRFS_DIR_ITEM_KEY ?
3913 "DIR_ITEM" : "DIR_INDEX",
3914 key->objectid, key->offset, namelen, name,
3915 imode_to_type(mode));
3919 /* Check whether inode_id/filetype/name match */
3920 node = path.nodes[0];
3921 slot = path.slots[0];
3922 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3923 total = btrfs_item_size_nr(node, slot);
3924 while (cur < total) {
3925 ret = DIR_ITEM_MISMATCH;
3926 name_len = btrfs_dir_name_len(node, di);
3927 data_len = btrfs_dir_data_len(node, di);
3929 btrfs_dir_item_key_to_cpu(node, di, &location);
3930 if (location.objectid != ref_key->objectid ||
3931 location.type != BTRFS_INODE_ITEM_KEY ||
3932 location.offset != 0)
3935 filetype = btrfs_dir_type(node, di);
3936 if (imode_to_type(mode) != filetype)
3939 if (name_len <= BTRFS_NAME_LEN) {
3942 len = BTRFS_NAME_LEN;
3943 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3945 key->type == BTRFS_DIR_ITEM_KEY ?
3946 "DIR_ITEM" : "DIR_INDEX",
3947 key->objectid, key->offset, name_len);
3949 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3950 if (len != namelen || strncmp(namebuf, name, len))
3956 len = sizeof(*di) + name_len + data_len;
3957 di = (struct btrfs_dir_item *)((char *)di + len);
3960 if (ret == DIR_ITEM_MISMATCH)
3962 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3964 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3965 ref_key->objectid, ref_key->offset,
3966 key->type == BTRFS_DIR_ITEM_KEY ?
3967 "DIR_ITEM" : "DIR_INDEX",
3968 key->objectid, key->offset, namelen, name,
3969 imode_to_type(mode));
3971 btrfs_release_path(&path);
3976 * Traverse the given INODE_REF and call find_dir_item() to find related
3977 * DIR_ITEM/DIR_INDEX.
3979 * @root: the root of the fs/file tree
3980 * @ref_key: the key of the INODE_REF
3981 * @refs: the count of INODE_REF
3982 * @mode: the st_mode of INODE_ITEM
3984 * Return 0 if no error occurred.
3986 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3987 struct extent_buffer *node, int slot, u64 *refs,
3990 struct btrfs_key key;
3991 struct btrfs_inode_ref *ref;
3992 char namebuf[BTRFS_NAME_LEN] = {0};
4000 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4001 total = btrfs_item_size_nr(node, slot);
4004 /* Update inode ref count */
4007 index = btrfs_inode_ref_index(node, ref);
4008 name_len = btrfs_inode_ref_name_len(node, ref);
4009 if (name_len <= BTRFS_NAME_LEN) {
4012 len = BTRFS_NAME_LEN;
4013 warning("root %llu INODE_REF[%llu %llu] name too long",
4014 root->objectid, ref_key->objectid, ref_key->offset);
4017 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4019 /* Check root dir ref name */
4020 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4021 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4022 root->objectid, ref_key->objectid, ref_key->offset,
4024 err |= ROOT_DIR_ERROR;
4027 /* Find related DIR_INDEX */
4028 key.objectid = ref_key->offset;
4029 key.type = BTRFS_DIR_INDEX_KEY;
4031 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4034 /* Find related dir_item */
4035 key.objectid = ref_key->offset;
4036 key.type = BTRFS_DIR_ITEM_KEY;
4037 key.offset = btrfs_name_hash(namebuf, len);
4038 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4041 len = sizeof(*ref) + name_len;
4042 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4051 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4052 * DIR_ITEM/DIR_INDEX.
4054 * @root: the root of the fs/file tree
4055 * @ref_key: the key of the INODE_EXTREF
4056 * @refs: the count of INODE_EXTREF
4057 * @mode: the st_mode of INODE_ITEM
4059 * Return 0 if no error occurred.
4061 static int check_inode_extref(struct btrfs_root *root,
4062 struct btrfs_key *ref_key,
4063 struct extent_buffer *node, int slot, u64 *refs,
4066 struct btrfs_key key;
4067 struct btrfs_inode_extref *extref;
4068 char namebuf[BTRFS_NAME_LEN] = {0};
4078 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4079 total = btrfs_item_size_nr(node, slot);
4082 /* update inode ref count */
4084 name_len = btrfs_inode_extref_name_len(node, extref);
4085 index = btrfs_inode_extref_index(node, extref);
4086 parent = btrfs_inode_extref_parent(node, extref);
4087 if (name_len <= BTRFS_NAME_LEN) {
4090 len = BTRFS_NAME_LEN;
4091 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4092 root->objectid, ref_key->objectid, ref_key->offset);
4094 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4096 /* Check root dir ref name */
4097 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4098 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4099 root->objectid, ref_key->objectid, ref_key->offset,
4101 err |= ROOT_DIR_ERROR;
4104 /* find related dir_index */
4105 key.objectid = parent;
4106 key.type = BTRFS_DIR_INDEX_KEY;
4108 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4111 /* find related dir_item */
4112 key.objectid = parent;
4113 key.type = BTRFS_DIR_ITEM_KEY;
4114 key.offset = btrfs_name_hash(namebuf, len);
4115 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4118 len = sizeof(*extref) + name_len;
4119 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4129 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4130 * DIR_ITEM/DIR_INDEX match.
4132 * @root: the root of the fs/file tree
4133 * @key: the key of the INODE_REF/INODE_EXTREF
4134 * @name: the name in the INODE_REF/INODE_EXTREF
4135 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4136 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4138 * @ext_ref: the EXTENDED_IREF feature
4140 * Return 0 if no error occurred.
4141 * Return >0 for error bitmap
4143 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4144 char *name, int namelen, u64 index,
4145 unsigned int ext_ref)
4147 struct btrfs_path path;
4148 struct btrfs_inode_ref *ref;
4149 struct btrfs_inode_extref *extref;
4150 struct extent_buffer *node;
4151 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4162 btrfs_init_path(&path);
4163 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4165 ret = INODE_REF_MISSING;
4169 node = path.nodes[0];
4170 slot = path.slots[0];
4172 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4173 total = btrfs_item_size_nr(node, slot);
4175 /* Iterate all entry of INODE_REF */
4176 while (cur < total) {
4177 ret = INODE_REF_MISSING;
4179 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4180 ref_index = btrfs_inode_ref_index(node, ref);
4181 if (index != (u64)-1 && index != ref_index)
4184 if (ref_namelen <= BTRFS_NAME_LEN) {
4187 len = BTRFS_NAME_LEN;
4188 warning("root %llu INODE %s[%llu %llu] name too long",
4190 key->type == BTRFS_INODE_REF_KEY ?
4192 key->objectid, key->offset);
4194 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4197 if (len != namelen || strncmp(ref_namebuf, name, len))
4203 len = sizeof(*ref) + ref_namelen;
4204 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4209 /* Skip if not support EXTENDED_IREF feature */
4213 btrfs_release_path(&path);
4214 btrfs_init_path(&path);
4216 dir_id = key->offset;
4217 key->type = BTRFS_INODE_EXTREF_KEY;
4218 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4220 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4222 ret = INODE_REF_MISSING;
4226 node = path.nodes[0];
4227 slot = path.slots[0];
4229 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4231 total = btrfs_item_size_nr(node, slot);
4233 /* Iterate all entry of INODE_EXTREF */
4234 while (cur < total) {
4235 ret = INODE_REF_MISSING;
4237 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4238 ref_index = btrfs_inode_extref_index(node, extref);
4239 parent = btrfs_inode_extref_parent(node, extref);
4240 if (index != (u64)-1 && index != ref_index)
4243 if (parent != dir_id)
4246 if (ref_namelen <= BTRFS_NAME_LEN) {
4249 len = BTRFS_NAME_LEN;
4250 warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4252 key->type == BTRFS_INODE_REF_KEY ?
4254 key->objectid, key->offset);
4256 read_extent_buffer(node, ref_namebuf,
4257 (unsigned long)(extref + 1), len);
4259 if (len != namelen || strncmp(ref_namebuf, name, len))
4266 len = sizeof(*extref) + ref_namelen;
4267 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4272 btrfs_release_path(&path);
4277 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4278 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4280 * @root: the root of the fs/file tree
4281 * @key: the key of the INODE_REF/INODE_EXTREF
4282 * @size: the st_size of the INODE_ITEM
4283 * @ext_ref: the EXTENDED_IREF feature
4285 * Return 0 if no error occurred.
4287 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4288 struct extent_buffer *node, int slot, u64 *size,
4289 unsigned int ext_ref)
4291 struct btrfs_dir_item *di;
4292 struct btrfs_inode_item *ii;
4293 struct btrfs_path path;
4294 struct btrfs_key location;
4295 char namebuf[BTRFS_NAME_LEN] = {0};
4308 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4309 * ignore index check.
4311 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4313 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4314 total = btrfs_item_size_nr(node, slot);
4316 while (cur < total) {
4317 data_len = btrfs_dir_data_len(node, di);
4319 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4320 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4321 "DIR_ITEM" : "DIR_INDEX",
4322 key->objectid, key->offset, data_len);
4324 name_len = btrfs_dir_name_len(node, di);
4325 if (name_len <= BTRFS_NAME_LEN) {
4328 len = BTRFS_NAME_LEN;
4329 warning("root %llu %s[%llu %llu] name too long",
4331 key->type == BTRFS_DIR_ITEM_KEY ?
4332 "DIR_ITEM" : "DIR_INDEX",
4333 key->objectid, key->offset);
4335 (*size) += name_len;
4337 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4338 filetype = btrfs_dir_type(node, di);
4340 btrfs_init_path(&path);
4341 btrfs_dir_item_key_to_cpu(node, di, &location);
4343 /* Ignore related ROOT_ITEM check */
4344 if (location.type == BTRFS_ROOT_ITEM_KEY)
4347 /* Check relative INODE_ITEM(existence/filetype) */
4348 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4350 err |= INODE_ITEM_MISSING;
4351 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4352 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4353 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4354 key->offset, location.objectid, name_len,
4359 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4360 struct btrfs_inode_item);
4361 mode = btrfs_inode_mode(path.nodes[0], ii);
4363 if (imode_to_type(mode) != filetype) {
4364 err |= INODE_ITEM_MISMATCH;
4365 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4366 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4367 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4368 key->offset, name_len, namebuf, filetype);
4371 /* Check relative INODE_REF/INODE_EXTREF */
4372 location.type = BTRFS_INODE_REF_KEY;
4373 location.offset = key->objectid;
4374 ret = find_inode_ref(root, &location, namebuf, len,
4377 if (ret & INODE_REF_MISSING)
4378 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4379 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4380 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4381 key->offset, name_len, namebuf, filetype);
4384 btrfs_release_path(&path);
4385 len = sizeof(*di) + name_len + data_len;
4386 di = (struct btrfs_dir_item *)((char *)di + len);
4389 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4390 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4391 root->objectid, key->objectid, key->offset);
4400 * Check file extent datasum/hole, update the size of the file extents,
4401 * check and update the last offset of the file extent.
4403 * @root: the root of fs/file tree.
4404 * @fkey: the key of the file extent.
4405 * @nodatasum: INODE_NODATASUM feature.
4406 * @size: the sum of all EXTENT_DATA items size for this inode.
4407 * @end: the offset of the last extent.
4409 * Return 0 if no error occurred.
4411 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4412 struct extent_buffer *node, int slot,
4413 unsigned int nodatasum, u64 *size, u64 *end)
4415 struct btrfs_file_extent_item *fi;
4418 u64 extent_num_bytes;
4420 unsigned int extent_type;
4421 unsigned int is_hole;
4425 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4427 extent_type = btrfs_file_extent_type(node, fi);
4428 /* Skip if file extent is inline */
4429 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4430 struct btrfs_item *e = btrfs_item_nr(slot);
4431 u32 item_inline_len;
4433 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4434 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4435 if (extent_num_bytes == 0 ||
4436 extent_num_bytes != item_inline_len)
4437 err |= FILE_EXTENT_ERROR;
4438 *size += extent_num_bytes;
4442 /* Check extent type */
4443 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4444 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4445 err |= FILE_EXTENT_ERROR;
4446 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4447 root->objectid, fkey->objectid, fkey->offset);
4451 /* Check REG_EXTENT/PREALLOC_EXTENT */
4452 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4453 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4454 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4455 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4457 /* Check EXTENT_DATA datasum */
4458 ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4459 if (found > 0 && nodatasum) {
4460 err |= ODD_CSUM_ITEM;
4461 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4462 root->objectid, fkey->objectid, fkey->offset);
4463 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4465 (ret < 0 || found == 0 || found < disk_num_bytes)) {
4466 err |= CSUM_ITEM_MISSING;
4467 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4468 root->objectid, fkey->objectid, fkey->offset);
4469 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4470 err |= ODD_CSUM_ITEM;
4471 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4472 root->objectid, fkey->objectid, fkey->offset);
4475 /* Check EXTENT_DATA hole */
4476 if (no_holes && is_hole) {
4477 err |= FILE_EXTENT_ERROR;
4478 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4479 root->objectid, fkey->objectid, fkey->offset);
4480 } else if (!no_holes && *end != fkey->offset) {
4481 err |= FILE_EXTENT_ERROR;
4482 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4483 root->objectid, fkey->objectid, fkey->offset);
4486 *end += extent_num_bytes;
4488 *size += extent_num_bytes;
4493 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4495 struct list_head *cur = rec->backrefs.next;
4496 struct extent_backref *back;
4497 struct tree_backref *tback;
4498 struct data_backref *dback;
4502 while(cur != &rec->backrefs) {
4503 back = to_extent_backref(cur);
4505 if (!back->found_extent_tree) {
4509 if (back->is_data) {
4510 dback = to_data_backref(back);
4511 fprintf(stderr, "Backref %llu %s %llu"
4512 " owner %llu offset %llu num_refs %lu"
4513 " not found in extent tree\n",
4514 (unsigned long long)rec->start,
4515 back->full_backref ?
4517 back->full_backref ?
4518 (unsigned long long)dback->parent:
4519 (unsigned long long)dback->root,
4520 (unsigned long long)dback->owner,
4521 (unsigned long long)dback->offset,
4522 (unsigned long)dback->num_refs);
4524 tback = to_tree_backref(back);
4525 fprintf(stderr, "Backref %llu parent %llu"
4526 " root %llu not found in extent tree\n",
4527 (unsigned long long)rec->start,
4528 (unsigned long long)tback->parent,
4529 (unsigned long long)tback->root);
4532 if (!back->is_data && !back->found_ref) {
4536 tback = to_tree_backref(back);
4537 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4538 (unsigned long long)rec->start,
4539 back->full_backref ? "parent" : "root",
4540 back->full_backref ?
4541 (unsigned long long)tback->parent :
4542 (unsigned long long)tback->root, back);
4544 if (back->is_data) {
4545 dback = to_data_backref(back);
4546 if (dback->found_ref != dback->num_refs) {
4550 fprintf(stderr, "Incorrect local backref count"
4551 " on %llu %s %llu owner %llu"
4552 " offset %llu found %u wanted %u back %p\n",
4553 (unsigned long long)rec->start,
4554 back->full_backref ?
4556 back->full_backref ?
4557 (unsigned long long)dback->parent:
4558 (unsigned long long)dback->root,
4559 (unsigned long long)dback->owner,
4560 (unsigned long long)dback->offset,
4561 dback->found_ref, dback->num_refs, back);
4563 if (dback->disk_bytenr != rec->start) {
4567 fprintf(stderr, "Backref disk bytenr does not"
4568 " match extent record, bytenr=%llu, "
4569 "ref bytenr=%llu\n",
4570 (unsigned long long)rec->start,
4571 (unsigned long long)dback->disk_bytenr);
4574 if (dback->bytes != rec->nr) {
4578 fprintf(stderr, "Backref bytes do not match "
4579 "extent backref, bytenr=%llu, ref "
4580 "bytes=%llu, backref bytes=%llu\n",
4581 (unsigned long long)rec->start,
4582 (unsigned long long)rec->nr,
4583 (unsigned long long)dback->bytes);
4586 if (!back->is_data) {
4589 dback = to_data_backref(back);
4590 found += dback->found_ref;
4593 if (found != rec->refs) {
4597 fprintf(stderr, "Incorrect global backref count "
4598 "on %llu found %llu wanted %llu\n",
4599 (unsigned long long)rec->start,
4600 (unsigned long long)found,
4601 (unsigned long long)rec->refs);
4607 static int free_all_extent_backrefs(struct extent_record *rec)
4609 struct extent_backref *back;
4610 struct list_head *cur;
4611 while (!list_empty(&rec->backrefs)) {
4612 cur = rec->backrefs.next;
4613 back = to_extent_backref(cur);
4620 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4621 struct cache_tree *extent_cache)
4623 struct cache_extent *cache;
4624 struct extent_record *rec;
4627 cache = first_cache_extent(extent_cache);
4630 rec = container_of(cache, struct extent_record, cache);
4631 remove_cache_extent(extent_cache, cache);
4632 free_all_extent_backrefs(rec);
4637 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4638 struct extent_record *rec)
4640 if (rec->content_checked && rec->owner_ref_checked &&
4641 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4642 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4643 !rec->bad_full_backref && !rec->crossing_stripes &&
4644 !rec->wrong_chunk_type) {
4645 remove_cache_extent(extent_cache, &rec->cache);
4646 free_all_extent_backrefs(rec);
4647 list_del_init(&rec->list);
4653 static int check_owner_ref(struct btrfs_root *root,
4654 struct extent_record *rec,
4655 struct extent_buffer *buf)
4657 struct extent_backref *node;
4658 struct tree_backref *back;
4659 struct btrfs_root *ref_root;
4660 struct btrfs_key key;
4661 struct btrfs_path path;
4662 struct extent_buffer *parent;
4667 list_for_each_entry(node, &rec->backrefs, list) {
4670 if (!node->found_ref)
4672 if (node->full_backref)
4674 back = to_tree_backref(node);
4675 if (btrfs_header_owner(buf) == back->root)
4678 BUG_ON(rec->is_root);
4680 /* try to find the block by search corresponding fs tree */
4681 key.objectid = btrfs_header_owner(buf);
4682 key.type = BTRFS_ROOT_ITEM_KEY;
4683 key.offset = (u64)-1;
4685 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4686 if (IS_ERR(ref_root))
4689 level = btrfs_header_level(buf);
4691 btrfs_item_key_to_cpu(buf, &key, 0);
4693 btrfs_node_key_to_cpu(buf, &key, 0);
4695 btrfs_init_path(&path);
4696 path.lowest_level = level + 1;
4697 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4701 parent = path.nodes[level + 1];
4702 if (parent && buf->start == btrfs_node_blockptr(parent,
4703 path.slots[level + 1]))
4706 btrfs_release_path(&path);
4707 return found ? 0 : 1;
4710 static int is_extent_tree_record(struct extent_record *rec)
4712 struct list_head *cur = rec->backrefs.next;
4713 struct extent_backref *node;
4714 struct tree_backref *back;
4717 while(cur != &rec->backrefs) {
4718 node = to_extent_backref(cur);
4722 back = to_tree_backref(node);
4723 if (node->full_backref)
4725 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4732 static int record_bad_block_io(struct btrfs_fs_info *info,
4733 struct cache_tree *extent_cache,
4736 struct extent_record *rec;
4737 struct cache_extent *cache;
4738 struct btrfs_key key;
4740 cache = lookup_cache_extent(extent_cache, start, len);
4744 rec = container_of(cache, struct extent_record, cache);
4745 if (!is_extent_tree_record(rec))
4748 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4749 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4752 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4753 struct extent_buffer *buf, int slot)
4755 if (btrfs_header_level(buf)) {
4756 struct btrfs_key_ptr ptr1, ptr2;
4758 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4759 sizeof(struct btrfs_key_ptr));
4760 read_extent_buffer(buf, &ptr2,
4761 btrfs_node_key_ptr_offset(slot + 1),
4762 sizeof(struct btrfs_key_ptr));
4763 write_extent_buffer(buf, &ptr1,
4764 btrfs_node_key_ptr_offset(slot + 1),
4765 sizeof(struct btrfs_key_ptr));
4766 write_extent_buffer(buf, &ptr2,
4767 btrfs_node_key_ptr_offset(slot),
4768 sizeof(struct btrfs_key_ptr));
4770 struct btrfs_disk_key key;
4771 btrfs_node_key(buf, &key, 0);
4772 btrfs_fixup_low_keys(root, path, &key,
4773 btrfs_header_level(buf) + 1);
4776 struct btrfs_item *item1, *item2;
4777 struct btrfs_key k1, k2;
4778 char *item1_data, *item2_data;
4779 u32 item1_offset, item2_offset, item1_size, item2_size;
4781 item1 = btrfs_item_nr(slot);
4782 item2 = btrfs_item_nr(slot + 1);
4783 btrfs_item_key_to_cpu(buf, &k1, slot);
4784 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4785 item1_offset = btrfs_item_offset(buf, item1);
4786 item2_offset = btrfs_item_offset(buf, item2);
4787 item1_size = btrfs_item_size(buf, item1);
4788 item2_size = btrfs_item_size(buf, item2);
4790 item1_data = malloc(item1_size);
4793 item2_data = malloc(item2_size);
4799 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4800 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4802 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4803 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4807 btrfs_set_item_offset(buf, item1, item2_offset);
4808 btrfs_set_item_offset(buf, item2, item1_offset);
4809 btrfs_set_item_size(buf, item1, item2_size);
4810 btrfs_set_item_size(buf, item2, item1_size);
4812 path->slots[0] = slot;
4813 btrfs_set_item_key_unsafe(root, path, &k2);
4814 path->slots[0] = slot + 1;
4815 btrfs_set_item_key_unsafe(root, path, &k1);
4820 static int fix_key_order(struct btrfs_trans_handle *trans,
4821 struct btrfs_root *root,
4822 struct btrfs_path *path)
4824 struct extent_buffer *buf;
4825 struct btrfs_key k1, k2;
4827 int level = path->lowest_level;
4830 buf = path->nodes[level];
4831 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4833 btrfs_node_key_to_cpu(buf, &k1, i);
4834 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4836 btrfs_item_key_to_cpu(buf, &k1, i);
4837 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4839 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4841 ret = swap_values(root, path, buf, i);
4844 btrfs_mark_buffer_dirty(buf);
4850 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4851 struct btrfs_root *root,
4852 struct btrfs_path *path,
4853 struct extent_buffer *buf, int slot)
4855 struct btrfs_key key;
4856 int nritems = btrfs_header_nritems(buf);
4858 btrfs_item_key_to_cpu(buf, &key, slot);
4860 /* These are all the keys we can deal with missing. */
4861 if (key.type != BTRFS_DIR_INDEX_KEY &&
4862 key.type != BTRFS_EXTENT_ITEM_KEY &&
4863 key.type != BTRFS_METADATA_ITEM_KEY &&
4864 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4865 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4868 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4869 (unsigned long long)key.objectid, key.type,
4870 (unsigned long long)key.offset, slot, buf->start);
4871 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4872 btrfs_item_nr_offset(slot + 1),
4873 sizeof(struct btrfs_item) *
4874 (nritems - slot - 1));
4875 btrfs_set_header_nritems(buf, nritems - 1);
4877 struct btrfs_disk_key disk_key;
4879 btrfs_item_key(buf, &disk_key, 0);
4880 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4882 btrfs_mark_buffer_dirty(buf);
4886 static int fix_item_offset(struct btrfs_trans_handle *trans,
4887 struct btrfs_root *root,
4888 struct btrfs_path *path)
4890 struct extent_buffer *buf;
4894 /* We should only get this for leaves */
4895 BUG_ON(path->lowest_level);
4896 buf = path->nodes[0];
4898 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4899 unsigned int shift = 0, offset;
4901 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4902 BTRFS_LEAF_DATA_SIZE(root)) {
4903 if (btrfs_item_end_nr(buf, i) >
4904 BTRFS_LEAF_DATA_SIZE(root)) {
4905 ret = delete_bogus_item(trans, root, path,
4909 fprintf(stderr, "item is off the end of the "
4910 "leaf, can't fix\n");
4914 shift = BTRFS_LEAF_DATA_SIZE(root) -
4915 btrfs_item_end_nr(buf, i);
4916 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4917 btrfs_item_offset_nr(buf, i - 1)) {
4918 if (btrfs_item_end_nr(buf, i) >
4919 btrfs_item_offset_nr(buf, i - 1)) {
4920 ret = delete_bogus_item(trans, root, path,
4924 fprintf(stderr, "items overlap, can't fix\n");
4928 shift = btrfs_item_offset_nr(buf, i - 1) -
4929 btrfs_item_end_nr(buf, i);
4934 printf("Shifting item nr %d by %u bytes in block %llu\n",
4935 i, shift, (unsigned long long)buf->start);
4936 offset = btrfs_item_offset_nr(buf, i);
4937 memmove_extent_buffer(buf,
4938 btrfs_leaf_data(buf) + offset + shift,
4939 btrfs_leaf_data(buf) + offset,
4940 btrfs_item_size_nr(buf, i));
4941 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4943 btrfs_mark_buffer_dirty(buf);
4947 * We may have moved things, in which case we want to exit so we don't
4948 * write those changes out. Once we have proper abort functionality in
4949 * progs this can be changed to something nicer.
4956 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4957 * then just return -EIO.
4959 static int try_to_fix_bad_block(struct btrfs_root *root,
4960 struct extent_buffer *buf,
4961 enum btrfs_tree_block_status status)
4963 struct btrfs_trans_handle *trans;
4964 struct ulist *roots;
4965 struct ulist_node *node;
4966 struct btrfs_root *search_root;
4967 struct btrfs_path path;
4968 struct ulist_iterator iter;
4969 struct btrfs_key root_key, key;
4972 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4973 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4976 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4980 btrfs_init_path(&path);
4981 ULIST_ITER_INIT(&iter);
4982 while ((node = ulist_next(roots, &iter))) {
4983 root_key.objectid = node->val;
4984 root_key.type = BTRFS_ROOT_ITEM_KEY;
4985 root_key.offset = (u64)-1;
4987 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4994 trans = btrfs_start_transaction(search_root, 0);
4995 if (IS_ERR(trans)) {
4996 ret = PTR_ERR(trans);
5000 path.lowest_level = btrfs_header_level(buf);
5001 path.skip_check_block = 1;
5002 if (path.lowest_level)
5003 btrfs_node_key_to_cpu(buf, &key, 0);
5005 btrfs_item_key_to_cpu(buf, &key, 0);
5006 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5009 btrfs_commit_transaction(trans, search_root);
5012 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5013 ret = fix_key_order(trans, search_root, &path);
5014 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5015 ret = fix_item_offset(trans, search_root, &path);
5017 btrfs_commit_transaction(trans, search_root);
5020 btrfs_release_path(&path);
5021 btrfs_commit_transaction(trans, search_root);
5024 btrfs_release_path(&path);
5028 static int check_block(struct btrfs_root *root,
5029 struct cache_tree *extent_cache,
5030 struct extent_buffer *buf, u64 flags)
5032 struct extent_record *rec;
5033 struct cache_extent *cache;
5034 struct btrfs_key key;
5035 enum btrfs_tree_block_status status;
5039 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5042 rec = container_of(cache, struct extent_record, cache);
5043 rec->generation = btrfs_header_generation(buf);
5045 level = btrfs_header_level(buf);
5046 if (btrfs_header_nritems(buf) > 0) {
5049 btrfs_item_key_to_cpu(buf, &key, 0);
5051 btrfs_node_key_to_cpu(buf, &key, 0);
5053 rec->info_objectid = key.objectid;
5055 rec->info_level = level;
5057 if (btrfs_is_leaf(buf))
5058 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5060 status = btrfs_check_node(root, &rec->parent_key, buf);
5062 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5064 status = try_to_fix_bad_block(root, buf, status);
5065 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5067 fprintf(stderr, "bad block %llu\n",
5068 (unsigned long long)buf->start);
5071 * Signal to callers we need to start the scan over
5072 * again since we'll have cowed blocks.
5077 rec->content_checked = 1;
5078 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5079 rec->owner_ref_checked = 1;
5081 ret = check_owner_ref(root, rec, buf);
5083 rec->owner_ref_checked = 1;
5087 maybe_free_extent_rec(extent_cache, rec);
5091 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5092 u64 parent, u64 root)
5094 struct list_head *cur = rec->backrefs.next;
5095 struct extent_backref *node;
5096 struct tree_backref *back;
5098 while(cur != &rec->backrefs) {
5099 node = to_extent_backref(cur);
5103 back = to_tree_backref(node);
5105 if (!node->full_backref)
5107 if (parent == back->parent)
5110 if (node->full_backref)
5112 if (back->root == root)
5119 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5120 u64 parent, u64 root)
5122 struct tree_backref *ref = malloc(sizeof(*ref));
5126 memset(&ref->node, 0, sizeof(ref->node));
5128 ref->parent = parent;
5129 ref->node.full_backref = 1;
5132 ref->node.full_backref = 0;
5134 list_add_tail(&ref->node.list, &rec->backrefs);
5139 static struct data_backref *find_data_backref(struct extent_record *rec,
5140 u64 parent, u64 root,
5141 u64 owner, u64 offset,
5143 u64 disk_bytenr, u64 bytes)
5145 struct list_head *cur = rec->backrefs.next;
5146 struct extent_backref *node;
5147 struct data_backref *back;
5149 while(cur != &rec->backrefs) {
5150 node = to_extent_backref(cur);
5154 back = to_data_backref(node);
5156 if (!node->full_backref)
5158 if (parent == back->parent)
5161 if (node->full_backref)
5163 if (back->root == root && back->owner == owner &&
5164 back->offset == offset) {
5165 if (found_ref && node->found_ref &&
5166 (back->bytes != bytes ||
5167 back->disk_bytenr != disk_bytenr))
5176 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5177 u64 parent, u64 root,
5178 u64 owner, u64 offset,
5181 struct data_backref *ref = malloc(sizeof(*ref));
5185 memset(&ref->node, 0, sizeof(ref->node));
5186 ref->node.is_data = 1;
5189 ref->parent = parent;
5192 ref->node.full_backref = 1;
5196 ref->offset = offset;
5197 ref->node.full_backref = 0;
5199 ref->bytes = max_size;
5202 list_add_tail(&ref->node.list, &rec->backrefs);
5203 if (max_size > rec->max_size)
5204 rec->max_size = max_size;
5208 /* Check if the type of extent matches with its chunk */
5209 static void check_extent_type(struct extent_record *rec)
5211 struct btrfs_block_group_cache *bg_cache;
5213 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5217 /* data extent, check chunk directly*/
5218 if (!rec->metadata) {
5219 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5220 rec->wrong_chunk_type = 1;
5224 /* metadata extent, check the obvious case first */
5225 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5226 BTRFS_BLOCK_GROUP_METADATA))) {
5227 rec->wrong_chunk_type = 1;
5232 * Check SYSTEM extent, as it's also marked as metadata, we can only
5233 * make sure it's a SYSTEM extent by its backref
5235 if (!list_empty(&rec->backrefs)) {
5236 struct extent_backref *node;
5237 struct tree_backref *tback;
5240 node = to_extent_backref(rec->backrefs.next);
5241 if (node->is_data) {
5242 /* tree block shouldn't have data backref */
5243 rec->wrong_chunk_type = 1;
5246 tback = container_of(node, struct tree_backref, node);
5248 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5249 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5251 bg_type = BTRFS_BLOCK_GROUP_METADATA;
5252 if (!(bg_cache->flags & bg_type))
5253 rec->wrong_chunk_type = 1;
5258 * Allocate a new extent record, fill default values from @tmpl and insert int
5259 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5260 * the cache, otherwise it fails.
5262 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5263 struct extent_record *tmpl)
5265 struct extent_record *rec;
5268 rec = malloc(sizeof(*rec));
5271 rec->start = tmpl->start;
5272 rec->max_size = tmpl->max_size;
5273 rec->nr = max(tmpl->nr, tmpl->max_size);
5274 rec->found_rec = tmpl->found_rec;
5275 rec->content_checked = tmpl->content_checked;
5276 rec->owner_ref_checked = tmpl->owner_ref_checked;
5277 rec->num_duplicates = 0;
5278 rec->metadata = tmpl->metadata;
5279 rec->flag_block_full_backref = FLAG_UNSET;
5280 rec->bad_full_backref = 0;
5281 rec->crossing_stripes = 0;
5282 rec->wrong_chunk_type = 0;
5283 rec->is_root = tmpl->is_root;
5284 rec->refs = tmpl->refs;
5285 rec->extent_item_refs = tmpl->extent_item_refs;
5286 rec->parent_generation = tmpl->parent_generation;
5287 INIT_LIST_HEAD(&rec->backrefs);
5288 INIT_LIST_HEAD(&rec->dups);
5289 INIT_LIST_HEAD(&rec->list);
5290 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5291 rec->cache.start = tmpl->start;
5292 rec->cache.size = tmpl->nr;
5293 ret = insert_cache_extent(extent_cache, &rec->cache);
5298 bytes_used += rec->nr;
5301 rec->crossing_stripes = check_crossing_stripes(global_info,
5302 rec->start, global_info->tree_root->nodesize);
5303 check_extent_type(rec);
5308 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5310 * - refs - if found, increase refs
5311 * - is_root - if found, set
5312 * - content_checked - if found, set
5313 * - owner_ref_checked - if found, set
5315 * If not found, create a new one, initialize and insert.
5317 static int add_extent_rec(struct cache_tree *extent_cache,
5318 struct extent_record *tmpl)
5320 struct extent_record *rec;
5321 struct cache_extent *cache;
5325 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5327 rec = container_of(cache, struct extent_record, cache);
5331 rec->nr = max(tmpl->nr, tmpl->max_size);
5334 * We need to make sure to reset nr to whatever the extent
5335 * record says was the real size, this way we can compare it to
5338 if (tmpl->found_rec) {
5339 if (tmpl->start != rec->start || rec->found_rec) {
5340 struct extent_record *tmp;
5343 if (list_empty(&rec->list))
5344 list_add_tail(&rec->list,
5345 &duplicate_extents);
5348 * We have to do this song and dance in case we
5349 * find an extent record that falls inside of
5350 * our current extent record but does not have
5351 * the same objectid.
5353 tmp = malloc(sizeof(*tmp));
5356 tmp->start = tmpl->start;
5357 tmp->max_size = tmpl->max_size;
5360 tmp->metadata = tmpl->metadata;
5361 tmp->extent_item_refs = tmpl->extent_item_refs;
5362 INIT_LIST_HEAD(&tmp->list);
5363 list_add_tail(&tmp->list, &rec->dups);
5364 rec->num_duplicates++;
5371 if (tmpl->extent_item_refs && !dup) {
5372 if (rec->extent_item_refs) {
5373 fprintf(stderr, "block %llu rec "
5374 "extent_item_refs %llu, passed %llu\n",
5375 (unsigned long long)tmpl->start,
5376 (unsigned long long)
5377 rec->extent_item_refs,
5378 (unsigned long long)tmpl->extent_item_refs);
5380 rec->extent_item_refs = tmpl->extent_item_refs;
5384 if (tmpl->content_checked)
5385 rec->content_checked = 1;
5386 if (tmpl->owner_ref_checked)
5387 rec->owner_ref_checked = 1;
5388 memcpy(&rec->parent_key, &tmpl->parent_key,
5389 sizeof(tmpl->parent_key));
5390 if (tmpl->parent_generation)
5391 rec->parent_generation = tmpl->parent_generation;
5392 if (rec->max_size < tmpl->max_size)
5393 rec->max_size = tmpl->max_size;
5396 * A metadata extent can't cross stripe_len boundary, otherwise
5397 * kernel scrub won't be able to handle it.
5398 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5402 rec->crossing_stripes = check_crossing_stripes(
5403 global_info, rec->start,
5404 global_info->tree_root->nodesize);
5405 check_extent_type(rec);
5406 maybe_free_extent_rec(extent_cache, rec);
5410 ret = add_extent_rec_nolookup(extent_cache, tmpl);
5415 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5416 u64 parent, u64 root, int found_ref)
5418 struct extent_record *rec;
5419 struct tree_backref *back;
5420 struct cache_extent *cache;
5423 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5425 struct extent_record tmpl;
5427 memset(&tmpl, 0, sizeof(tmpl));
5428 tmpl.start = bytenr;
5432 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5436 /* really a bug in cache_extent implement now */
5437 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5442 rec = container_of(cache, struct extent_record, cache);
5443 if (rec->start != bytenr) {
5445 * Several cause, from unaligned bytenr to over lapping extents
5450 back = find_tree_backref(rec, parent, root);
5452 back = alloc_tree_backref(rec, parent, root);
5458 if (back->node.found_ref) {
5459 fprintf(stderr, "Extent back ref already exists "
5460 "for %llu parent %llu root %llu \n",
5461 (unsigned long long)bytenr,
5462 (unsigned long long)parent,
5463 (unsigned long long)root);
5465 back->node.found_ref = 1;
5467 if (back->node.found_extent_tree) {
5468 fprintf(stderr, "Extent back ref already exists "
5469 "for %llu parent %llu root %llu \n",
5470 (unsigned long long)bytenr,
5471 (unsigned long long)parent,
5472 (unsigned long long)root);
5474 back->node.found_extent_tree = 1;
5476 check_extent_type(rec);
5477 maybe_free_extent_rec(extent_cache, rec);
5481 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5482 u64 parent, u64 root, u64 owner, u64 offset,
5483 u32 num_refs, int found_ref, u64 max_size)
5485 struct extent_record *rec;
5486 struct data_backref *back;
5487 struct cache_extent *cache;
5490 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5492 struct extent_record tmpl;
5494 memset(&tmpl, 0, sizeof(tmpl));
5495 tmpl.start = bytenr;
5497 tmpl.max_size = max_size;
5499 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5503 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5508 rec = container_of(cache, struct extent_record, cache);
5509 if (rec->max_size < max_size)
5510 rec->max_size = max_size;
5513 * If found_ref is set then max_size is the real size and must match the
5514 * existing refs. So if we have already found a ref then we need to
5515 * make sure that this ref matches the existing one, otherwise we need
5516 * to add a new backref so we can notice that the backrefs don't match
5517 * and we need to figure out who is telling the truth. This is to
5518 * account for that awful fsync bug I introduced where we'd end up with
5519 * a btrfs_file_extent_item that would have its length include multiple
5520 * prealloc extents or point inside of a prealloc extent.
5522 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5525 back = alloc_data_backref(rec, parent, root, owner, offset,
5531 BUG_ON(num_refs != 1);
5532 if (back->node.found_ref)
5533 BUG_ON(back->bytes != max_size);
5534 back->node.found_ref = 1;
5535 back->found_ref += 1;
5536 back->bytes = max_size;
5537 back->disk_bytenr = bytenr;
5539 rec->content_checked = 1;
5540 rec->owner_ref_checked = 1;
5542 if (back->node.found_extent_tree) {
5543 fprintf(stderr, "Extent back ref already exists "
5544 "for %llu parent %llu root %llu "
5545 "owner %llu offset %llu num_refs %lu\n",
5546 (unsigned long long)bytenr,
5547 (unsigned long long)parent,
5548 (unsigned long long)root,
5549 (unsigned long long)owner,
5550 (unsigned long long)offset,
5551 (unsigned long)num_refs);
5553 back->num_refs = num_refs;
5554 back->node.found_extent_tree = 1;
5556 maybe_free_extent_rec(extent_cache, rec);
5560 static int add_pending(struct cache_tree *pending,
5561 struct cache_tree *seen, u64 bytenr, u32 size)
5564 ret = add_cache_extent(seen, bytenr, size);
5567 add_cache_extent(pending, bytenr, size);
5571 static int pick_next_pending(struct cache_tree *pending,
5572 struct cache_tree *reada,
5573 struct cache_tree *nodes,
5574 u64 last, struct block_info *bits, int bits_nr,
5577 unsigned long node_start = last;
5578 struct cache_extent *cache;
5581 cache = search_cache_extent(reada, 0);
5583 bits[0].start = cache->start;
5584 bits[0].size = cache->size;
5589 if (node_start > 32768)
5590 node_start -= 32768;
5592 cache = search_cache_extent(nodes, node_start);
5594 cache = search_cache_extent(nodes, 0);
5597 cache = search_cache_extent(pending, 0);
5602 bits[ret].start = cache->start;
5603 bits[ret].size = cache->size;
5604 cache = next_cache_extent(cache);
5606 } while (cache && ret < bits_nr);
5612 bits[ret].start = cache->start;
5613 bits[ret].size = cache->size;
5614 cache = next_cache_extent(cache);
5616 } while (cache && ret < bits_nr);
5618 if (bits_nr - ret > 8) {
5619 u64 lookup = bits[0].start + bits[0].size;
5620 struct cache_extent *next;
5621 next = search_cache_extent(pending, lookup);
5623 if (next->start - lookup > 32768)
5625 bits[ret].start = next->start;
5626 bits[ret].size = next->size;
5627 lookup = next->start + next->size;
5631 next = next_cache_extent(next);
5639 static void free_chunk_record(struct cache_extent *cache)
5641 struct chunk_record *rec;
5643 rec = container_of(cache, struct chunk_record, cache);
5644 list_del_init(&rec->list);
5645 list_del_init(&rec->dextents);
5649 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5651 cache_tree_free_extents(chunk_cache, free_chunk_record);
5654 static void free_device_record(struct rb_node *node)
5656 struct device_record *rec;
5658 rec = container_of(node, struct device_record, node);
5662 FREE_RB_BASED_TREE(device_cache, free_device_record);
5664 int insert_block_group_record(struct block_group_tree *tree,
5665 struct block_group_record *bg_rec)
5669 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5673 list_add_tail(&bg_rec->list, &tree->block_groups);
5677 static void free_block_group_record(struct cache_extent *cache)
5679 struct block_group_record *rec;
5681 rec = container_of(cache, struct block_group_record, cache);
5682 list_del_init(&rec->list);
5686 void free_block_group_tree(struct block_group_tree *tree)
5688 cache_tree_free_extents(&tree->tree, free_block_group_record);
5691 int insert_device_extent_record(struct device_extent_tree *tree,
5692 struct device_extent_record *de_rec)
5697 * Device extent is a bit different from the other extents, because
5698 * the extents which belong to the different devices may have the
5699 * same start and size, so we need use the special extent cache
5700 * search/insert functions.
5702 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5706 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5707 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5711 static void free_device_extent_record(struct cache_extent *cache)
5713 struct device_extent_record *rec;
5715 rec = container_of(cache, struct device_extent_record, cache);
5716 if (!list_empty(&rec->chunk_list))
5717 list_del_init(&rec->chunk_list);
5718 if (!list_empty(&rec->device_list))
5719 list_del_init(&rec->device_list);
5723 void free_device_extent_tree(struct device_extent_tree *tree)
5725 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5728 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5729 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5730 struct extent_buffer *leaf, int slot)
5732 struct btrfs_extent_ref_v0 *ref0;
5733 struct btrfs_key key;
5736 btrfs_item_key_to_cpu(leaf, &key, slot);
5737 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5738 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5739 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5742 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5743 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5749 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5750 struct btrfs_key *key,
5753 struct btrfs_chunk *ptr;
5754 struct chunk_record *rec;
5757 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5758 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5760 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5762 fprintf(stderr, "memory allocation failed\n");
5766 INIT_LIST_HEAD(&rec->list);
5767 INIT_LIST_HEAD(&rec->dextents);
5770 rec->cache.start = key->offset;
5771 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5773 rec->generation = btrfs_header_generation(leaf);
5775 rec->objectid = key->objectid;
5776 rec->type = key->type;
5777 rec->offset = key->offset;
5779 rec->length = rec->cache.size;
5780 rec->owner = btrfs_chunk_owner(leaf, ptr);
5781 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5782 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5783 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5784 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5785 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5786 rec->num_stripes = num_stripes;
5787 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5789 for (i = 0; i < rec->num_stripes; ++i) {
5790 rec->stripes[i].devid =
5791 btrfs_stripe_devid_nr(leaf, ptr, i);
5792 rec->stripes[i].offset =
5793 btrfs_stripe_offset_nr(leaf, ptr, i);
5794 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5795 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5802 static int process_chunk_item(struct cache_tree *chunk_cache,
5803 struct btrfs_key *key, struct extent_buffer *eb,
5806 struct chunk_record *rec;
5807 struct btrfs_chunk *chunk;
5810 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5812 * Do extra check for this chunk item,
5814 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5815 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5816 * and owner<->key_type check.
5818 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5821 error("chunk(%llu, %llu) is not valid, ignore it",
5822 key->offset, btrfs_chunk_length(eb, chunk));
5825 rec = btrfs_new_chunk_record(eb, key, slot);
5826 ret = insert_cache_extent(chunk_cache, &rec->cache);
5828 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5829 rec->offset, rec->length);
5836 static int process_device_item(struct rb_root *dev_cache,
5837 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5839 struct btrfs_dev_item *ptr;
5840 struct device_record *rec;
5843 ptr = btrfs_item_ptr(eb,
5844 slot, struct btrfs_dev_item);
5846 rec = malloc(sizeof(*rec));
5848 fprintf(stderr, "memory allocation failed\n");
5852 rec->devid = key->offset;
5853 rec->generation = btrfs_header_generation(eb);
5855 rec->objectid = key->objectid;
5856 rec->type = key->type;
5857 rec->offset = key->offset;
5859 rec->devid = btrfs_device_id(eb, ptr);
5860 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5861 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5863 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5865 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5872 struct block_group_record *
5873 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5876 struct btrfs_block_group_item *ptr;
5877 struct block_group_record *rec;
5879 rec = calloc(1, sizeof(*rec));
5881 fprintf(stderr, "memory allocation failed\n");
5885 rec->cache.start = key->objectid;
5886 rec->cache.size = key->offset;
5888 rec->generation = btrfs_header_generation(leaf);
5890 rec->objectid = key->objectid;
5891 rec->type = key->type;
5892 rec->offset = key->offset;
5894 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5895 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5897 INIT_LIST_HEAD(&rec->list);
5902 static int process_block_group_item(struct block_group_tree *block_group_cache,
5903 struct btrfs_key *key,
5904 struct extent_buffer *eb, int slot)
5906 struct block_group_record *rec;
5909 rec = btrfs_new_block_group_record(eb, key, slot);
5910 ret = insert_block_group_record(block_group_cache, rec);
5912 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5913 rec->objectid, rec->offset);
5920 struct device_extent_record *
5921 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5922 struct btrfs_key *key, int slot)
5924 struct device_extent_record *rec;
5925 struct btrfs_dev_extent *ptr;
5927 rec = calloc(1, sizeof(*rec));
5929 fprintf(stderr, "memory allocation failed\n");
5933 rec->cache.objectid = key->objectid;
5934 rec->cache.start = key->offset;
5936 rec->generation = btrfs_header_generation(leaf);
5938 rec->objectid = key->objectid;
5939 rec->type = key->type;
5940 rec->offset = key->offset;
5942 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5943 rec->chunk_objecteid =
5944 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5946 btrfs_dev_extent_chunk_offset(leaf, ptr);
5947 rec->length = btrfs_dev_extent_length(leaf, ptr);
5948 rec->cache.size = rec->length;
5950 INIT_LIST_HEAD(&rec->chunk_list);
5951 INIT_LIST_HEAD(&rec->device_list);
5957 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5958 struct btrfs_key *key, struct extent_buffer *eb,
5961 struct device_extent_record *rec;
5964 rec = btrfs_new_device_extent_record(eb, key, slot);
5965 ret = insert_device_extent_record(dev_extent_cache, rec);
5968 "Device extent[%llu, %llu, %llu] existed.\n",
5969 rec->objectid, rec->offset, rec->length);
5976 static int process_extent_item(struct btrfs_root *root,
5977 struct cache_tree *extent_cache,
5978 struct extent_buffer *eb, int slot)
5980 struct btrfs_extent_item *ei;
5981 struct btrfs_extent_inline_ref *iref;
5982 struct btrfs_extent_data_ref *dref;
5983 struct btrfs_shared_data_ref *sref;
5984 struct btrfs_key key;
5985 struct extent_record tmpl;
5990 u32 item_size = btrfs_item_size_nr(eb, slot);
5996 btrfs_item_key_to_cpu(eb, &key, slot);
5998 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6000 num_bytes = root->nodesize;
6002 num_bytes = key.offset;
6005 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6006 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6007 key.objectid, root->sectorsize);
6010 if (item_size < sizeof(*ei)) {
6011 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6012 struct btrfs_extent_item_v0 *ei0;
6013 BUG_ON(item_size != sizeof(*ei0));
6014 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6015 refs = btrfs_extent_refs_v0(eb, ei0);
6019 memset(&tmpl, 0, sizeof(tmpl));
6020 tmpl.start = key.objectid;
6021 tmpl.nr = num_bytes;
6022 tmpl.extent_item_refs = refs;
6023 tmpl.metadata = metadata;
6025 tmpl.max_size = num_bytes;
6027 return add_extent_rec(extent_cache, &tmpl);
6030 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6031 refs = btrfs_extent_refs(eb, ei);
6032 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6036 if (metadata && num_bytes != root->nodesize) {
6037 error("ignore invalid metadata extent, length %llu does not equal to %u",
6038 num_bytes, root->nodesize);
6041 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6042 error("ignore invalid data extent, length %llu is not aligned to %u",
6043 num_bytes, root->sectorsize);
6047 memset(&tmpl, 0, sizeof(tmpl));
6048 tmpl.start = key.objectid;
6049 tmpl.nr = num_bytes;
6050 tmpl.extent_item_refs = refs;
6051 tmpl.metadata = metadata;
6053 tmpl.max_size = num_bytes;
6054 add_extent_rec(extent_cache, &tmpl);
6056 ptr = (unsigned long)(ei + 1);
6057 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6058 key.type == BTRFS_EXTENT_ITEM_KEY)
6059 ptr += sizeof(struct btrfs_tree_block_info);
6061 end = (unsigned long)ei + item_size;
6063 iref = (struct btrfs_extent_inline_ref *)ptr;
6064 type = btrfs_extent_inline_ref_type(eb, iref);
6065 offset = btrfs_extent_inline_ref_offset(eb, iref);
6067 case BTRFS_TREE_BLOCK_REF_KEY:
6068 ret = add_tree_backref(extent_cache, key.objectid,
6071 error("add_tree_backref failed: %s",
6074 case BTRFS_SHARED_BLOCK_REF_KEY:
6075 ret = add_tree_backref(extent_cache, key.objectid,
6078 error("add_tree_backref failed: %s",
6081 case BTRFS_EXTENT_DATA_REF_KEY:
6082 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6083 add_data_backref(extent_cache, key.objectid, 0,
6084 btrfs_extent_data_ref_root(eb, dref),
6085 btrfs_extent_data_ref_objectid(eb,
6087 btrfs_extent_data_ref_offset(eb, dref),
6088 btrfs_extent_data_ref_count(eb, dref),
6091 case BTRFS_SHARED_DATA_REF_KEY:
6092 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6093 add_data_backref(extent_cache, key.objectid, offset,
6095 btrfs_shared_data_ref_count(eb, sref),
6099 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6100 key.objectid, key.type, num_bytes);
6103 ptr += btrfs_extent_inline_ref_size(type);
6110 static int check_cache_range(struct btrfs_root *root,
6111 struct btrfs_block_group_cache *cache,
6112 u64 offset, u64 bytes)
6114 struct btrfs_free_space *entry;
6120 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6121 bytenr = btrfs_sb_offset(i);
6122 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6123 cache->key.objectid, bytenr, 0,
6124 &logical, &nr, &stripe_len);
6129 if (logical[nr] + stripe_len <= offset)
6131 if (offset + bytes <= logical[nr])
6133 if (logical[nr] == offset) {
6134 if (stripe_len >= bytes) {
6138 bytes -= stripe_len;
6139 offset += stripe_len;
6140 } else if (logical[nr] < offset) {
6141 if (logical[nr] + stripe_len >=
6146 bytes = (offset + bytes) -
6147 (logical[nr] + stripe_len);
6148 offset = logical[nr] + stripe_len;
6151 * Could be tricky, the super may land in the
6152 * middle of the area we're checking. First
6153 * check the easiest case, it's at the end.
6155 if (logical[nr] + stripe_len >=
6157 bytes = logical[nr] - offset;
6161 /* Check the left side */
6162 ret = check_cache_range(root, cache,
6164 logical[nr] - offset);
6170 /* Now we continue with the right side */
6171 bytes = (offset + bytes) -
6172 (logical[nr] + stripe_len);
6173 offset = logical[nr] + stripe_len;
6180 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6182 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6183 offset, offset+bytes);
6187 if (entry->offset != offset) {
6188 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6193 if (entry->bytes != bytes) {
6194 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6195 bytes, entry->bytes, offset);
6199 unlink_free_space(cache->free_space_ctl, entry);
6204 static int verify_space_cache(struct btrfs_root *root,
6205 struct btrfs_block_group_cache *cache)
6207 struct btrfs_path path;
6208 struct extent_buffer *leaf;
6209 struct btrfs_key key;
6213 root = root->fs_info->extent_root;
6215 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6217 btrfs_init_path(&path);
6218 key.objectid = last;
6220 key.type = BTRFS_EXTENT_ITEM_KEY;
6221 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6226 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6227 ret = btrfs_next_leaf(root, &path);
6235 leaf = path.nodes[0];
6236 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6237 if (key.objectid >= cache->key.offset + cache->key.objectid)
6239 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6240 key.type != BTRFS_METADATA_ITEM_KEY) {
6245 if (last == key.objectid) {
6246 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6247 last = key.objectid + key.offset;
6249 last = key.objectid + root->nodesize;
6254 ret = check_cache_range(root, cache, last,
6255 key.objectid - last);
6258 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6259 last = key.objectid + key.offset;
6261 last = key.objectid + root->nodesize;
6265 if (last < cache->key.objectid + cache->key.offset)
6266 ret = check_cache_range(root, cache, last,
6267 cache->key.objectid +
6268 cache->key.offset - last);
6271 btrfs_release_path(&path);
6274 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6275 fprintf(stderr, "There are still entries left in the space "
6283 static int check_space_cache(struct btrfs_root *root)
6285 struct btrfs_block_group_cache *cache;
6286 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6290 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6291 btrfs_super_generation(root->fs_info->super_copy) !=
6292 btrfs_super_cache_generation(root->fs_info->super_copy)) {
6293 printf("cache and super generation don't match, space cache "
6294 "will be invalidated\n");
6298 if (ctx.progress_enabled) {
6299 ctx.tp = TASK_FREE_SPACE;
6300 task_start(ctx.info);
6304 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6308 start = cache->key.objectid + cache->key.offset;
6309 if (!cache->free_space_ctl) {
6310 if (btrfs_init_free_space_ctl(cache,
6311 root->sectorsize)) {
6316 btrfs_remove_free_space_cache(cache);
6319 if (btrfs_fs_compat_ro(root->fs_info,
6320 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6321 ret = exclude_super_stripes(root, cache);
6323 fprintf(stderr, "could not exclude super stripes: %s\n",
6328 ret = load_free_space_tree(root->fs_info, cache);
6329 free_excluded_extents(root, cache);
6331 fprintf(stderr, "could not load free space tree: %s\n",
6338 ret = load_free_space_cache(root->fs_info, cache);
6343 ret = verify_space_cache(root, cache);
6345 fprintf(stderr, "cache appears valid but isn't %Lu\n",
6346 cache->key.objectid);
6351 task_stop(ctx.info);
6353 return error ? -EINVAL : 0;
6356 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6357 u64 num_bytes, unsigned long leaf_offset,
6358 struct extent_buffer *eb) {
6361 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6363 unsigned long csum_offset;
6367 u64 data_checked = 0;
6373 if (num_bytes % root->sectorsize)
6376 data = malloc(num_bytes);
6380 while (offset < num_bytes) {
6383 read_len = num_bytes - offset;
6384 /* read as much space once a time */
6385 ret = read_extent_data(root, data + offset,
6386 bytenr + offset, &read_len, mirror);
6390 /* verify every 4k data's checksum */
6391 while (data_checked < read_len) {
6393 tmp = offset + data_checked;
6395 csum = btrfs_csum_data(NULL, (char *)data + tmp,
6396 csum, root->sectorsize);
6397 btrfs_csum_final(csum, (u8 *)&csum);
6399 csum_offset = leaf_offset +
6400 tmp / root->sectorsize * csum_size;
6401 read_extent_buffer(eb, (char *)&csum_expected,
6402 csum_offset, csum_size);
6403 /* try another mirror */
6404 if (csum != csum_expected) {
6405 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6406 mirror, bytenr + tmp,
6407 csum, csum_expected);
6408 num_copies = btrfs_num_copies(
6409 &root->fs_info->mapping_tree,
6411 if (mirror < num_copies - 1) {
6416 data_checked += root->sectorsize;
6425 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6428 struct btrfs_path path;
6429 struct extent_buffer *leaf;
6430 struct btrfs_key key;
6433 btrfs_init_path(&path);
6434 key.objectid = bytenr;
6435 key.type = BTRFS_EXTENT_ITEM_KEY;
6436 key.offset = (u64)-1;
6439 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6442 fprintf(stderr, "Error looking up extent record %d\n", ret);
6443 btrfs_release_path(&path);
6446 if (path.slots[0] > 0) {
6449 ret = btrfs_prev_leaf(root, &path);
6452 } else if (ret > 0) {
6459 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6462 * Block group items come before extent items if they have the same
6463 * bytenr, so walk back one more just in case. Dear future traveller,
6464 * first congrats on mastering time travel. Now if it's not too much
6465 * trouble could you go back to 2006 and tell Chris to make the
6466 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6467 * EXTENT_ITEM_KEY please?
6469 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6470 if (path.slots[0] > 0) {
6473 ret = btrfs_prev_leaf(root, &path);
6476 } else if (ret > 0) {
6481 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6485 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6486 ret = btrfs_next_leaf(root, &path);
6488 fprintf(stderr, "Error going to next leaf "
6490 btrfs_release_path(&path);
6496 leaf = path.nodes[0];
6497 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6498 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6502 if (key.objectid + key.offset < bytenr) {
6506 if (key.objectid > bytenr + num_bytes)
6509 if (key.objectid == bytenr) {
6510 if (key.offset >= num_bytes) {
6514 num_bytes -= key.offset;
6515 bytenr += key.offset;
6516 } else if (key.objectid < bytenr) {
6517 if (key.objectid + key.offset >= bytenr + num_bytes) {
6521 num_bytes = (bytenr + num_bytes) -
6522 (key.objectid + key.offset);
6523 bytenr = key.objectid + key.offset;
6525 if (key.objectid + key.offset < bytenr + num_bytes) {
6526 u64 new_start = key.objectid + key.offset;
6527 u64 new_bytes = bytenr + num_bytes - new_start;
6530 * Weird case, the extent is in the middle of
6531 * our range, we'll have to search one side
6532 * and then the other. Not sure if this happens
6533 * in real life, but no harm in coding it up
6534 * anyway just in case.
6536 btrfs_release_path(&path);
6537 ret = check_extent_exists(root, new_start,
6540 fprintf(stderr, "Right section didn't "
6544 num_bytes = key.objectid - bytenr;
6547 num_bytes = key.objectid - bytenr;
6554 if (num_bytes && !ret) {
6555 fprintf(stderr, "There are no extents for csum range "
6556 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6560 btrfs_release_path(&path);
6564 static int check_csums(struct btrfs_root *root)
6566 struct btrfs_path path;
6567 struct extent_buffer *leaf;
6568 struct btrfs_key key;
6569 u64 offset = 0, num_bytes = 0;
6570 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6574 unsigned long leaf_offset;
6576 root = root->fs_info->csum_root;
6577 if (!extent_buffer_uptodate(root->node)) {
6578 fprintf(stderr, "No valid csum tree found\n");
6582 btrfs_init_path(&path);
6583 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6584 key.type = BTRFS_EXTENT_CSUM_KEY;
6586 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6588 fprintf(stderr, "Error searching csum tree %d\n", ret);
6589 btrfs_release_path(&path);
6593 if (ret > 0 && path.slots[0])
6598 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6599 ret = btrfs_next_leaf(root, &path);
6601 fprintf(stderr, "Error going to next leaf "
6608 leaf = path.nodes[0];
6610 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6611 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6616 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6617 csum_size) * root->sectorsize;
6618 if (!check_data_csum)
6619 goto skip_csum_check;
6620 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6621 ret = check_extent_csums(root, key.offset, data_len,
6627 offset = key.offset;
6628 } else if (key.offset != offset + num_bytes) {
6629 ret = check_extent_exists(root, offset, num_bytes);
6631 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6632 "there is no extent record\n",
6633 offset, offset+num_bytes);
6636 offset = key.offset;
6639 num_bytes += data_len;
6643 btrfs_release_path(&path);
6647 static int is_dropped_key(struct btrfs_key *key,
6648 struct btrfs_key *drop_key) {
6649 if (key->objectid < drop_key->objectid)
6651 else if (key->objectid == drop_key->objectid) {
6652 if (key->type < drop_key->type)
6654 else if (key->type == drop_key->type) {
6655 if (key->offset < drop_key->offset)
6663 * Here are the rules for FULL_BACKREF.
6665 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6666 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6668 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6669 * if it happened after the relocation occurred since we'll have dropped the
6670 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6671 * have no real way to know for sure.
6673 * We process the blocks one root at a time, and we start from the lowest root
6674 * objectid and go to the highest. So we can just lookup the owner backref for
6675 * the record and if we don't find it then we know it doesn't exist and we have
6678 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6679 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6680 * be set or not and then we can check later once we've gathered all the refs.
6682 static int calc_extent_flag(struct btrfs_root *root,
6683 struct cache_tree *extent_cache,
6684 struct extent_buffer *buf,
6685 struct root_item_record *ri,
6688 struct extent_record *rec;
6689 struct cache_extent *cache;
6690 struct tree_backref *tback;
6693 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6694 /* we have added this extent before */
6698 rec = container_of(cache, struct extent_record, cache);
6701 * Except file/reloc tree, we can not have
6704 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6709 if (buf->start == ri->bytenr)
6712 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6715 owner = btrfs_header_owner(buf);
6716 if (owner == ri->objectid)
6719 tback = find_tree_backref(rec, 0, owner);
6724 if (rec->flag_block_full_backref != FLAG_UNSET &&
6725 rec->flag_block_full_backref != 0)
6726 rec->bad_full_backref = 1;
6729 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6730 if (rec->flag_block_full_backref != FLAG_UNSET &&
6731 rec->flag_block_full_backref != 1)
6732 rec->bad_full_backref = 1;
6736 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6738 fprintf(stderr, "Invalid key type(");
6739 print_key_type(stderr, 0, key_type);
6740 fprintf(stderr, ") found in root(");
6741 print_objectid(stderr, rootid, 0);
6742 fprintf(stderr, ")\n");
6746 * Check if the key is valid with its extent buffer.
6748 * This is a early check in case invalid key exists in a extent buffer
6749 * This is not comprehensive yet, but should prevent wrong key/item passed
6752 static int check_type_with_root(u64 rootid, u8 key_type)
6755 /* Only valid in chunk tree */
6756 case BTRFS_DEV_ITEM_KEY:
6757 case BTRFS_CHUNK_ITEM_KEY:
6758 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6761 /* valid in csum and log tree */
6762 case BTRFS_CSUM_TREE_OBJECTID:
6763 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6767 case BTRFS_EXTENT_ITEM_KEY:
6768 case BTRFS_METADATA_ITEM_KEY:
6769 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6770 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6773 case BTRFS_ROOT_ITEM_KEY:
6774 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6777 case BTRFS_DEV_EXTENT_KEY:
6778 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6784 report_mismatch_key_root(key_type, rootid);
6788 static int run_next_block(struct btrfs_root *root,
6789 struct block_info *bits,
6792 struct cache_tree *pending,
6793 struct cache_tree *seen,
6794 struct cache_tree *reada,
6795 struct cache_tree *nodes,
6796 struct cache_tree *extent_cache,
6797 struct cache_tree *chunk_cache,
6798 struct rb_root *dev_cache,
6799 struct block_group_tree *block_group_cache,
6800 struct device_extent_tree *dev_extent_cache,
6801 struct root_item_record *ri)
6803 struct extent_buffer *buf;
6804 struct extent_record *rec = NULL;
6815 struct btrfs_key key;
6816 struct cache_extent *cache;
6819 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6820 bits_nr, &reada_bits);
6825 for(i = 0; i < nritems; i++) {
6826 ret = add_cache_extent(reada, bits[i].start,
6831 /* fixme, get the parent transid */
6832 readahead_tree_block(root, bits[i].start,
6836 *last = bits[0].start;
6837 bytenr = bits[0].start;
6838 size = bits[0].size;
6840 cache = lookup_cache_extent(pending, bytenr, size);
6842 remove_cache_extent(pending, cache);
6845 cache = lookup_cache_extent(reada, bytenr, size);
6847 remove_cache_extent(reada, cache);
6850 cache = lookup_cache_extent(nodes, bytenr, size);
6852 remove_cache_extent(nodes, cache);
6855 cache = lookup_cache_extent(extent_cache, bytenr, size);
6857 rec = container_of(cache, struct extent_record, cache);
6858 gen = rec->parent_generation;
6861 /* fixme, get the real parent transid */
6862 buf = read_tree_block(root, bytenr, size, gen);
6863 if (!extent_buffer_uptodate(buf)) {
6864 record_bad_block_io(root->fs_info,
6865 extent_cache, bytenr, size);
6869 nritems = btrfs_header_nritems(buf);
6872 if (!init_extent_tree) {
6873 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6874 btrfs_header_level(buf), 1, NULL,
6877 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6879 fprintf(stderr, "Couldn't calc extent flags\n");
6880 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6885 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6887 fprintf(stderr, "Couldn't calc extent flags\n");
6888 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6892 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6894 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6895 ri->objectid == btrfs_header_owner(buf)) {
6897 * Ok we got to this block from it's original owner and
6898 * we have FULL_BACKREF set. Relocation can leave
6899 * converted blocks over so this is altogether possible,
6900 * however it's not possible if the generation > the
6901 * last snapshot, so check for this case.
6903 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6904 btrfs_header_generation(buf) > ri->last_snapshot) {
6905 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6906 rec->bad_full_backref = 1;
6911 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6912 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6913 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6914 rec->bad_full_backref = 1;
6918 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6919 rec->flag_block_full_backref = 1;
6923 rec->flag_block_full_backref = 0;
6925 owner = btrfs_header_owner(buf);
6928 ret = check_block(root, extent_cache, buf, flags);
6932 if (btrfs_is_leaf(buf)) {
6933 btree_space_waste += btrfs_leaf_free_space(root, buf);
6934 for (i = 0; i < nritems; i++) {
6935 struct btrfs_file_extent_item *fi;
6936 btrfs_item_key_to_cpu(buf, &key, i);
6938 * Check key type against the leaf owner.
6939 * Could filter quite a lot of early error if
6942 if (check_type_with_root(btrfs_header_owner(buf),
6944 fprintf(stderr, "ignoring invalid key\n");
6947 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6948 process_extent_item(root, extent_cache, buf,
6952 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6953 process_extent_item(root, extent_cache, buf,
6957 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6959 btrfs_item_size_nr(buf, i);
6962 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6963 process_chunk_item(chunk_cache, &key, buf, i);
6966 if (key.type == BTRFS_DEV_ITEM_KEY) {
6967 process_device_item(dev_cache, &key, buf, i);
6970 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6971 process_block_group_item(block_group_cache,
6975 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6976 process_device_extent_item(dev_extent_cache,
6981 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6982 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6983 process_extent_ref_v0(extent_cache, buf, i);
6990 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6991 ret = add_tree_backref(extent_cache,
6992 key.objectid, 0, key.offset, 0);
6994 error("add_tree_backref failed: %s",
6998 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6999 ret = add_tree_backref(extent_cache,
7000 key.objectid, key.offset, 0, 0);
7002 error("add_tree_backref failed: %s",
7006 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7007 struct btrfs_extent_data_ref *ref;
7008 ref = btrfs_item_ptr(buf, i,
7009 struct btrfs_extent_data_ref);
7010 add_data_backref(extent_cache,
7012 btrfs_extent_data_ref_root(buf, ref),
7013 btrfs_extent_data_ref_objectid(buf,
7015 btrfs_extent_data_ref_offset(buf, ref),
7016 btrfs_extent_data_ref_count(buf, ref),
7017 0, root->sectorsize);
7020 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7021 struct btrfs_shared_data_ref *ref;
7022 ref = btrfs_item_ptr(buf, i,
7023 struct btrfs_shared_data_ref);
7024 add_data_backref(extent_cache,
7025 key.objectid, key.offset, 0, 0, 0,
7026 btrfs_shared_data_ref_count(buf, ref),
7027 0, root->sectorsize);
7030 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7031 struct bad_item *bad;
7033 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7037 bad = malloc(sizeof(struct bad_item));
7040 INIT_LIST_HEAD(&bad->list);
7041 memcpy(&bad->key, &key,
7042 sizeof(struct btrfs_key));
7043 bad->root_id = owner;
7044 list_add_tail(&bad->list, &delete_items);
7047 if (key.type != BTRFS_EXTENT_DATA_KEY)
7049 fi = btrfs_item_ptr(buf, i,
7050 struct btrfs_file_extent_item);
7051 if (btrfs_file_extent_type(buf, fi) ==
7052 BTRFS_FILE_EXTENT_INLINE)
7054 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7057 data_bytes_allocated +=
7058 btrfs_file_extent_disk_num_bytes(buf, fi);
7059 if (data_bytes_allocated < root->sectorsize) {
7062 data_bytes_referenced +=
7063 btrfs_file_extent_num_bytes(buf, fi);
7064 add_data_backref(extent_cache,
7065 btrfs_file_extent_disk_bytenr(buf, fi),
7066 parent, owner, key.objectid, key.offset -
7067 btrfs_file_extent_offset(buf, fi), 1, 1,
7068 btrfs_file_extent_disk_num_bytes(buf, fi));
7072 struct btrfs_key first_key;
7074 first_key.objectid = 0;
7077 btrfs_item_key_to_cpu(buf, &first_key, 0);
7078 level = btrfs_header_level(buf);
7079 for (i = 0; i < nritems; i++) {
7080 struct extent_record tmpl;
7082 ptr = btrfs_node_blockptr(buf, i);
7083 size = root->nodesize;
7084 btrfs_node_key_to_cpu(buf, &key, i);
7086 if ((level == ri->drop_level)
7087 && is_dropped_key(&key, &ri->drop_key)) {
7092 memset(&tmpl, 0, sizeof(tmpl));
7093 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7094 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7099 tmpl.max_size = size;
7100 ret = add_extent_rec(extent_cache, &tmpl);
7104 ret = add_tree_backref(extent_cache, ptr, parent,
7107 error("add_tree_backref failed: %s",
7113 add_pending(nodes, seen, ptr, size);
7115 add_pending(pending, seen, ptr, size);
7118 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7119 nritems) * sizeof(struct btrfs_key_ptr);
7121 total_btree_bytes += buf->len;
7122 if (fs_root_objectid(btrfs_header_owner(buf)))
7123 total_fs_tree_bytes += buf->len;
7124 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7125 total_extent_tree_bytes += buf->len;
7126 if (!found_old_backref &&
7127 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7128 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7129 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7130 found_old_backref = 1;
7132 free_extent_buffer(buf);
7136 static int add_root_to_pending(struct extent_buffer *buf,
7137 struct cache_tree *extent_cache,
7138 struct cache_tree *pending,
7139 struct cache_tree *seen,
7140 struct cache_tree *nodes,
7143 struct extent_record tmpl;
7146 if (btrfs_header_level(buf) > 0)
7147 add_pending(nodes, seen, buf->start, buf->len);
7149 add_pending(pending, seen, buf->start, buf->len);
7151 memset(&tmpl, 0, sizeof(tmpl));
7152 tmpl.start = buf->start;
7157 tmpl.max_size = buf->len;
7158 add_extent_rec(extent_cache, &tmpl);
7160 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7161 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7162 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7165 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7170 /* as we fix the tree, we might be deleting blocks that
7171 * we're tracking for repair. This hook makes sure we
7172 * remove any backrefs for blocks as we are fixing them.
7174 static int free_extent_hook(struct btrfs_trans_handle *trans,
7175 struct btrfs_root *root,
7176 u64 bytenr, u64 num_bytes, u64 parent,
7177 u64 root_objectid, u64 owner, u64 offset,
7180 struct extent_record *rec;
7181 struct cache_extent *cache;
7183 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7185 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7186 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7190 rec = container_of(cache, struct extent_record, cache);
7192 struct data_backref *back;
7193 back = find_data_backref(rec, parent, root_objectid, owner,
7194 offset, 1, bytenr, num_bytes);
7197 if (back->node.found_ref) {
7198 back->found_ref -= refs_to_drop;
7200 rec->refs -= refs_to_drop;
7202 if (back->node.found_extent_tree) {
7203 back->num_refs -= refs_to_drop;
7204 if (rec->extent_item_refs)
7205 rec->extent_item_refs -= refs_to_drop;
7207 if (back->found_ref == 0)
7208 back->node.found_ref = 0;
7209 if (back->num_refs == 0)
7210 back->node.found_extent_tree = 0;
7212 if (!back->node.found_extent_tree && back->node.found_ref) {
7213 list_del(&back->node.list);
7217 struct tree_backref *back;
7218 back = find_tree_backref(rec, parent, root_objectid);
7221 if (back->node.found_ref) {
7224 back->node.found_ref = 0;
7226 if (back->node.found_extent_tree) {
7227 if (rec->extent_item_refs)
7228 rec->extent_item_refs--;
7229 back->node.found_extent_tree = 0;
7231 if (!back->node.found_extent_tree && back->node.found_ref) {
7232 list_del(&back->node.list);
7236 maybe_free_extent_rec(extent_cache, rec);
7241 static int delete_extent_records(struct btrfs_trans_handle *trans,
7242 struct btrfs_root *root,
7243 struct btrfs_path *path,
7244 u64 bytenr, u64 new_len)
7246 struct btrfs_key key;
7247 struct btrfs_key found_key;
7248 struct extent_buffer *leaf;
7253 key.objectid = bytenr;
7255 key.offset = (u64)-1;
7258 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7265 if (path->slots[0] == 0)
7271 leaf = path->nodes[0];
7272 slot = path->slots[0];
7274 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7275 if (found_key.objectid != bytenr)
7278 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7279 found_key.type != BTRFS_METADATA_ITEM_KEY &&
7280 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7281 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7282 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7283 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7284 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7285 btrfs_release_path(path);
7286 if (found_key.type == 0) {
7287 if (found_key.offset == 0)
7289 key.offset = found_key.offset - 1;
7290 key.type = found_key.type;
7292 key.type = found_key.type - 1;
7293 key.offset = (u64)-1;
7297 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7298 found_key.objectid, found_key.type, found_key.offset);
7300 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7303 btrfs_release_path(path);
7305 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7306 found_key.type == BTRFS_METADATA_ITEM_KEY) {
7307 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7308 found_key.offset : root->nodesize;
7310 ret = btrfs_update_block_group(trans, root, bytenr,
7317 btrfs_release_path(path);
7322 * for a single backref, this will allocate a new extent
7323 * and add the backref to it.
7325 static int record_extent(struct btrfs_trans_handle *trans,
7326 struct btrfs_fs_info *info,
7327 struct btrfs_path *path,
7328 struct extent_record *rec,
7329 struct extent_backref *back,
7330 int allocated, u64 flags)
7333 struct btrfs_root *extent_root = info->extent_root;
7334 struct extent_buffer *leaf;
7335 struct btrfs_key ins_key;
7336 struct btrfs_extent_item *ei;
7337 struct data_backref *dback;
7338 struct btrfs_tree_block_info *bi;
7341 rec->max_size = max_t(u64, rec->max_size,
7342 info->extent_root->nodesize);
7345 u32 item_size = sizeof(*ei);
7348 item_size += sizeof(*bi);
7350 ins_key.objectid = rec->start;
7351 ins_key.offset = rec->max_size;
7352 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7354 ret = btrfs_insert_empty_item(trans, extent_root, path,
7355 &ins_key, item_size);
7359 leaf = path->nodes[0];
7360 ei = btrfs_item_ptr(leaf, path->slots[0],
7361 struct btrfs_extent_item);
7363 btrfs_set_extent_refs(leaf, ei, 0);
7364 btrfs_set_extent_generation(leaf, ei, rec->generation);
7366 if (back->is_data) {
7367 btrfs_set_extent_flags(leaf, ei,
7368 BTRFS_EXTENT_FLAG_DATA);
7370 struct btrfs_disk_key copy_key;;
7372 bi = (struct btrfs_tree_block_info *)(ei + 1);
7373 memset_extent_buffer(leaf, 0, (unsigned long)bi,
7376 btrfs_set_disk_key_objectid(©_key,
7377 rec->info_objectid);
7378 btrfs_set_disk_key_type(©_key, 0);
7379 btrfs_set_disk_key_offset(©_key, 0);
7381 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7382 btrfs_set_tree_block_key(leaf, bi, ©_key);
7384 btrfs_set_extent_flags(leaf, ei,
7385 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7388 btrfs_mark_buffer_dirty(leaf);
7389 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7390 rec->max_size, 1, 0);
7393 btrfs_release_path(path);
7396 if (back->is_data) {
7400 dback = to_data_backref(back);
7401 if (back->full_backref)
7402 parent = dback->parent;
7406 for (i = 0; i < dback->found_ref; i++) {
7407 /* if parent != 0, we're doing a full backref
7408 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7409 * just makes the backref allocator create a data
7412 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7413 rec->start, rec->max_size,
7417 BTRFS_FIRST_FREE_OBJECTID :
7423 fprintf(stderr, "adding new data backref"
7424 " on %llu %s %llu owner %llu"
7425 " offset %llu found %d\n",
7426 (unsigned long long)rec->start,
7427 back->full_backref ?
7429 back->full_backref ?
7430 (unsigned long long)parent :
7431 (unsigned long long)dback->root,
7432 (unsigned long long)dback->owner,
7433 (unsigned long long)dback->offset,
7437 struct tree_backref *tback;
7439 tback = to_tree_backref(back);
7440 if (back->full_backref)
7441 parent = tback->parent;
7445 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7446 rec->start, rec->max_size,
7447 parent, tback->root, 0, 0);
7448 fprintf(stderr, "adding new tree backref on "
7449 "start %llu len %llu parent %llu root %llu\n",
7450 rec->start, rec->max_size, parent, tback->root);
7453 btrfs_release_path(path);
7457 static struct extent_entry *find_entry(struct list_head *entries,
7458 u64 bytenr, u64 bytes)
7460 struct extent_entry *entry = NULL;
7462 list_for_each_entry(entry, entries, list) {
7463 if (entry->bytenr == bytenr && entry->bytes == bytes)
7470 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7472 struct extent_entry *entry, *best = NULL, *prev = NULL;
7474 list_for_each_entry(entry, entries, list) {
7476 * If there are as many broken entries as entries then we know
7477 * not to trust this particular entry.
7479 if (entry->broken == entry->count)
7483 * Special case, when there are only two entries and 'best' is
7493 * If our current entry == best then we can't be sure our best
7494 * is really the best, so we need to keep searching.
7496 if (best && best->count == entry->count) {
7502 /* Prev == entry, not good enough, have to keep searching */
7503 if (!prev->broken && prev->count == entry->count)
7507 best = (prev->count > entry->count) ? prev : entry;
7508 else if (best->count < entry->count)
7516 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7517 struct data_backref *dback, struct extent_entry *entry)
7519 struct btrfs_trans_handle *trans;
7520 struct btrfs_root *root;
7521 struct btrfs_file_extent_item *fi;
7522 struct extent_buffer *leaf;
7523 struct btrfs_key key;
7527 key.objectid = dback->root;
7528 key.type = BTRFS_ROOT_ITEM_KEY;
7529 key.offset = (u64)-1;
7530 root = btrfs_read_fs_root(info, &key);
7532 fprintf(stderr, "Couldn't find root for our ref\n");
7537 * The backref points to the original offset of the extent if it was
7538 * split, so we need to search down to the offset we have and then walk
7539 * forward until we find the backref we're looking for.
7541 key.objectid = dback->owner;
7542 key.type = BTRFS_EXTENT_DATA_KEY;
7543 key.offset = dback->offset;
7544 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7546 fprintf(stderr, "Error looking up ref %d\n", ret);
7551 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7552 ret = btrfs_next_leaf(root, path);
7554 fprintf(stderr, "Couldn't find our ref, next\n");
7558 leaf = path->nodes[0];
7559 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7560 if (key.objectid != dback->owner ||
7561 key.type != BTRFS_EXTENT_DATA_KEY) {
7562 fprintf(stderr, "Couldn't find our ref, search\n");
7565 fi = btrfs_item_ptr(leaf, path->slots[0],
7566 struct btrfs_file_extent_item);
7567 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7568 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7570 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7575 btrfs_release_path(path);
7577 trans = btrfs_start_transaction(root, 1);
7579 return PTR_ERR(trans);
7582 * Ok we have the key of the file extent we want to fix, now we can cow
7583 * down to the thing and fix it.
7585 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7587 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7588 key.objectid, key.type, key.offset, ret);
7592 fprintf(stderr, "Well that's odd, we just found this key "
7593 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7598 leaf = path->nodes[0];
7599 fi = btrfs_item_ptr(leaf, path->slots[0],
7600 struct btrfs_file_extent_item);
7602 if (btrfs_file_extent_compression(leaf, fi) &&
7603 dback->disk_bytenr != entry->bytenr) {
7604 fprintf(stderr, "Ref doesn't match the record start and is "
7605 "compressed, please take a btrfs-image of this file "
7606 "system and send it to a btrfs developer so they can "
7607 "complete this functionality for bytenr %Lu\n",
7608 dback->disk_bytenr);
7613 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7614 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7615 } else if (dback->disk_bytenr > entry->bytenr) {
7616 u64 off_diff, offset;
7618 off_diff = dback->disk_bytenr - entry->bytenr;
7619 offset = btrfs_file_extent_offset(leaf, fi);
7620 if (dback->disk_bytenr + offset +
7621 btrfs_file_extent_num_bytes(leaf, fi) >
7622 entry->bytenr + entry->bytes) {
7623 fprintf(stderr, "Ref is past the entry end, please "
7624 "take a btrfs-image of this file system and "
7625 "send it to a btrfs developer, ref %Lu\n",
7626 dback->disk_bytenr);
7631 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7632 btrfs_set_file_extent_offset(leaf, fi, offset);
7633 } else if (dback->disk_bytenr < entry->bytenr) {
7636 offset = btrfs_file_extent_offset(leaf, fi);
7637 if (dback->disk_bytenr + offset < entry->bytenr) {
7638 fprintf(stderr, "Ref is before the entry start, please"
7639 " take a btrfs-image of this file system and "
7640 "send it to a btrfs developer, ref %Lu\n",
7641 dback->disk_bytenr);
7646 offset += dback->disk_bytenr;
7647 offset -= entry->bytenr;
7648 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7649 btrfs_set_file_extent_offset(leaf, fi, offset);
7652 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7655 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7656 * only do this if we aren't using compression, otherwise it's a
7659 if (!btrfs_file_extent_compression(leaf, fi))
7660 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7662 printf("ram bytes may be wrong?\n");
7663 btrfs_mark_buffer_dirty(leaf);
7665 err = btrfs_commit_transaction(trans, root);
7666 btrfs_release_path(path);
7667 return ret ? ret : err;
7670 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7671 struct extent_record *rec)
7673 struct extent_backref *back;
7674 struct data_backref *dback;
7675 struct extent_entry *entry, *best = NULL;
7678 int broken_entries = 0;
7683 * Metadata is easy and the backrefs should always agree on bytenr and
7684 * size, if not we've got bigger issues.
7689 list_for_each_entry(back, &rec->backrefs, list) {
7690 if (back->full_backref || !back->is_data)
7693 dback = to_data_backref(back);
7696 * We only pay attention to backrefs that we found a real
7699 if (dback->found_ref == 0)
7703 * For now we only catch when the bytes don't match, not the
7704 * bytenr. We can easily do this at the same time, but I want
7705 * to have a fs image to test on before we just add repair
7706 * functionality willy-nilly so we know we won't screw up the
7710 entry = find_entry(&entries, dback->disk_bytenr,
7713 entry = malloc(sizeof(struct extent_entry));
7718 memset(entry, 0, sizeof(*entry));
7719 entry->bytenr = dback->disk_bytenr;
7720 entry->bytes = dback->bytes;
7721 list_add_tail(&entry->list, &entries);
7726 * If we only have on entry we may think the entries agree when
7727 * in reality they don't so we have to do some extra checking.
7729 if (dback->disk_bytenr != rec->start ||
7730 dback->bytes != rec->nr || back->broken)
7741 /* Yay all the backrefs agree, carry on good sir */
7742 if (nr_entries <= 1 && !mismatch)
7745 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7746 "%Lu\n", rec->start);
7749 * First we want to see if the backrefs can agree amongst themselves who
7750 * is right, so figure out which one of the entries has the highest
7753 best = find_most_right_entry(&entries);
7756 * Ok so we may have an even split between what the backrefs think, so
7757 * this is where we use the extent ref to see what it thinks.
7760 entry = find_entry(&entries, rec->start, rec->nr);
7761 if (!entry && (!broken_entries || !rec->found_rec)) {
7762 fprintf(stderr, "Backrefs don't agree with each other "
7763 "and extent record doesn't agree with anybody,"
7764 " so we can't fix bytenr %Lu bytes %Lu\n",
7765 rec->start, rec->nr);
7768 } else if (!entry) {
7770 * Ok our backrefs were broken, we'll assume this is the
7771 * correct value and add an entry for this range.
7773 entry = malloc(sizeof(struct extent_entry));
7778 memset(entry, 0, sizeof(*entry));
7779 entry->bytenr = rec->start;
7780 entry->bytes = rec->nr;
7781 list_add_tail(&entry->list, &entries);
7785 best = find_most_right_entry(&entries);
7787 fprintf(stderr, "Backrefs and extent record evenly "
7788 "split on who is right, this is going to "
7789 "require user input to fix bytenr %Lu bytes "
7790 "%Lu\n", rec->start, rec->nr);
7797 * I don't think this can happen currently as we'll abort() if we catch
7798 * this case higher up, but in case somebody removes that we still can't
7799 * deal with it properly here yet, so just bail out of that's the case.
7801 if (best->bytenr != rec->start) {
7802 fprintf(stderr, "Extent start and backref starts don't match, "
7803 "please use btrfs-image on this file system and send "
7804 "it to a btrfs developer so they can make fsck fix "
7805 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7806 rec->start, rec->nr);
7812 * Ok great we all agreed on an extent record, let's go find the real
7813 * references and fix up the ones that don't match.
7815 list_for_each_entry(back, &rec->backrefs, list) {
7816 if (back->full_backref || !back->is_data)
7819 dback = to_data_backref(back);
7822 * Still ignoring backrefs that don't have a real ref attached
7825 if (dback->found_ref == 0)
7828 if (dback->bytes == best->bytes &&
7829 dback->disk_bytenr == best->bytenr)
7832 ret = repair_ref(info, path, dback, best);
7838 * Ok we messed with the actual refs, which means we need to drop our
7839 * entire cache and go back and rescan. I know this is a huge pain and
7840 * adds a lot of extra work, but it's the only way to be safe. Once all
7841 * the backrefs agree we may not need to do anything to the extent
7846 while (!list_empty(&entries)) {
7847 entry = list_entry(entries.next, struct extent_entry, list);
7848 list_del_init(&entry->list);
7854 static int process_duplicates(struct btrfs_root *root,
7855 struct cache_tree *extent_cache,
7856 struct extent_record *rec)
7858 struct extent_record *good, *tmp;
7859 struct cache_extent *cache;
7863 * If we found a extent record for this extent then return, or if we
7864 * have more than one duplicate we are likely going to need to delete
7867 if (rec->found_rec || rec->num_duplicates > 1)
7870 /* Shouldn't happen but just in case */
7871 BUG_ON(!rec->num_duplicates);
7874 * So this happens if we end up with a backref that doesn't match the
7875 * actual extent entry. So either the backref is bad or the extent
7876 * entry is bad. Either way we want to have the extent_record actually
7877 * reflect what we found in the extent_tree, so we need to take the
7878 * duplicate out and use that as the extent_record since the only way we
7879 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7881 remove_cache_extent(extent_cache, &rec->cache);
7883 good = to_extent_record(rec->dups.next);
7884 list_del_init(&good->list);
7885 INIT_LIST_HEAD(&good->backrefs);
7886 INIT_LIST_HEAD(&good->dups);
7887 good->cache.start = good->start;
7888 good->cache.size = good->nr;
7889 good->content_checked = 0;
7890 good->owner_ref_checked = 0;
7891 good->num_duplicates = 0;
7892 good->refs = rec->refs;
7893 list_splice_init(&rec->backrefs, &good->backrefs);
7895 cache = lookup_cache_extent(extent_cache, good->start,
7899 tmp = container_of(cache, struct extent_record, cache);
7902 * If we find another overlapping extent and it's found_rec is
7903 * set then it's a duplicate and we need to try and delete
7906 if (tmp->found_rec || tmp->num_duplicates > 0) {
7907 if (list_empty(&good->list))
7908 list_add_tail(&good->list,
7909 &duplicate_extents);
7910 good->num_duplicates += tmp->num_duplicates + 1;
7911 list_splice_init(&tmp->dups, &good->dups);
7912 list_del_init(&tmp->list);
7913 list_add_tail(&tmp->list, &good->dups);
7914 remove_cache_extent(extent_cache, &tmp->cache);
7919 * Ok we have another non extent item backed extent rec, so lets
7920 * just add it to this extent and carry on like we did above.
7922 good->refs += tmp->refs;
7923 list_splice_init(&tmp->backrefs, &good->backrefs);
7924 remove_cache_extent(extent_cache, &tmp->cache);
7927 ret = insert_cache_extent(extent_cache, &good->cache);
7930 return good->num_duplicates ? 0 : 1;
7933 static int delete_duplicate_records(struct btrfs_root *root,
7934 struct extent_record *rec)
7936 struct btrfs_trans_handle *trans;
7937 LIST_HEAD(delete_list);
7938 struct btrfs_path path;
7939 struct extent_record *tmp, *good, *n;
7942 struct btrfs_key key;
7944 btrfs_init_path(&path);
7947 /* Find the record that covers all of the duplicates. */
7948 list_for_each_entry(tmp, &rec->dups, list) {
7949 if (good->start < tmp->start)
7951 if (good->nr > tmp->nr)
7954 if (tmp->start + tmp->nr < good->start + good->nr) {
7955 fprintf(stderr, "Ok we have overlapping extents that "
7956 "aren't completely covered by each other, this "
7957 "is going to require more careful thought. "
7958 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7959 tmp->start, tmp->nr, good->start, good->nr);
7966 list_add_tail(&rec->list, &delete_list);
7968 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7971 list_move_tail(&tmp->list, &delete_list);
7974 root = root->fs_info->extent_root;
7975 trans = btrfs_start_transaction(root, 1);
7976 if (IS_ERR(trans)) {
7977 ret = PTR_ERR(trans);
7981 list_for_each_entry(tmp, &delete_list, list) {
7982 if (tmp->found_rec == 0)
7984 key.objectid = tmp->start;
7985 key.type = BTRFS_EXTENT_ITEM_KEY;
7986 key.offset = tmp->nr;
7988 /* Shouldn't happen but just in case */
7989 if (tmp->metadata) {
7990 fprintf(stderr, "Well this shouldn't happen, extent "
7991 "record overlaps but is metadata? "
7992 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7996 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8002 ret = btrfs_del_item(trans, root, &path);
8005 btrfs_release_path(&path);
8008 err = btrfs_commit_transaction(trans, root);
8012 while (!list_empty(&delete_list)) {
8013 tmp = to_extent_record(delete_list.next);
8014 list_del_init(&tmp->list);
8020 while (!list_empty(&rec->dups)) {
8021 tmp = to_extent_record(rec->dups.next);
8022 list_del_init(&tmp->list);
8026 btrfs_release_path(&path);
8028 if (!ret && !nr_del)
8029 rec->num_duplicates = 0;
8031 return ret ? ret : nr_del;
8034 static int find_possible_backrefs(struct btrfs_fs_info *info,
8035 struct btrfs_path *path,
8036 struct cache_tree *extent_cache,
8037 struct extent_record *rec)
8039 struct btrfs_root *root;
8040 struct extent_backref *back;
8041 struct data_backref *dback;
8042 struct cache_extent *cache;
8043 struct btrfs_file_extent_item *fi;
8044 struct btrfs_key key;
8048 list_for_each_entry(back, &rec->backrefs, list) {
8049 /* Don't care about full backrefs (poor unloved backrefs) */
8050 if (back->full_backref || !back->is_data)
8053 dback = to_data_backref(back);
8055 /* We found this one, we don't need to do a lookup */
8056 if (dback->found_ref)
8059 key.objectid = dback->root;
8060 key.type = BTRFS_ROOT_ITEM_KEY;
8061 key.offset = (u64)-1;
8063 root = btrfs_read_fs_root(info, &key);
8065 /* No root, definitely a bad ref, skip */
8066 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8068 /* Other err, exit */
8070 return PTR_ERR(root);
8072 key.objectid = dback->owner;
8073 key.type = BTRFS_EXTENT_DATA_KEY;
8074 key.offset = dback->offset;
8075 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8077 btrfs_release_path(path);
8080 /* Didn't find it, we can carry on */
8085 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8086 struct btrfs_file_extent_item);
8087 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8088 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8089 btrfs_release_path(path);
8090 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8092 struct extent_record *tmp;
8093 tmp = container_of(cache, struct extent_record, cache);
8096 * If we found an extent record for the bytenr for this
8097 * particular backref then we can't add it to our
8098 * current extent record. We only want to add backrefs
8099 * that don't have a corresponding extent item in the
8100 * extent tree since they likely belong to this record
8101 * and we need to fix it if it doesn't match bytenrs.
8107 dback->found_ref += 1;
8108 dback->disk_bytenr = bytenr;
8109 dback->bytes = bytes;
8112 * Set this so the verify backref code knows not to trust the
8113 * values in this backref.
8122 * Record orphan data ref into corresponding root.
8124 * Return 0 if the extent item contains data ref and recorded.
8125 * Return 1 if the extent item contains no useful data ref
8126 * On that case, it may contains only shared_dataref or metadata backref
8127 * or the file extent exists(this should be handled by the extent bytenr
8129 * Return <0 if something goes wrong.
8131 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8132 struct extent_record *rec)
8134 struct btrfs_key key;
8135 struct btrfs_root *dest_root;
8136 struct extent_backref *back;
8137 struct data_backref *dback;
8138 struct orphan_data_extent *orphan;
8139 struct btrfs_path path;
8140 int recorded_data_ref = 0;
8145 btrfs_init_path(&path);
8146 list_for_each_entry(back, &rec->backrefs, list) {
8147 if (back->full_backref || !back->is_data ||
8148 !back->found_extent_tree)
8150 dback = to_data_backref(back);
8151 if (dback->found_ref)
8153 key.objectid = dback->root;
8154 key.type = BTRFS_ROOT_ITEM_KEY;
8155 key.offset = (u64)-1;
8157 dest_root = btrfs_read_fs_root(fs_info, &key);
8159 /* For non-exist root we just skip it */
8160 if (IS_ERR(dest_root) || !dest_root)
8163 key.objectid = dback->owner;
8164 key.type = BTRFS_EXTENT_DATA_KEY;
8165 key.offset = dback->offset;
8167 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8168 btrfs_release_path(&path);
8170 * For ret < 0, it's OK since the fs-tree may be corrupted,
8171 * we need to record it for inode/file extent rebuild.
8172 * For ret > 0, we record it only for file extent rebuild.
8173 * For ret == 0, the file extent exists but only bytenr
8174 * mismatch, let the original bytenr fix routine to handle,
8180 orphan = malloc(sizeof(*orphan));
8185 INIT_LIST_HEAD(&orphan->list);
8186 orphan->root = dback->root;
8187 orphan->objectid = dback->owner;
8188 orphan->offset = dback->offset;
8189 orphan->disk_bytenr = rec->cache.start;
8190 orphan->disk_len = rec->cache.size;
8191 list_add(&dest_root->orphan_data_extents, &orphan->list);
8192 recorded_data_ref = 1;
8195 btrfs_release_path(&path);
8197 return !recorded_data_ref;
8203 * when an incorrect extent item is found, this will delete
8204 * all of the existing entries for it and recreate them
8205 * based on what the tree scan found.
8207 static int fixup_extent_refs(struct btrfs_fs_info *info,
8208 struct cache_tree *extent_cache,
8209 struct extent_record *rec)
8211 struct btrfs_trans_handle *trans = NULL;
8213 struct btrfs_path path;
8214 struct list_head *cur = rec->backrefs.next;
8215 struct cache_extent *cache;
8216 struct extent_backref *back;
8220 if (rec->flag_block_full_backref)
8221 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8223 btrfs_init_path(&path);
8224 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8226 * Sometimes the backrefs themselves are so broken they don't
8227 * get attached to any meaningful rec, so first go back and
8228 * check any of our backrefs that we couldn't find and throw
8229 * them into the list if we find the backref so that
8230 * verify_backrefs can figure out what to do.
8232 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8237 /* step one, make sure all of the backrefs agree */
8238 ret = verify_backrefs(info, &path, rec);
8242 trans = btrfs_start_transaction(info->extent_root, 1);
8243 if (IS_ERR(trans)) {
8244 ret = PTR_ERR(trans);
8248 /* step two, delete all the existing records */
8249 ret = delete_extent_records(trans, info->extent_root, &path,
8250 rec->start, rec->max_size);
8255 /* was this block corrupt? If so, don't add references to it */
8256 cache = lookup_cache_extent(info->corrupt_blocks,
8257 rec->start, rec->max_size);
8263 /* step three, recreate all the refs we did find */
8264 while(cur != &rec->backrefs) {
8265 back = to_extent_backref(cur);
8269 * if we didn't find any references, don't create a
8272 if (!back->found_ref)
8275 rec->bad_full_backref = 0;
8276 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8284 int err = btrfs_commit_transaction(trans, info->extent_root);
8289 btrfs_release_path(&path);
8293 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8294 struct extent_record *rec)
8296 struct btrfs_trans_handle *trans;
8297 struct btrfs_root *root = fs_info->extent_root;
8298 struct btrfs_path path;
8299 struct btrfs_extent_item *ei;
8300 struct btrfs_key key;
8304 key.objectid = rec->start;
8305 if (rec->metadata) {
8306 key.type = BTRFS_METADATA_ITEM_KEY;
8307 key.offset = rec->info_level;
8309 key.type = BTRFS_EXTENT_ITEM_KEY;
8310 key.offset = rec->max_size;
8313 trans = btrfs_start_transaction(root, 0);
8315 return PTR_ERR(trans);
8317 btrfs_init_path(&path);
8318 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8320 btrfs_release_path(&path);
8321 btrfs_commit_transaction(trans, root);
8324 fprintf(stderr, "Didn't find extent for %llu\n",
8325 (unsigned long long)rec->start);
8326 btrfs_release_path(&path);
8327 btrfs_commit_transaction(trans, root);
8331 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8332 struct btrfs_extent_item);
8333 flags = btrfs_extent_flags(path.nodes[0], ei);
8334 if (rec->flag_block_full_backref) {
8335 fprintf(stderr, "setting full backref on %llu\n",
8336 (unsigned long long)key.objectid);
8337 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8339 fprintf(stderr, "clearing full backref on %llu\n",
8340 (unsigned long long)key.objectid);
8341 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8343 btrfs_set_extent_flags(path.nodes[0], ei, flags);
8344 btrfs_mark_buffer_dirty(path.nodes[0]);
8345 btrfs_release_path(&path);
8346 return btrfs_commit_transaction(trans, root);
8349 /* right now we only prune from the extent allocation tree */
8350 static int prune_one_block(struct btrfs_trans_handle *trans,
8351 struct btrfs_fs_info *info,
8352 struct btrfs_corrupt_block *corrupt)
8355 struct btrfs_path path;
8356 struct extent_buffer *eb;
8360 int level = corrupt->level + 1;
8362 btrfs_init_path(&path);
8364 /* we want to stop at the parent to our busted block */
8365 path.lowest_level = level;
8367 ret = btrfs_search_slot(trans, info->extent_root,
8368 &corrupt->key, &path, -1, 1);
8373 eb = path.nodes[level];
8380 * hopefully the search gave us the block we want to prune,
8381 * lets try that first
8383 slot = path.slots[level];
8384 found = btrfs_node_blockptr(eb, slot);
8385 if (found == corrupt->cache.start)
8388 nritems = btrfs_header_nritems(eb);
8390 /* the search failed, lets scan this node and hope we find it */
8391 for (slot = 0; slot < nritems; slot++) {
8392 found = btrfs_node_blockptr(eb, slot);
8393 if (found == corrupt->cache.start)
8397 * we couldn't find the bad block. TODO, search all the nodes for pointers
8400 if (eb == info->extent_root->node) {
8405 btrfs_release_path(&path);
8410 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8411 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8414 btrfs_release_path(&path);
8418 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8420 struct btrfs_trans_handle *trans = NULL;
8421 struct cache_extent *cache;
8422 struct btrfs_corrupt_block *corrupt;
8425 cache = search_cache_extent(info->corrupt_blocks, 0);
8429 trans = btrfs_start_transaction(info->extent_root, 1);
8431 return PTR_ERR(trans);
8433 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8434 prune_one_block(trans, info, corrupt);
8435 remove_cache_extent(info->corrupt_blocks, cache);
8438 return btrfs_commit_transaction(trans, info->extent_root);
8442 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8444 struct btrfs_block_group_cache *cache;
8449 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8450 &start, &end, EXTENT_DIRTY);
8453 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8459 cache = btrfs_lookup_first_block_group(fs_info, start);
8464 start = cache->key.objectid + cache->key.offset;
8468 static int check_extent_refs(struct btrfs_root *root,
8469 struct cache_tree *extent_cache)
8471 struct extent_record *rec;
8472 struct cache_extent *cache;
8481 * if we're doing a repair, we have to make sure
8482 * we don't allocate from the problem extents.
8483 * In the worst case, this will be all the
8486 cache = search_cache_extent(extent_cache, 0);
8488 rec = container_of(cache, struct extent_record, cache);
8489 set_extent_dirty(root->fs_info->excluded_extents,
8491 rec->start + rec->max_size - 1,
8493 cache = next_cache_extent(cache);
8496 /* pin down all the corrupted blocks too */
8497 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8499 set_extent_dirty(root->fs_info->excluded_extents,
8501 cache->start + cache->size - 1,
8503 cache = next_cache_extent(cache);
8505 prune_corrupt_blocks(root->fs_info);
8506 reset_cached_block_groups(root->fs_info);
8509 reset_cached_block_groups(root->fs_info);
8512 * We need to delete any duplicate entries we find first otherwise we
8513 * could mess up the extent tree when we have backrefs that actually
8514 * belong to a different extent item and not the weird duplicate one.
8516 while (repair && !list_empty(&duplicate_extents)) {
8517 rec = to_extent_record(duplicate_extents.next);
8518 list_del_init(&rec->list);
8520 /* Sometimes we can find a backref before we find an actual
8521 * extent, so we need to process it a little bit to see if there
8522 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8523 * if this is a backref screwup. If we need to delete stuff
8524 * process_duplicates() will return 0, otherwise it will return
8527 if (process_duplicates(root, extent_cache, rec))
8529 ret = delete_duplicate_records(root, rec);
8533 * delete_duplicate_records will return the number of entries
8534 * deleted, so if it's greater than 0 then we know we actually
8535 * did something and we need to remove.
8549 cache = search_cache_extent(extent_cache, 0);
8552 rec = container_of(cache, struct extent_record, cache);
8553 if (rec->num_duplicates) {
8554 fprintf(stderr, "extent item %llu has multiple extent "
8555 "items\n", (unsigned long long)rec->start);
8560 if (rec->refs != rec->extent_item_refs) {
8561 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8562 (unsigned long long)rec->start,
8563 (unsigned long long)rec->nr);
8564 fprintf(stderr, "extent item %llu, found %llu\n",
8565 (unsigned long long)rec->extent_item_refs,
8566 (unsigned long long)rec->refs);
8567 ret = record_orphan_data_extents(root->fs_info, rec);
8574 * we can't use the extent to repair file
8575 * extent, let the fallback method handle it.
8577 if (!fixed && repair) {
8578 ret = fixup_extent_refs(
8589 if (all_backpointers_checked(rec, 1)) {
8590 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8591 (unsigned long long)rec->start,
8592 (unsigned long long)rec->nr);
8594 if (!fixed && !recorded && repair) {
8595 ret = fixup_extent_refs(root->fs_info,
8604 if (!rec->owner_ref_checked) {
8605 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8606 (unsigned long long)rec->start,
8607 (unsigned long long)rec->nr);
8608 if (!fixed && !recorded && repair) {
8609 ret = fixup_extent_refs(root->fs_info,
8618 if (rec->bad_full_backref) {
8619 fprintf(stderr, "bad full backref, on [%llu]\n",
8620 (unsigned long long)rec->start);
8622 ret = fixup_extent_flags(root->fs_info, rec);
8631 * Although it's not a extent ref's problem, we reuse this
8632 * routine for error reporting.
8633 * No repair function yet.
8635 if (rec->crossing_stripes) {
8637 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8638 rec->start, rec->start + rec->max_size);
8643 if (rec->wrong_chunk_type) {
8645 "bad extent [%llu, %llu), type mismatch with chunk\n",
8646 rec->start, rec->start + rec->max_size);
8651 remove_cache_extent(extent_cache, cache);
8652 free_all_extent_backrefs(rec);
8653 if (!init_extent_tree && repair && (!cur_err || fixed))
8654 clear_extent_dirty(root->fs_info->excluded_extents,
8656 rec->start + rec->max_size - 1,
8662 if (ret && ret != -EAGAIN) {
8663 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8666 struct btrfs_trans_handle *trans;
8668 root = root->fs_info->extent_root;
8669 trans = btrfs_start_transaction(root, 1);
8670 if (IS_ERR(trans)) {
8671 ret = PTR_ERR(trans);
8675 btrfs_fix_block_accounting(trans, root);
8676 ret = btrfs_commit_transaction(trans, root);
8681 fprintf(stderr, "repaired damaged extent references\n");
8687 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8691 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8692 stripe_size = length;
8693 stripe_size /= num_stripes;
8694 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8695 stripe_size = length * 2;
8696 stripe_size /= num_stripes;
8697 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8698 stripe_size = length;
8699 stripe_size /= (num_stripes - 1);
8700 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8701 stripe_size = length;
8702 stripe_size /= (num_stripes - 2);
8704 stripe_size = length;
8710 * Check the chunk with its block group/dev list ref:
8711 * Return 0 if all refs seems valid.
8712 * Return 1 if part of refs seems valid, need later check for rebuild ref
8713 * like missing block group and needs to search extent tree to rebuild them.
8714 * Return -1 if essential refs are missing and unable to rebuild.
8716 static int check_chunk_refs(struct chunk_record *chunk_rec,
8717 struct block_group_tree *block_group_cache,
8718 struct device_extent_tree *dev_extent_cache,
8721 struct cache_extent *block_group_item;
8722 struct block_group_record *block_group_rec;
8723 struct cache_extent *dev_extent_item;
8724 struct device_extent_record *dev_extent_rec;
8728 int metadump_v2 = 0;
8732 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8735 if (block_group_item) {
8736 block_group_rec = container_of(block_group_item,
8737 struct block_group_record,
8739 if (chunk_rec->length != block_group_rec->offset ||
8740 chunk_rec->offset != block_group_rec->objectid ||
8742 chunk_rec->type_flags != block_group_rec->flags)) {
8745 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8746 chunk_rec->objectid,
8751 chunk_rec->type_flags,
8752 block_group_rec->objectid,
8753 block_group_rec->type,
8754 block_group_rec->offset,
8755 block_group_rec->offset,
8756 block_group_rec->objectid,
8757 block_group_rec->flags);
8760 list_del_init(&block_group_rec->list);
8761 chunk_rec->bg_rec = block_group_rec;
8766 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8767 chunk_rec->objectid,
8772 chunk_rec->type_flags);
8779 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8780 chunk_rec->num_stripes);
8781 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8782 devid = chunk_rec->stripes[i].devid;
8783 offset = chunk_rec->stripes[i].offset;
8784 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8785 devid, offset, length);
8786 if (dev_extent_item) {
8787 dev_extent_rec = container_of(dev_extent_item,
8788 struct device_extent_record,
8790 if (dev_extent_rec->objectid != devid ||
8791 dev_extent_rec->offset != offset ||
8792 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8793 dev_extent_rec->length != length) {
8796 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8797 chunk_rec->objectid,
8800 chunk_rec->stripes[i].devid,
8801 chunk_rec->stripes[i].offset,
8802 dev_extent_rec->objectid,
8803 dev_extent_rec->offset,
8804 dev_extent_rec->length);
8807 list_move(&dev_extent_rec->chunk_list,
8808 &chunk_rec->dextents);
8813 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8814 chunk_rec->objectid,
8817 chunk_rec->stripes[i].devid,
8818 chunk_rec->stripes[i].offset);
8825 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8826 int check_chunks(struct cache_tree *chunk_cache,
8827 struct block_group_tree *block_group_cache,
8828 struct device_extent_tree *dev_extent_cache,
8829 struct list_head *good, struct list_head *bad,
8830 struct list_head *rebuild, int silent)
8832 struct cache_extent *chunk_item;
8833 struct chunk_record *chunk_rec;
8834 struct block_group_record *bg_rec;
8835 struct device_extent_record *dext_rec;
8839 chunk_item = first_cache_extent(chunk_cache);
8840 while (chunk_item) {
8841 chunk_rec = container_of(chunk_item, struct chunk_record,
8843 err = check_chunk_refs(chunk_rec, block_group_cache,
8844 dev_extent_cache, silent);
8847 if (err == 0 && good)
8848 list_add_tail(&chunk_rec->list, good);
8849 if (err > 0 && rebuild)
8850 list_add_tail(&chunk_rec->list, rebuild);
8852 list_add_tail(&chunk_rec->list, bad);
8853 chunk_item = next_cache_extent(chunk_item);
8856 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8859 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8867 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8871 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8882 static int check_device_used(struct device_record *dev_rec,
8883 struct device_extent_tree *dext_cache)
8885 struct cache_extent *cache;
8886 struct device_extent_record *dev_extent_rec;
8889 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8891 dev_extent_rec = container_of(cache,
8892 struct device_extent_record,
8894 if (dev_extent_rec->objectid != dev_rec->devid)
8897 list_del_init(&dev_extent_rec->device_list);
8898 total_byte += dev_extent_rec->length;
8899 cache = next_cache_extent(cache);
8902 if (total_byte != dev_rec->byte_used) {
8904 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8905 total_byte, dev_rec->byte_used, dev_rec->objectid,
8906 dev_rec->type, dev_rec->offset);
8913 /* check btrfs_dev_item -> btrfs_dev_extent */
8914 static int check_devices(struct rb_root *dev_cache,
8915 struct device_extent_tree *dev_extent_cache)
8917 struct rb_node *dev_node;
8918 struct device_record *dev_rec;
8919 struct device_extent_record *dext_rec;
8923 dev_node = rb_first(dev_cache);
8925 dev_rec = container_of(dev_node, struct device_record, node);
8926 err = check_device_used(dev_rec, dev_extent_cache);
8930 dev_node = rb_next(dev_node);
8932 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8935 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8936 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8943 static int add_root_item_to_list(struct list_head *head,
8944 u64 objectid, u64 bytenr, u64 last_snapshot,
8945 u8 level, u8 drop_level,
8946 int level_size, struct btrfs_key *drop_key)
8949 struct root_item_record *ri_rec;
8950 ri_rec = malloc(sizeof(*ri_rec));
8953 ri_rec->bytenr = bytenr;
8954 ri_rec->objectid = objectid;
8955 ri_rec->level = level;
8956 ri_rec->level_size = level_size;
8957 ri_rec->drop_level = drop_level;
8958 ri_rec->last_snapshot = last_snapshot;
8960 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8961 list_add_tail(&ri_rec->list, head);
8966 static void free_root_item_list(struct list_head *list)
8968 struct root_item_record *ri_rec;
8970 while (!list_empty(list)) {
8971 ri_rec = list_first_entry(list, struct root_item_record,
8973 list_del_init(&ri_rec->list);
8978 static int deal_root_from_list(struct list_head *list,
8979 struct btrfs_root *root,
8980 struct block_info *bits,
8982 struct cache_tree *pending,
8983 struct cache_tree *seen,
8984 struct cache_tree *reada,
8985 struct cache_tree *nodes,
8986 struct cache_tree *extent_cache,
8987 struct cache_tree *chunk_cache,
8988 struct rb_root *dev_cache,
8989 struct block_group_tree *block_group_cache,
8990 struct device_extent_tree *dev_extent_cache)
8995 while (!list_empty(list)) {
8996 struct root_item_record *rec;
8997 struct extent_buffer *buf;
8998 rec = list_entry(list->next,
8999 struct root_item_record, list);
9001 buf = read_tree_block(root->fs_info->tree_root,
9002 rec->bytenr, rec->level_size, 0);
9003 if (!extent_buffer_uptodate(buf)) {
9004 free_extent_buffer(buf);
9008 ret = add_root_to_pending(buf, extent_cache, pending,
9009 seen, nodes, rec->objectid);
9013 * To rebuild extent tree, we need deal with snapshot
9014 * one by one, otherwise we deal with node firstly which
9015 * can maximize readahead.
9018 ret = run_next_block(root, bits, bits_nr, &last,
9019 pending, seen, reada, nodes,
9020 extent_cache, chunk_cache,
9021 dev_cache, block_group_cache,
9022 dev_extent_cache, rec);
9026 free_extent_buffer(buf);
9027 list_del(&rec->list);
9033 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9034 reada, nodes, extent_cache, chunk_cache,
9035 dev_cache, block_group_cache,
9036 dev_extent_cache, NULL);
9046 static int check_chunks_and_extents(struct btrfs_root *root)
9048 struct rb_root dev_cache;
9049 struct cache_tree chunk_cache;
9050 struct block_group_tree block_group_cache;
9051 struct device_extent_tree dev_extent_cache;
9052 struct cache_tree extent_cache;
9053 struct cache_tree seen;
9054 struct cache_tree pending;
9055 struct cache_tree reada;
9056 struct cache_tree nodes;
9057 struct extent_io_tree excluded_extents;
9058 struct cache_tree corrupt_blocks;
9059 struct btrfs_path path;
9060 struct btrfs_key key;
9061 struct btrfs_key found_key;
9063 struct block_info *bits;
9065 struct extent_buffer *leaf;
9067 struct btrfs_root_item ri;
9068 struct list_head dropping_trees;
9069 struct list_head normal_trees;
9070 struct btrfs_root *root1;
9075 dev_cache = RB_ROOT;
9076 cache_tree_init(&chunk_cache);
9077 block_group_tree_init(&block_group_cache);
9078 device_extent_tree_init(&dev_extent_cache);
9080 cache_tree_init(&extent_cache);
9081 cache_tree_init(&seen);
9082 cache_tree_init(&pending);
9083 cache_tree_init(&nodes);
9084 cache_tree_init(&reada);
9085 cache_tree_init(&corrupt_blocks);
9086 extent_io_tree_init(&excluded_extents);
9087 INIT_LIST_HEAD(&dropping_trees);
9088 INIT_LIST_HEAD(&normal_trees);
9091 root->fs_info->excluded_extents = &excluded_extents;
9092 root->fs_info->fsck_extent_cache = &extent_cache;
9093 root->fs_info->free_extent_hook = free_extent_hook;
9094 root->fs_info->corrupt_blocks = &corrupt_blocks;
9098 bits = malloc(bits_nr * sizeof(struct block_info));
9104 if (ctx.progress_enabled) {
9105 ctx.tp = TASK_EXTENTS;
9106 task_start(ctx.info);
9110 root1 = root->fs_info->tree_root;
9111 level = btrfs_header_level(root1->node);
9112 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9113 root1->node->start, 0, level, 0,
9114 root1->nodesize, NULL);
9117 root1 = root->fs_info->chunk_root;
9118 level = btrfs_header_level(root1->node);
9119 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9120 root1->node->start, 0, level, 0,
9121 root1->nodesize, NULL);
9124 btrfs_init_path(&path);
9127 key.type = BTRFS_ROOT_ITEM_KEY;
9128 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9133 leaf = path.nodes[0];
9134 slot = path.slots[0];
9135 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9136 ret = btrfs_next_leaf(root, &path);
9139 leaf = path.nodes[0];
9140 slot = path.slots[0];
9142 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9143 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9144 unsigned long offset;
9147 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9148 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9149 last_snapshot = btrfs_root_last_snapshot(&ri);
9150 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9151 level = btrfs_root_level(&ri);
9152 level_size = root->nodesize;
9153 ret = add_root_item_to_list(&normal_trees,
9155 btrfs_root_bytenr(&ri),
9156 last_snapshot, level,
9157 0, level_size, NULL);
9161 level = btrfs_root_level(&ri);
9162 level_size = root->nodesize;
9163 objectid = found_key.objectid;
9164 btrfs_disk_key_to_cpu(&found_key,
9166 ret = add_root_item_to_list(&dropping_trees,
9168 btrfs_root_bytenr(&ri),
9169 last_snapshot, level,
9171 level_size, &found_key);
9178 btrfs_release_path(&path);
9181 * check_block can return -EAGAIN if it fixes something, please keep
9182 * this in mind when dealing with return values from these functions, if
9183 * we get -EAGAIN we want to fall through and restart the loop.
9185 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9186 &seen, &reada, &nodes, &extent_cache,
9187 &chunk_cache, &dev_cache, &block_group_cache,
9194 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9195 &pending, &seen, &reada, &nodes,
9196 &extent_cache, &chunk_cache, &dev_cache,
9197 &block_group_cache, &dev_extent_cache);
9204 ret = check_chunks(&chunk_cache, &block_group_cache,
9205 &dev_extent_cache, NULL, NULL, NULL, 0);
9212 ret = check_extent_refs(root, &extent_cache);
9219 ret = check_devices(&dev_cache, &dev_extent_cache);
9224 task_stop(ctx.info);
9226 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9227 extent_io_tree_cleanup(&excluded_extents);
9228 root->fs_info->fsck_extent_cache = NULL;
9229 root->fs_info->free_extent_hook = NULL;
9230 root->fs_info->corrupt_blocks = NULL;
9231 root->fs_info->excluded_extents = NULL;
9234 free_chunk_cache_tree(&chunk_cache);
9235 free_device_cache_tree(&dev_cache);
9236 free_block_group_tree(&block_group_cache);
9237 free_device_extent_tree(&dev_extent_cache);
9238 free_extent_cache_tree(&seen);
9239 free_extent_cache_tree(&pending);
9240 free_extent_cache_tree(&reada);
9241 free_extent_cache_tree(&nodes);
9244 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9245 free_extent_cache_tree(&seen);
9246 free_extent_cache_tree(&pending);
9247 free_extent_cache_tree(&reada);
9248 free_extent_cache_tree(&nodes);
9249 free_chunk_cache_tree(&chunk_cache);
9250 free_block_group_tree(&block_group_cache);
9251 free_device_cache_tree(&dev_cache);
9252 free_device_extent_tree(&dev_extent_cache);
9253 free_extent_record_cache(root->fs_info, &extent_cache);
9254 free_root_item_list(&normal_trees);
9255 free_root_item_list(&dropping_trees);
9256 extent_io_tree_cleanup(&excluded_extents);
9261 * Check backrefs of a tree block given by @bytenr or @eb.
9263 * @root: the root containing the @bytenr or @eb
9264 * @eb: tree block extent buffer, can be NULL
9265 * @bytenr: bytenr of the tree block to search
9266 * @level: tree level of the tree block
9267 * @owner: owner of the tree block
9269 * Return >0 for any error found and output error message
9270 * Return 0 for no error found
9272 static int check_tree_block_ref(struct btrfs_root *root,
9273 struct extent_buffer *eb, u64 bytenr,
9274 int level, u64 owner)
9276 struct btrfs_key key;
9277 struct btrfs_root *extent_root = root->fs_info->extent_root;
9278 struct btrfs_path path;
9279 struct btrfs_extent_item *ei;
9280 struct btrfs_extent_inline_ref *iref;
9281 struct extent_buffer *leaf;
9287 u32 nodesize = root->nodesize;
9294 btrfs_init_path(&path);
9295 key.objectid = bytenr;
9296 if (btrfs_fs_incompat(root->fs_info,
9297 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9298 key.type = BTRFS_METADATA_ITEM_KEY;
9300 key.type = BTRFS_EXTENT_ITEM_KEY;
9301 key.offset = (u64)-1;
9303 /* Search for the backref in extent tree */
9304 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9306 err |= BACKREF_MISSING;
9309 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9311 err |= BACKREF_MISSING;
9315 leaf = path.nodes[0];
9316 slot = path.slots[0];
9317 btrfs_item_key_to_cpu(leaf, &key, slot);
9319 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9321 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9322 skinny_level = (int)key.offset;
9323 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9325 struct btrfs_tree_block_info *info;
9327 info = (struct btrfs_tree_block_info *)(ei + 1);
9328 skinny_level = btrfs_tree_block_level(leaf, info);
9329 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9336 if (!(btrfs_extent_flags(leaf, ei) &
9337 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9339 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9340 key.objectid, nodesize,
9341 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9342 err = BACKREF_MISMATCH;
9344 header_gen = btrfs_header_generation(eb);
9345 extent_gen = btrfs_extent_generation(leaf, ei);
9346 if (header_gen != extent_gen) {
9348 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9349 key.objectid, nodesize, header_gen,
9351 err = BACKREF_MISMATCH;
9353 if (level != skinny_level) {
9355 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9356 key.objectid, nodesize, level, skinny_level);
9357 err = BACKREF_MISMATCH;
9359 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9361 "extent[%llu %u] is referred by other roots than %llu",
9362 key.objectid, nodesize, root->objectid);
9363 err = BACKREF_MISMATCH;
9368 * Iterate the extent/metadata item to find the exact backref
9370 item_size = btrfs_item_size_nr(leaf, slot);
9371 ptr = (unsigned long)iref;
9372 end = (unsigned long)ei + item_size;
9374 iref = (struct btrfs_extent_inline_ref *)ptr;
9375 type = btrfs_extent_inline_ref_type(leaf, iref);
9376 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9378 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9379 (offset == root->objectid || offset == owner)) {
9381 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9382 /* Check if the backref points to valid referencer */
9383 found_ref = !check_tree_block_ref(root, NULL, offset,
9389 ptr += btrfs_extent_inline_ref_size(type);
9393 * Inlined extent item doesn't have what we need, check
9394 * TREE_BLOCK_REF_KEY
9397 btrfs_release_path(&path);
9398 key.objectid = bytenr;
9399 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9400 key.offset = root->objectid;
9402 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9407 err |= BACKREF_MISSING;
9409 btrfs_release_path(&path);
9410 if (eb && (err & BACKREF_MISSING))
9411 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9412 bytenr, nodesize, owner, level);
9417 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9419 * Return >0 any error found and output error message
9420 * Return 0 for no error found
9422 static int check_extent_data_item(struct btrfs_root *root,
9423 struct extent_buffer *eb, int slot)
9425 struct btrfs_file_extent_item *fi;
9426 struct btrfs_path path;
9427 struct btrfs_root *extent_root = root->fs_info->extent_root;
9428 struct btrfs_key fi_key;
9429 struct btrfs_key dbref_key;
9430 struct extent_buffer *leaf;
9431 struct btrfs_extent_item *ei;
9432 struct btrfs_extent_inline_ref *iref;
9433 struct btrfs_extent_data_ref *dref;
9435 u64 file_extent_gen;
9438 u64 extent_num_bytes;
9446 int found_dbackref = 0;
9450 btrfs_item_key_to_cpu(eb, &fi_key, slot);
9451 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9452 file_extent_gen = btrfs_file_extent_generation(eb, fi);
9454 /* Nothing to check for hole and inline data extents */
9455 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9456 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9459 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9460 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9461 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9463 /* Check unaligned disk_num_bytes and num_bytes */
9464 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9466 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9467 fi_key.objectid, fi_key.offset, disk_num_bytes,
9469 err |= BYTES_UNALIGNED;
9471 data_bytes_allocated += disk_num_bytes;
9473 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9475 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9476 fi_key.objectid, fi_key.offset, extent_num_bytes,
9478 err |= BYTES_UNALIGNED;
9480 data_bytes_referenced += extent_num_bytes;
9482 owner = btrfs_header_owner(eb);
9484 /* Check the extent item of the file extent in extent tree */
9485 btrfs_init_path(&path);
9486 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9487 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9488 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9490 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9492 err |= BACKREF_MISSING;
9496 leaf = path.nodes[0];
9497 slot = path.slots[0];
9498 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9500 extent_flags = btrfs_extent_flags(leaf, ei);
9501 extent_gen = btrfs_extent_generation(leaf, ei);
9503 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9505 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9506 disk_bytenr, disk_num_bytes,
9507 BTRFS_EXTENT_FLAG_DATA);
9508 err |= BACKREF_MISMATCH;
9511 if (file_extent_gen < extent_gen) {
9513 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9514 disk_bytenr, disk_num_bytes, file_extent_gen,
9516 err |= BACKREF_MISMATCH;
9519 /* Check data backref inside that extent item */
9520 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9521 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9522 ptr = (unsigned long)iref;
9523 end = (unsigned long)ei + item_size;
9525 iref = (struct btrfs_extent_inline_ref *)ptr;
9526 type = btrfs_extent_inline_ref_type(leaf, iref);
9527 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9529 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9530 ref_root = btrfs_extent_data_ref_root(leaf, dref);
9531 if (ref_root == owner || ref_root == root->objectid)
9533 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9534 found_dbackref = !check_tree_block_ref(root, NULL,
9535 btrfs_extent_inline_ref_offset(leaf, iref),
9541 ptr += btrfs_extent_inline_ref_size(type);
9544 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9545 if (!found_dbackref) {
9546 btrfs_release_path(&path);
9548 btrfs_init_path(&path);
9549 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9550 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9551 dbref_key.offset = hash_extent_data_ref(root->objectid,
9552 fi_key.objectid, fi_key.offset);
9554 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9555 &dbref_key, &path, 0, 0);
9560 if (!found_dbackref)
9561 err |= BACKREF_MISSING;
9563 btrfs_release_path(&path);
9564 if (err & BACKREF_MISSING) {
9565 error("data extent[%llu %llu] backref lost",
9566 disk_bytenr, disk_num_bytes);
9572 * Get real tree block level for the case like shared block
9573 * Return >= 0 as tree level
9574 * Return <0 for error
9576 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9578 struct extent_buffer *eb;
9579 struct btrfs_path path;
9580 struct btrfs_key key;
9581 struct btrfs_extent_item *ei;
9584 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9589 /* Search extent tree for extent generation and level */
9590 key.objectid = bytenr;
9591 key.type = BTRFS_METADATA_ITEM_KEY;
9592 key.offset = (u64)-1;
9594 btrfs_init_path(&path);
9595 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9598 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9606 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9607 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9608 struct btrfs_extent_item);
9609 flags = btrfs_extent_flags(path.nodes[0], ei);
9610 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9615 /* Get transid for later read_tree_block() check */
9616 transid = btrfs_extent_generation(path.nodes[0], ei);
9618 /* Get backref level as one source */
9619 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9620 backref_level = key.offset;
9622 struct btrfs_tree_block_info *info;
9624 info = (struct btrfs_tree_block_info *)(ei + 1);
9625 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9627 btrfs_release_path(&path);
9629 /* Get level from tree block as an alternative source */
9630 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9631 if (!extent_buffer_uptodate(eb)) {
9632 free_extent_buffer(eb);
9635 header_level = btrfs_header_level(eb);
9636 free_extent_buffer(eb);
9638 if (header_level != backref_level)
9640 return header_level;
9643 btrfs_release_path(&path);
9648 * Check if a tree block backref is valid (points to a valid tree block)
9649 * if level == -1, level will be resolved
9650 * Return >0 for any error found and print error message
9652 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9653 u64 bytenr, int level)
9655 struct btrfs_root *root;
9656 struct btrfs_key key;
9657 struct btrfs_path path;
9658 struct extent_buffer *eb;
9659 struct extent_buffer *node;
9660 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9664 /* Query level for level == -1 special case */
9666 level = query_tree_block_level(fs_info, bytenr);
9668 err |= REFERENCER_MISSING;
9672 key.objectid = root_id;
9673 key.type = BTRFS_ROOT_ITEM_KEY;
9674 key.offset = (u64)-1;
9676 root = btrfs_read_fs_root(fs_info, &key);
9678 err |= REFERENCER_MISSING;
9682 /* Read out the tree block to get item/node key */
9683 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9684 if (!extent_buffer_uptodate(eb)) {
9685 err |= REFERENCER_MISSING;
9686 free_extent_buffer(eb);
9690 /* Empty tree, no need to check key */
9691 if (!btrfs_header_nritems(eb) && !level) {
9692 free_extent_buffer(eb);
9697 btrfs_node_key_to_cpu(eb, &key, 0);
9699 btrfs_item_key_to_cpu(eb, &key, 0);
9701 free_extent_buffer(eb);
9703 btrfs_init_path(&path);
9704 path.lowest_level = level;
9705 /* Search with the first key, to ensure we can reach it */
9706 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9708 err |= REFERENCER_MISSING;
9712 node = path.nodes[level];
9713 if (btrfs_header_bytenr(node) != bytenr) {
9715 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9716 bytenr, nodesize, bytenr,
9717 btrfs_header_bytenr(node));
9718 err |= REFERENCER_MISMATCH;
9720 if (btrfs_header_level(node) != level) {
9722 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9723 bytenr, nodesize, level,
9724 btrfs_header_level(node));
9725 err |= REFERENCER_MISMATCH;
9729 btrfs_release_path(&path);
9731 if (err & REFERENCER_MISSING) {
9733 error("extent [%llu %d] lost referencer (owner: %llu)",
9734 bytenr, nodesize, root_id);
9737 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9738 bytenr, nodesize, root_id, level);
9745 * Check referencer for shared block backref
9746 * If level == -1, this function will resolve the level.
9748 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9749 u64 parent, u64 bytenr, int level)
9751 struct extent_buffer *eb;
9752 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9754 int found_parent = 0;
9757 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9758 if (!extent_buffer_uptodate(eb))
9762 level = query_tree_block_level(fs_info, bytenr);
9766 if (level + 1 != btrfs_header_level(eb))
9769 nr = btrfs_header_nritems(eb);
9770 for (i = 0; i < nr; i++) {
9771 if (bytenr == btrfs_node_blockptr(eb, i)) {
9777 free_extent_buffer(eb);
9778 if (!found_parent) {
9780 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9781 bytenr, nodesize, parent, level);
9782 return REFERENCER_MISSING;
9788 * Check referencer for normal (inlined) data ref
9789 * If len == 0, it will be resolved by searching in extent tree
9791 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9792 u64 root_id, u64 objectid, u64 offset,
9793 u64 bytenr, u64 len, u32 count)
9795 struct btrfs_root *root;
9796 struct btrfs_root *extent_root = fs_info->extent_root;
9797 struct btrfs_key key;
9798 struct btrfs_path path;
9799 struct extent_buffer *leaf;
9800 struct btrfs_file_extent_item *fi;
9801 u32 found_count = 0;
9806 key.objectid = bytenr;
9807 key.type = BTRFS_EXTENT_ITEM_KEY;
9808 key.offset = (u64)-1;
9810 btrfs_init_path(&path);
9811 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9814 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9817 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9818 if (key.objectid != bytenr ||
9819 key.type != BTRFS_EXTENT_ITEM_KEY)
9822 btrfs_release_path(&path);
9824 key.objectid = root_id;
9825 key.type = BTRFS_ROOT_ITEM_KEY;
9826 key.offset = (u64)-1;
9827 btrfs_init_path(&path);
9829 root = btrfs_read_fs_root(fs_info, &key);
9833 key.objectid = objectid;
9834 key.type = BTRFS_EXTENT_DATA_KEY;
9836 * It can be nasty as data backref offset is
9837 * file offset - file extent offset, which is smaller or
9838 * equal to original backref offset. The only special case is
9839 * overflow. So we need to special check and do further search.
9841 key.offset = offset & (1ULL << 63) ? 0 : offset;
9843 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9848 * Search afterwards to get correct one
9849 * NOTE: As we must do a comprehensive check on the data backref to
9850 * make sure the dref count also matches, we must iterate all file
9851 * extents for that inode.
9854 leaf = path.nodes[0];
9855 slot = path.slots[0];
9857 btrfs_item_key_to_cpu(leaf, &key, slot);
9858 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9860 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9862 * Except normal disk bytenr and disk num bytes, we still
9863 * need to do extra check on dbackref offset as
9864 * dbackref offset = file_offset - file_extent_offset
9866 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9867 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9868 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9872 ret = btrfs_next_item(root, &path);
9877 btrfs_release_path(&path);
9878 if (found_count != count) {
9880 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9881 bytenr, len, root_id, objectid, offset, count, found_count);
9882 return REFERENCER_MISSING;
9888 * Check if the referencer of a shared data backref exists
9890 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9891 u64 parent, u64 bytenr)
9893 struct extent_buffer *eb;
9894 struct btrfs_key key;
9895 struct btrfs_file_extent_item *fi;
9896 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9898 int found_parent = 0;
9901 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9902 if (!extent_buffer_uptodate(eb))
9905 nr = btrfs_header_nritems(eb);
9906 for (i = 0; i < nr; i++) {
9907 btrfs_item_key_to_cpu(eb, &key, i);
9908 if (key.type != BTRFS_EXTENT_DATA_KEY)
9911 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9912 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9915 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9922 free_extent_buffer(eb);
9923 if (!found_parent) {
9924 error("shared extent %llu referencer lost (parent: %llu)",
9926 return REFERENCER_MISSING;
9932 * This function will check a given extent item, including its backref and
9933 * itself (like crossing stripe boundary and type)
9935 * Since we don't use extent_record anymore, introduce new error bit
9937 static int check_extent_item(struct btrfs_fs_info *fs_info,
9938 struct extent_buffer *eb, int slot)
9940 struct btrfs_extent_item *ei;
9941 struct btrfs_extent_inline_ref *iref;
9942 struct btrfs_extent_data_ref *dref;
9946 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9947 u32 item_size = btrfs_item_size_nr(eb, slot);
9952 struct btrfs_key key;
9956 btrfs_item_key_to_cpu(eb, &key, slot);
9957 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9958 bytes_used += key.offset;
9960 bytes_used += nodesize;
9962 if (item_size < sizeof(*ei)) {
9964 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9965 * old thing when on disk format is still un-determined.
9966 * No need to care about it anymore
9968 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9972 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9973 flags = btrfs_extent_flags(eb, ei);
9975 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9977 if (metadata && check_crossing_stripes(global_info, key.objectid,
9979 error("bad metadata [%llu, %llu) crossing stripe boundary",
9980 key.objectid, key.objectid + nodesize);
9981 err |= CROSSING_STRIPE_BOUNDARY;
9984 ptr = (unsigned long)(ei + 1);
9986 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9987 /* Old EXTENT_ITEM metadata */
9988 struct btrfs_tree_block_info *info;
9990 info = (struct btrfs_tree_block_info *)ptr;
9991 level = btrfs_tree_block_level(eb, info);
9992 ptr += sizeof(struct btrfs_tree_block_info);
9994 /* New METADATA_ITEM */
9997 end = (unsigned long)ei + item_size;
10000 err |= ITEM_SIZE_MISMATCH;
10004 /* Now check every backref in this extent item */
10006 iref = (struct btrfs_extent_inline_ref *)ptr;
10007 type = btrfs_extent_inline_ref_type(eb, iref);
10008 offset = btrfs_extent_inline_ref_offset(eb, iref);
10010 case BTRFS_TREE_BLOCK_REF_KEY:
10011 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10015 case BTRFS_SHARED_BLOCK_REF_KEY:
10016 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10020 case BTRFS_EXTENT_DATA_REF_KEY:
10021 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10022 ret = check_extent_data_backref(fs_info,
10023 btrfs_extent_data_ref_root(eb, dref),
10024 btrfs_extent_data_ref_objectid(eb, dref),
10025 btrfs_extent_data_ref_offset(eb, dref),
10026 key.objectid, key.offset,
10027 btrfs_extent_data_ref_count(eb, dref));
10030 case BTRFS_SHARED_DATA_REF_KEY:
10031 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10035 error("extent[%llu %d %llu] has unknown ref type: %d",
10036 key.objectid, key.type, key.offset, type);
10037 err |= UNKNOWN_TYPE;
10041 ptr += btrfs_extent_inline_ref_size(type);
10050 * Check if a dev extent item is referred correctly by its chunk
10052 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10053 struct extent_buffer *eb, int slot)
10055 struct btrfs_root *chunk_root = fs_info->chunk_root;
10056 struct btrfs_dev_extent *ptr;
10057 struct btrfs_path path;
10058 struct btrfs_key chunk_key;
10059 struct btrfs_key devext_key;
10060 struct btrfs_chunk *chunk;
10061 struct extent_buffer *l;
10065 int found_chunk = 0;
10068 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10069 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10070 length = btrfs_dev_extent_length(eb, ptr);
10072 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10073 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10074 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10076 btrfs_init_path(&path);
10077 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10082 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10083 if (btrfs_chunk_length(l, chunk) != length)
10086 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10087 for (i = 0; i < num_stripes; i++) {
10088 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10089 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10091 if (devid == devext_key.objectid &&
10092 offset == devext_key.offset) {
10098 btrfs_release_path(&path);
10099 if (!found_chunk) {
10101 "device extent[%llu, %llu, %llu] did not find the related chunk",
10102 devext_key.objectid, devext_key.offset, length);
10103 return REFERENCER_MISSING;
10109 * Check if the used space is correct with the dev item
10111 static int check_dev_item(struct btrfs_fs_info *fs_info,
10112 struct extent_buffer *eb, int slot)
10114 struct btrfs_root *dev_root = fs_info->dev_root;
10115 struct btrfs_dev_item *dev_item;
10116 struct btrfs_path path;
10117 struct btrfs_key key;
10118 struct btrfs_dev_extent *ptr;
10124 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10125 dev_id = btrfs_device_id(eb, dev_item);
10126 used = btrfs_device_bytes_used(eb, dev_item);
10128 key.objectid = dev_id;
10129 key.type = BTRFS_DEV_EXTENT_KEY;
10132 btrfs_init_path(&path);
10133 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10135 btrfs_item_key_to_cpu(eb, &key, slot);
10136 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10137 key.objectid, key.type, key.offset);
10138 btrfs_release_path(&path);
10139 return REFERENCER_MISSING;
10142 /* Iterate dev_extents to calculate the used space of a device */
10144 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10146 if (key.objectid > dev_id)
10148 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10151 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10152 struct btrfs_dev_extent);
10153 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10155 ret = btrfs_next_item(dev_root, &path);
10159 btrfs_release_path(&path);
10161 if (used != total) {
10162 btrfs_item_key_to_cpu(eb, &key, slot);
10164 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10165 total, used, BTRFS_ROOT_TREE_OBJECTID,
10166 BTRFS_DEV_EXTENT_KEY, dev_id);
10167 return ACCOUNTING_MISMATCH;
10173 * Check a block group item with its referener (chunk) and its used space
10174 * with extent/metadata item
10176 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10177 struct extent_buffer *eb, int slot)
10179 struct btrfs_root *extent_root = fs_info->extent_root;
10180 struct btrfs_root *chunk_root = fs_info->chunk_root;
10181 struct btrfs_block_group_item *bi;
10182 struct btrfs_block_group_item bg_item;
10183 struct btrfs_path path;
10184 struct btrfs_key bg_key;
10185 struct btrfs_key chunk_key;
10186 struct btrfs_key extent_key;
10187 struct btrfs_chunk *chunk;
10188 struct extent_buffer *leaf;
10189 struct btrfs_extent_item *ei;
10190 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10198 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10199 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10200 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10201 used = btrfs_block_group_used(&bg_item);
10202 bg_flags = btrfs_block_group_flags(&bg_item);
10204 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10205 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10206 chunk_key.offset = bg_key.objectid;
10208 btrfs_init_path(&path);
10209 /* Search for the referencer chunk */
10210 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10213 "block group[%llu %llu] did not find the related chunk item",
10214 bg_key.objectid, bg_key.offset);
10215 err |= REFERENCER_MISSING;
10217 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10218 struct btrfs_chunk);
10219 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10222 "block group[%llu %llu] related chunk item length does not match",
10223 bg_key.objectid, bg_key.offset);
10224 err |= REFERENCER_MISMATCH;
10227 btrfs_release_path(&path);
10229 /* Search from the block group bytenr */
10230 extent_key.objectid = bg_key.objectid;
10231 extent_key.type = 0;
10232 extent_key.offset = 0;
10234 btrfs_init_path(&path);
10235 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10239 /* Iterate extent tree to account used space */
10241 leaf = path.nodes[0];
10242 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10243 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10246 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10247 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10249 if (extent_key.objectid < bg_key.objectid)
10252 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10255 total += extent_key.offset;
10257 ei = btrfs_item_ptr(leaf, path.slots[0],
10258 struct btrfs_extent_item);
10259 flags = btrfs_extent_flags(leaf, ei);
10260 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10261 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10263 "bad extent[%llu, %llu) type mismatch with chunk",
10264 extent_key.objectid,
10265 extent_key.objectid + extent_key.offset);
10266 err |= CHUNK_TYPE_MISMATCH;
10268 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10269 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10270 BTRFS_BLOCK_GROUP_METADATA))) {
10272 "bad extent[%llu, %llu) type mismatch with chunk",
10273 extent_key.objectid,
10274 extent_key.objectid + nodesize);
10275 err |= CHUNK_TYPE_MISMATCH;
10279 ret = btrfs_next_item(extent_root, &path);
10285 btrfs_release_path(&path);
10287 if (total != used) {
10289 "block group[%llu %llu] used %llu but extent items used %llu",
10290 bg_key.objectid, bg_key.offset, used, total);
10291 err |= ACCOUNTING_MISMATCH;
10297 * Check a chunk item.
10298 * Including checking all referred dev_extents and block group
10300 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10301 struct extent_buffer *eb, int slot)
10303 struct btrfs_root *extent_root = fs_info->extent_root;
10304 struct btrfs_root *dev_root = fs_info->dev_root;
10305 struct btrfs_path path;
10306 struct btrfs_key chunk_key;
10307 struct btrfs_key bg_key;
10308 struct btrfs_key devext_key;
10309 struct btrfs_chunk *chunk;
10310 struct extent_buffer *leaf;
10311 struct btrfs_block_group_item *bi;
10312 struct btrfs_block_group_item bg_item;
10313 struct btrfs_dev_extent *ptr;
10314 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10326 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10327 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10328 length = btrfs_chunk_length(eb, chunk);
10329 chunk_end = chunk_key.offset + length;
10330 if (!IS_ALIGNED(length, sectorsize)) {
10331 error("chunk[%llu %llu) not aligned to %u",
10332 chunk_key.offset, chunk_end, sectorsize);
10333 err |= BYTES_UNALIGNED;
10337 type = btrfs_chunk_type(eb, chunk);
10338 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10339 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10340 error("chunk[%llu %llu) has no chunk type",
10341 chunk_key.offset, chunk_end);
10342 err |= UNKNOWN_TYPE;
10344 if (profile && (profile & (profile - 1))) {
10345 error("chunk[%llu %llu) multiple profiles detected: %llx",
10346 chunk_key.offset, chunk_end, profile);
10347 err |= UNKNOWN_TYPE;
10350 bg_key.objectid = chunk_key.offset;
10351 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10352 bg_key.offset = length;
10354 btrfs_init_path(&path);
10355 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10358 "chunk[%llu %llu) did not find the related block group item",
10359 chunk_key.offset, chunk_end);
10360 err |= REFERENCER_MISSING;
10362 leaf = path.nodes[0];
10363 bi = btrfs_item_ptr(leaf, path.slots[0],
10364 struct btrfs_block_group_item);
10365 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10367 if (btrfs_block_group_flags(&bg_item) != type) {
10369 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10370 chunk_key.offset, chunk_end, type,
10371 btrfs_block_group_flags(&bg_item));
10372 err |= REFERENCER_MISSING;
10376 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10377 for (i = 0; i < num_stripes; i++) {
10378 btrfs_release_path(&path);
10379 btrfs_init_path(&path);
10380 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10381 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10382 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10384 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10387 goto not_match_dev;
10389 leaf = path.nodes[0];
10390 ptr = btrfs_item_ptr(leaf, path.slots[0],
10391 struct btrfs_dev_extent);
10392 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10393 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10394 if (objectid != chunk_key.objectid ||
10395 offset != chunk_key.offset ||
10396 btrfs_dev_extent_length(leaf, ptr) != length)
10397 goto not_match_dev;
10400 err |= BACKREF_MISSING;
10402 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10403 chunk_key.objectid, chunk_end, i);
10406 btrfs_release_path(&path);
10412 * Main entry function to check known items and update related accounting info
10414 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10416 struct btrfs_fs_info *fs_info = root->fs_info;
10417 struct btrfs_key key;
10420 struct btrfs_extent_data_ref *dref;
10425 btrfs_item_key_to_cpu(eb, &key, slot);
10429 case BTRFS_EXTENT_DATA_KEY:
10430 ret = check_extent_data_item(root, eb, slot);
10433 case BTRFS_BLOCK_GROUP_ITEM_KEY:
10434 ret = check_block_group_item(fs_info, eb, slot);
10437 case BTRFS_DEV_ITEM_KEY:
10438 ret = check_dev_item(fs_info, eb, slot);
10441 case BTRFS_CHUNK_ITEM_KEY:
10442 ret = check_chunk_item(fs_info, eb, slot);
10445 case BTRFS_DEV_EXTENT_KEY:
10446 ret = check_dev_extent_item(fs_info, eb, slot);
10449 case BTRFS_EXTENT_ITEM_KEY:
10450 case BTRFS_METADATA_ITEM_KEY:
10451 ret = check_extent_item(fs_info, eb, slot);
10454 case BTRFS_EXTENT_CSUM_KEY:
10455 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10457 case BTRFS_TREE_BLOCK_REF_KEY:
10458 ret = check_tree_block_backref(fs_info, key.offset,
10462 case BTRFS_EXTENT_DATA_REF_KEY:
10463 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10464 ret = check_extent_data_backref(fs_info,
10465 btrfs_extent_data_ref_root(eb, dref),
10466 btrfs_extent_data_ref_objectid(eb, dref),
10467 btrfs_extent_data_ref_offset(eb, dref),
10469 btrfs_extent_data_ref_count(eb, dref));
10472 case BTRFS_SHARED_BLOCK_REF_KEY:
10473 ret = check_shared_block_backref(fs_info, key.offset,
10477 case BTRFS_SHARED_DATA_REF_KEY:
10478 ret = check_shared_data_backref(fs_info, key.offset,
10486 if (++slot < btrfs_header_nritems(eb))
10493 * Helper function for later fs/subvol tree check. To determine if a tree
10494 * block should be checked.
10495 * This function will ensure only the direct referencer with lowest rootid to
10496 * check a fs/subvolume tree block.
10498 * Backref check at extent tree would detect errors like missing subvolume
10499 * tree, so we can do aggressive check to reduce duplicated checks.
10501 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10503 struct btrfs_root *extent_root = root->fs_info->extent_root;
10504 struct btrfs_key key;
10505 struct btrfs_path path;
10506 struct extent_buffer *leaf;
10508 struct btrfs_extent_item *ei;
10514 struct btrfs_extent_inline_ref *iref;
10517 btrfs_init_path(&path);
10518 key.objectid = btrfs_header_bytenr(eb);
10519 key.type = BTRFS_METADATA_ITEM_KEY;
10520 key.offset = (u64)-1;
10523 * Any failure in backref resolving means we can't determine
10524 * whom the tree block belongs to.
10525 * So in that case, we need to check that tree block
10527 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10531 ret = btrfs_previous_extent_item(extent_root, &path,
10532 btrfs_header_bytenr(eb));
10536 leaf = path.nodes[0];
10537 slot = path.slots[0];
10538 btrfs_item_key_to_cpu(leaf, &key, slot);
10539 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10541 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10542 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10544 struct btrfs_tree_block_info *info;
10546 info = (struct btrfs_tree_block_info *)(ei + 1);
10547 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10550 item_size = btrfs_item_size_nr(leaf, slot);
10551 ptr = (unsigned long)iref;
10552 end = (unsigned long)ei + item_size;
10553 while (ptr < end) {
10554 iref = (struct btrfs_extent_inline_ref *)ptr;
10555 type = btrfs_extent_inline_ref_type(leaf, iref);
10556 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10559 * We only check the tree block if current root is
10560 * the lowest referencer of it.
10562 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10563 offset < root->objectid) {
10564 btrfs_release_path(&path);
10568 ptr += btrfs_extent_inline_ref_size(type);
10571 * Normally we should also check keyed tree block ref, but that may be
10572 * very time consuming. Inlined ref should already make us skip a lot
10573 * of refs now. So skip search keyed tree block ref.
10577 btrfs_release_path(&path);
10582 * Traversal function for tree block. We will do:
10583 * 1) Skip shared fs/subvolume tree blocks
10584 * 2) Update related bytes accounting
10585 * 3) Pre-order traversal
10587 static int traverse_tree_block(struct btrfs_root *root,
10588 struct extent_buffer *node)
10590 struct extent_buffer *eb;
10591 struct btrfs_key key;
10592 struct btrfs_key drop_key;
10600 * Skip shared fs/subvolume tree block, in that case they will
10601 * be checked by referencer with lowest rootid
10603 if (is_fstree(root->objectid) && !should_check(root, node))
10606 /* Update bytes accounting */
10607 total_btree_bytes += node->len;
10608 if (fs_root_objectid(btrfs_header_owner(node)))
10609 total_fs_tree_bytes += node->len;
10610 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10611 total_extent_tree_bytes += node->len;
10612 if (!found_old_backref &&
10613 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10614 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10615 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10616 found_old_backref = 1;
10618 /* pre-order tranversal, check itself first */
10619 level = btrfs_header_level(node);
10620 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10621 btrfs_header_level(node),
10622 btrfs_header_owner(node));
10626 "check %s failed root %llu bytenr %llu level %d, force continue check",
10627 level ? "node":"leaf", root->objectid,
10628 btrfs_header_bytenr(node), btrfs_header_level(node));
10631 btree_space_waste += btrfs_leaf_free_space(root, node);
10632 ret = check_leaf_items(root, node);
10637 nr = btrfs_header_nritems(node);
10638 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10639 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10640 sizeof(struct btrfs_key_ptr);
10642 /* Then check all its children */
10643 for (i = 0; i < nr; i++) {
10644 u64 blocknr = btrfs_node_blockptr(node, i);
10646 btrfs_node_key_to_cpu(node, &key, i);
10647 if (level == root->root_item.drop_level &&
10648 is_dropped_key(&key, &drop_key))
10652 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10653 * to call the function itself.
10655 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10656 if (extent_buffer_uptodate(eb)) {
10657 ret = traverse_tree_block(root, eb);
10660 free_extent_buffer(eb);
10667 * Low memory usage version check_chunks_and_extents.
10669 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10671 struct btrfs_path path;
10672 struct btrfs_key key;
10673 struct btrfs_root *root1;
10674 struct btrfs_root *cur_root;
10678 root1 = root->fs_info->chunk_root;
10679 ret = traverse_tree_block(root1, root1->node);
10682 root1 = root->fs_info->tree_root;
10683 ret = traverse_tree_block(root1, root1->node);
10686 btrfs_init_path(&path);
10687 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10689 key.type = BTRFS_ROOT_ITEM_KEY;
10691 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10693 error("cannot find extent treet in tree_root");
10698 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10699 if (key.type != BTRFS_ROOT_ITEM_KEY)
10701 key.offset = (u64)-1;
10703 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10704 if (IS_ERR(cur_root) || !cur_root) {
10705 error("failed to read tree: %lld", key.objectid);
10709 ret = traverse_tree_block(cur_root, cur_root->node);
10713 ret = btrfs_next_item(root1, &path);
10719 btrfs_release_path(&path);
10723 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10724 struct btrfs_root *root, int overwrite)
10726 struct extent_buffer *c;
10727 struct extent_buffer *old = root->node;
10730 struct btrfs_disk_key disk_key = {0,0,0};
10736 extent_buffer_get(c);
10739 c = btrfs_alloc_free_block(trans, root,
10741 root->root_key.objectid,
10742 &disk_key, level, 0, 0);
10745 extent_buffer_get(c);
10749 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10750 btrfs_set_header_level(c, level);
10751 btrfs_set_header_bytenr(c, c->start);
10752 btrfs_set_header_generation(c, trans->transid);
10753 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10754 btrfs_set_header_owner(c, root->root_key.objectid);
10756 write_extent_buffer(c, root->fs_info->fsid,
10757 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10759 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10760 btrfs_header_chunk_tree_uuid(c),
10763 btrfs_mark_buffer_dirty(c);
10765 * this case can happen in the following case:
10767 * 1.overwrite previous root.
10769 * 2.reinit reloc data root, this is because we skip pin
10770 * down reloc data tree before which means we can allocate
10771 * same block bytenr here.
10773 if (old->start == c->start) {
10774 btrfs_set_root_generation(&root->root_item,
10776 root->root_item.level = btrfs_header_level(root->node);
10777 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10778 &root->root_key, &root->root_item);
10780 free_extent_buffer(c);
10784 free_extent_buffer(old);
10786 add_root_to_dirty_list(root);
10790 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10791 struct extent_buffer *eb, int tree_root)
10793 struct extent_buffer *tmp;
10794 struct btrfs_root_item *ri;
10795 struct btrfs_key key;
10798 int level = btrfs_header_level(eb);
10804 * If we have pinned this block before, don't pin it again.
10805 * This can not only avoid forever loop with broken filesystem
10806 * but also give us some speedups.
10808 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10809 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10812 btrfs_pin_extent(fs_info, eb->start, eb->len);
10814 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10815 nritems = btrfs_header_nritems(eb);
10816 for (i = 0; i < nritems; i++) {
10818 btrfs_item_key_to_cpu(eb, &key, i);
10819 if (key.type != BTRFS_ROOT_ITEM_KEY)
10821 /* Skip the extent root and reloc roots */
10822 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10823 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10824 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10826 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10827 bytenr = btrfs_disk_root_bytenr(eb, ri);
10830 * If at any point we start needing the real root we
10831 * will have to build a stump root for the root we are
10832 * in, but for now this doesn't actually use the root so
10833 * just pass in extent_root.
10835 tmp = read_tree_block(fs_info->extent_root, bytenr,
10837 if (!extent_buffer_uptodate(tmp)) {
10838 fprintf(stderr, "Error reading root block\n");
10841 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10842 free_extent_buffer(tmp);
10846 bytenr = btrfs_node_blockptr(eb, i);
10848 /* If we aren't the tree root don't read the block */
10849 if (level == 1 && !tree_root) {
10850 btrfs_pin_extent(fs_info, bytenr, nodesize);
10854 tmp = read_tree_block(fs_info->extent_root, bytenr,
10856 if (!extent_buffer_uptodate(tmp)) {
10857 fprintf(stderr, "Error reading tree block\n");
10860 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10861 free_extent_buffer(tmp);
10870 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10874 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10878 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10881 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10883 struct btrfs_block_group_cache *cache;
10884 struct btrfs_path path;
10885 struct extent_buffer *leaf;
10886 struct btrfs_chunk *chunk;
10887 struct btrfs_key key;
10891 btrfs_init_path(&path);
10893 key.type = BTRFS_CHUNK_ITEM_KEY;
10895 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10897 btrfs_release_path(&path);
10902 * We do this in case the block groups were screwed up and had alloc
10903 * bits that aren't actually set on the chunks. This happens with
10904 * restored images every time and could happen in real life I guess.
10906 fs_info->avail_data_alloc_bits = 0;
10907 fs_info->avail_metadata_alloc_bits = 0;
10908 fs_info->avail_system_alloc_bits = 0;
10910 /* First we need to create the in-memory block groups */
10912 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10913 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10915 btrfs_release_path(&path);
10923 leaf = path.nodes[0];
10924 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10925 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10930 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10931 btrfs_add_block_group(fs_info, 0,
10932 btrfs_chunk_type(leaf, chunk),
10933 key.objectid, key.offset,
10934 btrfs_chunk_length(leaf, chunk));
10935 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10936 key.offset + btrfs_chunk_length(leaf, chunk),
10942 cache = btrfs_lookup_first_block_group(fs_info, start);
10946 start = cache->key.objectid + cache->key.offset;
10949 btrfs_release_path(&path);
10953 static int reset_balance(struct btrfs_trans_handle *trans,
10954 struct btrfs_fs_info *fs_info)
10956 struct btrfs_root *root = fs_info->tree_root;
10957 struct btrfs_path path;
10958 struct extent_buffer *leaf;
10959 struct btrfs_key key;
10960 int del_slot, del_nr = 0;
10964 btrfs_init_path(&path);
10965 key.objectid = BTRFS_BALANCE_OBJECTID;
10966 key.type = BTRFS_BALANCE_ITEM_KEY;
10968 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10973 goto reinit_data_reloc;
10978 ret = btrfs_del_item(trans, root, &path);
10981 btrfs_release_path(&path);
10983 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10984 key.type = BTRFS_ROOT_ITEM_KEY;
10986 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10990 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10995 ret = btrfs_del_items(trans, root, &path,
11002 btrfs_release_path(&path);
11005 ret = btrfs_search_slot(trans, root, &key, &path,
11012 leaf = path.nodes[0];
11013 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11014 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11016 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11021 del_slot = path.slots[0];
11030 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11034 btrfs_release_path(&path);
11037 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11038 key.type = BTRFS_ROOT_ITEM_KEY;
11039 key.offset = (u64)-1;
11040 root = btrfs_read_fs_root(fs_info, &key);
11041 if (IS_ERR(root)) {
11042 fprintf(stderr, "Error reading data reloc tree\n");
11043 ret = PTR_ERR(root);
11046 record_root_in_trans(trans, root);
11047 ret = btrfs_fsck_reinit_root(trans, root, 0);
11050 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11052 btrfs_release_path(&path);
11056 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11057 struct btrfs_fs_info *fs_info)
11063 * The only reason we don't do this is because right now we're just
11064 * walking the trees we find and pinning down their bytes, we don't look
11065 * at any of the leaves. In order to do mixed groups we'd have to check
11066 * the leaves of any fs roots and pin down the bytes for any file
11067 * extents we find. Not hard but why do it if we don't have to?
11069 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11070 fprintf(stderr, "We don't support re-initing the extent tree "
11071 "for mixed block groups yet, please notify a btrfs "
11072 "developer you want to do this so they can add this "
11073 "functionality.\n");
11078 * first we need to walk all of the trees except the extent tree and pin
11079 * down the bytes that are in use so we don't overwrite any existing
11082 ret = pin_metadata_blocks(fs_info);
11084 fprintf(stderr, "error pinning down used bytes\n");
11089 * Need to drop all the block groups since we're going to recreate all
11092 btrfs_free_block_groups(fs_info);
11093 ret = reset_block_groups(fs_info);
11095 fprintf(stderr, "error resetting the block groups\n");
11099 /* Ok we can allocate now, reinit the extent root */
11100 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11102 fprintf(stderr, "extent root initialization failed\n");
11104 * When the transaction code is updated we should end the
11105 * transaction, but for now progs only knows about commit so
11106 * just return an error.
11112 * Now we have all the in-memory block groups setup so we can make
11113 * allocations properly, and the metadata we care about is safe since we
11114 * pinned all of it above.
11117 struct btrfs_block_group_cache *cache;
11119 cache = btrfs_lookup_first_block_group(fs_info, start);
11122 start = cache->key.objectid + cache->key.offset;
11123 ret = btrfs_insert_item(trans, fs_info->extent_root,
11124 &cache->key, &cache->item,
11125 sizeof(cache->item));
11127 fprintf(stderr, "Error adding block group\n");
11130 btrfs_extent_post_op(trans, fs_info->extent_root);
11133 ret = reset_balance(trans, fs_info);
11135 fprintf(stderr, "error resetting the pending balance\n");
11140 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11142 struct btrfs_path path;
11143 struct btrfs_trans_handle *trans;
11144 struct btrfs_key key;
11147 printf("Recowing metadata block %llu\n", eb->start);
11148 key.objectid = btrfs_header_owner(eb);
11149 key.type = BTRFS_ROOT_ITEM_KEY;
11150 key.offset = (u64)-1;
11152 root = btrfs_read_fs_root(root->fs_info, &key);
11153 if (IS_ERR(root)) {
11154 fprintf(stderr, "Couldn't find owner root %llu\n",
11156 return PTR_ERR(root);
11159 trans = btrfs_start_transaction(root, 1);
11161 return PTR_ERR(trans);
11163 btrfs_init_path(&path);
11164 path.lowest_level = btrfs_header_level(eb);
11165 if (path.lowest_level)
11166 btrfs_node_key_to_cpu(eb, &key, 0);
11168 btrfs_item_key_to_cpu(eb, &key, 0);
11170 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11171 btrfs_commit_transaction(trans, root);
11172 btrfs_release_path(&path);
11176 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11178 struct btrfs_path path;
11179 struct btrfs_trans_handle *trans;
11180 struct btrfs_key key;
11183 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11184 bad->key.type, bad->key.offset);
11185 key.objectid = bad->root_id;
11186 key.type = BTRFS_ROOT_ITEM_KEY;
11187 key.offset = (u64)-1;
11189 root = btrfs_read_fs_root(root->fs_info, &key);
11190 if (IS_ERR(root)) {
11191 fprintf(stderr, "Couldn't find owner root %llu\n",
11193 return PTR_ERR(root);
11196 trans = btrfs_start_transaction(root, 1);
11198 return PTR_ERR(trans);
11200 btrfs_init_path(&path);
11201 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11207 ret = btrfs_del_item(trans, root, &path);
11209 btrfs_commit_transaction(trans, root);
11210 btrfs_release_path(&path);
11214 static int zero_log_tree(struct btrfs_root *root)
11216 struct btrfs_trans_handle *trans;
11219 trans = btrfs_start_transaction(root, 1);
11220 if (IS_ERR(trans)) {
11221 ret = PTR_ERR(trans);
11224 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11225 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11226 ret = btrfs_commit_transaction(trans, root);
11230 static int populate_csum(struct btrfs_trans_handle *trans,
11231 struct btrfs_root *csum_root, char *buf, u64 start,
11238 while (offset < len) {
11239 sectorsize = csum_root->sectorsize;
11240 ret = read_extent_data(csum_root, buf, start + offset,
11244 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11245 start + offset, buf, sectorsize);
11248 offset += sectorsize;
11253 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11254 struct btrfs_root *csum_root,
11255 struct btrfs_root *cur_root)
11257 struct btrfs_path path;
11258 struct btrfs_key key;
11259 struct extent_buffer *node;
11260 struct btrfs_file_extent_item *fi;
11267 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11271 btrfs_init_path(&path);
11275 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11278 /* Iterate all regular file extents and fill its csum */
11280 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11282 if (key.type != BTRFS_EXTENT_DATA_KEY)
11284 node = path.nodes[0];
11285 slot = path.slots[0];
11286 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11287 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11289 start = btrfs_file_extent_disk_bytenr(node, fi);
11290 len = btrfs_file_extent_disk_num_bytes(node, fi);
11292 ret = populate_csum(trans, csum_root, buf, start, len);
11293 if (ret == -EEXIST)
11299 * TODO: if next leaf is corrupted, jump to nearest next valid
11302 ret = btrfs_next_item(cur_root, &path);
11312 btrfs_release_path(&path);
11317 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11318 struct btrfs_root *csum_root)
11320 struct btrfs_fs_info *fs_info = csum_root->fs_info;
11321 struct btrfs_path path;
11322 struct btrfs_root *tree_root = fs_info->tree_root;
11323 struct btrfs_root *cur_root;
11324 struct extent_buffer *node;
11325 struct btrfs_key key;
11329 btrfs_init_path(&path);
11330 key.objectid = BTRFS_FS_TREE_OBJECTID;
11332 key.type = BTRFS_ROOT_ITEM_KEY;
11333 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11342 node = path.nodes[0];
11343 slot = path.slots[0];
11344 btrfs_item_key_to_cpu(node, &key, slot);
11345 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11347 if (key.type != BTRFS_ROOT_ITEM_KEY)
11349 if (!is_fstree(key.objectid))
11351 key.offset = (u64)-1;
11353 cur_root = btrfs_read_fs_root(fs_info, &key);
11354 if (IS_ERR(cur_root) || !cur_root) {
11355 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11359 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11364 ret = btrfs_next_item(tree_root, &path);
11374 btrfs_release_path(&path);
11378 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11379 struct btrfs_root *csum_root)
11381 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11382 struct btrfs_path path;
11383 struct btrfs_extent_item *ei;
11384 struct extent_buffer *leaf;
11386 struct btrfs_key key;
11389 btrfs_init_path(&path);
11391 key.type = BTRFS_EXTENT_ITEM_KEY;
11393 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11395 btrfs_release_path(&path);
11399 buf = malloc(csum_root->sectorsize);
11401 btrfs_release_path(&path);
11406 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11407 ret = btrfs_next_leaf(extent_root, &path);
11415 leaf = path.nodes[0];
11417 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11418 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11423 ei = btrfs_item_ptr(leaf, path.slots[0],
11424 struct btrfs_extent_item);
11425 if (!(btrfs_extent_flags(leaf, ei) &
11426 BTRFS_EXTENT_FLAG_DATA)) {
11431 ret = populate_csum(trans, csum_root, buf, key.objectid,
11438 btrfs_release_path(&path);
11444 * Recalculate the csum and put it into the csum tree.
11446 * Extent tree init will wipe out all the extent info, so in that case, we
11447 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
11448 * will use fs/subvol trees to init the csum tree.
11450 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11451 struct btrfs_root *csum_root,
11452 int search_fs_tree)
11454 if (search_fs_tree)
11455 return fill_csum_tree_from_fs(trans, csum_root);
11457 return fill_csum_tree_from_extent(trans, csum_root);
11460 static void free_roots_info_cache(void)
11462 if (!roots_info_cache)
11465 while (!cache_tree_empty(roots_info_cache)) {
11466 struct cache_extent *entry;
11467 struct root_item_info *rii;
11469 entry = first_cache_extent(roots_info_cache);
11472 remove_cache_extent(roots_info_cache, entry);
11473 rii = container_of(entry, struct root_item_info, cache_extent);
11477 free(roots_info_cache);
11478 roots_info_cache = NULL;
11481 static int build_roots_info_cache(struct btrfs_fs_info *info)
11484 struct btrfs_key key;
11485 struct extent_buffer *leaf;
11486 struct btrfs_path path;
11488 if (!roots_info_cache) {
11489 roots_info_cache = malloc(sizeof(*roots_info_cache));
11490 if (!roots_info_cache)
11492 cache_tree_init(roots_info_cache);
11495 btrfs_init_path(&path);
11497 key.type = BTRFS_EXTENT_ITEM_KEY;
11499 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11502 leaf = path.nodes[0];
11505 struct btrfs_key found_key;
11506 struct btrfs_extent_item *ei;
11507 struct btrfs_extent_inline_ref *iref;
11508 int slot = path.slots[0];
11513 struct cache_extent *entry;
11514 struct root_item_info *rii;
11516 if (slot >= btrfs_header_nritems(leaf)) {
11517 ret = btrfs_next_leaf(info->extent_root, &path);
11524 leaf = path.nodes[0];
11525 slot = path.slots[0];
11528 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11530 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11531 found_key.type != BTRFS_METADATA_ITEM_KEY)
11534 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11535 flags = btrfs_extent_flags(leaf, ei);
11537 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11538 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11541 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11542 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11543 level = found_key.offset;
11545 struct btrfs_tree_block_info *binfo;
11547 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11548 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11549 level = btrfs_tree_block_level(leaf, binfo);
11553 * For a root extent, it must be of the following type and the
11554 * first (and only one) iref in the item.
11556 type = btrfs_extent_inline_ref_type(leaf, iref);
11557 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11560 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11561 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11563 rii = malloc(sizeof(struct root_item_info));
11568 rii->cache_extent.start = root_id;
11569 rii->cache_extent.size = 1;
11570 rii->level = (u8)-1;
11571 entry = &rii->cache_extent;
11572 ret = insert_cache_extent(roots_info_cache, entry);
11575 rii = container_of(entry, struct root_item_info,
11579 ASSERT(rii->cache_extent.start == root_id);
11580 ASSERT(rii->cache_extent.size == 1);
11582 if (level > rii->level || rii->level == (u8)-1) {
11583 rii->level = level;
11584 rii->bytenr = found_key.objectid;
11585 rii->gen = btrfs_extent_generation(leaf, ei);
11586 rii->node_count = 1;
11587 } else if (level == rii->level) {
11595 btrfs_release_path(&path);
11600 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11601 struct btrfs_path *path,
11602 const struct btrfs_key *root_key,
11603 const int read_only_mode)
11605 const u64 root_id = root_key->objectid;
11606 struct cache_extent *entry;
11607 struct root_item_info *rii;
11608 struct btrfs_root_item ri;
11609 unsigned long offset;
11611 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11614 "Error: could not find extent items for root %llu\n",
11615 root_key->objectid);
11619 rii = container_of(entry, struct root_item_info, cache_extent);
11620 ASSERT(rii->cache_extent.start == root_id);
11621 ASSERT(rii->cache_extent.size == 1);
11623 if (rii->node_count != 1) {
11625 "Error: could not find btree root extent for root %llu\n",
11630 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11631 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11633 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11634 btrfs_root_level(&ri) != rii->level ||
11635 btrfs_root_generation(&ri) != rii->gen) {
11638 * If we're in repair mode but our caller told us to not update
11639 * the root item, i.e. just check if it needs to be updated, don't
11640 * print this message, since the caller will call us again shortly
11641 * for the same root item without read only mode (the caller will
11642 * open a transaction first).
11644 if (!(read_only_mode && repair))
11646 "%sroot item for root %llu,"
11647 " current bytenr %llu, current gen %llu, current level %u,"
11648 " new bytenr %llu, new gen %llu, new level %u\n",
11649 (read_only_mode ? "" : "fixing "),
11651 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11652 btrfs_root_level(&ri),
11653 rii->bytenr, rii->gen, rii->level);
11655 if (btrfs_root_generation(&ri) > rii->gen) {
11657 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11658 root_id, btrfs_root_generation(&ri), rii->gen);
11662 if (!read_only_mode) {
11663 btrfs_set_root_bytenr(&ri, rii->bytenr);
11664 btrfs_set_root_level(&ri, rii->level);
11665 btrfs_set_root_generation(&ri, rii->gen);
11666 write_extent_buffer(path->nodes[0], &ri,
11667 offset, sizeof(ri));
11677 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11678 * caused read-only snapshots to be corrupted if they were created at a moment
11679 * when the source subvolume/snapshot had orphan items. The issue was that the
11680 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11681 * node instead of the post orphan cleanup root node.
11682 * So this function, and its callees, just detects and fixes those cases. Even
11683 * though the regression was for read-only snapshots, this function applies to
11684 * any snapshot/subvolume root.
11685 * This must be run before any other repair code - not doing it so, makes other
11686 * repair code delete or modify backrefs in the extent tree for example, which
11687 * will result in an inconsistent fs after repairing the root items.
11689 static int repair_root_items(struct btrfs_fs_info *info)
11691 struct btrfs_path path;
11692 struct btrfs_key key;
11693 struct extent_buffer *leaf;
11694 struct btrfs_trans_handle *trans = NULL;
11697 int need_trans = 0;
11699 btrfs_init_path(&path);
11701 ret = build_roots_info_cache(info);
11705 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11706 key.type = BTRFS_ROOT_ITEM_KEY;
11711 * Avoid opening and committing transactions if a leaf doesn't have
11712 * any root items that need to be fixed, so that we avoid rotating
11713 * backup roots unnecessarily.
11716 trans = btrfs_start_transaction(info->tree_root, 1);
11717 if (IS_ERR(trans)) {
11718 ret = PTR_ERR(trans);
11723 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11727 leaf = path.nodes[0];
11730 struct btrfs_key found_key;
11732 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11733 int no_more_keys = find_next_key(&path, &key);
11735 btrfs_release_path(&path);
11737 ret = btrfs_commit_transaction(trans,
11749 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11751 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11753 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11756 ret = maybe_repair_root_item(info, &path, &found_key,
11761 if (!trans && repair) {
11764 btrfs_release_path(&path);
11774 free_roots_info_cache();
11775 btrfs_release_path(&path);
11777 btrfs_commit_transaction(trans, info->tree_root);
11784 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11786 struct btrfs_trans_handle *trans;
11787 struct btrfs_block_group_cache *bg_cache;
11791 /* Clear all free space cache inodes and its extent data */
11793 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11796 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11799 current = bg_cache->key.objectid + bg_cache->key.offset;
11802 /* Don't forget to set cache_generation to -1 */
11803 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11804 if (IS_ERR(trans)) {
11805 error("failed to update super block cache generation");
11806 return PTR_ERR(trans);
11808 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11809 btrfs_commit_transaction(trans, fs_info->tree_root);
11814 const char * const cmd_check_usage[] = {
11815 "btrfs check [options] <device>",
11816 "Check structural integrity of a filesystem (unmounted).",
11817 "Check structural integrity of an unmounted filesystem. Verify internal",
11818 "trees' consistency and item connectivity. In the repair mode try to",
11819 "fix the problems found. ",
11820 "WARNING: the repair mode is considered dangerous",
11822 "-s|--super <superblock> use this superblock copy",
11823 "-b|--backup use the first valid backup root copy",
11824 "--repair try to repair the filesystem",
11825 "--readonly run in read-only mode (default)",
11826 "--init-csum-tree create a new CRC tree",
11827 "--init-extent-tree create a new extent tree",
11828 "--mode <MODE> allows choice of memory/IO trade-offs",
11829 " where MODE is one of:",
11830 " original - read inodes and extents to memory (requires",
11831 " more memory, does less IO)",
11832 " lowmem - try to use less memory but read blocks again",
11834 "--check-data-csum verify checksums of data blocks",
11835 "-Q|--qgroup-report print a report on qgroup consistency",
11836 "-E|--subvol-extents <subvolid>",
11837 " print subvolume extents and sharing state",
11838 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11839 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11840 "-p|--progress indicate progress",
11841 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11845 int cmd_check(int argc, char **argv)
11847 struct cache_tree root_cache;
11848 struct btrfs_root *root;
11849 struct btrfs_fs_info *info;
11852 u64 tree_root_bytenr = 0;
11853 u64 chunk_root_bytenr = 0;
11854 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11857 int init_csum_tree = 0;
11859 int clear_space_cache = 0;
11860 int qgroup_report = 0;
11861 int qgroups_repaired = 0;
11862 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11866 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11867 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11868 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11869 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11870 static const struct option long_options[] = {
11871 { "super", required_argument, NULL, 's' },
11872 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11873 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11874 { "init-csum-tree", no_argument, NULL,
11875 GETOPT_VAL_INIT_CSUM },
11876 { "init-extent-tree", no_argument, NULL,
11877 GETOPT_VAL_INIT_EXTENT },
11878 { "check-data-csum", no_argument, NULL,
11879 GETOPT_VAL_CHECK_CSUM },
11880 { "backup", no_argument, NULL, 'b' },
11881 { "subvol-extents", required_argument, NULL, 'E' },
11882 { "qgroup-report", no_argument, NULL, 'Q' },
11883 { "tree-root", required_argument, NULL, 'r' },
11884 { "chunk-root", required_argument, NULL,
11885 GETOPT_VAL_CHUNK_TREE },
11886 { "progress", no_argument, NULL, 'p' },
11887 { "mode", required_argument, NULL,
11889 { "clear-space-cache", required_argument, NULL,
11890 GETOPT_VAL_CLEAR_SPACE_CACHE},
11891 { NULL, 0, NULL, 0}
11894 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11898 case 'a': /* ignored */ break;
11900 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11903 num = arg_strtou64(optarg);
11904 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11906 "super mirror should be less than %d",
11907 BTRFS_SUPER_MIRROR_MAX);
11910 bytenr = btrfs_sb_offset(((int)num));
11911 printf("using SB copy %llu, bytenr %llu\n", num,
11912 (unsigned long long)bytenr);
11918 subvolid = arg_strtou64(optarg);
11921 tree_root_bytenr = arg_strtou64(optarg);
11923 case GETOPT_VAL_CHUNK_TREE:
11924 chunk_root_bytenr = arg_strtou64(optarg);
11927 ctx.progress_enabled = true;
11931 usage(cmd_check_usage);
11932 case GETOPT_VAL_REPAIR:
11933 printf("enabling repair mode\n");
11935 ctree_flags |= OPEN_CTREE_WRITES;
11937 case GETOPT_VAL_READONLY:
11940 case GETOPT_VAL_INIT_CSUM:
11941 printf("Creating a new CRC tree\n");
11942 init_csum_tree = 1;
11944 ctree_flags |= OPEN_CTREE_WRITES;
11946 case GETOPT_VAL_INIT_EXTENT:
11947 init_extent_tree = 1;
11948 ctree_flags |= (OPEN_CTREE_WRITES |
11949 OPEN_CTREE_NO_BLOCK_GROUPS);
11952 case GETOPT_VAL_CHECK_CSUM:
11953 check_data_csum = 1;
11955 case GETOPT_VAL_MODE:
11956 check_mode = parse_check_mode(optarg);
11957 if (check_mode == CHECK_MODE_UNKNOWN) {
11958 error("unknown mode: %s", optarg);
11962 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11963 if (strcmp(optarg, "v1") == 0) {
11964 clear_space_cache = 1;
11965 } else if (strcmp(optarg, "v2") == 0) {
11966 clear_space_cache = 2;
11967 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11970 "invalid argument to --clear-space-cache, must be v1 or v2");
11973 ctree_flags |= OPEN_CTREE_WRITES;
11978 if (check_argc_exact(argc - optind, 1))
11979 usage(cmd_check_usage);
11981 if (ctx.progress_enabled) {
11982 ctx.tp = TASK_NOTHING;
11983 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11986 /* This check is the only reason for --readonly to exist */
11987 if (readonly && repair) {
11988 error("repair options are not compatible with --readonly");
11993 * Not supported yet
11995 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11996 error("low memory mode doesn't support repair yet");
12001 cache_tree_init(&root_cache);
12003 if((ret = check_mounted(argv[optind])) < 0) {
12004 error("could not check mount status: %s", strerror(-ret));
12007 error("%s is currently mounted, aborting", argv[optind]);
12012 /* only allow partial opening under repair mode */
12014 ctree_flags |= OPEN_CTREE_PARTIAL;
12016 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12017 chunk_root_bytenr, ctree_flags);
12019 error("cannot open file system");
12024 global_info = info;
12025 root = info->fs_root;
12026 if (clear_space_cache == 1) {
12027 if (btrfs_fs_compat_ro(info,
12028 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12030 "free space cache v2 detected, use --clear-space-cache v2");
12034 printf("Clearing free space cache\n");
12035 ret = clear_free_space_cache(info);
12037 error("failed to clear free space cache");
12040 printf("Free space cache cleared\n");
12043 } else if (clear_space_cache == 2) {
12044 if (!btrfs_fs_compat_ro(info,
12045 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12046 printf("no free space cache v2 to clear\n");
12050 printf("Clear free space cache v2\n");
12051 ret = btrfs_clear_free_space_tree(info);
12053 error("failed to clear free space cache v2: %d", ret);
12056 printf("free space cache v2 cleared\n");
12062 * repair mode will force us to commit transaction which
12063 * will make us fail to load log tree when mounting.
12065 if (repair && btrfs_super_log_root(info->super_copy)) {
12066 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12071 ret = zero_log_tree(root);
12073 error("failed to zero log tree: %d", ret);
12078 uuid_unparse(info->super_copy->fsid, uuidbuf);
12079 if (qgroup_report) {
12080 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12082 ret = qgroup_verify_all(info);
12088 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12089 subvolid, argv[optind], uuidbuf);
12090 ret = print_extent_state(info, subvolid);
12093 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12095 if (!extent_buffer_uptodate(info->tree_root->node) ||
12096 !extent_buffer_uptodate(info->dev_root->node) ||
12097 !extent_buffer_uptodate(info->chunk_root->node)) {
12098 error("critical roots corrupted, unable to check the filesystem");
12103 if (init_extent_tree || init_csum_tree) {
12104 struct btrfs_trans_handle *trans;
12106 trans = btrfs_start_transaction(info->extent_root, 0);
12107 if (IS_ERR(trans)) {
12108 error("error starting transaction");
12109 ret = PTR_ERR(trans);
12113 if (init_extent_tree) {
12114 printf("Creating a new extent tree\n");
12115 ret = reinit_extent_tree(trans, info);
12120 if (init_csum_tree) {
12121 printf("Reinitialize checksum tree\n");
12122 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12124 error("checksum tree initialization failed: %d",
12130 ret = fill_csum_tree(trans, info->csum_root,
12133 error("checksum tree refilling failed: %d", ret);
12138 * Ok now we commit and run the normal fsck, which will add
12139 * extent entries for all of the items it finds.
12141 ret = btrfs_commit_transaction(trans, info->extent_root);
12145 if (!extent_buffer_uptodate(info->extent_root->node)) {
12146 error("critical: extent_root, unable to check the filesystem");
12150 if (!extent_buffer_uptodate(info->csum_root->node)) {
12151 error("critical: csum_root, unable to check the filesystem");
12156 if (!ctx.progress_enabled)
12157 fprintf(stderr, "checking extents\n");
12158 if (check_mode == CHECK_MODE_LOWMEM)
12159 ret = check_chunks_and_extents_v2(root);
12161 ret = check_chunks_and_extents(root);
12164 "errors found in extent allocation tree or chunk allocation");
12166 ret = repair_root_items(info);
12170 fprintf(stderr, "Fixed %d roots.\n", ret);
12172 } else if (ret > 0) {
12174 "Found %d roots with an outdated root item.\n",
12177 "Please run a filesystem check with the option --repair to fix them.\n");
12182 if (!ctx.progress_enabled) {
12183 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12184 fprintf(stderr, "checking free space tree\n");
12186 fprintf(stderr, "checking free space cache\n");
12188 ret = check_space_cache(root);
12193 * We used to have to have these hole extents in between our real
12194 * extents so if we don't have this flag set we need to make sure there
12195 * are no gaps in the file extents for inodes, otherwise we can just
12196 * ignore it when this happens.
12198 no_holes = btrfs_fs_incompat(root->fs_info,
12199 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12200 if (!ctx.progress_enabled)
12201 fprintf(stderr, "checking fs roots\n");
12202 ret = check_fs_roots(root, &root_cache);
12206 fprintf(stderr, "checking csums\n");
12207 ret = check_csums(root);
12211 fprintf(stderr, "checking root refs\n");
12212 ret = check_root_refs(root, &root_cache);
12216 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12217 struct extent_buffer *eb;
12219 eb = list_first_entry(&root->fs_info->recow_ebs,
12220 struct extent_buffer, recow);
12221 list_del_init(&eb->recow);
12222 ret = recow_extent_buffer(root, eb);
12227 while (!list_empty(&delete_items)) {
12228 struct bad_item *bad;
12230 bad = list_first_entry(&delete_items, struct bad_item, list);
12231 list_del_init(&bad->list);
12233 ret = delete_bad_item(root, bad);
12237 if (info->quota_enabled) {
12239 fprintf(stderr, "checking quota groups\n");
12240 err = qgroup_verify_all(info);
12244 err = repair_qgroups(info, &qgroups_repaired);
12249 if (!list_empty(&root->fs_info->recow_ebs)) {
12250 error("transid errors in file system");
12254 /* Don't override original ret */
12255 if (!ret && qgroups_repaired)
12256 ret = qgroups_repaired;
12258 if (found_old_backref) { /*
12259 * there was a disk format change when mixed
12260 * backref was in testing tree. The old format
12261 * existed about one week.
12263 printf("\n * Found old mixed backref format. "
12264 "The old format is not supported! *"
12265 "\n * Please mount the FS in readonly mode, "
12266 "backup data and re-format the FS. *\n\n");
12269 printf("found %llu bytes used err is %d\n",
12270 (unsigned long long)bytes_used, ret);
12271 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12272 printf("total tree bytes: %llu\n",
12273 (unsigned long long)total_btree_bytes);
12274 printf("total fs tree bytes: %llu\n",
12275 (unsigned long long)total_fs_tree_bytes);
12276 printf("total extent tree bytes: %llu\n",
12277 (unsigned long long)total_extent_tree_bytes);
12278 printf("btree space waste bytes: %llu\n",
12279 (unsigned long long)btree_space_waste);
12280 printf("file data blocks allocated: %llu\n referenced %llu\n",
12281 (unsigned long long)data_bytes_allocated,
12282 (unsigned long long)data_bytes_referenced);
12284 free_qgroup_counts();
12285 free_root_recs_tree(&root_cache);
12289 if (ctx.progress_enabled)
12290 task_deinit(ctx.info);