2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
50 TASK_NOTHING, /* have to be the last element */
55 enum task_position tp;
57 struct task_info *info;
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
118 return container_of(back, struct data_backref, node);
122 * Much like data_backref, just removed the undetermined members
123 * and change it to use list_head.
124 * During extent scan, it is stored in root->orphan_data_extent.
125 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
127 struct orphan_data_extent {
128 struct list_head list;
136 struct tree_backref {
137 struct extent_backref node;
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
146 return container_of(back, struct tree_backref, node);
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
152 struct extent_record {
153 struct list_head backrefs;
154 struct list_head dups;
155 struct list_head list;
156 struct cache_extent cache;
157 struct btrfs_disk_key parent_key;
162 u64 extent_item_refs;
164 u64 parent_generation;
168 unsigned int flag_block_full_backref:2;
169 unsigned int found_rec:1;
170 unsigned int content_checked:1;
171 unsigned int owner_ref_checked:1;
172 unsigned int is_root:1;
173 unsigned int metadata:1;
174 unsigned int bad_full_backref:1;
175 unsigned int crossing_stripes:1;
176 unsigned int wrong_chunk_type:1;
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
181 return container_of(entry, struct extent_record, list);
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
200 return list_entry(entry, struct inode_backref, list);
203 struct root_item_record {
204 struct list_head list;
211 struct btrfs_key drop_key;
214 #define REF_ERR_NO_DIR_ITEM (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX (1 << 1)
216 #define REF_ERR_NO_INODE_REF (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
219 #define REF_ERR_DUP_INODE_REF (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
228 struct file_extent_hole {
234 struct inode_record {
235 struct list_head backrefs;
236 unsigned int checked:1;
237 unsigned int merging:1;
238 unsigned int found_inode_item:1;
239 unsigned int found_dir_item:1;
240 unsigned int found_file_extent:1;
241 unsigned int found_csum_item:1;
242 unsigned int some_csum_missing:1;
243 unsigned int nodatasum:1;
256 struct rb_root holes;
257 struct list_head orphan_extents;
262 #define I_ERR_NO_INODE_ITEM (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
278 struct root_backref {
279 struct list_head list;
280 unsigned int found_dir_item:1;
281 unsigned int found_dir_index:1;
282 unsigned int found_back_ref:1;
283 unsigned int found_forward_ref:1;
284 unsigned int reachable:1;
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
295 return list_entry(entry, struct root_backref, list);
299 struct list_head backrefs;
300 struct cache_extent cache;
301 unsigned int found_root_item:1;
307 struct cache_extent cache;
312 struct cache_extent cache;
313 struct cache_tree root_cache;
314 struct cache_tree inode_cache;
315 struct inode_record *current;
324 struct walk_control {
325 struct cache_tree shared;
326 struct shared_node *nodes[BTRFS_MAX_LEVEL];
332 struct btrfs_key key;
334 struct list_head list;
337 struct extent_entry {
342 struct list_head list;
345 struct root_item_info {
346 /* level of the root */
348 /* number of nodes at this level, must be 1 for a root */
352 struct cache_extent cache_extent;
356 * Error bit for low memory mode check.
358 * Currently no caller cares about it yet. Just internal use for error
361 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH (1 << 8)
372 static void *print_status_check(void *p)
374 struct task_ctx *priv = p;
375 const char work_indicator[] = { '.', 'o', 'O', 'o' };
377 static char *task_position_string[] = {
379 "checking free space cache",
383 task_period_start(priv->info, 1000 /* 1s */);
385 if (priv->tp == TASK_NOTHING)
389 printf("%s [%c]\r", task_position_string[priv->tp],
390 work_indicator[count % 4]);
393 task_period_wait(priv->info);
398 static int print_status_return(void *p)
406 static enum btrfs_check_mode parse_check_mode(const char *str)
408 if (strcmp(str, "lowmem") == 0)
409 return CHECK_MODE_LOWMEM;
410 if (strcmp(str, "orig") == 0)
411 return CHECK_MODE_ORIGINAL;
412 if (strcmp(str, "original") == 0)
413 return CHECK_MODE_ORIGINAL;
415 return CHECK_MODE_UNKNOWN;
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
421 struct file_extent_hole *hole;
423 if (RB_EMPTY_ROOT(holes))
426 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
432 struct file_extent_hole *hole1;
433 struct file_extent_hole *hole2;
435 hole1 = rb_entry(node1, struct file_extent_hole, node);
436 hole2 = rb_entry(node2, struct file_extent_hole, node);
438 if (hole1->start > hole2->start)
440 if (hole1->start < hole2->start)
442 /* Now hole1->start == hole2->start */
443 if (hole1->len >= hole2->len)
445 * Hole 1 will be merge center
446 * Same hole will be merged later
449 /* Hole 2 will be merge center */
454 * Add a hole to the record
456 * This will do hole merge for copy_file_extent_holes(),
457 * which will ensure there won't be continuous holes.
459 static int add_file_extent_hole(struct rb_root *holes,
462 struct file_extent_hole *hole;
463 struct file_extent_hole *prev = NULL;
464 struct file_extent_hole *next = NULL;
466 hole = malloc(sizeof(*hole));
471 /* Since compare will not return 0, no -EEXIST will happen */
472 rb_insert(holes, &hole->node, compare_hole);
474 /* simple merge with previous hole */
475 if (rb_prev(&hole->node))
476 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
478 if (prev && prev->start + prev->len >= hole->start) {
479 hole->len = hole->start + hole->len - prev->start;
480 hole->start = prev->start;
481 rb_erase(&prev->node, holes);
486 /* iterate merge with next holes */
488 if (!rb_next(&hole->node))
490 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
492 if (hole->start + hole->len >= next->start) {
493 if (hole->start + hole->len <= next->start + next->len)
494 hole->len = next->start + next->len -
496 rb_erase(&next->node, holes);
505 static int compare_hole_range(struct rb_node *node, void *data)
507 struct file_extent_hole *hole;
510 hole = (struct file_extent_hole *)data;
513 hole = rb_entry(node, struct file_extent_hole, node);
514 if (start < hole->start)
516 if (start >= hole->start && start < hole->start + hole->len)
522 * Delete a hole in the record
524 * This will do the hole split and is much restrict than add.
526 static int del_file_extent_hole(struct rb_root *holes,
529 struct file_extent_hole *hole;
530 struct file_extent_hole tmp;
535 struct rb_node *node;
542 node = rb_search(holes, &tmp, compare_hole_range, NULL);
545 hole = rb_entry(node, struct file_extent_hole, node);
546 if (start + len > hole->start + hole->len)
550 * Now there will be no overlap, delete the hole and re-add the
551 * split(s) if they exists.
553 if (start > hole->start) {
554 prev_start = hole->start;
555 prev_len = start - hole->start;
558 if (hole->start + hole->len > start + len) {
559 next_start = start + len;
560 next_len = hole->start + hole->len - start - len;
563 rb_erase(node, holes);
566 ret = add_file_extent_hole(holes, prev_start, prev_len);
571 ret = add_file_extent_hole(holes, next_start, next_len);
578 static int copy_file_extent_holes(struct rb_root *dst,
581 struct file_extent_hole *hole;
582 struct rb_node *node;
585 node = rb_first(src);
587 hole = rb_entry(node, struct file_extent_hole, node);
588 ret = add_file_extent_hole(dst, hole->start, hole->len);
591 node = rb_next(node);
596 static void free_file_extent_holes(struct rb_root *holes)
598 struct rb_node *node;
599 struct file_extent_hole *hole;
601 node = rb_first(holes);
603 hole = rb_entry(node, struct file_extent_hole, node);
604 rb_erase(node, holes);
606 node = rb_first(holes);
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613 struct btrfs_root *root)
615 if (root->last_trans != trans->transid) {
616 root->track_dirty = 1;
617 root->last_trans = trans->transid;
618 root->commit_root = root->node;
619 extent_buffer_get(root->node);
623 static u8 imode_to_type(u32 imode)
626 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
628 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
629 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
630 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
631 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
632 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
633 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
636 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
642 struct device_record *rec1;
643 struct device_record *rec2;
645 rec1 = rb_entry(node1, struct device_record, node);
646 rec2 = rb_entry(node2, struct device_record, node);
647 if (rec1->devid > rec2->devid)
649 else if (rec1->devid < rec2->devid)
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
657 struct inode_record *rec;
658 struct inode_backref *backref;
659 struct inode_backref *orig;
660 struct inode_backref *tmp;
661 struct orphan_data_extent *src_orphan;
662 struct orphan_data_extent *dst_orphan;
667 rec = malloc(sizeof(*rec));
669 return ERR_PTR(-ENOMEM);
670 memcpy(rec, orig_rec, sizeof(*rec));
672 INIT_LIST_HEAD(&rec->backrefs);
673 INIT_LIST_HEAD(&rec->orphan_extents);
674 rec->holes = RB_ROOT;
676 list_for_each_entry(orig, &orig_rec->backrefs, list) {
677 size = sizeof(*orig) + orig->namelen + 1;
678 backref = malloc(size);
683 memcpy(backref, orig, size);
684 list_add_tail(&backref->list, &rec->backrefs);
686 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687 dst_orphan = malloc(sizeof(*dst_orphan));
692 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
695 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
702 rb = rb_first(&rec->holes);
704 struct file_extent_hole *hole;
706 hole = rb_entry(rb, struct file_extent_hole, node);
712 if (!list_empty(&rec->backrefs))
713 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714 list_del(&orig->list);
718 if (!list_empty(&rec->orphan_extents))
719 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720 list_del(&orig->list);
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
732 struct orphan_data_extent *orphan;
734 if (list_empty(orphan_extents))
736 printf("The following data extent is lost in tree %llu:\n",
738 list_for_each_entry(orphan, orphan_extents, list) {
739 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740 orphan->objectid, orphan->offset, orphan->disk_bytenr,
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
747 u64 root_objectid = root->root_key.objectid;
748 int errors = rec->errors;
752 /* reloc root errors, we print its corresponding fs root objectid*/
753 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754 root_objectid = root->root_key.offset;
755 fprintf(stderr, "reloc");
757 fprintf(stderr, "root %llu inode %llu errors %x",
758 (unsigned long long) root_objectid,
759 (unsigned long long) rec->ino, rec->errors);
761 if (errors & I_ERR_NO_INODE_ITEM)
762 fprintf(stderr, ", no inode item");
763 if (errors & I_ERR_NO_ORPHAN_ITEM)
764 fprintf(stderr, ", no orphan item");
765 if (errors & I_ERR_DUP_INODE_ITEM)
766 fprintf(stderr, ", dup inode item");
767 if (errors & I_ERR_DUP_DIR_INDEX)
768 fprintf(stderr, ", dup dir index");
769 if (errors & I_ERR_ODD_DIR_ITEM)
770 fprintf(stderr, ", odd dir item");
771 if (errors & I_ERR_ODD_FILE_EXTENT)
772 fprintf(stderr, ", odd file extent");
773 if (errors & I_ERR_BAD_FILE_EXTENT)
774 fprintf(stderr, ", bad file extent");
775 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776 fprintf(stderr, ", file extent overlap");
777 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778 fprintf(stderr, ", file extent discount");
779 if (errors & I_ERR_DIR_ISIZE_WRONG)
780 fprintf(stderr, ", dir isize wrong");
781 if (errors & I_ERR_FILE_NBYTES_WRONG)
782 fprintf(stderr, ", nbytes wrong");
783 if (errors & I_ERR_ODD_CSUM_ITEM)
784 fprintf(stderr, ", odd csum item");
785 if (errors & I_ERR_SOME_CSUM_MISSING)
786 fprintf(stderr, ", some csum missing");
787 if (errors & I_ERR_LINK_COUNT_WRONG)
788 fprintf(stderr, ", link count wrong");
789 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790 fprintf(stderr, ", orphan file extent");
791 fprintf(stderr, "\n");
792 /* Print the orphan extents if needed */
793 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
796 /* Print the holes if needed */
797 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798 struct file_extent_hole *hole;
799 struct rb_node *node;
802 node = rb_first(&rec->holes);
803 fprintf(stderr, "Found file extent holes:\n");
806 hole = rb_entry(node, struct file_extent_hole, node);
807 fprintf(stderr, "\tstart: %llu, len: %llu\n",
808 hole->start, hole->len);
809 node = rb_next(node);
812 fprintf(stderr, "\tstart: 0, len: %llu\n",
813 round_up(rec->isize, root->sectorsize));
817 static void print_ref_error(int errors)
819 if (errors & REF_ERR_NO_DIR_ITEM)
820 fprintf(stderr, ", no dir item");
821 if (errors & REF_ERR_NO_DIR_INDEX)
822 fprintf(stderr, ", no dir index");
823 if (errors & REF_ERR_NO_INODE_REF)
824 fprintf(stderr, ", no inode ref");
825 if (errors & REF_ERR_DUP_DIR_ITEM)
826 fprintf(stderr, ", dup dir item");
827 if (errors & REF_ERR_DUP_DIR_INDEX)
828 fprintf(stderr, ", dup dir index");
829 if (errors & REF_ERR_DUP_INODE_REF)
830 fprintf(stderr, ", dup inode ref");
831 if (errors & REF_ERR_INDEX_UNMATCH)
832 fprintf(stderr, ", index mismatch");
833 if (errors & REF_ERR_FILETYPE_UNMATCH)
834 fprintf(stderr, ", filetype mismatch");
835 if (errors & REF_ERR_NAME_TOO_LONG)
836 fprintf(stderr, ", name too long");
837 if (errors & REF_ERR_NO_ROOT_REF)
838 fprintf(stderr, ", no root ref");
839 if (errors & REF_ERR_NO_ROOT_BACKREF)
840 fprintf(stderr, ", no root backref");
841 if (errors & REF_ERR_DUP_ROOT_REF)
842 fprintf(stderr, ", dup root ref");
843 if (errors & REF_ERR_DUP_ROOT_BACKREF)
844 fprintf(stderr, ", dup root backref");
845 fprintf(stderr, "\n");
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
851 struct ptr_node *node;
852 struct cache_extent *cache;
853 struct inode_record *rec = NULL;
856 cache = lookup_cache_extent(inode_cache, ino, 1);
858 node = container_of(cache, struct ptr_node, cache);
860 if (mod && rec->refs > 1) {
861 node->data = clone_inode_rec(rec);
862 if (IS_ERR(node->data))
868 rec = calloc(1, sizeof(*rec));
870 return ERR_PTR(-ENOMEM);
872 rec->extent_start = (u64)-1;
874 INIT_LIST_HEAD(&rec->backrefs);
875 INIT_LIST_HEAD(&rec->orphan_extents);
876 rec->holes = RB_ROOT;
878 node = malloc(sizeof(*node));
881 return ERR_PTR(-ENOMEM);
883 node->cache.start = ino;
884 node->cache.size = 1;
887 if (ino == BTRFS_FREE_INO_OBJECTID)
890 ret = insert_cache_extent(inode_cache, &node->cache);
892 return ERR_PTR(-EEXIST);
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
899 struct orphan_data_extent *orphan;
901 while (!list_empty(orphan_extents)) {
902 orphan = list_entry(orphan_extents->next,
903 struct orphan_data_extent, list);
904 list_del(&orphan->list);
909 static void free_inode_rec(struct inode_record *rec)
911 struct inode_backref *backref;
916 while (!list_empty(&rec->backrefs)) {
917 backref = to_inode_backref(rec->backrefs.next);
918 list_del(&backref->list);
921 free_orphan_data_extents(&rec->orphan_extents);
922 free_file_extent_holes(&rec->holes);
926 static int can_free_inode_rec(struct inode_record *rec)
928 if (!rec->errors && rec->checked && rec->found_inode_item &&
929 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935 struct inode_record *rec)
937 struct cache_extent *cache;
938 struct inode_backref *tmp, *backref;
939 struct ptr_node *node;
942 if (!rec->found_inode_item)
945 filetype = imode_to_type(rec->imode);
946 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947 if (backref->found_dir_item && backref->found_dir_index) {
948 if (backref->filetype != filetype)
949 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950 if (!backref->errors && backref->found_inode_ref &&
951 rec->nlink == rec->found_link) {
952 list_del(&backref->list);
958 if (!rec->checked || rec->merging)
961 if (S_ISDIR(rec->imode)) {
962 if (rec->found_size != rec->isize)
963 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964 if (rec->found_file_extent)
965 rec->errors |= I_ERR_ODD_FILE_EXTENT;
966 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967 if (rec->found_dir_item)
968 rec->errors |= I_ERR_ODD_DIR_ITEM;
969 if (rec->found_size != rec->nbytes)
970 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971 if (rec->nlink > 0 && !no_holes &&
972 (rec->extent_end < rec->isize ||
973 first_extent_gap(&rec->holes) < rec->isize))
974 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
977 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978 if (rec->found_csum_item && rec->nodatasum)
979 rec->errors |= I_ERR_ODD_CSUM_ITEM;
980 if (rec->some_csum_missing && !rec->nodatasum)
981 rec->errors |= I_ERR_SOME_CSUM_MISSING;
984 BUG_ON(rec->refs != 1);
985 if (can_free_inode_rec(rec)) {
986 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987 node = container_of(cache, struct ptr_node, cache);
988 BUG_ON(node->data != rec);
989 remove_cache_extent(inode_cache, &node->cache);
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
997 struct btrfs_path path;
998 struct btrfs_key key;
1001 key.objectid = BTRFS_ORPHAN_OBJECTID;
1002 key.type = BTRFS_ORPHAN_ITEM_KEY;
1005 btrfs_init_path(&path);
1006 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007 btrfs_release_path(&path);
1013 static int process_inode_item(struct extent_buffer *eb,
1014 int slot, struct btrfs_key *key,
1015 struct shared_node *active_node)
1017 struct inode_record *rec;
1018 struct btrfs_inode_item *item;
1020 rec = active_node->current;
1021 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022 if (rec->found_inode_item) {
1023 rec->errors |= I_ERR_DUP_INODE_ITEM;
1026 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027 rec->nlink = btrfs_inode_nlink(eb, item);
1028 rec->isize = btrfs_inode_size(eb, item);
1029 rec->nbytes = btrfs_inode_nbytes(eb, item);
1030 rec->imode = btrfs_inode_mode(eb, item);
1031 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1033 rec->found_inode_item = 1;
1034 if (rec->nlink == 0)
1035 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036 maybe_free_inode_rec(&active_node->inode_cache, rec);
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1042 int namelen, u64 dir)
1044 struct inode_backref *backref;
1046 list_for_each_entry(backref, &rec->backrefs, list) {
1047 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1049 if (backref->dir != dir || backref->namelen != namelen)
1051 if (memcmp(name, backref->name, namelen))
1056 backref = malloc(sizeof(*backref) + namelen + 1);
1059 memset(backref, 0, sizeof(*backref));
1061 backref->namelen = namelen;
1062 memcpy(backref->name, name, namelen);
1063 backref->name[namelen] = '\0';
1064 list_add_tail(&backref->list, &rec->backrefs);
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069 u64 ino, u64 dir, u64 index,
1070 const char *name, int namelen,
1071 u8 filetype, u8 itemtype, int errors)
1073 struct inode_record *rec;
1074 struct inode_backref *backref;
1076 rec = get_inode_rec(inode_cache, ino, 1);
1077 BUG_ON(IS_ERR(rec));
1078 backref = get_inode_backref(rec, name, namelen, dir);
1081 backref->errors |= errors;
1082 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083 if (backref->found_dir_index)
1084 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085 if (backref->found_inode_ref && backref->index != index)
1086 backref->errors |= REF_ERR_INDEX_UNMATCH;
1087 if (backref->found_dir_item && backref->filetype != filetype)
1088 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1090 backref->index = index;
1091 backref->filetype = filetype;
1092 backref->found_dir_index = 1;
1093 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1095 if (backref->found_dir_item)
1096 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097 if (backref->found_dir_index && backref->filetype != filetype)
1098 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1100 backref->filetype = filetype;
1101 backref->found_dir_item = 1;
1102 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104 if (backref->found_inode_ref)
1105 backref->errors |= REF_ERR_DUP_INODE_REF;
1106 if (backref->found_dir_index && backref->index != index)
1107 backref->errors |= REF_ERR_INDEX_UNMATCH;
1109 backref->index = index;
1111 backref->ref_type = itemtype;
1112 backref->found_inode_ref = 1;
1117 maybe_free_inode_rec(inode_cache, rec);
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122 struct cache_tree *dst_cache)
1124 struct inode_backref *backref;
1129 list_for_each_entry(backref, &src->backrefs, list) {
1130 if (backref->found_dir_index) {
1131 add_inode_backref(dst_cache, dst->ino, backref->dir,
1132 backref->index, backref->name,
1133 backref->namelen, backref->filetype,
1134 BTRFS_DIR_INDEX_KEY, backref->errors);
1136 if (backref->found_dir_item) {
1138 add_inode_backref(dst_cache, dst->ino,
1139 backref->dir, 0, backref->name,
1140 backref->namelen, backref->filetype,
1141 BTRFS_DIR_ITEM_KEY, backref->errors);
1143 if (backref->found_inode_ref) {
1144 add_inode_backref(dst_cache, dst->ino,
1145 backref->dir, backref->index,
1146 backref->name, backref->namelen, 0,
1147 backref->ref_type, backref->errors);
1151 if (src->found_dir_item)
1152 dst->found_dir_item = 1;
1153 if (src->found_file_extent)
1154 dst->found_file_extent = 1;
1155 if (src->found_csum_item)
1156 dst->found_csum_item = 1;
1157 if (src->some_csum_missing)
1158 dst->some_csum_missing = 1;
1159 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1165 BUG_ON(src->found_link < dir_count);
1166 dst->found_link += src->found_link - dir_count;
1167 dst->found_size += src->found_size;
1168 if (src->extent_start != (u64)-1) {
1169 if (dst->extent_start == (u64)-1) {
1170 dst->extent_start = src->extent_start;
1171 dst->extent_end = src->extent_end;
1173 if (dst->extent_end > src->extent_start)
1174 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175 else if (dst->extent_end < src->extent_start) {
1176 ret = add_file_extent_hole(&dst->holes,
1178 src->extent_start - dst->extent_end);
1180 if (dst->extent_end < src->extent_end)
1181 dst->extent_end = src->extent_end;
1185 dst->errors |= src->errors;
1186 if (src->found_inode_item) {
1187 if (!dst->found_inode_item) {
1188 dst->nlink = src->nlink;
1189 dst->isize = src->isize;
1190 dst->nbytes = src->nbytes;
1191 dst->imode = src->imode;
1192 dst->nodatasum = src->nodatasum;
1193 dst->found_inode_item = 1;
1195 dst->errors |= I_ERR_DUP_INODE_ITEM;
1203 static int splice_shared_node(struct shared_node *src_node,
1204 struct shared_node *dst_node)
1206 struct cache_extent *cache;
1207 struct ptr_node *node, *ins;
1208 struct cache_tree *src, *dst;
1209 struct inode_record *rec, *conflict;
1210 u64 current_ino = 0;
1214 if (--src_node->refs == 0)
1216 if (src_node->current)
1217 current_ino = src_node->current->ino;
1219 src = &src_node->root_cache;
1220 dst = &dst_node->root_cache;
1222 cache = search_cache_extent(src, 0);
1224 node = container_of(cache, struct ptr_node, cache);
1226 cache = next_cache_extent(cache);
1229 remove_cache_extent(src, &node->cache);
1232 ins = malloc(sizeof(*ins));
1234 ins->cache.start = node->cache.start;
1235 ins->cache.size = node->cache.size;
1239 ret = insert_cache_extent(dst, &ins->cache);
1240 if (ret == -EEXIST) {
1241 conflict = get_inode_rec(dst, rec->ino, 1);
1242 BUG_ON(IS_ERR(conflict));
1243 merge_inode_recs(rec, conflict, dst);
1245 conflict->checked = 1;
1246 if (dst_node->current == conflict)
1247 dst_node->current = NULL;
1249 maybe_free_inode_rec(dst, conflict);
1250 free_inode_rec(rec);
1257 if (src == &src_node->root_cache) {
1258 src = &src_node->inode_cache;
1259 dst = &dst_node->inode_cache;
1263 if (current_ino > 0 && (!dst_node->current ||
1264 current_ino > dst_node->current->ino)) {
1265 if (dst_node->current) {
1266 dst_node->current->checked = 1;
1267 maybe_free_inode_rec(dst, dst_node->current);
1269 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270 BUG_ON(IS_ERR(dst_node->current));
1275 static void free_inode_ptr(struct cache_extent *cache)
1277 struct ptr_node *node;
1278 struct inode_record *rec;
1280 node = container_of(cache, struct ptr_node, cache);
1282 free_inode_rec(rec);
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1291 struct cache_extent *cache;
1292 struct shared_node *node;
1294 cache = lookup_cache_extent(shared, bytenr, 1);
1296 node = container_of(cache, struct shared_node, cache);
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1305 struct shared_node *node;
1307 node = calloc(1, sizeof(*node));
1310 node->cache.start = bytenr;
1311 node->cache.size = 1;
1312 cache_tree_init(&node->root_cache);
1313 cache_tree_init(&node->inode_cache);
1316 ret = insert_cache_extent(shared, &node->cache);
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322 struct walk_control *wc, int level)
1324 struct shared_node *node;
1325 struct shared_node *dest;
1328 if (level == wc->active_node)
1331 BUG_ON(wc->active_node <= level);
1332 node = find_shared_node(&wc->shared, bytenr);
1334 ret = add_shared_node(&wc->shared, bytenr, refs);
1336 node = find_shared_node(&wc->shared, bytenr);
1337 wc->nodes[level] = node;
1338 wc->active_node = level;
1342 if (wc->root_level == wc->active_node &&
1343 btrfs_root_refs(&root->root_item) == 0) {
1344 if (--node->refs == 0) {
1345 free_inode_recs_tree(&node->root_cache);
1346 free_inode_recs_tree(&node->inode_cache);
1347 remove_cache_extent(&wc->shared, &node->cache);
1353 dest = wc->nodes[wc->active_node];
1354 splice_shared_node(node, dest);
1355 if (node->refs == 0) {
1356 remove_cache_extent(&wc->shared, &node->cache);
1362 static int leave_shared_node(struct btrfs_root *root,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->root_level)
1372 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1376 BUG_ON(i >= BTRFS_MAX_LEVEL);
1378 node = wc->nodes[wc->active_node];
1379 wc->nodes[wc->active_node] = NULL;
1380 wc->active_node = i;
1382 dest = wc->nodes[wc->active_node];
1383 if (wc->active_node < wc->root_level ||
1384 btrfs_root_refs(&root->root_item) > 0) {
1385 BUG_ON(node->refs <= 1);
1386 splice_shared_node(node, dest);
1388 BUG_ON(node->refs < 2);
1397 * 1 - if the root with id child_root_id is a child of root parent_root_id
1398 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1399 * has other root(s) as parent(s)
1400 * 2 - if the root child_root_id doesn't have any parent roots
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1405 struct btrfs_path path;
1406 struct btrfs_key key;
1407 struct extent_buffer *leaf;
1411 btrfs_init_path(&path);
1413 key.objectid = parent_root_id;
1414 key.type = BTRFS_ROOT_REF_KEY;
1415 key.offset = child_root_id;
1416 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1420 btrfs_release_path(&path);
1424 key.objectid = child_root_id;
1425 key.type = BTRFS_ROOT_BACKREF_KEY;
1427 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1433 leaf = path.nodes[0];
1434 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1438 leaf = path.nodes[0];
1441 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442 if (key.objectid != child_root_id ||
1443 key.type != BTRFS_ROOT_BACKREF_KEY)
1448 if (key.offset == parent_root_id) {
1449 btrfs_release_path(&path);
1456 btrfs_release_path(&path);
1459 return has_parent ? 0 : 2;
1462 static int process_dir_item(struct btrfs_root *root,
1463 struct extent_buffer *eb,
1464 int slot, struct btrfs_key *key,
1465 struct shared_node *active_node)
1475 struct btrfs_dir_item *di;
1476 struct inode_record *rec;
1477 struct cache_tree *root_cache;
1478 struct cache_tree *inode_cache;
1479 struct btrfs_key location;
1480 char namebuf[BTRFS_NAME_LEN];
1482 root_cache = &active_node->root_cache;
1483 inode_cache = &active_node->inode_cache;
1484 rec = active_node->current;
1485 rec->found_dir_item = 1;
1487 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1491 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492 name_len = btrfs_dir_name_len(eb, di);
1493 data_len = btrfs_dir_data_len(eb, di);
1494 filetype = btrfs_dir_type(eb, di);
1496 rec->found_size += name_len;
1497 if (name_len <= BTRFS_NAME_LEN) {
1501 len = BTRFS_NAME_LEN;
1502 error = REF_ERR_NAME_TOO_LONG;
1504 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1506 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507 add_inode_backref(inode_cache, location.objectid,
1508 key->objectid, key->offset, namebuf,
1509 len, filetype, key->type, error);
1510 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511 add_inode_backref(root_cache, location.objectid,
1512 key->objectid, key->offset,
1513 namebuf, len, filetype,
1516 fprintf(stderr, "invalid location in dir item %u\n",
1518 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519 key->objectid, key->offset, namebuf,
1520 len, filetype, key->type, error);
1523 len = sizeof(*di) + name_len + data_len;
1524 di = (struct btrfs_dir_item *)((char *)di + len);
1527 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528 rec->errors |= I_ERR_DUP_DIR_INDEX;
1533 static int process_inode_ref(struct extent_buffer *eb,
1534 int slot, struct btrfs_key *key,
1535 struct shared_node *active_node)
1543 struct cache_tree *inode_cache;
1544 struct btrfs_inode_ref *ref;
1545 char namebuf[BTRFS_NAME_LEN];
1547 inode_cache = &active_node->inode_cache;
1549 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550 total = btrfs_item_size_nr(eb, slot);
1551 while (cur < total) {
1552 name_len = btrfs_inode_ref_name_len(eb, ref);
1553 index = btrfs_inode_ref_index(eb, ref);
1554 if (name_len <= BTRFS_NAME_LEN) {
1558 len = BTRFS_NAME_LEN;
1559 error = REF_ERR_NAME_TOO_LONG;
1561 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562 add_inode_backref(inode_cache, key->objectid, key->offset,
1563 index, namebuf, len, 0, key->type, error);
1565 len = sizeof(*ref) + name_len;
1566 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1572 static int process_inode_extref(struct extent_buffer *eb,
1573 int slot, struct btrfs_key *key,
1574 struct shared_node *active_node)
1583 struct cache_tree *inode_cache;
1584 struct btrfs_inode_extref *extref;
1585 char namebuf[BTRFS_NAME_LEN];
1587 inode_cache = &active_node->inode_cache;
1589 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590 total = btrfs_item_size_nr(eb, slot);
1591 while (cur < total) {
1592 name_len = btrfs_inode_extref_name_len(eb, extref);
1593 index = btrfs_inode_extref_index(eb, extref);
1594 parent = btrfs_inode_extref_parent(eb, extref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf,
1603 (unsigned long)(extref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, parent,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*extref) + name_len;
1608 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616 u64 len, u64 *found)
1618 struct btrfs_key key;
1619 struct btrfs_path path;
1620 struct extent_buffer *leaf;
1625 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1627 btrfs_init_path(&path);
1629 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1631 key.type = BTRFS_EXTENT_CSUM_KEY;
1633 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1637 if (ret > 0 && path.slots[0] > 0) {
1638 leaf = path.nodes[0];
1639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641 key.type == BTRFS_EXTENT_CSUM_KEY)
1646 leaf = path.nodes[0];
1647 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1653 leaf = path.nodes[0];
1656 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658 key.type != BTRFS_EXTENT_CSUM_KEY)
1661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662 if (key.offset >= start + len)
1665 if (key.offset > start)
1668 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670 if (csum_end > start) {
1671 size = min(csum_end - start, len);
1680 btrfs_release_path(&path);
1686 static int process_file_extent(struct btrfs_root *root,
1687 struct extent_buffer *eb,
1688 int slot, struct btrfs_key *key,
1689 struct shared_node *active_node)
1691 struct inode_record *rec;
1692 struct btrfs_file_extent_item *fi;
1694 u64 disk_bytenr = 0;
1695 u64 extent_offset = 0;
1696 u64 mask = root->sectorsize - 1;
1700 rec = active_node->current;
1701 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702 rec->found_file_extent = 1;
1704 if (rec->extent_start == (u64)-1) {
1705 rec->extent_start = key->offset;
1706 rec->extent_end = key->offset;
1709 if (rec->extent_end > key->offset)
1710 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711 else if (rec->extent_end < key->offset) {
1712 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713 key->offset - rec->extent_end);
1718 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719 extent_type = btrfs_file_extent_type(eb, fi);
1721 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1724 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725 rec->found_size += num_bytes;
1726 num_bytes = (num_bytes + mask) & ~mask;
1727 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731 extent_offset = btrfs_file_extent_offset(eb, fi);
1732 if (num_bytes == 0 || (num_bytes & mask))
1733 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 if (num_bytes + extent_offset >
1735 btrfs_file_extent_ram_bytes(eb, fi))
1736 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738 (btrfs_file_extent_compression(eb, fi) ||
1739 btrfs_file_extent_encryption(eb, fi) ||
1740 btrfs_file_extent_other_encoding(eb, fi)))
1741 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742 if (disk_bytenr > 0)
1743 rec->found_size += num_bytes;
1745 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1747 rec->extent_end = key->offset + num_bytes;
1750 * The data reloc tree will copy full extents into its inode and then
1751 * copy the corresponding csums. Because the extent it copied could be
1752 * a preallocated extent that hasn't been written to yet there may be no
1753 * csums to copy, ergo we won't have csums for our file extent. This is
1754 * ok so just don't bother checking csums if the inode belongs to the
1757 if (disk_bytenr > 0 &&
1758 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1760 if (btrfs_file_extent_compression(eb, fi))
1761 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1763 disk_bytenr += extent_offset;
1765 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1768 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1770 rec->found_csum_item = 1;
1771 if (found < num_bytes)
1772 rec->some_csum_missing = 1;
1773 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1775 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782 struct walk_control *wc)
1784 struct btrfs_key key;
1788 struct cache_tree *inode_cache;
1789 struct shared_node *active_node;
1791 if (wc->root_level == wc->active_node &&
1792 btrfs_root_refs(&root->root_item) == 0)
1795 active_node = wc->nodes[wc->active_node];
1796 inode_cache = &active_node->inode_cache;
1797 nritems = btrfs_header_nritems(eb);
1798 for (i = 0; i < nritems; i++) {
1799 btrfs_item_key_to_cpu(eb, &key, i);
1801 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1803 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1806 if (active_node->current == NULL ||
1807 active_node->current->ino < key.objectid) {
1808 if (active_node->current) {
1809 active_node->current->checked = 1;
1810 maybe_free_inode_rec(inode_cache,
1811 active_node->current);
1813 active_node->current = get_inode_rec(inode_cache,
1815 BUG_ON(IS_ERR(active_node->current));
1818 case BTRFS_DIR_ITEM_KEY:
1819 case BTRFS_DIR_INDEX_KEY:
1820 ret = process_dir_item(root, eb, i, &key, active_node);
1822 case BTRFS_INODE_REF_KEY:
1823 ret = process_inode_ref(eb, i, &key, active_node);
1825 case BTRFS_INODE_EXTREF_KEY:
1826 ret = process_inode_extref(eb, i, &key, active_node);
1828 case BTRFS_INODE_ITEM_KEY:
1829 ret = process_inode_item(eb, i, &key, active_node);
1831 case BTRFS_EXTENT_DATA_KEY:
1832 ret = process_file_extent(root, eb, i, &key,
1842 static void reada_walk_down(struct btrfs_root *root,
1843 struct extent_buffer *node, int slot)
1852 level = btrfs_header_level(node);
1856 nritems = btrfs_header_nritems(node);
1857 blocksize = root->nodesize;
1858 for (i = slot; i < nritems; i++) {
1859 bytenr = btrfs_node_blockptr(node, i);
1860 ptr_gen = btrfs_node_ptr_generation(node, i);
1861 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1866 * Check the child node/leaf by the following condition:
1867 * 1. the first item key of the node/leaf should be the same with the one
1869 * 2. block in parent node should match the child node/leaf.
1870 * 3. generation of parent node and child's header should be consistent.
1872 * Or the child node/leaf pointed by the key in parent is not valid.
1874 * We hope to check leaf owner too, but since subvol may share leaves,
1875 * which makes leaf owner check not so strong, key check should be
1876 * sufficient enough for that case.
1878 static int check_child_node(struct btrfs_root *root,
1879 struct extent_buffer *parent, int slot,
1880 struct extent_buffer *child)
1882 struct btrfs_key parent_key;
1883 struct btrfs_key child_key;
1886 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887 if (btrfs_header_level(child) == 0)
1888 btrfs_item_key_to_cpu(child, &child_key, 0);
1890 btrfs_node_key_to_cpu(child, &child_key, 0);
1892 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1895 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896 parent_key.objectid, parent_key.type, parent_key.offset,
1897 child_key.objectid, child_key.type, child_key.offset);
1899 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1901 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902 btrfs_node_blockptr(parent, slot),
1903 btrfs_header_bytenr(child));
1905 if (btrfs_node_ptr_generation(parent, slot) !=
1906 btrfs_header_generation(child)) {
1908 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909 btrfs_header_generation(child),
1910 btrfs_node_ptr_generation(parent, slot));
1916 u64 bytenr[BTRFS_MAX_LEVEL];
1917 u64 refs[BTRFS_MAX_LEVEL];
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level,
1922 struct node_refs *nrefs)
1924 enum btrfs_tree_block_status status;
1927 struct extent_buffer *next;
1928 struct extent_buffer *cur;
1933 WARN_ON(*level < 0);
1934 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1936 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937 refs = nrefs->refs[*level];
1940 ret = btrfs_lookup_extent_info(NULL, root,
1941 path->nodes[*level]->start,
1942 *level, 1, &refs, NULL);
1947 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948 nrefs->refs[*level] = refs;
1952 ret = enter_shared_node(root, path->nodes[*level]->start,
1960 while (*level >= 0) {
1961 WARN_ON(*level < 0);
1962 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963 cur = path->nodes[*level];
1965 if (btrfs_header_level(cur) != *level)
1968 if (path->slots[*level] >= btrfs_header_nritems(cur))
1971 ret = process_one_leaf(root, cur, wc);
1976 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978 blocksize = root->nodesize;
1980 if (bytenr == nrefs->bytenr[*level - 1]) {
1981 refs = nrefs->refs[*level - 1];
1983 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984 *level - 1, 1, &refs, NULL);
1988 nrefs->bytenr[*level - 1] = bytenr;
1989 nrefs->refs[*level - 1] = refs;
1994 ret = enter_shared_node(root, bytenr, refs,
1997 path->slots[*level]++;
2002 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004 free_extent_buffer(next);
2005 reada_walk_down(root, cur, path->slots[*level]);
2006 next = read_tree_block(root, bytenr, blocksize,
2008 if (!extent_buffer_uptodate(next)) {
2009 struct btrfs_key node_key;
2011 btrfs_node_key_to_cpu(path->nodes[*level],
2013 path->slots[*level]);
2014 btrfs_add_corrupt_extent_record(root->fs_info,
2016 path->nodes[*level]->start,
2017 root->nodesize, *level);
2023 ret = check_child_node(root, cur, path->slots[*level], next);
2029 if (btrfs_is_leaf(next))
2030 status = btrfs_check_leaf(root, NULL, next);
2032 status = btrfs_check_node(root, NULL, next);
2033 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034 free_extent_buffer(next);
2039 *level = *level - 1;
2040 free_extent_buffer(path->nodes[*level]);
2041 path->nodes[*level] = next;
2042 path->slots[*level] = 0;
2045 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050 struct walk_control *wc, int *level)
2053 struct extent_buffer *leaf;
2055 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056 leaf = path->nodes[i];
2057 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2062 free_extent_buffer(path->nodes[*level]);
2063 path->nodes[*level] = NULL;
2064 BUG_ON(*level > wc->active_node);
2065 if (*level == wc->active_node)
2066 leave_shared_node(root, wc, *level);
2073 static int check_root_dir(struct inode_record *rec)
2075 struct inode_backref *backref;
2078 if (!rec->found_inode_item || rec->errors)
2080 if (rec->nlink != 1 || rec->found_link != 0)
2082 if (list_empty(&rec->backrefs))
2084 backref = to_inode_backref(rec->backrefs.next);
2085 if (!backref->found_inode_ref)
2087 if (backref->index != 0 || backref->namelen != 2 ||
2088 memcmp(backref->name, "..", 2))
2090 if (backref->found_dir_index || backref->found_dir_item)
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098 struct btrfs_root *root, struct btrfs_path *path,
2099 struct inode_record *rec)
2101 struct btrfs_inode_item *ei;
2102 struct btrfs_key key;
2105 key.objectid = rec->ino;
2106 key.type = BTRFS_INODE_ITEM_KEY;
2107 key.offset = (u64)-1;
2109 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2113 if (!path->slots[0]) {
2120 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121 if (key.objectid != rec->ino) {
2126 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127 struct btrfs_inode_item);
2128 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129 btrfs_mark_buffer_dirty(path->nodes[0]);
2130 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132 root->root_key.objectid);
2134 btrfs_release_path(path);
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139 struct btrfs_root *root,
2140 struct btrfs_path *path,
2141 struct inode_record *rec)
2145 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146 btrfs_release_path(path);
2148 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153 struct btrfs_root *root,
2154 struct btrfs_path *path,
2155 struct inode_record *rec)
2157 struct btrfs_inode_item *ei;
2158 struct btrfs_key key;
2161 key.objectid = rec->ino;
2162 key.type = BTRFS_INODE_ITEM_KEY;
2165 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2172 /* Since ret == 0, no need to check anything */
2173 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174 struct btrfs_inode_item);
2175 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176 btrfs_mark_buffer_dirty(path->nodes[0]);
2177 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178 printf("reset nbytes for ino %llu root %llu\n",
2179 rec->ino, root->root_key.objectid);
2181 btrfs_release_path(path);
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186 struct cache_tree *inode_cache,
2187 struct inode_record *rec,
2188 struct inode_backref *backref)
2190 struct btrfs_path path;
2191 struct btrfs_trans_handle *trans;
2192 struct btrfs_dir_item *dir_item;
2193 struct extent_buffer *leaf;
2194 struct btrfs_key key;
2195 struct btrfs_disk_key disk_key;
2196 struct inode_record *dir_rec;
2197 unsigned long name_ptr;
2198 u32 data_size = sizeof(*dir_item) + backref->namelen;
2201 trans = btrfs_start_transaction(root, 1);
2203 return PTR_ERR(trans);
2205 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206 (unsigned long long)rec->ino);
2208 btrfs_init_path(&path);
2209 key.objectid = backref->dir;
2210 key.type = BTRFS_DIR_INDEX_KEY;
2211 key.offset = backref->index;
2212 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2215 leaf = path.nodes[0];
2216 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2218 disk_key.objectid = cpu_to_le64(rec->ino);
2219 disk_key.type = BTRFS_INODE_ITEM_KEY;
2220 disk_key.offset = 0;
2222 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224 btrfs_set_dir_data_len(leaf, dir_item, 0);
2225 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226 name_ptr = (unsigned long)(dir_item + 1);
2227 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228 btrfs_mark_buffer_dirty(leaf);
2229 btrfs_release_path(&path);
2230 btrfs_commit_transaction(trans, root);
2232 backref->found_dir_index = 1;
2233 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234 BUG_ON(IS_ERR(dir_rec));
2237 dir_rec->found_size += backref->namelen;
2238 if (dir_rec->found_size == dir_rec->isize &&
2239 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241 if (dir_rec->found_size != dir_rec->isize)
2242 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247 static int delete_dir_index(struct btrfs_root *root,
2248 struct cache_tree *inode_cache,
2249 struct inode_record *rec,
2250 struct inode_backref *backref)
2252 struct btrfs_trans_handle *trans;
2253 struct btrfs_dir_item *di;
2254 struct btrfs_path path;
2257 trans = btrfs_start_transaction(root, 1);
2259 return PTR_ERR(trans);
2261 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262 (unsigned long long)backref->dir,
2263 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264 (unsigned long long)root->objectid);
2266 btrfs_init_path(&path);
2267 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268 backref->name, backref->namelen,
2269 backref->index, -1);
2272 btrfs_release_path(&path);
2273 btrfs_commit_transaction(trans, root);
2280 ret = btrfs_del_item(trans, root, &path);
2282 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2284 btrfs_release_path(&path);
2285 btrfs_commit_transaction(trans, root);
2289 static int create_inode_item(struct btrfs_root *root,
2290 struct inode_record *rec,
2291 struct inode_backref *backref, int root_dir)
2293 struct btrfs_trans_handle *trans;
2294 struct btrfs_inode_item inode_item;
2295 time_t now = time(NULL);
2298 trans = btrfs_start_transaction(root, 1);
2299 if (IS_ERR(trans)) {
2300 ret = PTR_ERR(trans);
2304 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305 "be incomplete, please check permissions and content after "
2306 "the fsck completes.\n", (unsigned long long)root->objectid,
2307 (unsigned long long)rec->ino);
2309 memset(&inode_item, 0, sizeof(inode_item));
2310 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2312 btrfs_set_stack_inode_nlink(&inode_item, 1);
2314 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316 if (rec->found_dir_item) {
2317 if (rec->found_file_extent)
2318 fprintf(stderr, "root %llu inode %llu has both a dir "
2319 "item and extents, unsure if it is a dir or a "
2320 "regular file so setting it as a directory\n",
2321 (unsigned long long)root->objectid,
2322 (unsigned long long)rec->ino);
2323 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325 } else if (!rec->found_dir_item) {
2326 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2329 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2338 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2340 btrfs_commit_transaction(trans, root);
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345 struct inode_record *rec,
2346 struct cache_tree *inode_cache,
2349 struct inode_backref *tmp, *backref;
2350 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 if (!delete && rec->ino == root_dirid) {
2356 if (!rec->found_inode_item) {
2357 ret = create_inode_item(root, rec, backref, 1);
2364 /* Index 0 for root dir's are special, don't mess with it */
2365 if (rec->ino == root_dirid && backref->index == 0)
2369 ((backref->found_dir_index && !backref->found_inode_ref) ||
2370 (backref->found_dir_index && backref->found_inode_ref &&
2371 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372 ret = delete_dir_index(root, inode_cache, rec, backref);
2376 list_del(&backref->list);
2380 if (!delete && !backref->found_dir_index &&
2381 backref->found_dir_item && backref->found_inode_ref) {
2382 ret = add_missing_dir_index(root, inode_cache, rec,
2387 if (backref->found_dir_item &&
2388 backref->found_dir_index &&
2389 backref->found_dir_index) {
2390 if (!backref->errors &&
2391 backref->found_inode_ref) {
2392 list_del(&backref->list);
2398 if (!delete && (!backref->found_dir_index &&
2399 !backref->found_dir_item &&
2400 backref->found_inode_ref)) {
2401 struct btrfs_trans_handle *trans;
2402 struct btrfs_key location;
2404 ret = check_dir_conflict(root, backref->name,
2410 * let nlink fixing routine to handle it,
2411 * which can do it better.
2416 location.objectid = rec->ino;
2417 location.type = BTRFS_INODE_ITEM_KEY;
2418 location.offset = 0;
2420 trans = btrfs_start_transaction(root, 1);
2421 if (IS_ERR(trans)) {
2422 ret = PTR_ERR(trans);
2425 fprintf(stderr, "adding missing dir index/item pair "
2427 (unsigned long long)rec->ino);
2428 ret = btrfs_insert_dir_item(trans, root, backref->name,
2430 backref->dir, &location,
2431 imode_to_type(rec->imode),
2434 btrfs_commit_transaction(trans, root);
2438 if (!delete && (backref->found_inode_ref &&
2439 backref->found_dir_index &&
2440 backref->found_dir_item &&
2441 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442 !rec->found_inode_item)) {
2443 ret = create_inode_item(root, rec, backref, 0);
2450 return ret ? ret : repaired;
2454 * To determine the file type for nlink/inode_item repair
2456 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457 * Return -ENOENT if file type is not found.
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2461 struct inode_backref *backref;
2463 /* For inode item recovered case */
2464 if (rec->found_inode_item) {
2465 *type = imode_to_type(rec->imode);
2469 list_for_each_entry(backref, &rec->backrefs, list) {
2470 if (backref->found_dir_index || backref->found_dir_item) {
2471 *type = backref->filetype;
2479 * To determine the file name for nlink repair
2481 * Return 0 if file name is found, set name and namelen.
2482 * Return -ENOENT if file name is not found.
2484 static int find_file_name(struct inode_record *rec,
2485 char *name, int *namelen)
2487 struct inode_backref *backref;
2489 list_for_each_entry(backref, &rec->backrefs, list) {
2490 if (backref->found_dir_index || backref->found_dir_item ||
2491 backref->found_inode_ref) {
2492 memcpy(name, backref->name, backref->namelen);
2493 *namelen = backref->namelen;
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502 struct btrfs_root *root,
2503 struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct inode_backref *backref;
2507 struct inode_backref *tmp;
2508 struct btrfs_key key;
2509 struct btrfs_inode_item *inode_item;
2512 /* We don't believe this either, reset it and iterate backref */
2513 rec->found_link = 0;
2515 /* Remove all backref including the valid ones */
2516 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518 backref->index, backref->name,
2519 backref->namelen, 0);
2523 /* remove invalid backref, so it won't be added back */
2524 if (!(backref->found_dir_index &&
2525 backref->found_dir_item &&
2526 backref->found_inode_ref)) {
2527 list_del(&backref->list);
2534 /* Set nlink to 0 */
2535 key.objectid = rec->ino;
2536 key.type = BTRFS_INODE_ITEM_KEY;
2538 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2545 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546 struct btrfs_inode_item);
2547 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548 btrfs_mark_buffer_dirty(path->nodes[0]);
2549 btrfs_release_path(path);
2552 * Add back valid inode_ref/dir_item/dir_index,
2553 * add_link() will handle the nlink inc, so new nlink must be correct
2555 list_for_each_entry(backref, &rec->backrefs, list) {
2556 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557 backref->name, backref->namelen,
2558 backref->filetype, &backref->index, 1);
2563 btrfs_release_path(path);
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568 struct btrfs_root *root,
2569 struct btrfs_path *path,
2570 struct inode_record *rec)
2572 char *dir_name = "lost+found";
2573 char namebuf[BTRFS_NAME_LEN] = {0};
2578 int name_recovered = 0;
2579 int type_recovered = 0;
2583 * Get file name and type first before these invalid inode ref
2584 * are deleted by remove_all_invalid_backref()
2586 name_recovered = !find_file_name(rec, namebuf, &namelen);
2587 type_recovered = !find_file_type(rec, &type);
2589 if (!name_recovered) {
2590 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591 rec->ino, rec->ino);
2592 namelen = count_digits(rec->ino);
2593 sprintf(namebuf, "%llu", rec->ino);
2596 if (!type_recovered) {
2597 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2599 type = BTRFS_FT_REG_FILE;
2603 ret = reset_nlink(trans, root, path, rec);
2606 "Failed to reset nlink for inode %llu: %s\n",
2607 rec->ino, strerror(-ret));
2611 if (rec->found_link == 0) {
2612 lost_found_ino = root->highest_inode;
2613 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2618 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2622 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623 dir_name, strerror(-ret));
2626 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627 namebuf, namelen, type, NULL, 1);
2629 * Add ".INO" suffix several times to handle case where
2630 * "FILENAME.INO" is already taken by another file.
2632 while (ret == -EEXIST) {
2634 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2636 if (namelen + count_digits(rec->ino) + 1 >
2641 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2643 namelen += count_digits(rec->ino) + 1;
2644 ret = btrfs_add_link(trans, root, rec->ino,
2645 lost_found_ino, namebuf,
2646 namelen, type, NULL, 1);
2650 "Failed to link the inode %llu to %s dir: %s\n",
2651 rec->ino, dir_name, strerror(-ret));
2655 * Just increase the found_link, don't actually add the
2656 * backref. This will make things easier and this inode
2657 * record will be freed after the repair is done.
2658 * So fsck will not report problem about this inode.
2661 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662 namelen, namebuf, dir_name);
2664 printf("Fixed the nlink of inode %llu\n", rec->ino);
2667 * Clear the flag anyway, or we will loop forever for the same inode
2668 * as it will not be removed from the bad inode list and the dead loop
2671 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672 btrfs_release_path(path);
2677 * Check if there is any normal(reg or prealloc) file extent for given
2679 * This is used to determine the file type when neither its dir_index/item or
2680 * inode_item exists.
2682 * This will *NOT* report error, if any error happens, just consider it does
2683 * not have any normal file extent.
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2687 struct btrfs_path path;
2688 struct btrfs_key key;
2689 struct btrfs_key found_key;
2690 struct btrfs_file_extent_item *fi;
2694 btrfs_init_path(&path);
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2699 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2704 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705 ret = btrfs_next_leaf(root, &path);
2712 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2717 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path.nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2726 btrfs_release_path(&path);
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2752 int type_recovered = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2768 * 1. If found backref(inode_index/item is already handled) to it,
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2782 filetype = BTRFS_FT_REG_FILE;
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2884 node = rb_first(&rec->holes);
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path path;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2930 * For nlink repair, it may create a dir and add link, so
2931 * 2 for parent(256)'s dir_index and dir_item
2932 * 2 for lost+found dir's inode_item and inode_ref
2933 * 1 for the new inode_ref of the file
2934 * 2 for lost+found dir's dir_index and dir_item for the file
2936 trans = btrfs_start_transaction(root, 7);
2938 return PTR_ERR(trans);
2940 btrfs_init_path(&path);
2941 if (rec->errors & I_ERR_NO_INODE_ITEM)
2942 ret = repair_inode_no_item(trans, root, &path, rec);
2943 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948 ret = repair_inode_isize(trans, root, &path, rec);
2949 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952 ret = repair_inode_nlinks(trans, root, &path, rec);
2953 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954 ret = repair_inode_nbytes(trans, root, &path, rec);
2955 btrfs_commit_transaction(trans, root);
2956 btrfs_release_path(&path);
2960 static int check_inode_recs(struct btrfs_root *root,
2961 struct cache_tree *inode_cache)
2963 struct cache_extent *cache;
2964 struct ptr_node *node;
2965 struct inode_record *rec;
2966 struct inode_backref *backref;
2971 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2973 if (btrfs_root_refs(&root->root_item) == 0) {
2974 if (!cache_tree_empty(inode_cache))
2975 fprintf(stderr, "warning line %d\n", __LINE__);
2980 * We need to record the highest inode number for later 'lost+found'
2982 * We must select an ino not used/referred by any existing inode, or
2983 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984 * this may cause 'lost+found' dir has wrong nlinks.
2986 cache = last_cache_extent(inode_cache);
2988 node = container_of(cache, struct ptr_node, cache);
2990 if (rec->ino > root->highest_inode)
2991 root->highest_inode = rec->ino;
2995 * We need to repair backrefs first because we could change some of the
2996 * errors in the inode recs.
2998 * We also need to go through and delete invalid backrefs first and then
2999 * add the correct ones second. We do this because we may get EEXIST
3000 * when adding back the correct index because we hadn't yet deleted the
3003 * For example, if we were missing a dir index then the directories
3004 * isize would be wrong, so if we fixed the isize to what we thought it
3005 * would be and then fixed the backref we'd still have a invalid fs, so
3006 * we need to add back the dir index and then check to see if the isize
3011 if (stage == 3 && !err)
3014 cache = search_cache_extent(inode_cache, 0);
3015 while (repair && cache) {
3016 node = container_of(cache, struct ptr_node, cache);
3018 cache = next_cache_extent(cache);
3020 /* Need to free everything up and rescan */
3022 remove_cache_extent(inode_cache, &node->cache);
3024 free_inode_rec(rec);
3028 if (list_empty(&rec->backrefs))
3031 ret = repair_inode_backrefs(root, rec, inode_cache,
3045 rec = get_inode_rec(inode_cache, root_dirid, 0);
3046 BUG_ON(IS_ERR(rec));
3048 ret = check_root_dir(rec);
3050 fprintf(stderr, "root %llu root dir %llu error\n",
3051 (unsigned long long)root->root_key.objectid,
3052 (unsigned long long)root_dirid);
3053 print_inode_error(root, rec);
3058 struct btrfs_trans_handle *trans;
3060 trans = btrfs_start_transaction(root, 1);
3061 if (IS_ERR(trans)) {
3062 err = PTR_ERR(trans);
3067 "root %llu missing its root dir, recreating\n",
3068 (unsigned long long)root->objectid);
3070 ret = btrfs_make_root_dir(trans, root, root_dirid);
3073 btrfs_commit_transaction(trans, root);
3077 fprintf(stderr, "root %llu root dir %llu not found\n",
3078 (unsigned long long)root->root_key.objectid,
3079 (unsigned long long)root_dirid);
3083 cache = search_cache_extent(inode_cache, 0);
3086 node = container_of(cache, struct ptr_node, cache);
3088 remove_cache_extent(inode_cache, &node->cache);
3090 if (rec->ino == root_dirid ||
3091 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092 free_inode_rec(rec);
3096 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097 ret = check_orphan_item(root, rec->ino);
3099 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100 if (can_free_inode_rec(rec)) {
3101 free_inode_rec(rec);
3106 if (!rec->found_inode_item)
3107 rec->errors |= I_ERR_NO_INODE_ITEM;
3108 if (rec->found_link != rec->nlink)
3109 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3111 ret = try_repair_inode(root, rec);
3112 if (ret == 0 && can_free_inode_rec(rec)) {
3113 free_inode_rec(rec);
3119 if (!(repair && ret == 0))
3121 print_inode_error(root, rec);
3122 list_for_each_entry(backref, &rec->backrefs, list) {
3123 if (!backref->found_dir_item)
3124 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125 if (!backref->found_dir_index)
3126 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127 if (!backref->found_inode_ref)
3128 backref->errors |= REF_ERR_NO_INODE_REF;
3129 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130 " namelen %u name %s filetype %d errors %x",
3131 (unsigned long long)backref->dir,
3132 (unsigned long long)backref->index,
3133 backref->namelen, backref->name,
3134 backref->filetype, backref->errors);
3135 print_ref_error(backref->errors);
3137 free_inode_rec(rec);
3139 return (error > 0) ? -1 : 0;
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3145 struct cache_extent *cache;
3146 struct root_record *rec = NULL;
3149 cache = lookup_cache_extent(root_cache, objectid, 1);
3151 rec = container_of(cache, struct root_record, cache);
3153 rec = calloc(1, sizeof(*rec));
3155 return ERR_PTR(-ENOMEM);
3156 rec->objectid = objectid;
3157 INIT_LIST_HEAD(&rec->backrefs);
3158 rec->cache.start = objectid;
3159 rec->cache.size = 1;
3161 ret = insert_cache_extent(root_cache, &rec->cache);
3163 return ERR_PTR(-EEXIST);
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169 u64 ref_root, u64 dir, u64 index,
3170 const char *name, int namelen)
3172 struct root_backref *backref;
3174 list_for_each_entry(backref, &rec->backrefs, list) {
3175 if (backref->ref_root != ref_root || backref->dir != dir ||
3176 backref->namelen != namelen)
3178 if (memcmp(name, backref->name, namelen))
3183 backref = calloc(1, sizeof(*backref) + namelen + 1);
3186 backref->ref_root = ref_root;
3188 backref->index = index;
3189 backref->namelen = namelen;
3190 memcpy(backref->name, name, namelen);
3191 backref->name[namelen] = '\0';
3192 list_add_tail(&backref->list, &rec->backrefs);
3196 static void free_root_record(struct cache_extent *cache)
3198 struct root_record *rec;
3199 struct root_backref *backref;
3201 rec = container_of(cache, struct root_record, cache);
3202 while (!list_empty(&rec->backrefs)) {
3203 backref = to_root_backref(rec->backrefs.next);
3204 list_del(&backref->list);
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3213 static int add_root_backref(struct cache_tree *root_cache,
3214 u64 root_id, u64 ref_root, u64 dir, u64 index,
3215 const char *name, int namelen,
3216 int item_type, int errors)
3218 struct root_record *rec;
3219 struct root_backref *backref;
3221 rec = get_root_rec(root_cache, root_id);
3222 BUG_ON(IS_ERR(rec));
3223 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3226 backref->errors |= errors;
3228 if (item_type != BTRFS_DIR_ITEM_KEY) {
3229 if (backref->found_dir_index || backref->found_back_ref ||
3230 backref->found_forward_ref) {
3231 if (backref->index != index)
3232 backref->errors |= REF_ERR_INDEX_UNMATCH;
3234 backref->index = index;
3238 if (item_type == BTRFS_DIR_ITEM_KEY) {
3239 if (backref->found_forward_ref)
3241 backref->found_dir_item = 1;
3242 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243 backref->found_dir_index = 1;
3244 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245 if (backref->found_forward_ref)
3246 backref->errors |= REF_ERR_DUP_ROOT_REF;
3247 else if (backref->found_dir_item)
3249 backref->found_forward_ref = 1;
3250 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251 if (backref->found_back_ref)
3252 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253 backref->found_back_ref = 1;
3258 if (backref->found_forward_ref && backref->found_dir_item)
3259 backref->reachable = 1;
3263 static int merge_root_recs(struct btrfs_root *root,
3264 struct cache_tree *src_cache,
3265 struct cache_tree *dst_cache)
3267 struct cache_extent *cache;
3268 struct ptr_node *node;
3269 struct inode_record *rec;
3270 struct inode_backref *backref;
3273 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274 free_inode_recs_tree(src_cache);
3279 cache = search_cache_extent(src_cache, 0);
3282 node = container_of(cache, struct ptr_node, cache);
3284 remove_cache_extent(src_cache, &node->cache);
3287 ret = is_child_root(root, root->objectid, rec->ino);
3293 list_for_each_entry(backref, &rec->backrefs, list) {
3294 BUG_ON(backref->found_inode_ref);
3295 if (backref->found_dir_item)
3296 add_root_backref(dst_cache, rec->ino,
3297 root->root_key.objectid, backref->dir,
3298 backref->index, backref->name,
3299 backref->namelen, BTRFS_DIR_ITEM_KEY,
3301 if (backref->found_dir_index)
3302 add_root_backref(dst_cache, rec->ino,
3303 root->root_key.objectid, backref->dir,
3304 backref->index, backref->name,
3305 backref->namelen, BTRFS_DIR_INDEX_KEY,
3309 free_inode_rec(rec);
3316 static int check_root_refs(struct btrfs_root *root,
3317 struct cache_tree *root_cache)
3319 struct root_record *rec;
3320 struct root_record *ref_root;
3321 struct root_backref *backref;
3322 struct cache_extent *cache;
3328 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329 BUG_ON(IS_ERR(rec));
3332 /* fixme: this can not detect circular references */
3335 cache = search_cache_extent(root_cache, 0);
3339 rec = container_of(cache, struct root_record, cache);
3340 cache = next_cache_extent(cache);
3342 if (rec->found_ref == 0)
3345 list_for_each_entry(backref, &rec->backrefs, list) {
3346 if (!backref->reachable)
3349 ref_root = get_root_rec(root_cache,
3351 BUG_ON(IS_ERR(ref_root));
3352 if (ref_root->found_ref > 0)
3355 backref->reachable = 0;
3357 if (rec->found_ref == 0)
3363 cache = search_cache_extent(root_cache, 0);
3367 rec = container_of(cache, struct root_record, cache);
3368 cache = next_cache_extent(cache);
3370 if (rec->found_ref == 0 &&
3371 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373 ret = check_orphan_item(root->fs_info->tree_root,
3379 * If we don't have a root item then we likely just have
3380 * a dir item in a snapshot for this root but no actual
3381 * ref key or anything so it's meaningless.
3383 if (!rec->found_root_item)
3386 fprintf(stderr, "fs tree %llu not referenced\n",
3387 (unsigned long long)rec->objectid);
3391 if (rec->found_ref > 0 && !rec->found_root_item)
3393 list_for_each_entry(backref, &rec->backrefs, list) {
3394 if (!backref->found_dir_item)
3395 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396 if (!backref->found_dir_index)
3397 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398 if (!backref->found_back_ref)
3399 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400 if (!backref->found_forward_ref)
3401 backref->errors |= REF_ERR_NO_ROOT_REF;
3402 if (backref->reachable && backref->errors)
3409 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410 (unsigned long long)rec->objectid, rec->found_ref,
3411 rec->found_root_item ? "" : "not found");
3413 list_for_each_entry(backref, &rec->backrefs, list) {
3414 if (!backref->reachable)
3416 if (!backref->errors && rec->found_root_item)
3418 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419 " index %llu namelen %u name %s errors %x\n",
3420 (unsigned long long)backref->ref_root,
3421 (unsigned long long)backref->dir,
3422 (unsigned long long)backref->index,
3423 backref->namelen, backref->name,
3425 print_ref_error(backref->errors);
3428 return errors > 0 ? 1 : 0;
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432 struct btrfs_key *key,
3433 struct cache_tree *root_cache)
3439 struct btrfs_root_ref *ref;
3440 char namebuf[BTRFS_NAME_LEN];
3443 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3445 dirid = btrfs_root_ref_dirid(eb, ref);
3446 index = btrfs_root_ref_sequence(eb, ref);
3447 name_len = btrfs_root_ref_name_len(eb, ref);
3449 if (name_len <= BTRFS_NAME_LEN) {
3453 len = BTRFS_NAME_LEN;
3454 error = REF_ERR_NAME_TOO_LONG;
3456 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3458 if (key->type == BTRFS_ROOT_REF_KEY) {
3459 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460 index, namebuf, len, key->type, error);
3462 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463 index, namebuf, len, key->type, error);
3468 static void free_corrupt_block(struct cache_extent *cache)
3470 struct btrfs_corrupt_block *corrupt;
3472 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3479 * Repair the btree of the given root.
3481 * The fix is to remove the node key in corrupt_blocks cache_tree.
3482 * and rebalance the tree.
3483 * After the fix, the btree should be writeable.
3485 static int repair_btree(struct btrfs_root *root,
3486 struct cache_tree *corrupt_blocks)
3488 struct btrfs_trans_handle *trans;
3489 struct btrfs_path path;
3490 struct btrfs_corrupt_block *corrupt;
3491 struct cache_extent *cache;
3492 struct btrfs_key key;
3497 if (cache_tree_empty(corrupt_blocks))
3500 trans = btrfs_start_transaction(root, 1);
3501 if (IS_ERR(trans)) {
3502 ret = PTR_ERR(trans);
3503 fprintf(stderr, "Error starting transaction: %s\n",
3507 btrfs_init_path(&path);
3508 cache = first_cache_extent(corrupt_blocks);
3510 corrupt = container_of(cache, struct btrfs_corrupt_block,
3512 level = corrupt->level;
3513 path.lowest_level = level;
3514 key.objectid = corrupt->key.objectid;
3515 key.type = corrupt->key.type;
3516 key.offset = corrupt->key.offset;
3519 * Here we don't want to do any tree balance, since it may
3520 * cause a balance with corrupted brother leaf/node,
3521 * so ins_len set to 0 here.
3522 * Balance will be done after all corrupt node/leaf is deleted.
3524 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3527 offset = btrfs_node_blockptr(path.nodes[level],
3530 /* Remove the ptr */
3531 ret = btrfs_del_ptr(trans, root, &path, level,
3536 * Remove the corresponding extent
3537 * return value is not concerned.
3539 btrfs_release_path(&path);
3540 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541 0, root->root_key.objectid,
3543 cache = next_cache_extent(cache);
3546 /* Balance the btree using btrfs_search_slot() */
3547 cache = first_cache_extent(corrupt_blocks);
3549 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551 memcpy(&key, &corrupt->key, sizeof(key));
3552 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3555 /* return will always >0 since it won't find the item */
3557 btrfs_release_path(&path);
3558 cache = next_cache_extent(cache);
3561 btrfs_commit_transaction(trans, root);
3562 btrfs_release_path(&path);
3566 static int check_fs_root(struct btrfs_root *root,
3567 struct cache_tree *root_cache,
3568 struct walk_control *wc)
3574 struct btrfs_path path;
3575 struct shared_node root_node;
3576 struct root_record *rec;
3577 struct btrfs_root_item *root_item = &root->root_item;
3578 struct cache_tree corrupt_blocks;
3579 struct orphan_data_extent *orphan;
3580 struct orphan_data_extent *tmp;
3581 enum btrfs_tree_block_status status;
3582 struct node_refs nrefs;
3585 * Reuse the corrupt_block cache tree to record corrupted tree block
3587 * Unlike the usage in extent tree check, here we do it in a per
3588 * fs/subvol tree base.
3590 cache_tree_init(&corrupt_blocks);
3591 root->fs_info->corrupt_blocks = &corrupt_blocks;
3593 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594 rec = get_root_rec(root_cache, root->root_key.objectid);
3595 BUG_ON(IS_ERR(rec));
3596 if (btrfs_root_refs(root_item) > 0)
3597 rec->found_root_item = 1;
3600 btrfs_init_path(&path);
3601 memset(&root_node, 0, sizeof(root_node));
3602 cache_tree_init(&root_node.root_cache);
3603 cache_tree_init(&root_node.inode_cache);
3604 memset(&nrefs, 0, sizeof(nrefs));
3606 /* Move the orphan extent record to corresponding inode_record */
3607 list_for_each_entry_safe(orphan, tmp,
3608 &root->orphan_data_extents, list) {
3609 struct inode_record *inode;
3611 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3613 BUG_ON(IS_ERR(inode));
3614 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615 list_move(&orphan->list, &inode->orphan_extents);
3618 level = btrfs_header_level(root->node);
3619 memset(wc->nodes, 0, sizeof(wc->nodes));
3620 wc->nodes[level] = &root_node;
3621 wc->active_node = level;
3622 wc->root_level = level;
3624 /* We may not have checked the root block, lets do that now */
3625 if (btrfs_is_leaf(root->node))
3626 status = btrfs_check_leaf(root, NULL, root->node);
3628 status = btrfs_check_node(root, NULL, root->node);
3629 if (status != BTRFS_TREE_BLOCK_CLEAN)
3632 if (btrfs_root_refs(root_item) > 0 ||
3633 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634 path.nodes[level] = root->node;
3635 extent_buffer_get(root->node);
3636 path.slots[level] = 0;
3638 struct btrfs_key key;
3639 struct btrfs_disk_key found_key;
3641 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642 level = root_item->drop_level;
3643 path.lowest_level = level;
3644 if (level > btrfs_header_level(root->node) ||
3645 level >= BTRFS_MAX_LEVEL) {
3646 error("ignoring invalid drop level: %u", level);
3649 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3652 btrfs_node_key(path.nodes[level], &found_key,
3654 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655 sizeof(found_key)));
3659 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3665 wret = walk_up_tree(root, &path, wc, &level);
3672 btrfs_release_path(&path);
3674 if (!cache_tree_empty(&corrupt_blocks)) {
3675 struct cache_extent *cache;
3676 struct btrfs_corrupt_block *corrupt;
3678 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679 root->root_key.objectid);
3680 cache = first_cache_extent(&corrupt_blocks);
3682 corrupt = container_of(cache,
3683 struct btrfs_corrupt_block,
3685 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686 cache->start, corrupt->level,
3687 corrupt->key.objectid, corrupt->key.type,
3688 corrupt->key.offset);
3689 cache = next_cache_extent(cache);
3692 printf("Try to repair the btree for root %llu\n",
3693 root->root_key.objectid);
3694 ret = repair_btree(root, &corrupt_blocks);
3696 fprintf(stderr, "Failed to repair btree: %s\n",
3699 printf("Btree for root %llu is fixed\n",
3700 root->root_key.objectid);
3704 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3708 if (root_node.current) {
3709 root_node.current->checked = 1;
3710 maybe_free_inode_rec(&root_node.inode_cache,
3714 err = check_inode_recs(root, &root_node.inode_cache);
3718 free_corrupt_blocks_tree(&corrupt_blocks);
3719 root->fs_info->corrupt_blocks = NULL;
3720 free_orphan_data_extents(&root->orphan_data_extents);
3724 static int fs_root_objectid(u64 objectid)
3726 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3729 return is_fstree(objectid);
3732 static int check_fs_roots(struct btrfs_root *root,
3733 struct cache_tree *root_cache)
3735 struct btrfs_path path;
3736 struct btrfs_key key;
3737 struct walk_control wc;
3738 struct extent_buffer *leaf, *tree_node;
3739 struct btrfs_root *tmp_root;
3740 struct btrfs_root *tree_root = root->fs_info->tree_root;
3744 if (ctx.progress_enabled) {
3745 ctx.tp = TASK_FS_ROOTS;
3746 task_start(ctx.info);
3750 * Just in case we made any changes to the extent tree that weren't
3751 * reflected into the free space cache yet.
3754 reset_cached_block_groups(root->fs_info);
3755 memset(&wc, 0, sizeof(wc));
3756 cache_tree_init(&wc.shared);
3757 btrfs_init_path(&path);
3762 key.type = BTRFS_ROOT_ITEM_KEY;
3763 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3768 tree_node = tree_root->node;
3770 if (tree_node != tree_root->node) {
3771 free_root_recs_tree(root_cache);
3772 btrfs_release_path(&path);
3775 leaf = path.nodes[0];
3776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777 ret = btrfs_next_leaf(tree_root, &path);
3783 leaf = path.nodes[0];
3785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787 fs_root_objectid(key.objectid)) {
3788 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789 tmp_root = btrfs_read_fs_root_no_cache(
3790 root->fs_info, &key);
3792 key.offset = (u64)-1;
3793 tmp_root = btrfs_read_fs_root(
3794 root->fs_info, &key);
3796 if (IS_ERR(tmp_root)) {
3800 ret = check_fs_root(tmp_root, root_cache, &wc);
3801 if (ret == -EAGAIN) {
3802 free_root_recs_tree(root_cache);
3803 btrfs_release_path(&path);
3808 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809 btrfs_free_fs_root(tmp_root);
3810 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811 key.type == BTRFS_ROOT_BACKREF_KEY) {
3812 process_root_ref(leaf, path.slots[0], &key,
3819 btrfs_release_path(&path);
3821 free_extent_cache_tree(&wc.shared);
3822 if (!cache_tree_empty(&wc.shared))
3823 fprintf(stderr, "warning line %d\n", __LINE__);
3825 task_stop(ctx.info);
3830 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
3839 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
3840 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
3841 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
3842 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
3843 #define NO_INODE_ITEM (1<<14) /* no inode_item */
3844 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
3845 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
3846 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
3849 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3850 * INODE_REF/INODE_EXTREF match.
3852 * @root: the root of the fs/file tree
3853 * @ref_key: the key of the INODE_REF/INODE_EXTREF
3854 * @key: the key of the DIR_ITEM/DIR_INDEX
3855 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
3856 * distinguish root_dir between normal dir/file
3857 * @name: the name in the INODE_REF/INODE_EXTREF
3858 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
3859 * @mode: the st_mode of INODE_ITEM
3861 * Return 0 if no error occurred.
3862 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3863 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3865 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3866 * not match for normal dir/file.
3868 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3869 struct btrfs_key *key, u64 index, char *name,
3870 u32 namelen, u32 mode)
3872 struct btrfs_path path;
3873 struct extent_buffer *node;
3874 struct btrfs_dir_item *di;
3875 struct btrfs_key location;
3876 char namebuf[BTRFS_NAME_LEN] = {0};
3886 btrfs_init_path(&path);
3887 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3889 ret = DIR_ITEM_MISSING;
3893 /* Process root dir and goto out*/
3896 ret = ROOT_DIR_ERROR;
3898 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3900 ref_key->type == BTRFS_INODE_REF_KEY ?
3902 ref_key->objectid, ref_key->offset,
3903 key->type == BTRFS_DIR_ITEM_KEY ?
3904 "DIR_ITEM" : "DIR_INDEX");
3912 /* Process normal file/dir */
3914 ret = DIR_ITEM_MISSING;
3916 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3918 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3919 ref_key->objectid, ref_key->offset,
3920 key->type == BTRFS_DIR_ITEM_KEY ?
3921 "DIR_ITEM" : "DIR_INDEX",
3922 key->objectid, key->offset, namelen, name,
3923 imode_to_type(mode));
3927 /* Check whether inode_id/filetype/name match */
3928 node = path.nodes[0];
3929 slot = path.slots[0];
3930 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3931 total = btrfs_item_size_nr(node, slot);
3932 while (cur < total) {
3933 ret = DIR_ITEM_MISMATCH;
3934 name_len = btrfs_dir_name_len(node, di);
3935 data_len = btrfs_dir_data_len(node, di);
3937 btrfs_dir_item_key_to_cpu(node, di, &location);
3938 if (location.objectid != ref_key->objectid ||
3939 location.type != BTRFS_INODE_ITEM_KEY ||
3940 location.offset != 0)
3943 filetype = btrfs_dir_type(node, di);
3944 if (imode_to_type(mode) != filetype)
3947 if (name_len <= BTRFS_NAME_LEN) {
3950 len = BTRFS_NAME_LEN;
3951 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3953 key->type == BTRFS_DIR_ITEM_KEY ?
3954 "DIR_ITEM" : "DIR_INDEX",
3955 key->objectid, key->offset, name_len);
3957 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3958 if (len != namelen || strncmp(namebuf, name, len))
3964 len = sizeof(*di) + name_len + data_len;
3965 di = (struct btrfs_dir_item *)((char *)di + len);
3968 if (ret == DIR_ITEM_MISMATCH)
3970 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3972 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3973 ref_key->objectid, ref_key->offset,
3974 key->type == BTRFS_DIR_ITEM_KEY ?
3975 "DIR_ITEM" : "DIR_INDEX",
3976 key->objectid, key->offset, namelen, name,
3977 imode_to_type(mode));
3979 btrfs_release_path(&path);
3984 * Traverse the given INODE_REF and call find_dir_item() to find related
3985 * DIR_ITEM/DIR_INDEX.
3987 * @root: the root of the fs/file tree
3988 * @ref_key: the key of the INODE_REF
3989 * @refs: the count of INODE_REF
3990 * @mode: the st_mode of INODE_ITEM
3992 * Return 0 if no error occurred.
3994 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3995 struct extent_buffer *node, int slot, u64 *refs,
3998 struct btrfs_key key;
3999 struct btrfs_inode_ref *ref;
4000 char namebuf[BTRFS_NAME_LEN] = {0};
4008 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4009 total = btrfs_item_size_nr(node, slot);
4012 /* Update inode ref count */
4015 index = btrfs_inode_ref_index(node, ref);
4016 name_len = btrfs_inode_ref_name_len(node, ref);
4017 if (name_len <= BTRFS_NAME_LEN) {
4020 len = BTRFS_NAME_LEN;
4021 warning("root %llu INODE_REF[%llu %llu] name too long",
4022 root->objectid, ref_key->objectid, ref_key->offset);
4025 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4027 /* Check root dir ref name */
4028 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4029 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4030 root->objectid, ref_key->objectid, ref_key->offset,
4032 err |= ROOT_DIR_ERROR;
4035 /* Find related DIR_INDEX */
4036 key.objectid = ref_key->offset;
4037 key.type = BTRFS_DIR_INDEX_KEY;
4039 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4042 /* Find related dir_item */
4043 key.objectid = ref_key->offset;
4044 key.type = BTRFS_DIR_ITEM_KEY;
4045 key.offset = btrfs_name_hash(namebuf, len);
4046 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4049 len = sizeof(*ref) + name_len;
4050 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4059 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4060 * DIR_ITEM/DIR_INDEX.
4062 * @root: the root of the fs/file tree
4063 * @ref_key: the key of the INODE_EXTREF
4064 * @refs: the count of INODE_EXTREF
4065 * @mode: the st_mode of INODE_ITEM
4067 * Return 0 if no error occurred.
4069 static int check_inode_extref(struct btrfs_root *root,
4070 struct btrfs_key *ref_key,
4071 struct extent_buffer *node, int slot, u64 *refs,
4074 struct btrfs_key key;
4075 struct btrfs_inode_extref *extref;
4076 char namebuf[BTRFS_NAME_LEN] = {0};
4086 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4087 total = btrfs_item_size_nr(node, slot);
4090 /* update inode ref count */
4092 name_len = btrfs_inode_extref_name_len(node, extref);
4093 index = btrfs_inode_extref_index(node, extref);
4094 parent = btrfs_inode_extref_parent(node, extref);
4095 if (name_len <= BTRFS_NAME_LEN) {
4098 len = BTRFS_NAME_LEN;
4099 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4100 root->objectid, ref_key->objectid, ref_key->offset);
4102 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4104 /* Check root dir ref name */
4105 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4106 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4107 root->objectid, ref_key->objectid, ref_key->offset,
4109 err |= ROOT_DIR_ERROR;
4112 /* find related dir_index */
4113 key.objectid = parent;
4114 key.type = BTRFS_DIR_INDEX_KEY;
4116 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4119 /* find related dir_item */
4120 key.objectid = parent;
4121 key.type = BTRFS_DIR_ITEM_KEY;
4122 key.offset = btrfs_name_hash(namebuf, len);
4123 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4126 len = sizeof(*extref) + name_len;
4127 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4137 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4138 * DIR_ITEM/DIR_INDEX match.
4140 * @root: the root of the fs/file tree
4141 * @key: the key of the INODE_REF/INODE_EXTREF
4142 * @name: the name in the INODE_REF/INODE_EXTREF
4143 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4144 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4146 * @ext_ref: the EXTENDED_IREF feature
4148 * Return 0 if no error occurred.
4149 * Return >0 for error bitmap
4151 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4152 char *name, int namelen, u64 index,
4153 unsigned int ext_ref)
4155 struct btrfs_path path;
4156 struct btrfs_inode_ref *ref;
4157 struct btrfs_inode_extref *extref;
4158 struct extent_buffer *node;
4159 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4170 btrfs_init_path(&path);
4171 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4173 ret = INODE_REF_MISSING;
4177 node = path.nodes[0];
4178 slot = path.slots[0];
4180 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4181 total = btrfs_item_size_nr(node, slot);
4183 /* Iterate all entry of INODE_REF */
4184 while (cur < total) {
4185 ret = INODE_REF_MISSING;
4187 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4188 ref_index = btrfs_inode_ref_index(node, ref);
4189 if (index != (u64)-1 && index != ref_index)
4192 if (ref_namelen <= BTRFS_NAME_LEN) {
4195 len = BTRFS_NAME_LEN;
4196 warning("root %llu INODE %s[%llu %llu] name too long",
4198 key->type == BTRFS_INODE_REF_KEY ?
4200 key->objectid, key->offset);
4202 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4205 if (len != namelen || strncmp(ref_namebuf, name, len))
4211 len = sizeof(*ref) + ref_namelen;
4212 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4217 /* Skip if not support EXTENDED_IREF feature */
4221 btrfs_release_path(&path);
4222 btrfs_init_path(&path);
4224 dir_id = key->offset;
4225 key->type = BTRFS_INODE_EXTREF_KEY;
4226 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4228 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4230 ret = INODE_REF_MISSING;
4234 node = path.nodes[0];
4235 slot = path.slots[0];
4237 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4239 total = btrfs_item_size_nr(node, slot);
4241 /* Iterate all entry of INODE_EXTREF */
4242 while (cur < total) {
4243 ret = INODE_REF_MISSING;
4245 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4246 ref_index = btrfs_inode_extref_index(node, extref);
4247 parent = btrfs_inode_extref_parent(node, extref);
4248 if (index != (u64)-1 && index != ref_index)
4251 if (parent != dir_id)
4254 if (ref_namelen <= BTRFS_NAME_LEN) {
4257 len = BTRFS_NAME_LEN;
4258 warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4260 key->type == BTRFS_INODE_REF_KEY ?
4262 key->objectid, key->offset);
4264 read_extent_buffer(node, ref_namebuf,
4265 (unsigned long)(extref + 1), len);
4267 if (len != namelen || strncmp(ref_namebuf, name, len))
4274 len = sizeof(*extref) + ref_namelen;
4275 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4280 btrfs_release_path(&path);
4285 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4286 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4288 * @root: the root of the fs/file tree
4289 * @key: the key of the INODE_REF/INODE_EXTREF
4290 * @size: the st_size of the INODE_ITEM
4291 * @ext_ref: the EXTENDED_IREF feature
4293 * Return 0 if no error occurred.
4295 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4296 struct extent_buffer *node, int slot, u64 *size,
4297 unsigned int ext_ref)
4299 struct btrfs_dir_item *di;
4300 struct btrfs_inode_item *ii;
4301 struct btrfs_path path;
4302 struct btrfs_key location;
4303 char namebuf[BTRFS_NAME_LEN] = {0};
4316 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4317 * ignore index check.
4319 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4321 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4322 total = btrfs_item_size_nr(node, slot);
4324 while (cur < total) {
4325 data_len = btrfs_dir_data_len(node, di);
4327 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4328 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4329 "DIR_ITEM" : "DIR_INDEX",
4330 key->objectid, key->offset, data_len);
4332 name_len = btrfs_dir_name_len(node, di);
4333 if (name_len <= BTRFS_NAME_LEN) {
4336 len = BTRFS_NAME_LEN;
4337 warning("root %llu %s[%llu %llu] name too long",
4339 key->type == BTRFS_DIR_ITEM_KEY ?
4340 "DIR_ITEM" : "DIR_INDEX",
4341 key->objectid, key->offset);
4343 (*size) += name_len;
4345 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4346 filetype = btrfs_dir_type(node, di);
4348 btrfs_init_path(&path);
4349 btrfs_dir_item_key_to_cpu(node, di, &location);
4351 /* Ignore related ROOT_ITEM check */
4352 if (location.type == BTRFS_ROOT_ITEM_KEY)
4355 /* Check relative INODE_ITEM(existence/filetype) */
4356 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4358 err |= INODE_ITEM_MISSING;
4359 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4360 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4361 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4362 key->offset, location.objectid, name_len,
4367 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4368 struct btrfs_inode_item);
4369 mode = btrfs_inode_mode(path.nodes[0], ii);
4371 if (imode_to_type(mode) != filetype) {
4372 err |= INODE_ITEM_MISMATCH;
4373 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4374 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4375 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4376 key->offset, name_len, namebuf, filetype);
4379 /* Check relative INODE_REF/INODE_EXTREF */
4380 location.type = BTRFS_INODE_REF_KEY;
4381 location.offset = key->objectid;
4382 ret = find_inode_ref(root, &location, namebuf, len,
4385 if (ret & INODE_REF_MISSING)
4386 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4387 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4388 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4389 key->offset, name_len, namebuf, filetype);
4392 btrfs_release_path(&path);
4393 len = sizeof(*di) + name_len + data_len;
4394 di = (struct btrfs_dir_item *)((char *)di + len);
4397 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4398 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4399 root->objectid, key->objectid, key->offset);
4408 * Check file extent datasum/hole, update the size of the file extents,
4409 * check and update the last offset of the file extent.
4411 * @root: the root of fs/file tree.
4412 * @fkey: the key of the file extent.
4413 * @nodatasum: INODE_NODATASUM feature.
4414 * @size: the sum of all EXTENT_DATA items size for this inode.
4415 * @end: the offset of the last extent.
4417 * Return 0 if no error occurred.
4419 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4420 struct extent_buffer *node, int slot,
4421 unsigned int nodatasum, u64 *size, u64 *end)
4423 struct btrfs_file_extent_item *fi;
4426 u64 extent_num_bytes;
4428 unsigned int extent_type;
4429 unsigned int is_hole;
4433 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4435 extent_type = btrfs_file_extent_type(node, fi);
4436 /* Skip if file extent is inline */
4437 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4438 struct btrfs_item *e = btrfs_item_nr(slot);
4439 u32 item_inline_len;
4441 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4442 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4443 if (extent_num_bytes == 0 ||
4444 extent_num_bytes != item_inline_len)
4445 err |= FILE_EXTENT_ERROR;
4446 *size += extent_num_bytes;
4450 /* Check extent type */
4451 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4452 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4453 err |= FILE_EXTENT_ERROR;
4454 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4455 root->objectid, fkey->objectid, fkey->offset);
4459 /* Check REG_EXTENT/PREALLOC_EXTENT */
4460 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4461 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4462 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4463 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4465 /* Check EXTENT_DATA datasum */
4466 ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4467 if (found > 0 && nodatasum) {
4468 err |= ODD_CSUM_ITEM;
4469 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4470 root->objectid, fkey->objectid, fkey->offset);
4471 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4473 (ret < 0 || found == 0 || found < disk_num_bytes)) {
4474 err |= CSUM_ITEM_MISSING;
4475 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4476 root->objectid, fkey->objectid, fkey->offset);
4477 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4478 err |= ODD_CSUM_ITEM;
4479 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4480 root->objectid, fkey->objectid, fkey->offset);
4483 /* Check EXTENT_DATA hole */
4484 if (no_holes && is_hole) {
4485 err |= FILE_EXTENT_ERROR;
4486 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4487 root->objectid, fkey->objectid, fkey->offset);
4488 } else if (!no_holes && *end != fkey->offset) {
4489 err |= FILE_EXTENT_ERROR;
4490 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4491 root->objectid, fkey->objectid, fkey->offset);
4494 *end += extent_num_bytes;
4496 *size += extent_num_bytes;
4502 * Check INODE_ITEM and related ITEMs (the same inode number)
4503 * 1. check link count
4504 * 2. check inode ref/extref
4505 * 3. check dir item/index
4507 * @ext_ref: the EXTENDED_IREF feature
4509 * Return 0 if no error occurred.
4510 * Return >0 for error or hit the traversal is done(by error bitmap)
4512 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4513 unsigned int ext_ref)
4515 struct extent_buffer *node;
4516 struct btrfs_inode_item *ii;
4517 struct btrfs_key key;
4526 u64 extent_size = 0;
4528 unsigned int nodatasum;
4533 node = path->nodes[0];
4534 slot = path->slots[0];
4536 btrfs_item_key_to_cpu(node, &key, slot);
4537 inode_id = key.objectid;
4539 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4540 ret = btrfs_next_item(root, path);
4546 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4547 isize = btrfs_inode_size(node, ii);
4548 nbytes = btrfs_inode_nbytes(node, ii);
4549 mode = btrfs_inode_mode(node, ii);
4550 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4551 nlink = btrfs_inode_nlink(node, ii);
4552 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4555 ret = btrfs_next_item(root, path);
4557 /* out will fill 'err' rusing current statistics */
4559 } else if (ret > 0) {
4564 node = path->nodes[0];
4565 slot = path->slots[0];
4566 btrfs_item_key_to_cpu(node, &key, slot);
4567 if (key.objectid != inode_id)
4571 case BTRFS_INODE_REF_KEY:
4572 ret = check_inode_ref(root, &key, node, slot, &refs,
4576 case BTRFS_INODE_EXTREF_KEY:
4577 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4578 warning("root %llu EXTREF[%llu %llu] isn't supported",
4579 root->objectid, key.objectid,
4581 ret = check_inode_extref(root, &key, node, slot, &refs,
4585 case BTRFS_DIR_ITEM_KEY:
4586 case BTRFS_DIR_INDEX_KEY:
4588 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4589 root->objectid, inode_id,
4590 imode_to_type(mode), key.objectid,
4593 ret = check_dir_item(root, &key, node, slot, &size,
4597 case BTRFS_EXTENT_DATA_KEY:
4599 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4600 root->objectid, inode_id, key.objectid,
4603 ret = check_file_extent(root, &key, node, slot,
4604 nodatasum, &extent_size,
4608 case BTRFS_XATTR_ITEM_KEY:
4611 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4612 key.objectid, key.type, key.offset);
4617 /* verify INODE_ITEM nlink/isize/nbytes */
4620 err |= LINK_COUNT_ERROR;
4621 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4622 root->objectid, inode_id, nlink);
4626 * Just a warning, as dir inode nbytes is just an
4627 * instructive value.
4629 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4630 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4631 root->objectid, inode_id, root->nodesize);
4634 if (isize != size) {
4636 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4637 root->objectid, inode_id, isize, size);
4640 if (nlink != refs) {
4641 err |= LINK_COUNT_ERROR;
4642 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4643 root->objectid, inode_id, nlink, refs);
4644 } else if (!nlink) {
4648 if (!nbytes && !no_holes && extent_end < isize) {
4649 err |= NBYTES_ERROR;
4650 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4651 root->objectid, inode_id, isize);
4654 if (nbytes != extent_size) {
4655 err |= NBYTES_ERROR;
4656 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4657 root->objectid, inode_id, nbytes, extent_size);
4665 * Iterate all item on the tree and call check_inode_item() to check.
4667 * @root: the root of the tree to be checked.
4668 * @ext_ref: the EXTENDED_IREF feature
4670 * Return 0 if no error found.
4671 * Return <0 for error.
4672 * All internal error bitmap will be converted to -EIO, to avoid
4673 * mixing negative and postive return value.
4675 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4677 struct btrfs_path *path;
4678 struct btrfs_key key;
4682 path = btrfs_alloc_path();
4690 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4695 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4698 * All check must start with inode item, skip if not
4700 if (key.type == BTRFS_INODE_ITEM_KEY) {
4701 ret = check_inode_item(root, path, ext_ref);
4703 if (err & LAST_ITEM)
4707 error("root %llu ITEM[%llu %u %llu] isn't INODE_ITEM, skip to next inode",
4708 root->objectid, key.objectid, key.type,
4711 err |= NO_INODE_ITEM;
4712 inode_id = key.objectid;
4715 * skip to next inode
4716 * TODO: Maybe search_slot() will be faster?
4719 ret = btrfs_next_item(root, path);
4722 } else if (ret < 0) {
4726 btrfs_item_key_to_cpu(path->nodes[0], &key,
4728 } while (inode_id == key.objectid);
4735 btrfs_free_path(path);
4740 * Find the relative ref for root_ref and root_backref.
4742 * @root: the root of the root tree.
4743 * @ref_key: the key of the root ref.
4745 * Return 0 if no error occurred.
4747 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4748 struct extent_buffer *node, int slot)
4750 struct btrfs_path *path;
4751 struct btrfs_key key;
4752 struct btrfs_root_ref *ref;
4753 struct btrfs_root_ref *backref;
4754 char ref_name[BTRFS_NAME_LEN] = {0};
4755 char backref_name[BTRFS_NAME_LEN] = {0};
4761 u32 backref_namelen;
4766 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
4767 ref_dirid = btrfs_root_ref_dirid(node, ref);
4768 ref_seq = btrfs_root_ref_sequence(node, ref);
4769 ref_namelen = btrfs_root_ref_name_len(node, ref);
4771 if (ref_namelen <= BTRFS_NAME_LEN) {
4774 len = BTRFS_NAME_LEN;
4775 warning("%s[%llu %llu] ref_name too long",
4776 ref_key->type == BTRFS_ROOT_REF_KEY ?
4777 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
4780 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
4782 /* Find relative root_ref */
4783 key.objectid = ref_key->offset;
4784 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
4785 key.offset = ref_key->objectid;
4787 path = btrfs_alloc_path();
4788 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4790 err |= ROOT_REF_MISSING;
4791 error("%s[%llu %llu] couldn't find relative ref",
4792 ref_key->type == BTRFS_ROOT_REF_KEY ?
4793 "ROOT_REF" : "ROOT_BACKREF",
4794 ref_key->objectid, ref_key->offset);
4798 backref = btrfs_item_ptr(path->nodes[0], path->slots[0],
4799 struct btrfs_root_ref);
4800 backref_dirid = btrfs_root_ref_dirid(path->nodes[0], backref);
4801 backref_seq = btrfs_root_ref_sequence(path->nodes[0], backref);
4802 backref_namelen = btrfs_root_ref_name_len(path->nodes[0], backref);
4804 if (backref_namelen <= BTRFS_NAME_LEN) {
4805 len = backref_namelen;
4807 len = BTRFS_NAME_LEN;
4808 warning("%s[%llu %llu] ref_name too long",
4809 key.type == BTRFS_ROOT_REF_KEY ?
4810 "ROOT_REF" : "ROOT_BACKREF",
4811 key.objectid, key.offset);
4813 read_extent_buffer(path->nodes[0], backref_name,
4814 (unsigned long)(backref + 1), len);
4816 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
4817 ref_namelen != backref_namelen ||
4818 strncmp(ref_name, backref_name, len)) {
4819 err |= ROOT_REF_MISMATCH;
4820 error("%s[%llu %llu] mismatch relative ref",
4821 ref_key->type == BTRFS_ROOT_REF_KEY ?
4822 "ROOT_REF" : "ROOT_BACKREF",
4823 ref_key->objectid, ref_key->offset);
4826 btrfs_free_path(path);
4831 * Check all fs/file tree in low_memory mode.
4833 * 1. for fs tree root item, call check_fs_root_v2()
4834 * 2. for fs tree root ref/backref, call check_root_ref()
4836 * Return 0 if no error occurred.
4838 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
4840 struct btrfs_root *tree_root = fs_info->tree_root;
4841 struct btrfs_root *cur_root = NULL;
4842 struct btrfs_path *path;
4843 struct btrfs_key key;
4844 struct extent_buffer *node;
4845 unsigned int ext_ref;
4850 ext_ref = btrfs_fs_incompat(fs_info,
4851 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF);
4853 path = btrfs_alloc_path();
4857 key.objectid = BTRFS_FS_TREE_OBJECTID;
4859 key.type = BTRFS_ROOT_ITEM_KEY;
4861 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
4865 } else if (ret > 0) {
4871 node = path->nodes[0];
4872 slot = path->slots[0];
4873 btrfs_item_key_to_cpu(node, &key, slot);
4874 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
4876 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4877 fs_root_objectid(key.objectid)) {
4878 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4879 cur_root = btrfs_read_fs_root_no_cache(fs_info,
4882 key.offset = (u64)-1;
4883 cur_root = btrfs_read_fs_root(fs_info, &key);
4886 if (IS_ERR(cur_root)) {
4887 error("Fail to read fs/subvol tree: %lld",
4893 ret = check_fs_root_v2(cur_root, ext_ref);
4896 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4897 btrfs_free_fs_root(cur_root);
4898 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4899 key.type == BTRFS_ROOT_BACKREF_KEY) {
4900 ret = check_root_ref(tree_root, &key, node, slot);
4904 ret = btrfs_next_item(tree_root, path);
4914 btrfs_free_path(path);
4918 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4920 struct list_head *cur = rec->backrefs.next;
4921 struct extent_backref *back;
4922 struct tree_backref *tback;
4923 struct data_backref *dback;
4927 while(cur != &rec->backrefs) {
4928 back = to_extent_backref(cur);
4930 if (!back->found_extent_tree) {
4934 if (back->is_data) {
4935 dback = to_data_backref(back);
4936 fprintf(stderr, "Backref %llu %s %llu"
4937 " owner %llu offset %llu num_refs %lu"
4938 " not found in extent tree\n",
4939 (unsigned long long)rec->start,
4940 back->full_backref ?
4942 back->full_backref ?
4943 (unsigned long long)dback->parent:
4944 (unsigned long long)dback->root,
4945 (unsigned long long)dback->owner,
4946 (unsigned long long)dback->offset,
4947 (unsigned long)dback->num_refs);
4949 tback = to_tree_backref(back);
4950 fprintf(stderr, "Backref %llu parent %llu"
4951 " root %llu not found in extent tree\n",
4952 (unsigned long long)rec->start,
4953 (unsigned long long)tback->parent,
4954 (unsigned long long)tback->root);
4957 if (!back->is_data && !back->found_ref) {
4961 tback = to_tree_backref(back);
4962 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4963 (unsigned long long)rec->start,
4964 back->full_backref ? "parent" : "root",
4965 back->full_backref ?
4966 (unsigned long long)tback->parent :
4967 (unsigned long long)tback->root, back);
4969 if (back->is_data) {
4970 dback = to_data_backref(back);
4971 if (dback->found_ref != dback->num_refs) {
4975 fprintf(stderr, "Incorrect local backref count"
4976 " on %llu %s %llu owner %llu"
4977 " offset %llu found %u wanted %u back %p\n",
4978 (unsigned long long)rec->start,
4979 back->full_backref ?
4981 back->full_backref ?
4982 (unsigned long long)dback->parent:
4983 (unsigned long long)dback->root,
4984 (unsigned long long)dback->owner,
4985 (unsigned long long)dback->offset,
4986 dback->found_ref, dback->num_refs, back);
4988 if (dback->disk_bytenr != rec->start) {
4992 fprintf(stderr, "Backref disk bytenr does not"
4993 " match extent record, bytenr=%llu, "
4994 "ref bytenr=%llu\n",
4995 (unsigned long long)rec->start,
4996 (unsigned long long)dback->disk_bytenr);
4999 if (dback->bytes != rec->nr) {
5003 fprintf(stderr, "Backref bytes do not match "
5004 "extent backref, bytenr=%llu, ref "
5005 "bytes=%llu, backref bytes=%llu\n",
5006 (unsigned long long)rec->start,
5007 (unsigned long long)rec->nr,
5008 (unsigned long long)dback->bytes);
5011 if (!back->is_data) {
5014 dback = to_data_backref(back);
5015 found += dback->found_ref;
5018 if (found != rec->refs) {
5022 fprintf(stderr, "Incorrect global backref count "
5023 "on %llu found %llu wanted %llu\n",
5024 (unsigned long long)rec->start,
5025 (unsigned long long)found,
5026 (unsigned long long)rec->refs);
5032 static int free_all_extent_backrefs(struct extent_record *rec)
5034 struct extent_backref *back;
5035 struct list_head *cur;
5036 while (!list_empty(&rec->backrefs)) {
5037 cur = rec->backrefs.next;
5038 back = to_extent_backref(cur);
5045 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5046 struct cache_tree *extent_cache)
5048 struct cache_extent *cache;
5049 struct extent_record *rec;
5052 cache = first_cache_extent(extent_cache);
5055 rec = container_of(cache, struct extent_record, cache);
5056 remove_cache_extent(extent_cache, cache);
5057 free_all_extent_backrefs(rec);
5062 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5063 struct extent_record *rec)
5065 if (rec->content_checked && rec->owner_ref_checked &&
5066 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5067 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5068 !rec->bad_full_backref && !rec->crossing_stripes &&
5069 !rec->wrong_chunk_type) {
5070 remove_cache_extent(extent_cache, &rec->cache);
5071 free_all_extent_backrefs(rec);
5072 list_del_init(&rec->list);
5078 static int check_owner_ref(struct btrfs_root *root,
5079 struct extent_record *rec,
5080 struct extent_buffer *buf)
5082 struct extent_backref *node;
5083 struct tree_backref *back;
5084 struct btrfs_root *ref_root;
5085 struct btrfs_key key;
5086 struct btrfs_path path;
5087 struct extent_buffer *parent;
5092 list_for_each_entry(node, &rec->backrefs, list) {
5095 if (!node->found_ref)
5097 if (node->full_backref)
5099 back = to_tree_backref(node);
5100 if (btrfs_header_owner(buf) == back->root)
5103 BUG_ON(rec->is_root);
5105 /* try to find the block by search corresponding fs tree */
5106 key.objectid = btrfs_header_owner(buf);
5107 key.type = BTRFS_ROOT_ITEM_KEY;
5108 key.offset = (u64)-1;
5110 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5111 if (IS_ERR(ref_root))
5114 level = btrfs_header_level(buf);
5116 btrfs_item_key_to_cpu(buf, &key, 0);
5118 btrfs_node_key_to_cpu(buf, &key, 0);
5120 btrfs_init_path(&path);
5121 path.lowest_level = level + 1;
5122 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5126 parent = path.nodes[level + 1];
5127 if (parent && buf->start == btrfs_node_blockptr(parent,
5128 path.slots[level + 1]))
5131 btrfs_release_path(&path);
5132 return found ? 0 : 1;
5135 static int is_extent_tree_record(struct extent_record *rec)
5137 struct list_head *cur = rec->backrefs.next;
5138 struct extent_backref *node;
5139 struct tree_backref *back;
5142 while(cur != &rec->backrefs) {
5143 node = to_extent_backref(cur);
5147 back = to_tree_backref(node);
5148 if (node->full_backref)
5150 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5157 static int record_bad_block_io(struct btrfs_fs_info *info,
5158 struct cache_tree *extent_cache,
5161 struct extent_record *rec;
5162 struct cache_extent *cache;
5163 struct btrfs_key key;
5165 cache = lookup_cache_extent(extent_cache, start, len);
5169 rec = container_of(cache, struct extent_record, cache);
5170 if (!is_extent_tree_record(rec))
5173 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5174 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5177 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5178 struct extent_buffer *buf, int slot)
5180 if (btrfs_header_level(buf)) {
5181 struct btrfs_key_ptr ptr1, ptr2;
5183 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5184 sizeof(struct btrfs_key_ptr));
5185 read_extent_buffer(buf, &ptr2,
5186 btrfs_node_key_ptr_offset(slot + 1),
5187 sizeof(struct btrfs_key_ptr));
5188 write_extent_buffer(buf, &ptr1,
5189 btrfs_node_key_ptr_offset(slot + 1),
5190 sizeof(struct btrfs_key_ptr));
5191 write_extent_buffer(buf, &ptr2,
5192 btrfs_node_key_ptr_offset(slot),
5193 sizeof(struct btrfs_key_ptr));
5195 struct btrfs_disk_key key;
5196 btrfs_node_key(buf, &key, 0);
5197 btrfs_fixup_low_keys(root, path, &key,
5198 btrfs_header_level(buf) + 1);
5201 struct btrfs_item *item1, *item2;
5202 struct btrfs_key k1, k2;
5203 char *item1_data, *item2_data;
5204 u32 item1_offset, item2_offset, item1_size, item2_size;
5206 item1 = btrfs_item_nr(slot);
5207 item2 = btrfs_item_nr(slot + 1);
5208 btrfs_item_key_to_cpu(buf, &k1, slot);
5209 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5210 item1_offset = btrfs_item_offset(buf, item1);
5211 item2_offset = btrfs_item_offset(buf, item2);
5212 item1_size = btrfs_item_size(buf, item1);
5213 item2_size = btrfs_item_size(buf, item2);
5215 item1_data = malloc(item1_size);
5218 item2_data = malloc(item2_size);
5224 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5225 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5227 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5228 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5232 btrfs_set_item_offset(buf, item1, item2_offset);
5233 btrfs_set_item_offset(buf, item2, item1_offset);
5234 btrfs_set_item_size(buf, item1, item2_size);
5235 btrfs_set_item_size(buf, item2, item1_size);
5237 path->slots[0] = slot;
5238 btrfs_set_item_key_unsafe(root, path, &k2);
5239 path->slots[0] = slot + 1;
5240 btrfs_set_item_key_unsafe(root, path, &k1);
5245 static int fix_key_order(struct btrfs_trans_handle *trans,
5246 struct btrfs_root *root,
5247 struct btrfs_path *path)
5249 struct extent_buffer *buf;
5250 struct btrfs_key k1, k2;
5252 int level = path->lowest_level;
5255 buf = path->nodes[level];
5256 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5258 btrfs_node_key_to_cpu(buf, &k1, i);
5259 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5261 btrfs_item_key_to_cpu(buf, &k1, i);
5262 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5264 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5266 ret = swap_values(root, path, buf, i);
5269 btrfs_mark_buffer_dirty(buf);
5275 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5276 struct btrfs_root *root,
5277 struct btrfs_path *path,
5278 struct extent_buffer *buf, int slot)
5280 struct btrfs_key key;
5281 int nritems = btrfs_header_nritems(buf);
5283 btrfs_item_key_to_cpu(buf, &key, slot);
5285 /* These are all the keys we can deal with missing. */
5286 if (key.type != BTRFS_DIR_INDEX_KEY &&
5287 key.type != BTRFS_EXTENT_ITEM_KEY &&
5288 key.type != BTRFS_METADATA_ITEM_KEY &&
5289 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5290 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5293 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5294 (unsigned long long)key.objectid, key.type,
5295 (unsigned long long)key.offset, slot, buf->start);
5296 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5297 btrfs_item_nr_offset(slot + 1),
5298 sizeof(struct btrfs_item) *
5299 (nritems - slot - 1));
5300 btrfs_set_header_nritems(buf, nritems - 1);
5302 struct btrfs_disk_key disk_key;
5304 btrfs_item_key(buf, &disk_key, 0);
5305 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5307 btrfs_mark_buffer_dirty(buf);
5311 static int fix_item_offset(struct btrfs_trans_handle *trans,
5312 struct btrfs_root *root,
5313 struct btrfs_path *path)
5315 struct extent_buffer *buf;
5319 /* We should only get this for leaves */
5320 BUG_ON(path->lowest_level);
5321 buf = path->nodes[0];
5323 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5324 unsigned int shift = 0, offset;
5326 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5327 BTRFS_LEAF_DATA_SIZE(root)) {
5328 if (btrfs_item_end_nr(buf, i) >
5329 BTRFS_LEAF_DATA_SIZE(root)) {
5330 ret = delete_bogus_item(trans, root, path,
5334 fprintf(stderr, "item is off the end of the "
5335 "leaf, can't fix\n");
5339 shift = BTRFS_LEAF_DATA_SIZE(root) -
5340 btrfs_item_end_nr(buf, i);
5341 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5342 btrfs_item_offset_nr(buf, i - 1)) {
5343 if (btrfs_item_end_nr(buf, i) >
5344 btrfs_item_offset_nr(buf, i - 1)) {
5345 ret = delete_bogus_item(trans, root, path,
5349 fprintf(stderr, "items overlap, can't fix\n");
5353 shift = btrfs_item_offset_nr(buf, i - 1) -
5354 btrfs_item_end_nr(buf, i);
5359 printf("Shifting item nr %d by %u bytes in block %llu\n",
5360 i, shift, (unsigned long long)buf->start);
5361 offset = btrfs_item_offset_nr(buf, i);
5362 memmove_extent_buffer(buf,
5363 btrfs_leaf_data(buf) + offset + shift,
5364 btrfs_leaf_data(buf) + offset,
5365 btrfs_item_size_nr(buf, i));
5366 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5368 btrfs_mark_buffer_dirty(buf);
5372 * We may have moved things, in which case we want to exit so we don't
5373 * write those changes out. Once we have proper abort functionality in
5374 * progs this can be changed to something nicer.
5381 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5382 * then just return -EIO.
5384 static int try_to_fix_bad_block(struct btrfs_root *root,
5385 struct extent_buffer *buf,
5386 enum btrfs_tree_block_status status)
5388 struct btrfs_trans_handle *trans;
5389 struct ulist *roots;
5390 struct ulist_node *node;
5391 struct btrfs_root *search_root;
5392 struct btrfs_path path;
5393 struct ulist_iterator iter;
5394 struct btrfs_key root_key, key;
5397 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5398 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5401 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5405 btrfs_init_path(&path);
5406 ULIST_ITER_INIT(&iter);
5407 while ((node = ulist_next(roots, &iter))) {
5408 root_key.objectid = node->val;
5409 root_key.type = BTRFS_ROOT_ITEM_KEY;
5410 root_key.offset = (u64)-1;
5412 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5419 trans = btrfs_start_transaction(search_root, 0);
5420 if (IS_ERR(trans)) {
5421 ret = PTR_ERR(trans);
5425 path.lowest_level = btrfs_header_level(buf);
5426 path.skip_check_block = 1;
5427 if (path.lowest_level)
5428 btrfs_node_key_to_cpu(buf, &key, 0);
5430 btrfs_item_key_to_cpu(buf, &key, 0);
5431 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5434 btrfs_commit_transaction(trans, search_root);
5437 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5438 ret = fix_key_order(trans, search_root, &path);
5439 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5440 ret = fix_item_offset(trans, search_root, &path);
5442 btrfs_commit_transaction(trans, search_root);
5445 btrfs_release_path(&path);
5446 btrfs_commit_transaction(trans, search_root);
5449 btrfs_release_path(&path);
5453 static int check_block(struct btrfs_root *root,
5454 struct cache_tree *extent_cache,
5455 struct extent_buffer *buf, u64 flags)
5457 struct extent_record *rec;
5458 struct cache_extent *cache;
5459 struct btrfs_key key;
5460 enum btrfs_tree_block_status status;
5464 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5467 rec = container_of(cache, struct extent_record, cache);
5468 rec->generation = btrfs_header_generation(buf);
5470 level = btrfs_header_level(buf);
5471 if (btrfs_header_nritems(buf) > 0) {
5474 btrfs_item_key_to_cpu(buf, &key, 0);
5476 btrfs_node_key_to_cpu(buf, &key, 0);
5478 rec->info_objectid = key.objectid;
5480 rec->info_level = level;
5482 if (btrfs_is_leaf(buf))
5483 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5485 status = btrfs_check_node(root, &rec->parent_key, buf);
5487 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5489 status = try_to_fix_bad_block(root, buf, status);
5490 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5492 fprintf(stderr, "bad block %llu\n",
5493 (unsigned long long)buf->start);
5496 * Signal to callers we need to start the scan over
5497 * again since we'll have cowed blocks.
5502 rec->content_checked = 1;
5503 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5504 rec->owner_ref_checked = 1;
5506 ret = check_owner_ref(root, rec, buf);
5508 rec->owner_ref_checked = 1;
5512 maybe_free_extent_rec(extent_cache, rec);
5516 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5517 u64 parent, u64 root)
5519 struct list_head *cur = rec->backrefs.next;
5520 struct extent_backref *node;
5521 struct tree_backref *back;
5523 while(cur != &rec->backrefs) {
5524 node = to_extent_backref(cur);
5528 back = to_tree_backref(node);
5530 if (!node->full_backref)
5532 if (parent == back->parent)
5535 if (node->full_backref)
5537 if (back->root == root)
5544 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5545 u64 parent, u64 root)
5547 struct tree_backref *ref = malloc(sizeof(*ref));
5551 memset(&ref->node, 0, sizeof(ref->node));
5553 ref->parent = parent;
5554 ref->node.full_backref = 1;
5557 ref->node.full_backref = 0;
5559 list_add_tail(&ref->node.list, &rec->backrefs);
5564 static struct data_backref *find_data_backref(struct extent_record *rec,
5565 u64 parent, u64 root,
5566 u64 owner, u64 offset,
5568 u64 disk_bytenr, u64 bytes)
5570 struct list_head *cur = rec->backrefs.next;
5571 struct extent_backref *node;
5572 struct data_backref *back;
5574 while(cur != &rec->backrefs) {
5575 node = to_extent_backref(cur);
5579 back = to_data_backref(node);
5581 if (!node->full_backref)
5583 if (parent == back->parent)
5586 if (node->full_backref)
5588 if (back->root == root && back->owner == owner &&
5589 back->offset == offset) {
5590 if (found_ref && node->found_ref &&
5591 (back->bytes != bytes ||
5592 back->disk_bytenr != disk_bytenr))
5601 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5602 u64 parent, u64 root,
5603 u64 owner, u64 offset,
5606 struct data_backref *ref = malloc(sizeof(*ref));
5610 memset(&ref->node, 0, sizeof(ref->node));
5611 ref->node.is_data = 1;
5614 ref->parent = parent;
5617 ref->node.full_backref = 1;
5621 ref->offset = offset;
5622 ref->node.full_backref = 0;
5624 ref->bytes = max_size;
5627 list_add_tail(&ref->node.list, &rec->backrefs);
5628 if (max_size > rec->max_size)
5629 rec->max_size = max_size;
5633 /* Check if the type of extent matches with its chunk */
5634 static void check_extent_type(struct extent_record *rec)
5636 struct btrfs_block_group_cache *bg_cache;
5638 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5642 /* data extent, check chunk directly*/
5643 if (!rec->metadata) {
5644 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5645 rec->wrong_chunk_type = 1;
5649 /* metadata extent, check the obvious case first */
5650 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5651 BTRFS_BLOCK_GROUP_METADATA))) {
5652 rec->wrong_chunk_type = 1;
5657 * Check SYSTEM extent, as it's also marked as metadata, we can only
5658 * make sure it's a SYSTEM extent by its backref
5660 if (!list_empty(&rec->backrefs)) {
5661 struct extent_backref *node;
5662 struct tree_backref *tback;
5665 node = to_extent_backref(rec->backrefs.next);
5666 if (node->is_data) {
5667 /* tree block shouldn't have data backref */
5668 rec->wrong_chunk_type = 1;
5671 tback = container_of(node, struct tree_backref, node);
5673 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5674 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5676 bg_type = BTRFS_BLOCK_GROUP_METADATA;
5677 if (!(bg_cache->flags & bg_type))
5678 rec->wrong_chunk_type = 1;
5683 * Allocate a new extent record, fill default values from @tmpl and insert int
5684 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5685 * the cache, otherwise it fails.
5687 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5688 struct extent_record *tmpl)
5690 struct extent_record *rec;
5693 rec = malloc(sizeof(*rec));
5696 rec->start = tmpl->start;
5697 rec->max_size = tmpl->max_size;
5698 rec->nr = max(tmpl->nr, tmpl->max_size);
5699 rec->found_rec = tmpl->found_rec;
5700 rec->content_checked = tmpl->content_checked;
5701 rec->owner_ref_checked = tmpl->owner_ref_checked;
5702 rec->num_duplicates = 0;
5703 rec->metadata = tmpl->metadata;
5704 rec->flag_block_full_backref = FLAG_UNSET;
5705 rec->bad_full_backref = 0;
5706 rec->crossing_stripes = 0;
5707 rec->wrong_chunk_type = 0;
5708 rec->is_root = tmpl->is_root;
5709 rec->refs = tmpl->refs;
5710 rec->extent_item_refs = tmpl->extent_item_refs;
5711 rec->parent_generation = tmpl->parent_generation;
5712 INIT_LIST_HEAD(&rec->backrefs);
5713 INIT_LIST_HEAD(&rec->dups);
5714 INIT_LIST_HEAD(&rec->list);
5715 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5716 rec->cache.start = tmpl->start;
5717 rec->cache.size = tmpl->nr;
5718 ret = insert_cache_extent(extent_cache, &rec->cache);
5723 bytes_used += rec->nr;
5726 rec->crossing_stripes = check_crossing_stripes(global_info,
5727 rec->start, global_info->tree_root->nodesize);
5728 check_extent_type(rec);
5733 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5735 * - refs - if found, increase refs
5736 * - is_root - if found, set
5737 * - content_checked - if found, set
5738 * - owner_ref_checked - if found, set
5740 * If not found, create a new one, initialize and insert.
5742 static int add_extent_rec(struct cache_tree *extent_cache,
5743 struct extent_record *tmpl)
5745 struct extent_record *rec;
5746 struct cache_extent *cache;
5750 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5752 rec = container_of(cache, struct extent_record, cache);
5756 rec->nr = max(tmpl->nr, tmpl->max_size);
5759 * We need to make sure to reset nr to whatever the extent
5760 * record says was the real size, this way we can compare it to
5763 if (tmpl->found_rec) {
5764 if (tmpl->start != rec->start || rec->found_rec) {
5765 struct extent_record *tmp;
5768 if (list_empty(&rec->list))
5769 list_add_tail(&rec->list,
5770 &duplicate_extents);
5773 * We have to do this song and dance in case we
5774 * find an extent record that falls inside of
5775 * our current extent record but does not have
5776 * the same objectid.
5778 tmp = malloc(sizeof(*tmp));
5781 tmp->start = tmpl->start;
5782 tmp->max_size = tmpl->max_size;
5785 tmp->metadata = tmpl->metadata;
5786 tmp->extent_item_refs = tmpl->extent_item_refs;
5787 INIT_LIST_HEAD(&tmp->list);
5788 list_add_tail(&tmp->list, &rec->dups);
5789 rec->num_duplicates++;
5796 if (tmpl->extent_item_refs && !dup) {
5797 if (rec->extent_item_refs) {
5798 fprintf(stderr, "block %llu rec "
5799 "extent_item_refs %llu, passed %llu\n",
5800 (unsigned long long)tmpl->start,
5801 (unsigned long long)
5802 rec->extent_item_refs,
5803 (unsigned long long)tmpl->extent_item_refs);
5805 rec->extent_item_refs = tmpl->extent_item_refs;
5809 if (tmpl->content_checked)
5810 rec->content_checked = 1;
5811 if (tmpl->owner_ref_checked)
5812 rec->owner_ref_checked = 1;
5813 memcpy(&rec->parent_key, &tmpl->parent_key,
5814 sizeof(tmpl->parent_key));
5815 if (tmpl->parent_generation)
5816 rec->parent_generation = tmpl->parent_generation;
5817 if (rec->max_size < tmpl->max_size)
5818 rec->max_size = tmpl->max_size;
5821 * A metadata extent can't cross stripe_len boundary, otherwise
5822 * kernel scrub won't be able to handle it.
5823 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5827 rec->crossing_stripes = check_crossing_stripes(
5828 global_info, rec->start,
5829 global_info->tree_root->nodesize);
5830 check_extent_type(rec);
5831 maybe_free_extent_rec(extent_cache, rec);
5835 ret = add_extent_rec_nolookup(extent_cache, tmpl);
5840 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5841 u64 parent, u64 root, int found_ref)
5843 struct extent_record *rec;
5844 struct tree_backref *back;
5845 struct cache_extent *cache;
5848 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5850 struct extent_record tmpl;
5852 memset(&tmpl, 0, sizeof(tmpl));
5853 tmpl.start = bytenr;
5857 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5861 /* really a bug in cache_extent implement now */
5862 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5867 rec = container_of(cache, struct extent_record, cache);
5868 if (rec->start != bytenr) {
5870 * Several cause, from unaligned bytenr to over lapping extents
5875 back = find_tree_backref(rec, parent, root);
5877 back = alloc_tree_backref(rec, parent, root);
5883 if (back->node.found_ref) {
5884 fprintf(stderr, "Extent back ref already exists "
5885 "for %llu parent %llu root %llu \n",
5886 (unsigned long long)bytenr,
5887 (unsigned long long)parent,
5888 (unsigned long long)root);
5890 back->node.found_ref = 1;
5892 if (back->node.found_extent_tree) {
5893 fprintf(stderr, "Extent back ref already exists "
5894 "for %llu parent %llu root %llu \n",
5895 (unsigned long long)bytenr,
5896 (unsigned long long)parent,
5897 (unsigned long long)root);
5899 back->node.found_extent_tree = 1;
5901 check_extent_type(rec);
5902 maybe_free_extent_rec(extent_cache, rec);
5906 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5907 u64 parent, u64 root, u64 owner, u64 offset,
5908 u32 num_refs, int found_ref, u64 max_size)
5910 struct extent_record *rec;
5911 struct data_backref *back;
5912 struct cache_extent *cache;
5915 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5917 struct extent_record tmpl;
5919 memset(&tmpl, 0, sizeof(tmpl));
5920 tmpl.start = bytenr;
5922 tmpl.max_size = max_size;
5924 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5928 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5933 rec = container_of(cache, struct extent_record, cache);
5934 if (rec->max_size < max_size)
5935 rec->max_size = max_size;
5938 * If found_ref is set then max_size is the real size and must match the
5939 * existing refs. So if we have already found a ref then we need to
5940 * make sure that this ref matches the existing one, otherwise we need
5941 * to add a new backref so we can notice that the backrefs don't match
5942 * and we need to figure out who is telling the truth. This is to
5943 * account for that awful fsync bug I introduced where we'd end up with
5944 * a btrfs_file_extent_item that would have its length include multiple
5945 * prealloc extents or point inside of a prealloc extent.
5947 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5950 back = alloc_data_backref(rec, parent, root, owner, offset,
5956 BUG_ON(num_refs != 1);
5957 if (back->node.found_ref)
5958 BUG_ON(back->bytes != max_size);
5959 back->node.found_ref = 1;
5960 back->found_ref += 1;
5961 back->bytes = max_size;
5962 back->disk_bytenr = bytenr;
5964 rec->content_checked = 1;
5965 rec->owner_ref_checked = 1;
5967 if (back->node.found_extent_tree) {
5968 fprintf(stderr, "Extent back ref already exists "
5969 "for %llu parent %llu root %llu "
5970 "owner %llu offset %llu num_refs %lu\n",
5971 (unsigned long long)bytenr,
5972 (unsigned long long)parent,
5973 (unsigned long long)root,
5974 (unsigned long long)owner,
5975 (unsigned long long)offset,
5976 (unsigned long)num_refs);
5978 back->num_refs = num_refs;
5979 back->node.found_extent_tree = 1;
5981 maybe_free_extent_rec(extent_cache, rec);
5985 static int add_pending(struct cache_tree *pending,
5986 struct cache_tree *seen, u64 bytenr, u32 size)
5989 ret = add_cache_extent(seen, bytenr, size);
5992 add_cache_extent(pending, bytenr, size);
5996 static int pick_next_pending(struct cache_tree *pending,
5997 struct cache_tree *reada,
5998 struct cache_tree *nodes,
5999 u64 last, struct block_info *bits, int bits_nr,
6002 unsigned long node_start = last;
6003 struct cache_extent *cache;
6006 cache = search_cache_extent(reada, 0);
6008 bits[0].start = cache->start;
6009 bits[0].size = cache->size;
6014 if (node_start > 32768)
6015 node_start -= 32768;
6017 cache = search_cache_extent(nodes, node_start);
6019 cache = search_cache_extent(nodes, 0);
6022 cache = search_cache_extent(pending, 0);
6027 bits[ret].start = cache->start;
6028 bits[ret].size = cache->size;
6029 cache = next_cache_extent(cache);
6031 } while (cache && ret < bits_nr);
6037 bits[ret].start = cache->start;
6038 bits[ret].size = cache->size;
6039 cache = next_cache_extent(cache);
6041 } while (cache && ret < bits_nr);
6043 if (bits_nr - ret > 8) {
6044 u64 lookup = bits[0].start + bits[0].size;
6045 struct cache_extent *next;
6046 next = search_cache_extent(pending, lookup);
6048 if (next->start - lookup > 32768)
6050 bits[ret].start = next->start;
6051 bits[ret].size = next->size;
6052 lookup = next->start + next->size;
6056 next = next_cache_extent(next);
6064 static void free_chunk_record(struct cache_extent *cache)
6066 struct chunk_record *rec;
6068 rec = container_of(cache, struct chunk_record, cache);
6069 list_del_init(&rec->list);
6070 list_del_init(&rec->dextents);
6074 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6076 cache_tree_free_extents(chunk_cache, free_chunk_record);
6079 static void free_device_record(struct rb_node *node)
6081 struct device_record *rec;
6083 rec = container_of(node, struct device_record, node);
6087 FREE_RB_BASED_TREE(device_cache, free_device_record);
6089 int insert_block_group_record(struct block_group_tree *tree,
6090 struct block_group_record *bg_rec)
6094 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6098 list_add_tail(&bg_rec->list, &tree->block_groups);
6102 static void free_block_group_record(struct cache_extent *cache)
6104 struct block_group_record *rec;
6106 rec = container_of(cache, struct block_group_record, cache);
6107 list_del_init(&rec->list);
6111 void free_block_group_tree(struct block_group_tree *tree)
6113 cache_tree_free_extents(&tree->tree, free_block_group_record);
6116 int insert_device_extent_record(struct device_extent_tree *tree,
6117 struct device_extent_record *de_rec)
6122 * Device extent is a bit different from the other extents, because
6123 * the extents which belong to the different devices may have the
6124 * same start and size, so we need use the special extent cache
6125 * search/insert functions.
6127 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6131 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6132 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6136 static void free_device_extent_record(struct cache_extent *cache)
6138 struct device_extent_record *rec;
6140 rec = container_of(cache, struct device_extent_record, cache);
6141 if (!list_empty(&rec->chunk_list))
6142 list_del_init(&rec->chunk_list);
6143 if (!list_empty(&rec->device_list))
6144 list_del_init(&rec->device_list);
6148 void free_device_extent_tree(struct device_extent_tree *tree)
6150 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6153 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6154 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6155 struct extent_buffer *leaf, int slot)
6157 struct btrfs_extent_ref_v0 *ref0;
6158 struct btrfs_key key;
6161 btrfs_item_key_to_cpu(leaf, &key, slot);
6162 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6163 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6164 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6167 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6168 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6174 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6175 struct btrfs_key *key,
6178 struct btrfs_chunk *ptr;
6179 struct chunk_record *rec;
6182 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6183 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6185 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6187 fprintf(stderr, "memory allocation failed\n");
6191 INIT_LIST_HEAD(&rec->list);
6192 INIT_LIST_HEAD(&rec->dextents);
6195 rec->cache.start = key->offset;
6196 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6198 rec->generation = btrfs_header_generation(leaf);
6200 rec->objectid = key->objectid;
6201 rec->type = key->type;
6202 rec->offset = key->offset;
6204 rec->length = rec->cache.size;
6205 rec->owner = btrfs_chunk_owner(leaf, ptr);
6206 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6207 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6208 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6209 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6210 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6211 rec->num_stripes = num_stripes;
6212 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6214 for (i = 0; i < rec->num_stripes; ++i) {
6215 rec->stripes[i].devid =
6216 btrfs_stripe_devid_nr(leaf, ptr, i);
6217 rec->stripes[i].offset =
6218 btrfs_stripe_offset_nr(leaf, ptr, i);
6219 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6220 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6227 static int process_chunk_item(struct cache_tree *chunk_cache,
6228 struct btrfs_key *key, struct extent_buffer *eb,
6231 struct chunk_record *rec;
6232 struct btrfs_chunk *chunk;
6235 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6237 * Do extra check for this chunk item,
6239 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6240 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6241 * and owner<->key_type check.
6243 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6246 error("chunk(%llu, %llu) is not valid, ignore it",
6247 key->offset, btrfs_chunk_length(eb, chunk));
6250 rec = btrfs_new_chunk_record(eb, key, slot);
6251 ret = insert_cache_extent(chunk_cache, &rec->cache);
6253 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6254 rec->offset, rec->length);
6261 static int process_device_item(struct rb_root *dev_cache,
6262 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6264 struct btrfs_dev_item *ptr;
6265 struct device_record *rec;
6268 ptr = btrfs_item_ptr(eb,
6269 slot, struct btrfs_dev_item);
6271 rec = malloc(sizeof(*rec));
6273 fprintf(stderr, "memory allocation failed\n");
6277 rec->devid = key->offset;
6278 rec->generation = btrfs_header_generation(eb);
6280 rec->objectid = key->objectid;
6281 rec->type = key->type;
6282 rec->offset = key->offset;
6284 rec->devid = btrfs_device_id(eb, ptr);
6285 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6286 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6288 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6290 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6297 struct block_group_record *
6298 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6301 struct btrfs_block_group_item *ptr;
6302 struct block_group_record *rec;
6304 rec = calloc(1, sizeof(*rec));
6306 fprintf(stderr, "memory allocation failed\n");
6310 rec->cache.start = key->objectid;
6311 rec->cache.size = key->offset;
6313 rec->generation = btrfs_header_generation(leaf);
6315 rec->objectid = key->objectid;
6316 rec->type = key->type;
6317 rec->offset = key->offset;
6319 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6320 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6322 INIT_LIST_HEAD(&rec->list);
6327 static int process_block_group_item(struct block_group_tree *block_group_cache,
6328 struct btrfs_key *key,
6329 struct extent_buffer *eb, int slot)
6331 struct block_group_record *rec;
6334 rec = btrfs_new_block_group_record(eb, key, slot);
6335 ret = insert_block_group_record(block_group_cache, rec);
6337 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6338 rec->objectid, rec->offset);
6345 struct device_extent_record *
6346 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6347 struct btrfs_key *key, int slot)
6349 struct device_extent_record *rec;
6350 struct btrfs_dev_extent *ptr;
6352 rec = calloc(1, sizeof(*rec));
6354 fprintf(stderr, "memory allocation failed\n");
6358 rec->cache.objectid = key->objectid;
6359 rec->cache.start = key->offset;
6361 rec->generation = btrfs_header_generation(leaf);
6363 rec->objectid = key->objectid;
6364 rec->type = key->type;
6365 rec->offset = key->offset;
6367 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6368 rec->chunk_objecteid =
6369 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6371 btrfs_dev_extent_chunk_offset(leaf, ptr);
6372 rec->length = btrfs_dev_extent_length(leaf, ptr);
6373 rec->cache.size = rec->length;
6375 INIT_LIST_HEAD(&rec->chunk_list);
6376 INIT_LIST_HEAD(&rec->device_list);
6382 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6383 struct btrfs_key *key, struct extent_buffer *eb,
6386 struct device_extent_record *rec;
6389 rec = btrfs_new_device_extent_record(eb, key, slot);
6390 ret = insert_device_extent_record(dev_extent_cache, rec);
6393 "Device extent[%llu, %llu, %llu] existed.\n",
6394 rec->objectid, rec->offset, rec->length);
6401 static int process_extent_item(struct btrfs_root *root,
6402 struct cache_tree *extent_cache,
6403 struct extent_buffer *eb, int slot)
6405 struct btrfs_extent_item *ei;
6406 struct btrfs_extent_inline_ref *iref;
6407 struct btrfs_extent_data_ref *dref;
6408 struct btrfs_shared_data_ref *sref;
6409 struct btrfs_key key;
6410 struct extent_record tmpl;
6415 u32 item_size = btrfs_item_size_nr(eb, slot);
6421 btrfs_item_key_to_cpu(eb, &key, slot);
6423 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6425 num_bytes = root->nodesize;
6427 num_bytes = key.offset;
6430 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6431 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6432 key.objectid, root->sectorsize);
6435 if (item_size < sizeof(*ei)) {
6436 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6437 struct btrfs_extent_item_v0 *ei0;
6438 BUG_ON(item_size != sizeof(*ei0));
6439 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6440 refs = btrfs_extent_refs_v0(eb, ei0);
6444 memset(&tmpl, 0, sizeof(tmpl));
6445 tmpl.start = key.objectid;
6446 tmpl.nr = num_bytes;
6447 tmpl.extent_item_refs = refs;
6448 tmpl.metadata = metadata;
6450 tmpl.max_size = num_bytes;
6452 return add_extent_rec(extent_cache, &tmpl);
6455 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6456 refs = btrfs_extent_refs(eb, ei);
6457 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6461 if (metadata && num_bytes != root->nodesize) {
6462 error("ignore invalid metadata extent, length %llu does not equal to %u",
6463 num_bytes, root->nodesize);
6466 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6467 error("ignore invalid data extent, length %llu is not aligned to %u",
6468 num_bytes, root->sectorsize);
6472 memset(&tmpl, 0, sizeof(tmpl));
6473 tmpl.start = key.objectid;
6474 tmpl.nr = num_bytes;
6475 tmpl.extent_item_refs = refs;
6476 tmpl.metadata = metadata;
6478 tmpl.max_size = num_bytes;
6479 add_extent_rec(extent_cache, &tmpl);
6481 ptr = (unsigned long)(ei + 1);
6482 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6483 key.type == BTRFS_EXTENT_ITEM_KEY)
6484 ptr += sizeof(struct btrfs_tree_block_info);
6486 end = (unsigned long)ei + item_size;
6488 iref = (struct btrfs_extent_inline_ref *)ptr;
6489 type = btrfs_extent_inline_ref_type(eb, iref);
6490 offset = btrfs_extent_inline_ref_offset(eb, iref);
6492 case BTRFS_TREE_BLOCK_REF_KEY:
6493 ret = add_tree_backref(extent_cache, key.objectid,
6496 error("add_tree_backref failed: %s",
6499 case BTRFS_SHARED_BLOCK_REF_KEY:
6500 ret = add_tree_backref(extent_cache, key.objectid,
6503 error("add_tree_backref failed: %s",
6506 case BTRFS_EXTENT_DATA_REF_KEY:
6507 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6508 add_data_backref(extent_cache, key.objectid, 0,
6509 btrfs_extent_data_ref_root(eb, dref),
6510 btrfs_extent_data_ref_objectid(eb,
6512 btrfs_extent_data_ref_offset(eb, dref),
6513 btrfs_extent_data_ref_count(eb, dref),
6516 case BTRFS_SHARED_DATA_REF_KEY:
6517 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6518 add_data_backref(extent_cache, key.objectid, offset,
6520 btrfs_shared_data_ref_count(eb, sref),
6524 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6525 key.objectid, key.type, num_bytes);
6528 ptr += btrfs_extent_inline_ref_size(type);
6535 static int check_cache_range(struct btrfs_root *root,
6536 struct btrfs_block_group_cache *cache,
6537 u64 offset, u64 bytes)
6539 struct btrfs_free_space *entry;
6545 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6546 bytenr = btrfs_sb_offset(i);
6547 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6548 cache->key.objectid, bytenr, 0,
6549 &logical, &nr, &stripe_len);
6554 if (logical[nr] + stripe_len <= offset)
6556 if (offset + bytes <= logical[nr])
6558 if (logical[nr] == offset) {
6559 if (stripe_len >= bytes) {
6563 bytes -= stripe_len;
6564 offset += stripe_len;
6565 } else if (logical[nr] < offset) {
6566 if (logical[nr] + stripe_len >=
6571 bytes = (offset + bytes) -
6572 (logical[nr] + stripe_len);
6573 offset = logical[nr] + stripe_len;
6576 * Could be tricky, the super may land in the
6577 * middle of the area we're checking. First
6578 * check the easiest case, it's at the end.
6580 if (logical[nr] + stripe_len >=
6582 bytes = logical[nr] - offset;
6586 /* Check the left side */
6587 ret = check_cache_range(root, cache,
6589 logical[nr] - offset);
6595 /* Now we continue with the right side */
6596 bytes = (offset + bytes) -
6597 (logical[nr] + stripe_len);
6598 offset = logical[nr] + stripe_len;
6605 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6607 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6608 offset, offset+bytes);
6612 if (entry->offset != offset) {
6613 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6618 if (entry->bytes != bytes) {
6619 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6620 bytes, entry->bytes, offset);
6624 unlink_free_space(cache->free_space_ctl, entry);
6629 static int verify_space_cache(struct btrfs_root *root,
6630 struct btrfs_block_group_cache *cache)
6632 struct btrfs_path path;
6633 struct extent_buffer *leaf;
6634 struct btrfs_key key;
6638 root = root->fs_info->extent_root;
6640 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6642 btrfs_init_path(&path);
6643 key.objectid = last;
6645 key.type = BTRFS_EXTENT_ITEM_KEY;
6646 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6651 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6652 ret = btrfs_next_leaf(root, &path);
6660 leaf = path.nodes[0];
6661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6662 if (key.objectid >= cache->key.offset + cache->key.objectid)
6664 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6665 key.type != BTRFS_METADATA_ITEM_KEY) {
6670 if (last == key.objectid) {
6671 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6672 last = key.objectid + key.offset;
6674 last = key.objectid + root->nodesize;
6679 ret = check_cache_range(root, cache, last,
6680 key.objectid - last);
6683 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6684 last = key.objectid + key.offset;
6686 last = key.objectid + root->nodesize;
6690 if (last < cache->key.objectid + cache->key.offset)
6691 ret = check_cache_range(root, cache, last,
6692 cache->key.objectid +
6693 cache->key.offset - last);
6696 btrfs_release_path(&path);
6699 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6700 fprintf(stderr, "There are still entries left in the space "
6708 static int check_space_cache(struct btrfs_root *root)
6710 struct btrfs_block_group_cache *cache;
6711 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6715 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6716 btrfs_super_generation(root->fs_info->super_copy) !=
6717 btrfs_super_cache_generation(root->fs_info->super_copy)) {
6718 printf("cache and super generation don't match, space cache "
6719 "will be invalidated\n");
6723 if (ctx.progress_enabled) {
6724 ctx.tp = TASK_FREE_SPACE;
6725 task_start(ctx.info);
6729 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6733 start = cache->key.objectid + cache->key.offset;
6734 if (!cache->free_space_ctl) {
6735 if (btrfs_init_free_space_ctl(cache,
6736 root->sectorsize)) {
6741 btrfs_remove_free_space_cache(cache);
6744 if (btrfs_fs_compat_ro(root->fs_info,
6745 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6746 ret = exclude_super_stripes(root, cache);
6748 fprintf(stderr, "could not exclude super stripes: %s\n",
6753 ret = load_free_space_tree(root->fs_info, cache);
6754 free_excluded_extents(root, cache);
6756 fprintf(stderr, "could not load free space tree: %s\n",
6763 ret = load_free_space_cache(root->fs_info, cache);
6768 ret = verify_space_cache(root, cache);
6770 fprintf(stderr, "cache appears valid but isn't %Lu\n",
6771 cache->key.objectid);
6776 task_stop(ctx.info);
6778 return error ? -EINVAL : 0;
6781 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6782 u64 num_bytes, unsigned long leaf_offset,
6783 struct extent_buffer *eb) {
6786 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6788 unsigned long csum_offset;
6792 u64 data_checked = 0;
6798 if (num_bytes % root->sectorsize)
6801 data = malloc(num_bytes);
6805 while (offset < num_bytes) {
6808 read_len = num_bytes - offset;
6809 /* read as much space once a time */
6810 ret = read_extent_data(root, data + offset,
6811 bytenr + offset, &read_len, mirror);
6815 /* verify every 4k data's checksum */
6816 while (data_checked < read_len) {
6818 tmp = offset + data_checked;
6820 csum = btrfs_csum_data(NULL, (char *)data + tmp,
6821 csum, root->sectorsize);
6822 btrfs_csum_final(csum, (u8 *)&csum);
6824 csum_offset = leaf_offset +
6825 tmp / root->sectorsize * csum_size;
6826 read_extent_buffer(eb, (char *)&csum_expected,
6827 csum_offset, csum_size);
6828 /* try another mirror */
6829 if (csum != csum_expected) {
6830 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6831 mirror, bytenr + tmp,
6832 csum, csum_expected);
6833 num_copies = btrfs_num_copies(
6834 &root->fs_info->mapping_tree,
6836 if (mirror < num_copies - 1) {
6841 data_checked += root->sectorsize;
6850 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6853 struct btrfs_path path;
6854 struct extent_buffer *leaf;
6855 struct btrfs_key key;
6858 btrfs_init_path(&path);
6859 key.objectid = bytenr;
6860 key.type = BTRFS_EXTENT_ITEM_KEY;
6861 key.offset = (u64)-1;
6864 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6867 fprintf(stderr, "Error looking up extent record %d\n", ret);
6868 btrfs_release_path(&path);
6871 if (path.slots[0] > 0) {
6874 ret = btrfs_prev_leaf(root, &path);
6877 } else if (ret > 0) {
6884 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6887 * Block group items come before extent items if they have the same
6888 * bytenr, so walk back one more just in case. Dear future traveller,
6889 * first congrats on mastering time travel. Now if it's not too much
6890 * trouble could you go back to 2006 and tell Chris to make the
6891 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6892 * EXTENT_ITEM_KEY please?
6894 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6895 if (path.slots[0] > 0) {
6898 ret = btrfs_prev_leaf(root, &path);
6901 } else if (ret > 0) {
6906 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6910 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6911 ret = btrfs_next_leaf(root, &path);
6913 fprintf(stderr, "Error going to next leaf "
6915 btrfs_release_path(&path);
6921 leaf = path.nodes[0];
6922 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6923 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6927 if (key.objectid + key.offset < bytenr) {
6931 if (key.objectid > bytenr + num_bytes)
6934 if (key.objectid == bytenr) {
6935 if (key.offset >= num_bytes) {
6939 num_bytes -= key.offset;
6940 bytenr += key.offset;
6941 } else if (key.objectid < bytenr) {
6942 if (key.objectid + key.offset >= bytenr + num_bytes) {
6946 num_bytes = (bytenr + num_bytes) -
6947 (key.objectid + key.offset);
6948 bytenr = key.objectid + key.offset;
6950 if (key.objectid + key.offset < bytenr + num_bytes) {
6951 u64 new_start = key.objectid + key.offset;
6952 u64 new_bytes = bytenr + num_bytes - new_start;
6955 * Weird case, the extent is in the middle of
6956 * our range, we'll have to search one side
6957 * and then the other. Not sure if this happens
6958 * in real life, but no harm in coding it up
6959 * anyway just in case.
6961 btrfs_release_path(&path);
6962 ret = check_extent_exists(root, new_start,
6965 fprintf(stderr, "Right section didn't "
6969 num_bytes = key.objectid - bytenr;
6972 num_bytes = key.objectid - bytenr;
6979 if (num_bytes && !ret) {
6980 fprintf(stderr, "There are no extents for csum range "
6981 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6985 btrfs_release_path(&path);
6989 static int check_csums(struct btrfs_root *root)
6991 struct btrfs_path path;
6992 struct extent_buffer *leaf;
6993 struct btrfs_key key;
6994 u64 offset = 0, num_bytes = 0;
6995 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6999 unsigned long leaf_offset;
7001 root = root->fs_info->csum_root;
7002 if (!extent_buffer_uptodate(root->node)) {
7003 fprintf(stderr, "No valid csum tree found\n");
7007 btrfs_init_path(&path);
7008 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7009 key.type = BTRFS_EXTENT_CSUM_KEY;
7011 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7013 fprintf(stderr, "Error searching csum tree %d\n", ret);
7014 btrfs_release_path(&path);
7018 if (ret > 0 && path.slots[0])
7023 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7024 ret = btrfs_next_leaf(root, &path);
7026 fprintf(stderr, "Error going to next leaf "
7033 leaf = path.nodes[0];
7035 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7036 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7041 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7042 csum_size) * root->sectorsize;
7043 if (!check_data_csum)
7044 goto skip_csum_check;
7045 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7046 ret = check_extent_csums(root, key.offset, data_len,
7052 offset = key.offset;
7053 } else if (key.offset != offset + num_bytes) {
7054 ret = check_extent_exists(root, offset, num_bytes);
7056 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7057 "there is no extent record\n",
7058 offset, offset+num_bytes);
7061 offset = key.offset;
7064 num_bytes += data_len;
7068 btrfs_release_path(&path);
7072 static int is_dropped_key(struct btrfs_key *key,
7073 struct btrfs_key *drop_key) {
7074 if (key->objectid < drop_key->objectid)
7076 else if (key->objectid == drop_key->objectid) {
7077 if (key->type < drop_key->type)
7079 else if (key->type == drop_key->type) {
7080 if (key->offset < drop_key->offset)
7088 * Here are the rules for FULL_BACKREF.
7090 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7091 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7093 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7094 * if it happened after the relocation occurred since we'll have dropped the
7095 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7096 * have no real way to know for sure.
7098 * We process the blocks one root at a time, and we start from the lowest root
7099 * objectid and go to the highest. So we can just lookup the owner backref for
7100 * the record and if we don't find it then we know it doesn't exist and we have
7103 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7104 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7105 * be set or not and then we can check later once we've gathered all the refs.
7107 static int calc_extent_flag(struct btrfs_root *root,
7108 struct cache_tree *extent_cache,
7109 struct extent_buffer *buf,
7110 struct root_item_record *ri,
7113 struct extent_record *rec;
7114 struct cache_extent *cache;
7115 struct tree_backref *tback;
7118 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7119 /* we have added this extent before */
7123 rec = container_of(cache, struct extent_record, cache);
7126 * Except file/reloc tree, we can not have
7129 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7134 if (buf->start == ri->bytenr)
7137 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7140 owner = btrfs_header_owner(buf);
7141 if (owner == ri->objectid)
7144 tback = find_tree_backref(rec, 0, owner);
7149 if (rec->flag_block_full_backref != FLAG_UNSET &&
7150 rec->flag_block_full_backref != 0)
7151 rec->bad_full_backref = 1;
7154 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7155 if (rec->flag_block_full_backref != FLAG_UNSET &&
7156 rec->flag_block_full_backref != 1)
7157 rec->bad_full_backref = 1;
7161 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7163 fprintf(stderr, "Invalid key type(");
7164 print_key_type(stderr, 0, key_type);
7165 fprintf(stderr, ") found in root(");
7166 print_objectid(stderr, rootid, 0);
7167 fprintf(stderr, ")\n");
7171 * Check if the key is valid with its extent buffer.
7173 * This is a early check in case invalid key exists in a extent buffer
7174 * This is not comprehensive yet, but should prevent wrong key/item passed
7177 static int check_type_with_root(u64 rootid, u8 key_type)
7180 /* Only valid in chunk tree */
7181 case BTRFS_DEV_ITEM_KEY:
7182 case BTRFS_CHUNK_ITEM_KEY:
7183 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7186 /* valid in csum and log tree */
7187 case BTRFS_CSUM_TREE_OBJECTID:
7188 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7192 case BTRFS_EXTENT_ITEM_KEY:
7193 case BTRFS_METADATA_ITEM_KEY:
7194 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7195 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7198 case BTRFS_ROOT_ITEM_KEY:
7199 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7202 case BTRFS_DEV_EXTENT_KEY:
7203 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7209 report_mismatch_key_root(key_type, rootid);
7213 static int run_next_block(struct btrfs_root *root,
7214 struct block_info *bits,
7217 struct cache_tree *pending,
7218 struct cache_tree *seen,
7219 struct cache_tree *reada,
7220 struct cache_tree *nodes,
7221 struct cache_tree *extent_cache,
7222 struct cache_tree *chunk_cache,
7223 struct rb_root *dev_cache,
7224 struct block_group_tree *block_group_cache,
7225 struct device_extent_tree *dev_extent_cache,
7226 struct root_item_record *ri)
7228 struct extent_buffer *buf;
7229 struct extent_record *rec = NULL;
7240 struct btrfs_key key;
7241 struct cache_extent *cache;
7244 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7245 bits_nr, &reada_bits);
7250 for(i = 0; i < nritems; i++) {
7251 ret = add_cache_extent(reada, bits[i].start,
7256 /* fixme, get the parent transid */
7257 readahead_tree_block(root, bits[i].start,
7261 *last = bits[0].start;
7262 bytenr = bits[0].start;
7263 size = bits[0].size;
7265 cache = lookup_cache_extent(pending, bytenr, size);
7267 remove_cache_extent(pending, cache);
7270 cache = lookup_cache_extent(reada, bytenr, size);
7272 remove_cache_extent(reada, cache);
7275 cache = lookup_cache_extent(nodes, bytenr, size);
7277 remove_cache_extent(nodes, cache);
7280 cache = lookup_cache_extent(extent_cache, bytenr, size);
7282 rec = container_of(cache, struct extent_record, cache);
7283 gen = rec->parent_generation;
7286 /* fixme, get the real parent transid */
7287 buf = read_tree_block(root, bytenr, size, gen);
7288 if (!extent_buffer_uptodate(buf)) {
7289 record_bad_block_io(root->fs_info,
7290 extent_cache, bytenr, size);
7294 nritems = btrfs_header_nritems(buf);
7297 if (!init_extent_tree) {
7298 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7299 btrfs_header_level(buf), 1, NULL,
7302 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7304 fprintf(stderr, "Couldn't calc extent flags\n");
7305 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7310 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7312 fprintf(stderr, "Couldn't calc extent flags\n");
7313 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7317 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7319 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7320 ri->objectid == btrfs_header_owner(buf)) {
7322 * Ok we got to this block from it's original owner and
7323 * we have FULL_BACKREF set. Relocation can leave
7324 * converted blocks over so this is altogether possible,
7325 * however it's not possible if the generation > the
7326 * last snapshot, so check for this case.
7328 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7329 btrfs_header_generation(buf) > ri->last_snapshot) {
7330 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7331 rec->bad_full_backref = 1;
7336 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7337 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7338 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7339 rec->bad_full_backref = 1;
7343 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7344 rec->flag_block_full_backref = 1;
7348 rec->flag_block_full_backref = 0;
7350 owner = btrfs_header_owner(buf);
7353 ret = check_block(root, extent_cache, buf, flags);
7357 if (btrfs_is_leaf(buf)) {
7358 btree_space_waste += btrfs_leaf_free_space(root, buf);
7359 for (i = 0; i < nritems; i++) {
7360 struct btrfs_file_extent_item *fi;
7361 btrfs_item_key_to_cpu(buf, &key, i);
7363 * Check key type against the leaf owner.
7364 * Could filter quite a lot of early error if
7367 if (check_type_with_root(btrfs_header_owner(buf),
7369 fprintf(stderr, "ignoring invalid key\n");
7372 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7373 process_extent_item(root, extent_cache, buf,
7377 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7378 process_extent_item(root, extent_cache, buf,
7382 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7384 btrfs_item_size_nr(buf, i);
7387 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7388 process_chunk_item(chunk_cache, &key, buf, i);
7391 if (key.type == BTRFS_DEV_ITEM_KEY) {
7392 process_device_item(dev_cache, &key, buf, i);
7395 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7396 process_block_group_item(block_group_cache,
7400 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7401 process_device_extent_item(dev_extent_cache,
7406 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7407 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7408 process_extent_ref_v0(extent_cache, buf, i);
7415 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7416 ret = add_tree_backref(extent_cache,
7417 key.objectid, 0, key.offset, 0);
7419 error("add_tree_backref failed: %s",
7423 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7424 ret = add_tree_backref(extent_cache,
7425 key.objectid, key.offset, 0, 0);
7427 error("add_tree_backref failed: %s",
7431 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7432 struct btrfs_extent_data_ref *ref;
7433 ref = btrfs_item_ptr(buf, i,
7434 struct btrfs_extent_data_ref);
7435 add_data_backref(extent_cache,
7437 btrfs_extent_data_ref_root(buf, ref),
7438 btrfs_extent_data_ref_objectid(buf,
7440 btrfs_extent_data_ref_offset(buf, ref),
7441 btrfs_extent_data_ref_count(buf, ref),
7442 0, root->sectorsize);
7445 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7446 struct btrfs_shared_data_ref *ref;
7447 ref = btrfs_item_ptr(buf, i,
7448 struct btrfs_shared_data_ref);
7449 add_data_backref(extent_cache,
7450 key.objectid, key.offset, 0, 0, 0,
7451 btrfs_shared_data_ref_count(buf, ref),
7452 0, root->sectorsize);
7455 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7456 struct bad_item *bad;
7458 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7462 bad = malloc(sizeof(struct bad_item));
7465 INIT_LIST_HEAD(&bad->list);
7466 memcpy(&bad->key, &key,
7467 sizeof(struct btrfs_key));
7468 bad->root_id = owner;
7469 list_add_tail(&bad->list, &delete_items);
7472 if (key.type != BTRFS_EXTENT_DATA_KEY)
7474 fi = btrfs_item_ptr(buf, i,
7475 struct btrfs_file_extent_item);
7476 if (btrfs_file_extent_type(buf, fi) ==
7477 BTRFS_FILE_EXTENT_INLINE)
7479 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7482 data_bytes_allocated +=
7483 btrfs_file_extent_disk_num_bytes(buf, fi);
7484 if (data_bytes_allocated < root->sectorsize) {
7487 data_bytes_referenced +=
7488 btrfs_file_extent_num_bytes(buf, fi);
7489 add_data_backref(extent_cache,
7490 btrfs_file_extent_disk_bytenr(buf, fi),
7491 parent, owner, key.objectid, key.offset -
7492 btrfs_file_extent_offset(buf, fi), 1, 1,
7493 btrfs_file_extent_disk_num_bytes(buf, fi));
7497 struct btrfs_key first_key;
7499 first_key.objectid = 0;
7502 btrfs_item_key_to_cpu(buf, &first_key, 0);
7503 level = btrfs_header_level(buf);
7504 for (i = 0; i < nritems; i++) {
7505 struct extent_record tmpl;
7507 ptr = btrfs_node_blockptr(buf, i);
7508 size = root->nodesize;
7509 btrfs_node_key_to_cpu(buf, &key, i);
7511 if ((level == ri->drop_level)
7512 && is_dropped_key(&key, &ri->drop_key)) {
7517 memset(&tmpl, 0, sizeof(tmpl));
7518 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7519 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7524 tmpl.max_size = size;
7525 ret = add_extent_rec(extent_cache, &tmpl);
7529 ret = add_tree_backref(extent_cache, ptr, parent,
7532 error("add_tree_backref failed: %s",
7538 add_pending(nodes, seen, ptr, size);
7540 add_pending(pending, seen, ptr, size);
7543 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7544 nritems) * sizeof(struct btrfs_key_ptr);
7546 total_btree_bytes += buf->len;
7547 if (fs_root_objectid(btrfs_header_owner(buf)))
7548 total_fs_tree_bytes += buf->len;
7549 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7550 total_extent_tree_bytes += buf->len;
7551 if (!found_old_backref &&
7552 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7553 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7554 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7555 found_old_backref = 1;
7557 free_extent_buffer(buf);
7561 static int add_root_to_pending(struct extent_buffer *buf,
7562 struct cache_tree *extent_cache,
7563 struct cache_tree *pending,
7564 struct cache_tree *seen,
7565 struct cache_tree *nodes,
7568 struct extent_record tmpl;
7571 if (btrfs_header_level(buf) > 0)
7572 add_pending(nodes, seen, buf->start, buf->len);
7574 add_pending(pending, seen, buf->start, buf->len);
7576 memset(&tmpl, 0, sizeof(tmpl));
7577 tmpl.start = buf->start;
7582 tmpl.max_size = buf->len;
7583 add_extent_rec(extent_cache, &tmpl);
7585 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7586 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7587 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7590 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7595 /* as we fix the tree, we might be deleting blocks that
7596 * we're tracking for repair. This hook makes sure we
7597 * remove any backrefs for blocks as we are fixing them.
7599 static int free_extent_hook(struct btrfs_trans_handle *trans,
7600 struct btrfs_root *root,
7601 u64 bytenr, u64 num_bytes, u64 parent,
7602 u64 root_objectid, u64 owner, u64 offset,
7605 struct extent_record *rec;
7606 struct cache_extent *cache;
7608 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7610 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7611 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7615 rec = container_of(cache, struct extent_record, cache);
7617 struct data_backref *back;
7618 back = find_data_backref(rec, parent, root_objectid, owner,
7619 offset, 1, bytenr, num_bytes);
7622 if (back->node.found_ref) {
7623 back->found_ref -= refs_to_drop;
7625 rec->refs -= refs_to_drop;
7627 if (back->node.found_extent_tree) {
7628 back->num_refs -= refs_to_drop;
7629 if (rec->extent_item_refs)
7630 rec->extent_item_refs -= refs_to_drop;
7632 if (back->found_ref == 0)
7633 back->node.found_ref = 0;
7634 if (back->num_refs == 0)
7635 back->node.found_extent_tree = 0;
7637 if (!back->node.found_extent_tree && back->node.found_ref) {
7638 list_del(&back->node.list);
7642 struct tree_backref *back;
7643 back = find_tree_backref(rec, parent, root_objectid);
7646 if (back->node.found_ref) {
7649 back->node.found_ref = 0;
7651 if (back->node.found_extent_tree) {
7652 if (rec->extent_item_refs)
7653 rec->extent_item_refs--;
7654 back->node.found_extent_tree = 0;
7656 if (!back->node.found_extent_tree && back->node.found_ref) {
7657 list_del(&back->node.list);
7661 maybe_free_extent_rec(extent_cache, rec);
7666 static int delete_extent_records(struct btrfs_trans_handle *trans,
7667 struct btrfs_root *root,
7668 struct btrfs_path *path,
7669 u64 bytenr, u64 new_len)
7671 struct btrfs_key key;
7672 struct btrfs_key found_key;
7673 struct extent_buffer *leaf;
7678 key.objectid = bytenr;
7680 key.offset = (u64)-1;
7683 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7690 if (path->slots[0] == 0)
7696 leaf = path->nodes[0];
7697 slot = path->slots[0];
7699 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7700 if (found_key.objectid != bytenr)
7703 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7704 found_key.type != BTRFS_METADATA_ITEM_KEY &&
7705 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7706 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7707 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7708 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7709 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7710 btrfs_release_path(path);
7711 if (found_key.type == 0) {
7712 if (found_key.offset == 0)
7714 key.offset = found_key.offset - 1;
7715 key.type = found_key.type;
7717 key.type = found_key.type - 1;
7718 key.offset = (u64)-1;
7722 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7723 found_key.objectid, found_key.type, found_key.offset);
7725 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7728 btrfs_release_path(path);
7730 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7731 found_key.type == BTRFS_METADATA_ITEM_KEY) {
7732 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7733 found_key.offset : root->nodesize;
7735 ret = btrfs_update_block_group(trans, root, bytenr,
7742 btrfs_release_path(path);
7747 * for a single backref, this will allocate a new extent
7748 * and add the backref to it.
7750 static int record_extent(struct btrfs_trans_handle *trans,
7751 struct btrfs_fs_info *info,
7752 struct btrfs_path *path,
7753 struct extent_record *rec,
7754 struct extent_backref *back,
7755 int allocated, u64 flags)
7758 struct btrfs_root *extent_root = info->extent_root;
7759 struct extent_buffer *leaf;
7760 struct btrfs_key ins_key;
7761 struct btrfs_extent_item *ei;
7762 struct data_backref *dback;
7763 struct btrfs_tree_block_info *bi;
7766 rec->max_size = max_t(u64, rec->max_size,
7767 info->extent_root->nodesize);
7770 u32 item_size = sizeof(*ei);
7773 item_size += sizeof(*bi);
7775 ins_key.objectid = rec->start;
7776 ins_key.offset = rec->max_size;
7777 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7779 ret = btrfs_insert_empty_item(trans, extent_root, path,
7780 &ins_key, item_size);
7784 leaf = path->nodes[0];
7785 ei = btrfs_item_ptr(leaf, path->slots[0],
7786 struct btrfs_extent_item);
7788 btrfs_set_extent_refs(leaf, ei, 0);
7789 btrfs_set_extent_generation(leaf, ei, rec->generation);
7791 if (back->is_data) {
7792 btrfs_set_extent_flags(leaf, ei,
7793 BTRFS_EXTENT_FLAG_DATA);
7795 struct btrfs_disk_key copy_key;;
7797 bi = (struct btrfs_tree_block_info *)(ei + 1);
7798 memset_extent_buffer(leaf, 0, (unsigned long)bi,
7801 btrfs_set_disk_key_objectid(©_key,
7802 rec->info_objectid);
7803 btrfs_set_disk_key_type(©_key, 0);
7804 btrfs_set_disk_key_offset(©_key, 0);
7806 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7807 btrfs_set_tree_block_key(leaf, bi, ©_key);
7809 btrfs_set_extent_flags(leaf, ei,
7810 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7813 btrfs_mark_buffer_dirty(leaf);
7814 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7815 rec->max_size, 1, 0);
7818 btrfs_release_path(path);
7821 if (back->is_data) {
7825 dback = to_data_backref(back);
7826 if (back->full_backref)
7827 parent = dback->parent;
7831 for (i = 0; i < dback->found_ref; i++) {
7832 /* if parent != 0, we're doing a full backref
7833 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7834 * just makes the backref allocator create a data
7837 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7838 rec->start, rec->max_size,
7842 BTRFS_FIRST_FREE_OBJECTID :
7848 fprintf(stderr, "adding new data backref"
7849 " on %llu %s %llu owner %llu"
7850 " offset %llu found %d\n",
7851 (unsigned long long)rec->start,
7852 back->full_backref ?
7854 back->full_backref ?
7855 (unsigned long long)parent :
7856 (unsigned long long)dback->root,
7857 (unsigned long long)dback->owner,
7858 (unsigned long long)dback->offset,
7862 struct tree_backref *tback;
7864 tback = to_tree_backref(back);
7865 if (back->full_backref)
7866 parent = tback->parent;
7870 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7871 rec->start, rec->max_size,
7872 parent, tback->root, 0, 0);
7873 fprintf(stderr, "adding new tree backref on "
7874 "start %llu len %llu parent %llu root %llu\n",
7875 rec->start, rec->max_size, parent, tback->root);
7878 btrfs_release_path(path);
7882 static struct extent_entry *find_entry(struct list_head *entries,
7883 u64 bytenr, u64 bytes)
7885 struct extent_entry *entry = NULL;
7887 list_for_each_entry(entry, entries, list) {
7888 if (entry->bytenr == bytenr && entry->bytes == bytes)
7895 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7897 struct extent_entry *entry, *best = NULL, *prev = NULL;
7899 list_for_each_entry(entry, entries, list) {
7901 * If there are as many broken entries as entries then we know
7902 * not to trust this particular entry.
7904 if (entry->broken == entry->count)
7908 * Special case, when there are only two entries and 'best' is
7918 * If our current entry == best then we can't be sure our best
7919 * is really the best, so we need to keep searching.
7921 if (best && best->count == entry->count) {
7927 /* Prev == entry, not good enough, have to keep searching */
7928 if (!prev->broken && prev->count == entry->count)
7932 best = (prev->count > entry->count) ? prev : entry;
7933 else if (best->count < entry->count)
7941 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7942 struct data_backref *dback, struct extent_entry *entry)
7944 struct btrfs_trans_handle *trans;
7945 struct btrfs_root *root;
7946 struct btrfs_file_extent_item *fi;
7947 struct extent_buffer *leaf;
7948 struct btrfs_key key;
7952 key.objectid = dback->root;
7953 key.type = BTRFS_ROOT_ITEM_KEY;
7954 key.offset = (u64)-1;
7955 root = btrfs_read_fs_root(info, &key);
7957 fprintf(stderr, "Couldn't find root for our ref\n");
7962 * The backref points to the original offset of the extent if it was
7963 * split, so we need to search down to the offset we have and then walk
7964 * forward until we find the backref we're looking for.
7966 key.objectid = dback->owner;
7967 key.type = BTRFS_EXTENT_DATA_KEY;
7968 key.offset = dback->offset;
7969 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7971 fprintf(stderr, "Error looking up ref %d\n", ret);
7976 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7977 ret = btrfs_next_leaf(root, path);
7979 fprintf(stderr, "Couldn't find our ref, next\n");
7983 leaf = path->nodes[0];
7984 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7985 if (key.objectid != dback->owner ||
7986 key.type != BTRFS_EXTENT_DATA_KEY) {
7987 fprintf(stderr, "Couldn't find our ref, search\n");
7990 fi = btrfs_item_ptr(leaf, path->slots[0],
7991 struct btrfs_file_extent_item);
7992 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7993 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7995 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8000 btrfs_release_path(path);
8002 trans = btrfs_start_transaction(root, 1);
8004 return PTR_ERR(trans);
8007 * Ok we have the key of the file extent we want to fix, now we can cow
8008 * down to the thing and fix it.
8010 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8012 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8013 key.objectid, key.type, key.offset, ret);
8017 fprintf(stderr, "Well that's odd, we just found this key "
8018 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8023 leaf = path->nodes[0];
8024 fi = btrfs_item_ptr(leaf, path->slots[0],
8025 struct btrfs_file_extent_item);
8027 if (btrfs_file_extent_compression(leaf, fi) &&
8028 dback->disk_bytenr != entry->bytenr) {
8029 fprintf(stderr, "Ref doesn't match the record start and is "
8030 "compressed, please take a btrfs-image of this file "
8031 "system and send it to a btrfs developer so they can "
8032 "complete this functionality for bytenr %Lu\n",
8033 dback->disk_bytenr);
8038 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8039 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8040 } else if (dback->disk_bytenr > entry->bytenr) {
8041 u64 off_diff, offset;
8043 off_diff = dback->disk_bytenr - entry->bytenr;
8044 offset = btrfs_file_extent_offset(leaf, fi);
8045 if (dback->disk_bytenr + offset +
8046 btrfs_file_extent_num_bytes(leaf, fi) >
8047 entry->bytenr + entry->bytes) {
8048 fprintf(stderr, "Ref is past the entry end, please "
8049 "take a btrfs-image of this file system and "
8050 "send it to a btrfs developer, ref %Lu\n",
8051 dback->disk_bytenr);
8056 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8057 btrfs_set_file_extent_offset(leaf, fi, offset);
8058 } else if (dback->disk_bytenr < entry->bytenr) {
8061 offset = btrfs_file_extent_offset(leaf, fi);
8062 if (dback->disk_bytenr + offset < entry->bytenr) {
8063 fprintf(stderr, "Ref is before the entry start, please"
8064 " take a btrfs-image of this file system and "
8065 "send it to a btrfs developer, ref %Lu\n",
8066 dback->disk_bytenr);
8071 offset += dback->disk_bytenr;
8072 offset -= entry->bytenr;
8073 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8074 btrfs_set_file_extent_offset(leaf, fi, offset);
8077 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8080 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8081 * only do this if we aren't using compression, otherwise it's a
8084 if (!btrfs_file_extent_compression(leaf, fi))
8085 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8087 printf("ram bytes may be wrong?\n");
8088 btrfs_mark_buffer_dirty(leaf);
8090 err = btrfs_commit_transaction(trans, root);
8091 btrfs_release_path(path);
8092 return ret ? ret : err;
8095 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8096 struct extent_record *rec)
8098 struct extent_backref *back;
8099 struct data_backref *dback;
8100 struct extent_entry *entry, *best = NULL;
8103 int broken_entries = 0;
8108 * Metadata is easy and the backrefs should always agree on bytenr and
8109 * size, if not we've got bigger issues.
8114 list_for_each_entry(back, &rec->backrefs, list) {
8115 if (back->full_backref || !back->is_data)
8118 dback = to_data_backref(back);
8121 * We only pay attention to backrefs that we found a real
8124 if (dback->found_ref == 0)
8128 * For now we only catch when the bytes don't match, not the
8129 * bytenr. We can easily do this at the same time, but I want
8130 * to have a fs image to test on before we just add repair
8131 * functionality willy-nilly so we know we won't screw up the
8135 entry = find_entry(&entries, dback->disk_bytenr,
8138 entry = malloc(sizeof(struct extent_entry));
8143 memset(entry, 0, sizeof(*entry));
8144 entry->bytenr = dback->disk_bytenr;
8145 entry->bytes = dback->bytes;
8146 list_add_tail(&entry->list, &entries);
8151 * If we only have on entry we may think the entries agree when
8152 * in reality they don't so we have to do some extra checking.
8154 if (dback->disk_bytenr != rec->start ||
8155 dback->bytes != rec->nr || back->broken)
8166 /* Yay all the backrefs agree, carry on good sir */
8167 if (nr_entries <= 1 && !mismatch)
8170 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8171 "%Lu\n", rec->start);
8174 * First we want to see if the backrefs can agree amongst themselves who
8175 * is right, so figure out which one of the entries has the highest
8178 best = find_most_right_entry(&entries);
8181 * Ok so we may have an even split between what the backrefs think, so
8182 * this is where we use the extent ref to see what it thinks.
8185 entry = find_entry(&entries, rec->start, rec->nr);
8186 if (!entry && (!broken_entries || !rec->found_rec)) {
8187 fprintf(stderr, "Backrefs don't agree with each other "
8188 "and extent record doesn't agree with anybody,"
8189 " so we can't fix bytenr %Lu bytes %Lu\n",
8190 rec->start, rec->nr);
8193 } else if (!entry) {
8195 * Ok our backrefs were broken, we'll assume this is the
8196 * correct value and add an entry for this range.
8198 entry = malloc(sizeof(struct extent_entry));
8203 memset(entry, 0, sizeof(*entry));
8204 entry->bytenr = rec->start;
8205 entry->bytes = rec->nr;
8206 list_add_tail(&entry->list, &entries);
8210 best = find_most_right_entry(&entries);
8212 fprintf(stderr, "Backrefs and extent record evenly "
8213 "split on who is right, this is going to "
8214 "require user input to fix bytenr %Lu bytes "
8215 "%Lu\n", rec->start, rec->nr);
8222 * I don't think this can happen currently as we'll abort() if we catch
8223 * this case higher up, but in case somebody removes that we still can't
8224 * deal with it properly here yet, so just bail out of that's the case.
8226 if (best->bytenr != rec->start) {
8227 fprintf(stderr, "Extent start and backref starts don't match, "
8228 "please use btrfs-image on this file system and send "
8229 "it to a btrfs developer so they can make fsck fix "
8230 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8231 rec->start, rec->nr);
8237 * Ok great we all agreed on an extent record, let's go find the real
8238 * references and fix up the ones that don't match.
8240 list_for_each_entry(back, &rec->backrefs, list) {
8241 if (back->full_backref || !back->is_data)
8244 dback = to_data_backref(back);
8247 * Still ignoring backrefs that don't have a real ref attached
8250 if (dback->found_ref == 0)
8253 if (dback->bytes == best->bytes &&
8254 dback->disk_bytenr == best->bytenr)
8257 ret = repair_ref(info, path, dback, best);
8263 * Ok we messed with the actual refs, which means we need to drop our
8264 * entire cache and go back and rescan. I know this is a huge pain and
8265 * adds a lot of extra work, but it's the only way to be safe. Once all
8266 * the backrefs agree we may not need to do anything to the extent
8271 while (!list_empty(&entries)) {
8272 entry = list_entry(entries.next, struct extent_entry, list);
8273 list_del_init(&entry->list);
8279 static int process_duplicates(struct btrfs_root *root,
8280 struct cache_tree *extent_cache,
8281 struct extent_record *rec)
8283 struct extent_record *good, *tmp;
8284 struct cache_extent *cache;
8288 * If we found a extent record for this extent then return, or if we
8289 * have more than one duplicate we are likely going to need to delete
8292 if (rec->found_rec || rec->num_duplicates > 1)
8295 /* Shouldn't happen but just in case */
8296 BUG_ON(!rec->num_duplicates);
8299 * So this happens if we end up with a backref that doesn't match the
8300 * actual extent entry. So either the backref is bad or the extent
8301 * entry is bad. Either way we want to have the extent_record actually
8302 * reflect what we found in the extent_tree, so we need to take the
8303 * duplicate out and use that as the extent_record since the only way we
8304 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8306 remove_cache_extent(extent_cache, &rec->cache);
8308 good = to_extent_record(rec->dups.next);
8309 list_del_init(&good->list);
8310 INIT_LIST_HEAD(&good->backrefs);
8311 INIT_LIST_HEAD(&good->dups);
8312 good->cache.start = good->start;
8313 good->cache.size = good->nr;
8314 good->content_checked = 0;
8315 good->owner_ref_checked = 0;
8316 good->num_duplicates = 0;
8317 good->refs = rec->refs;
8318 list_splice_init(&rec->backrefs, &good->backrefs);
8320 cache = lookup_cache_extent(extent_cache, good->start,
8324 tmp = container_of(cache, struct extent_record, cache);
8327 * If we find another overlapping extent and it's found_rec is
8328 * set then it's a duplicate and we need to try and delete
8331 if (tmp->found_rec || tmp->num_duplicates > 0) {
8332 if (list_empty(&good->list))
8333 list_add_tail(&good->list,
8334 &duplicate_extents);
8335 good->num_duplicates += tmp->num_duplicates + 1;
8336 list_splice_init(&tmp->dups, &good->dups);
8337 list_del_init(&tmp->list);
8338 list_add_tail(&tmp->list, &good->dups);
8339 remove_cache_extent(extent_cache, &tmp->cache);
8344 * Ok we have another non extent item backed extent rec, so lets
8345 * just add it to this extent and carry on like we did above.
8347 good->refs += tmp->refs;
8348 list_splice_init(&tmp->backrefs, &good->backrefs);
8349 remove_cache_extent(extent_cache, &tmp->cache);
8352 ret = insert_cache_extent(extent_cache, &good->cache);
8355 return good->num_duplicates ? 0 : 1;
8358 static int delete_duplicate_records(struct btrfs_root *root,
8359 struct extent_record *rec)
8361 struct btrfs_trans_handle *trans;
8362 LIST_HEAD(delete_list);
8363 struct btrfs_path path;
8364 struct extent_record *tmp, *good, *n;
8367 struct btrfs_key key;
8369 btrfs_init_path(&path);
8372 /* Find the record that covers all of the duplicates. */
8373 list_for_each_entry(tmp, &rec->dups, list) {
8374 if (good->start < tmp->start)
8376 if (good->nr > tmp->nr)
8379 if (tmp->start + tmp->nr < good->start + good->nr) {
8380 fprintf(stderr, "Ok we have overlapping extents that "
8381 "aren't completely covered by each other, this "
8382 "is going to require more careful thought. "
8383 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8384 tmp->start, tmp->nr, good->start, good->nr);
8391 list_add_tail(&rec->list, &delete_list);
8393 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8396 list_move_tail(&tmp->list, &delete_list);
8399 root = root->fs_info->extent_root;
8400 trans = btrfs_start_transaction(root, 1);
8401 if (IS_ERR(trans)) {
8402 ret = PTR_ERR(trans);
8406 list_for_each_entry(tmp, &delete_list, list) {
8407 if (tmp->found_rec == 0)
8409 key.objectid = tmp->start;
8410 key.type = BTRFS_EXTENT_ITEM_KEY;
8411 key.offset = tmp->nr;
8413 /* Shouldn't happen but just in case */
8414 if (tmp->metadata) {
8415 fprintf(stderr, "Well this shouldn't happen, extent "
8416 "record overlaps but is metadata? "
8417 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8421 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8427 ret = btrfs_del_item(trans, root, &path);
8430 btrfs_release_path(&path);
8433 err = btrfs_commit_transaction(trans, root);
8437 while (!list_empty(&delete_list)) {
8438 tmp = to_extent_record(delete_list.next);
8439 list_del_init(&tmp->list);
8445 while (!list_empty(&rec->dups)) {
8446 tmp = to_extent_record(rec->dups.next);
8447 list_del_init(&tmp->list);
8451 btrfs_release_path(&path);
8453 if (!ret && !nr_del)
8454 rec->num_duplicates = 0;
8456 return ret ? ret : nr_del;
8459 static int find_possible_backrefs(struct btrfs_fs_info *info,
8460 struct btrfs_path *path,
8461 struct cache_tree *extent_cache,
8462 struct extent_record *rec)
8464 struct btrfs_root *root;
8465 struct extent_backref *back;
8466 struct data_backref *dback;
8467 struct cache_extent *cache;
8468 struct btrfs_file_extent_item *fi;
8469 struct btrfs_key key;
8473 list_for_each_entry(back, &rec->backrefs, list) {
8474 /* Don't care about full backrefs (poor unloved backrefs) */
8475 if (back->full_backref || !back->is_data)
8478 dback = to_data_backref(back);
8480 /* We found this one, we don't need to do a lookup */
8481 if (dback->found_ref)
8484 key.objectid = dback->root;
8485 key.type = BTRFS_ROOT_ITEM_KEY;
8486 key.offset = (u64)-1;
8488 root = btrfs_read_fs_root(info, &key);
8490 /* No root, definitely a bad ref, skip */
8491 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8493 /* Other err, exit */
8495 return PTR_ERR(root);
8497 key.objectid = dback->owner;
8498 key.type = BTRFS_EXTENT_DATA_KEY;
8499 key.offset = dback->offset;
8500 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8502 btrfs_release_path(path);
8505 /* Didn't find it, we can carry on */
8510 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8511 struct btrfs_file_extent_item);
8512 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8513 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8514 btrfs_release_path(path);
8515 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8517 struct extent_record *tmp;
8518 tmp = container_of(cache, struct extent_record, cache);
8521 * If we found an extent record for the bytenr for this
8522 * particular backref then we can't add it to our
8523 * current extent record. We only want to add backrefs
8524 * that don't have a corresponding extent item in the
8525 * extent tree since they likely belong to this record
8526 * and we need to fix it if it doesn't match bytenrs.
8532 dback->found_ref += 1;
8533 dback->disk_bytenr = bytenr;
8534 dback->bytes = bytes;
8537 * Set this so the verify backref code knows not to trust the
8538 * values in this backref.
8547 * Record orphan data ref into corresponding root.
8549 * Return 0 if the extent item contains data ref and recorded.
8550 * Return 1 if the extent item contains no useful data ref
8551 * On that case, it may contains only shared_dataref or metadata backref
8552 * or the file extent exists(this should be handled by the extent bytenr
8554 * Return <0 if something goes wrong.
8556 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8557 struct extent_record *rec)
8559 struct btrfs_key key;
8560 struct btrfs_root *dest_root;
8561 struct extent_backref *back;
8562 struct data_backref *dback;
8563 struct orphan_data_extent *orphan;
8564 struct btrfs_path path;
8565 int recorded_data_ref = 0;
8570 btrfs_init_path(&path);
8571 list_for_each_entry(back, &rec->backrefs, list) {
8572 if (back->full_backref || !back->is_data ||
8573 !back->found_extent_tree)
8575 dback = to_data_backref(back);
8576 if (dback->found_ref)
8578 key.objectid = dback->root;
8579 key.type = BTRFS_ROOT_ITEM_KEY;
8580 key.offset = (u64)-1;
8582 dest_root = btrfs_read_fs_root(fs_info, &key);
8584 /* For non-exist root we just skip it */
8585 if (IS_ERR(dest_root) || !dest_root)
8588 key.objectid = dback->owner;
8589 key.type = BTRFS_EXTENT_DATA_KEY;
8590 key.offset = dback->offset;
8592 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8593 btrfs_release_path(&path);
8595 * For ret < 0, it's OK since the fs-tree may be corrupted,
8596 * we need to record it for inode/file extent rebuild.
8597 * For ret > 0, we record it only for file extent rebuild.
8598 * For ret == 0, the file extent exists but only bytenr
8599 * mismatch, let the original bytenr fix routine to handle,
8605 orphan = malloc(sizeof(*orphan));
8610 INIT_LIST_HEAD(&orphan->list);
8611 orphan->root = dback->root;
8612 orphan->objectid = dback->owner;
8613 orphan->offset = dback->offset;
8614 orphan->disk_bytenr = rec->cache.start;
8615 orphan->disk_len = rec->cache.size;
8616 list_add(&dest_root->orphan_data_extents, &orphan->list);
8617 recorded_data_ref = 1;
8620 btrfs_release_path(&path);
8622 return !recorded_data_ref;
8628 * when an incorrect extent item is found, this will delete
8629 * all of the existing entries for it and recreate them
8630 * based on what the tree scan found.
8632 static int fixup_extent_refs(struct btrfs_fs_info *info,
8633 struct cache_tree *extent_cache,
8634 struct extent_record *rec)
8636 struct btrfs_trans_handle *trans = NULL;
8638 struct btrfs_path path;
8639 struct list_head *cur = rec->backrefs.next;
8640 struct cache_extent *cache;
8641 struct extent_backref *back;
8645 if (rec->flag_block_full_backref)
8646 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8648 btrfs_init_path(&path);
8649 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8651 * Sometimes the backrefs themselves are so broken they don't
8652 * get attached to any meaningful rec, so first go back and
8653 * check any of our backrefs that we couldn't find and throw
8654 * them into the list if we find the backref so that
8655 * verify_backrefs can figure out what to do.
8657 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8662 /* step one, make sure all of the backrefs agree */
8663 ret = verify_backrefs(info, &path, rec);
8667 trans = btrfs_start_transaction(info->extent_root, 1);
8668 if (IS_ERR(trans)) {
8669 ret = PTR_ERR(trans);
8673 /* step two, delete all the existing records */
8674 ret = delete_extent_records(trans, info->extent_root, &path,
8675 rec->start, rec->max_size);
8680 /* was this block corrupt? If so, don't add references to it */
8681 cache = lookup_cache_extent(info->corrupt_blocks,
8682 rec->start, rec->max_size);
8688 /* step three, recreate all the refs we did find */
8689 while(cur != &rec->backrefs) {
8690 back = to_extent_backref(cur);
8694 * if we didn't find any references, don't create a
8697 if (!back->found_ref)
8700 rec->bad_full_backref = 0;
8701 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8709 int err = btrfs_commit_transaction(trans, info->extent_root);
8714 btrfs_release_path(&path);
8718 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8719 struct extent_record *rec)
8721 struct btrfs_trans_handle *trans;
8722 struct btrfs_root *root = fs_info->extent_root;
8723 struct btrfs_path path;
8724 struct btrfs_extent_item *ei;
8725 struct btrfs_key key;
8729 key.objectid = rec->start;
8730 if (rec->metadata) {
8731 key.type = BTRFS_METADATA_ITEM_KEY;
8732 key.offset = rec->info_level;
8734 key.type = BTRFS_EXTENT_ITEM_KEY;
8735 key.offset = rec->max_size;
8738 trans = btrfs_start_transaction(root, 0);
8740 return PTR_ERR(trans);
8742 btrfs_init_path(&path);
8743 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8745 btrfs_release_path(&path);
8746 btrfs_commit_transaction(trans, root);
8749 fprintf(stderr, "Didn't find extent for %llu\n",
8750 (unsigned long long)rec->start);
8751 btrfs_release_path(&path);
8752 btrfs_commit_transaction(trans, root);
8756 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8757 struct btrfs_extent_item);
8758 flags = btrfs_extent_flags(path.nodes[0], ei);
8759 if (rec->flag_block_full_backref) {
8760 fprintf(stderr, "setting full backref on %llu\n",
8761 (unsigned long long)key.objectid);
8762 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8764 fprintf(stderr, "clearing full backref on %llu\n",
8765 (unsigned long long)key.objectid);
8766 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8768 btrfs_set_extent_flags(path.nodes[0], ei, flags);
8769 btrfs_mark_buffer_dirty(path.nodes[0]);
8770 btrfs_release_path(&path);
8771 return btrfs_commit_transaction(trans, root);
8774 /* right now we only prune from the extent allocation tree */
8775 static int prune_one_block(struct btrfs_trans_handle *trans,
8776 struct btrfs_fs_info *info,
8777 struct btrfs_corrupt_block *corrupt)
8780 struct btrfs_path path;
8781 struct extent_buffer *eb;
8785 int level = corrupt->level + 1;
8787 btrfs_init_path(&path);
8789 /* we want to stop at the parent to our busted block */
8790 path.lowest_level = level;
8792 ret = btrfs_search_slot(trans, info->extent_root,
8793 &corrupt->key, &path, -1, 1);
8798 eb = path.nodes[level];
8805 * hopefully the search gave us the block we want to prune,
8806 * lets try that first
8808 slot = path.slots[level];
8809 found = btrfs_node_blockptr(eb, slot);
8810 if (found == corrupt->cache.start)
8813 nritems = btrfs_header_nritems(eb);
8815 /* the search failed, lets scan this node and hope we find it */
8816 for (slot = 0; slot < nritems; slot++) {
8817 found = btrfs_node_blockptr(eb, slot);
8818 if (found == corrupt->cache.start)
8822 * we couldn't find the bad block. TODO, search all the nodes for pointers
8825 if (eb == info->extent_root->node) {
8830 btrfs_release_path(&path);
8835 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8836 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8839 btrfs_release_path(&path);
8843 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8845 struct btrfs_trans_handle *trans = NULL;
8846 struct cache_extent *cache;
8847 struct btrfs_corrupt_block *corrupt;
8850 cache = search_cache_extent(info->corrupt_blocks, 0);
8854 trans = btrfs_start_transaction(info->extent_root, 1);
8856 return PTR_ERR(trans);
8858 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8859 prune_one_block(trans, info, corrupt);
8860 remove_cache_extent(info->corrupt_blocks, cache);
8863 return btrfs_commit_transaction(trans, info->extent_root);
8867 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8869 struct btrfs_block_group_cache *cache;
8874 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8875 &start, &end, EXTENT_DIRTY);
8878 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8884 cache = btrfs_lookup_first_block_group(fs_info, start);
8889 start = cache->key.objectid + cache->key.offset;
8893 static int check_extent_refs(struct btrfs_root *root,
8894 struct cache_tree *extent_cache)
8896 struct extent_record *rec;
8897 struct cache_extent *cache;
8906 * if we're doing a repair, we have to make sure
8907 * we don't allocate from the problem extents.
8908 * In the worst case, this will be all the
8911 cache = search_cache_extent(extent_cache, 0);
8913 rec = container_of(cache, struct extent_record, cache);
8914 set_extent_dirty(root->fs_info->excluded_extents,
8916 rec->start + rec->max_size - 1,
8918 cache = next_cache_extent(cache);
8921 /* pin down all the corrupted blocks too */
8922 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8924 set_extent_dirty(root->fs_info->excluded_extents,
8926 cache->start + cache->size - 1,
8928 cache = next_cache_extent(cache);
8930 prune_corrupt_blocks(root->fs_info);
8931 reset_cached_block_groups(root->fs_info);
8934 reset_cached_block_groups(root->fs_info);
8937 * We need to delete any duplicate entries we find first otherwise we
8938 * could mess up the extent tree when we have backrefs that actually
8939 * belong to a different extent item and not the weird duplicate one.
8941 while (repair && !list_empty(&duplicate_extents)) {
8942 rec = to_extent_record(duplicate_extents.next);
8943 list_del_init(&rec->list);
8945 /* Sometimes we can find a backref before we find an actual
8946 * extent, so we need to process it a little bit to see if there
8947 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8948 * if this is a backref screwup. If we need to delete stuff
8949 * process_duplicates() will return 0, otherwise it will return
8952 if (process_duplicates(root, extent_cache, rec))
8954 ret = delete_duplicate_records(root, rec);
8958 * delete_duplicate_records will return the number of entries
8959 * deleted, so if it's greater than 0 then we know we actually
8960 * did something and we need to remove.
8974 cache = search_cache_extent(extent_cache, 0);
8977 rec = container_of(cache, struct extent_record, cache);
8978 if (rec->num_duplicates) {
8979 fprintf(stderr, "extent item %llu has multiple extent "
8980 "items\n", (unsigned long long)rec->start);
8985 if (rec->refs != rec->extent_item_refs) {
8986 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8987 (unsigned long long)rec->start,
8988 (unsigned long long)rec->nr);
8989 fprintf(stderr, "extent item %llu, found %llu\n",
8990 (unsigned long long)rec->extent_item_refs,
8991 (unsigned long long)rec->refs);
8992 ret = record_orphan_data_extents(root->fs_info, rec);
8999 * we can't use the extent to repair file
9000 * extent, let the fallback method handle it.
9002 if (!fixed && repair) {
9003 ret = fixup_extent_refs(
9014 if (all_backpointers_checked(rec, 1)) {
9015 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9016 (unsigned long long)rec->start,
9017 (unsigned long long)rec->nr);
9019 if (!fixed && !recorded && repair) {
9020 ret = fixup_extent_refs(root->fs_info,
9029 if (!rec->owner_ref_checked) {
9030 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9031 (unsigned long long)rec->start,
9032 (unsigned long long)rec->nr);
9033 if (!fixed && !recorded && repair) {
9034 ret = fixup_extent_refs(root->fs_info,
9043 if (rec->bad_full_backref) {
9044 fprintf(stderr, "bad full backref, on [%llu]\n",
9045 (unsigned long long)rec->start);
9047 ret = fixup_extent_flags(root->fs_info, rec);
9056 * Although it's not a extent ref's problem, we reuse this
9057 * routine for error reporting.
9058 * No repair function yet.
9060 if (rec->crossing_stripes) {
9062 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9063 rec->start, rec->start + rec->max_size);
9068 if (rec->wrong_chunk_type) {
9070 "bad extent [%llu, %llu), type mismatch with chunk\n",
9071 rec->start, rec->start + rec->max_size);
9076 remove_cache_extent(extent_cache, cache);
9077 free_all_extent_backrefs(rec);
9078 if (!init_extent_tree && repair && (!cur_err || fixed))
9079 clear_extent_dirty(root->fs_info->excluded_extents,
9081 rec->start + rec->max_size - 1,
9087 if (ret && ret != -EAGAIN) {
9088 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9091 struct btrfs_trans_handle *trans;
9093 root = root->fs_info->extent_root;
9094 trans = btrfs_start_transaction(root, 1);
9095 if (IS_ERR(trans)) {
9096 ret = PTR_ERR(trans);
9100 btrfs_fix_block_accounting(trans, root);
9101 ret = btrfs_commit_transaction(trans, root);
9106 fprintf(stderr, "repaired damaged extent references\n");
9112 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9116 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9117 stripe_size = length;
9118 stripe_size /= num_stripes;
9119 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9120 stripe_size = length * 2;
9121 stripe_size /= num_stripes;
9122 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9123 stripe_size = length;
9124 stripe_size /= (num_stripes - 1);
9125 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9126 stripe_size = length;
9127 stripe_size /= (num_stripes - 2);
9129 stripe_size = length;
9135 * Check the chunk with its block group/dev list ref:
9136 * Return 0 if all refs seems valid.
9137 * Return 1 if part of refs seems valid, need later check for rebuild ref
9138 * like missing block group and needs to search extent tree to rebuild them.
9139 * Return -1 if essential refs are missing and unable to rebuild.
9141 static int check_chunk_refs(struct chunk_record *chunk_rec,
9142 struct block_group_tree *block_group_cache,
9143 struct device_extent_tree *dev_extent_cache,
9146 struct cache_extent *block_group_item;
9147 struct block_group_record *block_group_rec;
9148 struct cache_extent *dev_extent_item;
9149 struct device_extent_record *dev_extent_rec;
9153 int metadump_v2 = 0;
9157 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9160 if (block_group_item) {
9161 block_group_rec = container_of(block_group_item,
9162 struct block_group_record,
9164 if (chunk_rec->length != block_group_rec->offset ||
9165 chunk_rec->offset != block_group_rec->objectid ||
9167 chunk_rec->type_flags != block_group_rec->flags)) {
9170 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9171 chunk_rec->objectid,
9176 chunk_rec->type_flags,
9177 block_group_rec->objectid,
9178 block_group_rec->type,
9179 block_group_rec->offset,
9180 block_group_rec->offset,
9181 block_group_rec->objectid,
9182 block_group_rec->flags);
9185 list_del_init(&block_group_rec->list);
9186 chunk_rec->bg_rec = block_group_rec;
9191 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9192 chunk_rec->objectid,
9197 chunk_rec->type_flags);
9204 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9205 chunk_rec->num_stripes);
9206 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9207 devid = chunk_rec->stripes[i].devid;
9208 offset = chunk_rec->stripes[i].offset;
9209 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9210 devid, offset, length);
9211 if (dev_extent_item) {
9212 dev_extent_rec = container_of(dev_extent_item,
9213 struct device_extent_record,
9215 if (dev_extent_rec->objectid != devid ||
9216 dev_extent_rec->offset != offset ||
9217 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9218 dev_extent_rec->length != length) {
9221 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9222 chunk_rec->objectid,
9225 chunk_rec->stripes[i].devid,
9226 chunk_rec->stripes[i].offset,
9227 dev_extent_rec->objectid,
9228 dev_extent_rec->offset,
9229 dev_extent_rec->length);
9232 list_move(&dev_extent_rec->chunk_list,
9233 &chunk_rec->dextents);
9238 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9239 chunk_rec->objectid,
9242 chunk_rec->stripes[i].devid,
9243 chunk_rec->stripes[i].offset);
9250 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9251 int check_chunks(struct cache_tree *chunk_cache,
9252 struct block_group_tree *block_group_cache,
9253 struct device_extent_tree *dev_extent_cache,
9254 struct list_head *good, struct list_head *bad,
9255 struct list_head *rebuild, int silent)
9257 struct cache_extent *chunk_item;
9258 struct chunk_record *chunk_rec;
9259 struct block_group_record *bg_rec;
9260 struct device_extent_record *dext_rec;
9264 chunk_item = first_cache_extent(chunk_cache);
9265 while (chunk_item) {
9266 chunk_rec = container_of(chunk_item, struct chunk_record,
9268 err = check_chunk_refs(chunk_rec, block_group_cache,
9269 dev_extent_cache, silent);
9272 if (err == 0 && good)
9273 list_add_tail(&chunk_rec->list, good);
9274 if (err > 0 && rebuild)
9275 list_add_tail(&chunk_rec->list, rebuild);
9277 list_add_tail(&chunk_rec->list, bad);
9278 chunk_item = next_cache_extent(chunk_item);
9281 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9284 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9292 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9296 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9307 static int check_device_used(struct device_record *dev_rec,
9308 struct device_extent_tree *dext_cache)
9310 struct cache_extent *cache;
9311 struct device_extent_record *dev_extent_rec;
9314 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9316 dev_extent_rec = container_of(cache,
9317 struct device_extent_record,
9319 if (dev_extent_rec->objectid != dev_rec->devid)
9322 list_del_init(&dev_extent_rec->device_list);
9323 total_byte += dev_extent_rec->length;
9324 cache = next_cache_extent(cache);
9327 if (total_byte != dev_rec->byte_used) {
9329 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9330 total_byte, dev_rec->byte_used, dev_rec->objectid,
9331 dev_rec->type, dev_rec->offset);
9338 /* check btrfs_dev_item -> btrfs_dev_extent */
9339 static int check_devices(struct rb_root *dev_cache,
9340 struct device_extent_tree *dev_extent_cache)
9342 struct rb_node *dev_node;
9343 struct device_record *dev_rec;
9344 struct device_extent_record *dext_rec;
9348 dev_node = rb_first(dev_cache);
9350 dev_rec = container_of(dev_node, struct device_record, node);
9351 err = check_device_used(dev_rec, dev_extent_cache);
9355 dev_node = rb_next(dev_node);
9357 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9360 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9361 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9368 static int add_root_item_to_list(struct list_head *head,
9369 u64 objectid, u64 bytenr, u64 last_snapshot,
9370 u8 level, u8 drop_level,
9371 int level_size, struct btrfs_key *drop_key)
9374 struct root_item_record *ri_rec;
9375 ri_rec = malloc(sizeof(*ri_rec));
9378 ri_rec->bytenr = bytenr;
9379 ri_rec->objectid = objectid;
9380 ri_rec->level = level;
9381 ri_rec->level_size = level_size;
9382 ri_rec->drop_level = drop_level;
9383 ri_rec->last_snapshot = last_snapshot;
9385 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9386 list_add_tail(&ri_rec->list, head);
9391 static void free_root_item_list(struct list_head *list)
9393 struct root_item_record *ri_rec;
9395 while (!list_empty(list)) {
9396 ri_rec = list_first_entry(list, struct root_item_record,
9398 list_del_init(&ri_rec->list);
9403 static int deal_root_from_list(struct list_head *list,
9404 struct btrfs_root *root,
9405 struct block_info *bits,
9407 struct cache_tree *pending,
9408 struct cache_tree *seen,
9409 struct cache_tree *reada,
9410 struct cache_tree *nodes,
9411 struct cache_tree *extent_cache,
9412 struct cache_tree *chunk_cache,
9413 struct rb_root *dev_cache,
9414 struct block_group_tree *block_group_cache,
9415 struct device_extent_tree *dev_extent_cache)
9420 while (!list_empty(list)) {
9421 struct root_item_record *rec;
9422 struct extent_buffer *buf;
9423 rec = list_entry(list->next,
9424 struct root_item_record, list);
9426 buf = read_tree_block(root->fs_info->tree_root,
9427 rec->bytenr, rec->level_size, 0);
9428 if (!extent_buffer_uptodate(buf)) {
9429 free_extent_buffer(buf);
9433 ret = add_root_to_pending(buf, extent_cache, pending,
9434 seen, nodes, rec->objectid);
9438 * To rebuild extent tree, we need deal with snapshot
9439 * one by one, otherwise we deal with node firstly which
9440 * can maximize readahead.
9443 ret = run_next_block(root, bits, bits_nr, &last,
9444 pending, seen, reada, nodes,
9445 extent_cache, chunk_cache,
9446 dev_cache, block_group_cache,
9447 dev_extent_cache, rec);
9451 free_extent_buffer(buf);
9452 list_del(&rec->list);
9458 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9459 reada, nodes, extent_cache, chunk_cache,
9460 dev_cache, block_group_cache,
9461 dev_extent_cache, NULL);
9471 static int check_chunks_and_extents(struct btrfs_root *root)
9473 struct rb_root dev_cache;
9474 struct cache_tree chunk_cache;
9475 struct block_group_tree block_group_cache;
9476 struct device_extent_tree dev_extent_cache;
9477 struct cache_tree extent_cache;
9478 struct cache_tree seen;
9479 struct cache_tree pending;
9480 struct cache_tree reada;
9481 struct cache_tree nodes;
9482 struct extent_io_tree excluded_extents;
9483 struct cache_tree corrupt_blocks;
9484 struct btrfs_path path;
9485 struct btrfs_key key;
9486 struct btrfs_key found_key;
9488 struct block_info *bits;
9490 struct extent_buffer *leaf;
9492 struct btrfs_root_item ri;
9493 struct list_head dropping_trees;
9494 struct list_head normal_trees;
9495 struct btrfs_root *root1;
9500 dev_cache = RB_ROOT;
9501 cache_tree_init(&chunk_cache);
9502 block_group_tree_init(&block_group_cache);
9503 device_extent_tree_init(&dev_extent_cache);
9505 cache_tree_init(&extent_cache);
9506 cache_tree_init(&seen);
9507 cache_tree_init(&pending);
9508 cache_tree_init(&nodes);
9509 cache_tree_init(&reada);
9510 cache_tree_init(&corrupt_blocks);
9511 extent_io_tree_init(&excluded_extents);
9512 INIT_LIST_HEAD(&dropping_trees);
9513 INIT_LIST_HEAD(&normal_trees);
9516 root->fs_info->excluded_extents = &excluded_extents;
9517 root->fs_info->fsck_extent_cache = &extent_cache;
9518 root->fs_info->free_extent_hook = free_extent_hook;
9519 root->fs_info->corrupt_blocks = &corrupt_blocks;
9523 bits = malloc(bits_nr * sizeof(struct block_info));
9529 if (ctx.progress_enabled) {
9530 ctx.tp = TASK_EXTENTS;
9531 task_start(ctx.info);
9535 root1 = root->fs_info->tree_root;
9536 level = btrfs_header_level(root1->node);
9537 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9538 root1->node->start, 0, level, 0,
9539 root1->nodesize, NULL);
9542 root1 = root->fs_info->chunk_root;
9543 level = btrfs_header_level(root1->node);
9544 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9545 root1->node->start, 0, level, 0,
9546 root1->nodesize, NULL);
9549 btrfs_init_path(&path);
9552 key.type = BTRFS_ROOT_ITEM_KEY;
9553 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9558 leaf = path.nodes[0];
9559 slot = path.slots[0];
9560 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9561 ret = btrfs_next_leaf(root, &path);
9564 leaf = path.nodes[0];
9565 slot = path.slots[0];
9567 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9568 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9569 unsigned long offset;
9572 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9573 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9574 last_snapshot = btrfs_root_last_snapshot(&ri);
9575 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9576 level = btrfs_root_level(&ri);
9577 level_size = root->nodesize;
9578 ret = add_root_item_to_list(&normal_trees,
9580 btrfs_root_bytenr(&ri),
9581 last_snapshot, level,
9582 0, level_size, NULL);
9586 level = btrfs_root_level(&ri);
9587 level_size = root->nodesize;
9588 objectid = found_key.objectid;
9589 btrfs_disk_key_to_cpu(&found_key,
9591 ret = add_root_item_to_list(&dropping_trees,
9593 btrfs_root_bytenr(&ri),
9594 last_snapshot, level,
9596 level_size, &found_key);
9603 btrfs_release_path(&path);
9606 * check_block can return -EAGAIN if it fixes something, please keep
9607 * this in mind when dealing with return values from these functions, if
9608 * we get -EAGAIN we want to fall through and restart the loop.
9610 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9611 &seen, &reada, &nodes, &extent_cache,
9612 &chunk_cache, &dev_cache, &block_group_cache,
9619 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9620 &pending, &seen, &reada, &nodes,
9621 &extent_cache, &chunk_cache, &dev_cache,
9622 &block_group_cache, &dev_extent_cache);
9629 ret = check_chunks(&chunk_cache, &block_group_cache,
9630 &dev_extent_cache, NULL, NULL, NULL, 0);
9637 ret = check_extent_refs(root, &extent_cache);
9644 ret = check_devices(&dev_cache, &dev_extent_cache);
9649 task_stop(ctx.info);
9651 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9652 extent_io_tree_cleanup(&excluded_extents);
9653 root->fs_info->fsck_extent_cache = NULL;
9654 root->fs_info->free_extent_hook = NULL;
9655 root->fs_info->corrupt_blocks = NULL;
9656 root->fs_info->excluded_extents = NULL;
9659 free_chunk_cache_tree(&chunk_cache);
9660 free_device_cache_tree(&dev_cache);
9661 free_block_group_tree(&block_group_cache);
9662 free_device_extent_tree(&dev_extent_cache);
9663 free_extent_cache_tree(&seen);
9664 free_extent_cache_tree(&pending);
9665 free_extent_cache_tree(&reada);
9666 free_extent_cache_tree(&nodes);
9669 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9670 free_extent_cache_tree(&seen);
9671 free_extent_cache_tree(&pending);
9672 free_extent_cache_tree(&reada);
9673 free_extent_cache_tree(&nodes);
9674 free_chunk_cache_tree(&chunk_cache);
9675 free_block_group_tree(&block_group_cache);
9676 free_device_cache_tree(&dev_cache);
9677 free_device_extent_tree(&dev_extent_cache);
9678 free_extent_record_cache(root->fs_info, &extent_cache);
9679 free_root_item_list(&normal_trees);
9680 free_root_item_list(&dropping_trees);
9681 extent_io_tree_cleanup(&excluded_extents);
9686 * Check backrefs of a tree block given by @bytenr or @eb.
9688 * @root: the root containing the @bytenr or @eb
9689 * @eb: tree block extent buffer, can be NULL
9690 * @bytenr: bytenr of the tree block to search
9691 * @level: tree level of the tree block
9692 * @owner: owner of the tree block
9694 * Return >0 for any error found and output error message
9695 * Return 0 for no error found
9697 static int check_tree_block_ref(struct btrfs_root *root,
9698 struct extent_buffer *eb, u64 bytenr,
9699 int level, u64 owner)
9701 struct btrfs_key key;
9702 struct btrfs_root *extent_root = root->fs_info->extent_root;
9703 struct btrfs_path path;
9704 struct btrfs_extent_item *ei;
9705 struct btrfs_extent_inline_ref *iref;
9706 struct extent_buffer *leaf;
9712 u32 nodesize = root->nodesize;
9719 btrfs_init_path(&path);
9720 key.objectid = bytenr;
9721 if (btrfs_fs_incompat(root->fs_info,
9722 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9723 key.type = BTRFS_METADATA_ITEM_KEY;
9725 key.type = BTRFS_EXTENT_ITEM_KEY;
9726 key.offset = (u64)-1;
9728 /* Search for the backref in extent tree */
9729 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9731 err |= BACKREF_MISSING;
9734 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9736 err |= BACKREF_MISSING;
9740 leaf = path.nodes[0];
9741 slot = path.slots[0];
9742 btrfs_item_key_to_cpu(leaf, &key, slot);
9744 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9746 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9747 skinny_level = (int)key.offset;
9748 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9750 struct btrfs_tree_block_info *info;
9752 info = (struct btrfs_tree_block_info *)(ei + 1);
9753 skinny_level = btrfs_tree_block_level(leaf, info);
9754 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9761 if (!(btrfs_extent_flags(leaf, ei) &
9762 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9764 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9765 key.objectid, nodesize,
9766 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9767 err = BACKREF_MISMATCH;
9769 header_gen = btrfs_header_generation(eb);
9770 extent_gen = btrfs_extent_generation(leaf, ei);
9771 if (header_gen != extent_gen) {
9773 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9774 key.objectid, nodesize, header_gen,
9776 err = BACKREF_MISMATCH;
9778 if (level != skinny_level) {
9780 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9781 key.objectid, nodesize, level, skinny_level);
9782 err = BACKREF_MISMATCH;
9784 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9786 "extent[%llu %u] is referred by other roots than %llu",
9787 key.objectid, nodesize, root->objectid);
9788 err = BACKREF_MISMATCH;
9793 * Iterate the extent/metadata item to find the exact backref
9795 item_size = btrfs_item_size_nr(leaf, slot);
9796 ptr = (unsigned long)iref;
9797 end = (unsigned long)ei + item_size;
9799 iref = (struct btrfs_extent_inline_ref *)ptr;
9800 type = btrfs_extent_inline_ref_type(leaf, iref);
9801 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9803 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9804 (offset == root->objectid || offset == owner)) {
9806 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9807 /* Check if the backref points to valid referencer */
9808 found_ref = !check_tree_block_ref(root, NULL, offset,
9814 ptr += btrfs_extent_inline_ref_size(type);
9818 * Inlined extent item doesn't have what we need, check
9819 * TREE_BLOCK_REF_KEY
9822 btrfs_release_path(&path);
9823 key.objectid = bytenr;
9824 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9825 key.offset = root->objectid;
9827 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9832 err |= BACKREF_MISSING;
9834 btrfs_release_path(&path);
9835 if (eb && (err & BACKREF_MISSING))
9836 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9837 bytenr, nodesize, owner, level);
9842 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9844 * Return >0 any error found and output error message
9845 * Return 0 for no error found
9847 static int check_extent_data_item(struct btrfs_root *root,
9848 struct extent_buffer *eb, int slot)
9850 struct btrfs_file_extent_item *fi;
9851 struct btrfs_path path;
9852 struct btrfs_root *extent_root = root->fs_info->extent_root;
9853 struct btrfs_key fi_key;
9854 struct btrfs_key dbref_key;
9855 struct extent_buffer *leaf;
9856 struct btrfs_extent_item *ei;
9857 struct btrfs_extent_inline_ref *iref;
9858 struct btrfs_extent_data_ref *dref;
9860 u64 file_extent_gen;
9863 u64 extent_num_bytes;
9871 int found_dbackref = 0;
9875 btrfs_item_key_to_cpu(eb, &fi_key, slot);
9876 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9877 file_extent_gen = btrfs_file_extent_generation(eb, fi);
9879 /* Nothing to check for hole and inline data extents */
9880 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9881 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9884 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9885 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9886 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9888 /* Check unaligned disk_num_bytes and num_bytes */
9889 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9891 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9892 fi_key.objectid, fi_key.offset, disk_num_bytes,
9894 err |= BYTES_UNALIGNED;
9896 data_bytes_allocated += disk_num_bytes;
9898 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9900 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9901 fi_key.objectid, fi_key.offset, extent_num_bytes,
9903 err |= BYTES_UNALIGNED;
9905 data_bytes_referenced += extent_num_bytes;
9907 owner = btrfs_header_owner(eb);
9909 /* Check the extent item of the file extent in extent tree */
9910 btrfs_init_path(&path);
9911 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9912 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9913 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9915 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9917 err |= BACKREF_MISSING;
9921 leaf = path.nodes[0];
9922 slot = path.slots[0];
9923 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9925 extent_flags = btrfs_extent_flags(leaf, ei);
9926 extent_gen = btrfs_extent_generation(leaf, ei);
9928 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9930 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9931 disk_bytenr, disk_num_bytes,
9932 BTRFS_EXTENT_FLAG_DATA);
9933 err |= BACKREF_MISMATCH;
9936 if (file_extent_gen < extent_gen) {
9938 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9939 disk_bytenr, disk_num_bytes, file_extent_gen,
9941 err |= BACKREF_MISMATCH;
9944 /* Check data backref inside that extent item */
9945 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9946 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9947 ptr = (unsigned long)iref;
9948 end = (unsigned long)ei + item_size;
9950 iref = (struct btrfs_extent_inline_ref *)ptr;
9951 type = btrfs_extent_inline_ref_type(leaf, iref);
9952 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9954 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9955 ref_root = btrfs_extent_data_ref_root(leaf, dref);
9956 if (ref_root == owner || ref_root == root->objectid)
9958 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9959 found_dbackref = !check_tree_block_ref(root, NULL,
9960 btrfs_extent_inline_ref_offset(leaf, iref),
9966 ptr += btrfs_extent_inline_ref_size(type);
9969 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9970 if (!found_dbackref) {
9971 btrfs_release_path(&path);
9973 btrfs_init_path(&path);
9974 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9975 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9976 dbref_key.offset = hash_extent_data_ref(root->objectid,
9977 fi_key.objectid, fi_key.offset);
9979 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9980 &dbref_key, &path, 0, 0);
9985 if (!found_dbackref)
9986 err |= BACKREF_MISSING;
9988 btrfs_release_path(&path);
9989 if (err & BACKREF_MISSING) {
9990 error("data extent[%llu %llu] backref lost",
9991 disk_bytenr, disk_num_bytes);
9997 * Get real tree block level for the case like shared block
9998 * Return >= 0 as tree level
9999 * Return <0 for error
10001 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10003 struct extent_buffer *eb;
10004 struct btrfs_path path;
10005 struct btrfs_key key;
10006 struct btrfs_extent_item *ei;
10009 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10014 /* Search extent tree for extent generation and level */
10015 key.objectid = bytenr;
10016 key.type = BTRFS_METADATA_ITEM_KEY;
10017 key.offset = (u64)-1;
10019 btrfs_init_path(&path);
10020 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10023 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10031 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10032 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10033 struct btrfs_extent_item);
10034 flags = btrfs_extent_flags(path.nodes[0], ei);
10035 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10040 /* Get transid for later read_tree_block() check */
10041 transid = btrfs_extent_generation(path.nodes[0], ei);
10043 /* Get backref level as one source */
10044 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10045 backref_level = key.offset;
10047 struct btrfs_tree_block_info *info;
10049 info = (struct btrfs_tree_block_info *)(ei + 1);
10050 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10052 btrfs_release_path(&path);
10054 /* Get level from tree block as an alternative source */
10055 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10056 if (!extent_buffer_uptodate(eb)) {
10057 free_extent_buffer(eb);
10060 header_level = btrfs_header_level(eb);
10061 free_extent_buffer(eb);
10063 if (header_level != backref_level)
10065 return header_level;
10068 btrfs_release_path(&path);
10073 * Check if a tree block backref is valid (points to a valid tree block)
10074 * if level == -1, level will be resolved
10075 * Return >0 for any error found and print error message
10077 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10078 u64 bytenr, int level)
10080 struct btrfs_root *root;
10081 struct btrfs_key key;
10082 struct btrfs_path path;
10083 struct extent_buffer *eb;
10084 struct extent_buffer *node;
10085 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10089 /* Query level for level == -1 special case */
10091 level = query_tree_block_level(fs_info, bytenr);
10093 err |= REFERENCER_MISSING;
10097 key.objectid = root_id;
10098 key.type = BTRFS_ROOT_ITEM_KEY;
10099 key.offset = (u64)-1;
10101 root = btrfs_read_fs_root(fs_info, &key);
10102 if (IS_ERR(root)) {
10103 err |= REFERENCER_MISSING;
10107 /* Read out the tree block to get item/node key */
10108 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10109 if (!extent_buffer_uptodate(eb)) {
10110 err |= REFERENCER_MISSING;
10111 free_extent_buffer(eb);
10115 /* Empty tree, no need to check key */
10116 if (!btrfs_header_nritems(eb) && !level) {
10117 free_extent_buffer(eb);
10122 btrfs_node_key_to_cpu(eb, &key, 0);
10124 btrfs_item_key_to_cpu(eb, &key, 0);
10126 free_extent_buffer(eb);
10128 btrfs_init_path(&path);
10129 path.lowest_level = level;
10130 /* Search with the first key, to ensure we can reach it */
10131 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10133 err |= REFERENCER_MISSING;
10137 node = path.nodes[level];
10138 if (btrfs_header_bytenr(node) != bytenr) {
10140 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10141 bytenr, nodesize, bytenr,
10142 btrfs_header_bytenr(node));
10143 err |= REFERENCER_MISMATCH;
10145 if (btrfs_header_level(node) != level) {
10147 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10148 bytenr, nodesize, level,
10149 btrfs_header_level(node));
10150 err |= REFERENCER_MISMATCH;
10154 btrfs_release_path(&path);
10156 if (err & REFERENCER_MISSING) {
10158 error("extent [%llu %d] lost referencer (owner: %llu)",
10159 bytenr, nodesize, root_id);
10162 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10163 bytenr, nodesize, root_id, level);
10170 * Check referencer for shared block backref
10171 * If level == -1, this function will resolve the level.
10173 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10174 u64 parent, u64 bytenr, int level)
10176 struct extent_buffer *eb;
10177 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10179 int found_parent = 0;
10182 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10183 if (!extent_buffer_uptodate(eb))
10187 level = query_tree_block_level(fs_info, bytenr);
10191 if (level + 1 != btrfs_header_level(eb))
10194 nr = btrfs_header_nritems(eb);
10195 for (i = 0; i < nr; i++) {
10196 if (bytenr == btrfs_node_blockptr(eb, i)) {
10202 free_extent_buffer(eb);
10203 if (!found_parent) {
10205 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10206 bytenr, nodesize, parent, level);
10207 return REFERENCER_MISSING;
10213 * Check referencer for normal (inlined) data ref
10214 * If len == 0, it will be resolved by searching in extent tree
10216 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10217 u64 root_id, u64 objectid, u64 offset,
10218 u64 bytenr, u64 len, u32 count)
10220 struct btrfs_root *root;
10221 struct btrfs_root *extent_root = fs_info->extent_root;
10222 struct btrfs_key key;
10223 struct btrfs_path path;
10224 struct extent_buffer *leaf;
10225 struct btrfs_file_extent_item *fi;
10226 u32 found_count = 0;
10231 key.objectid = bytenr;
10232 key.type = BTRFS_EXTENT_ITEM_KEY;
10233 key.offset = (u64)-1;
10235 btrfs_init_path(&path);
10236 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10239 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10242 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10243 if (key.objectid != bytenr ||
10244 key.type != BTRFS_EXTENT_ITEM_KEY)
10247 btrfs_release_path(&path);
10249 key.objectid = root_id;
10250 key.type = BTRFS_ROOT_ITEM_KEY;
10251 key.offset = (u64)-1;
10252 btrfs_init_path(&path);
10254 root = btrfs_read_fs_root(fs_info, &key);
10258 key.objectid = objectid;
10259 key.type = BTRFS_EXTENT_DATA_KEY;
10261 * It can be nasty as data backref offset is
10262 * file offset - file extent offset, which is smaller or
10263 * equal to original backref offset. The only special case is
10264 * overflow. So we need to special check and do further search.
10266 key.offset = offset & (1ULL << 63) ? 0 : offset;
10268 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10273 * Search afterwards to get correct one
10274 * NOTE: As we must do a comprehensive check on the data backref to
10275 * make sure the dref count also matches, we must iterate all file
10276 * extents for that inode.
10279 leaf = path.nodes[0];
10280 slot = path.slots[0];
10282 btrfs_item_key_to_cpu(leaf, &key, slot);
10283 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10285 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10287 * Except normal disk bytenr and disk num bytes, we still
10288 * need to do extra check on dbackref offset as
10289 * dbackref offset = file_offset - file_extent_offset
10291 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10292 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10293 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10297 ret = btrfs_next_item(root, &path);
10302 btrfs_release_path(&path);
10303 if (found_count != count) {
10305 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10306 bytenr, len, root_id, objectid, offset, count, found_count);
10307 return REFERENCER_MISSING;
10313 * Check if the referencer of a shared data backref exists
10315 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10316 u64 parent, u64 bytenr)
10318 struct extent_buffer *eb;
10319 struct btrfs_key key;
10320 struct btrfs_file_extent_item *fi;
10321 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10323 int found_parent = 0;
10326 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10327 if (!extent_buffer_uptodate(eb))
10330 nr = btrfs_header_nritems(eb);
10331 for (i = 0; i < nr; i++) {
10332 btrfs_item_key_to_cpu(eb, &key, i);
10333 if (key.type != BTRFS_EXTENT_DATA_KEY)
10336 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10337 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10340 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10347 free_extent_buffer(eb);
10348 if (!found_parent) {
10349 error("shared extent %llu referencer lost (parent: %llu)",
10351 return REFERENCER_MISSING;
10357 * This function will check a given extent item, including its backref and
10358 * itself (like crossing stripe boundary and type)
10360 * Since we don't use extent_record anymore, introduce new error bit
10362 static int check_extent_item(struct btrfs_fs_info *fs_info,
10363 struct extent_buffer *eb, int slot)
10365 struct btrfs_extent_item *ei;
10366 struct btrfs_extent_inline_ref *iref;
10367 struct btrfs_extent_data_ref *dref;
10371 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10372 u32 item_size = btrfs_item_size_nr(eb, slot);
10377 struct btrfs_key key;
10381 btrfs_item_key_to_cpu(eb, &key, slot);
10382 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10383 bytes_used += key.offset;
10385 bytes_used += nodesize;
10387 if (item_size < sizeof(*ei)) {
10389 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10390 * old thing when on disk format is still un-determined.
10391 * No need to care about it anymore
10393 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10397 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10398 flags = btrfs_extent_flags(eb, ei);
10400 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10402 if (metadata && check_crossing_stripes(global_info, key.objectid,
10404 error("bad metadata [%llu, %llu) crossing stripe boundary",
10405 key.objectid, key.objectid + nodesize);
10406 err |= CROSSING_STRIPE_BOUNDARY;
10409 ptr = (unsigned long)(ei + 1);
10411 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10412 /* Old EXTENT_ITEM metadata */
10413 struct btrfs_tree_block_info *info;
10415 info = (struct btrfs_tree_block_info *)ptr;
10416 level = btrfs_tree_block_level(eb, info);
10417 ptr += sizeof(struct btrfs_tree_block_info);
10419 /* New METADATA_ITEM */
10420 level = key.offset;
10422 end = (unsigned long)ei + item_size;
10425 err |= ITEM_SIZE_MISMATCH;
10429 /* Now check every backref in this extent item */
10431 iref = (struct btrfs_extent_inline_ref *)ptr;
10432 type = btrfs_extent_inline_ref_type(eb, iref);
10433 offset = btrfs_extent_inline_ref_offset(eb, iref);
10435 case BTRFS_TREE_BLOCK_REF_KEY:
10436 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10440 case BTRFS_SHARED_BLOCK_REF_KEY:
10441 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10445 case BTRFS_EXTENT_DATA_REF_KEY:
10446 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10447 ret = check_extent_data_backref(fs_info,
10448 btrfs_extent_data_ref_root(eb, dref),
10449 btrfs_extent_data_ref_objectid(eb, dref),
10450 btrfs_extent_data_ref_offset(eb, dref),
10451 key.objectid, key.offset,
10452 btrfs_extent_data_ref_count(eb, dref));
10455 case BTRFS_SHARED_DATA_REF_KEY:
10456 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10460 error("extent[%llu %d %llu] has unknown ref type: %d",
10461 key.objectid, key.type, key.offset, type);
10462 err |= UNKNOWN_TYPE;
10466 ptr += btrfs_extent_inline_ref_size(type);
10475 * Check if a dev extent item is referred correctly by its chunk
10477 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10478 struct extent_buffer *eb, int slot)
10480 struct btrfs_root *chunk_root = fs_info->chunk_root;
10481 struct btrfs_dev_extent *ptr;
10482 struct btrfs_path path;
10483 struct btrfs_key chunk_key;
10484 struct btrfs_key devext_key;
10485 struct btrfs_chunk *chunk;
10486 struct extent_buffer *l;
10490 int found_chunk = 0;
10493 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10494 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10495 length = btrfs_dev_extent_length(eb, ptr);
10497 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10498 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10499 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10501 btrfs_init_path(&path);
10502 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10507 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10508 if (btrfs_chunk_length(l, chunk) != length)
10511 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10512 for (i = 0; i < num_stripes; i++) {
10513 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10514 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10516 if (devid == devext_key.objectid &&
10517 offset == devext_key.offset) {
10523 btrfs_release_path(&path);
10524 if (!found_chunk) {
10526 "device extent[%llu, %llu, %llu] did not find the related chunk",
10527 devext_key.objectid, devext_key.offset, length);
10528 return REFERENCER_MISSING;
10534 * Check if the used space is correct with the dev item
10536 static int check_dev_item(struct btrfs_fs_info *fs_info,
10537 struct extent_buffer *eb, int slot)
10539 struct btrfs_root *dev_root = fs_info->dev_root;
10540 struct btrfs_dev_item *dev_item;
10541 struct btrfs_path path;
10542 struct btrfs_key key;
10543 struct btrfs_dev_extent *ptr;
10549 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10550 dev_id = btrfs_device_id(eb, dev_item);
10551 used = btrfs_device_bytes_used(eb, dev_item);
10553 key.objectid = dev_id;
10554 key.type = BTRFS_DEV_EXTENT_KEY;
10557 btrfs_init_path(&path);
10558 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10560 btrfs_item_key_to_cpu(eb, &key, slot);
10561 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10562 key.objectid, key.type, key.offset);
10563 btrfs_release_path(&path);
10564 return REFERENCER_MISSING;
10567 /* Iterate dev_extents to calculate the used space of a device */
10569 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10571 if (key.objectid > dev_id)
10573 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10576 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10577 struct btrfs_dev_extent);
10578 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10580 ret = btrfs_next_item(dev_root, &path);
10584 btrfs_release_path(&path);
10586 if (used != total) {
10587 btrfs_item_key_to_cpu(eb, &key, slot);
10589 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10590 total, used, BTRFS_ROOT_TREE_OBJECTID,
10591 BTRFS_DEV_EXTENT_KEY, dev_id);
10592 return ACCOUNTING_MISMATCH;
10598 * Check a block group item with its referener (chunk) and its used space
10599 * with extent/metadata item
10601 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10602 struct extent_buffer *eb, int slot)
10604 struct btrfs_root *extent_root = fs_info->extent_root;
10605 struct btrfs_root *chunk_root = fs_info->chunk_root;
10606 struct btrfs_block_group_item *bi;
10607 struct btrfs_block_group_item bg_item;
10608 struct btrfs_path path;
10609 struct btrfs_key bg_key;
10610 struct btrfs_key chunk_key;
10611 struct btrfs_key extent_key;
10612 struct btrfs_chunk *chunk;
10613 struct extent_buffer *leaf;
10614 struct btrfs_extent_item *ei;
10615 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10623 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10624 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10625 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10626 used = btrfs_block_group_used(&bg_item);
10627 bg_flags = btrfs_block_group_flags(&bg_item);
10629 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10630 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10631 chunk_key.offset = bg_key.objectid;
10633 btrfs_init_path(&path);
10634 /* Search for the referencer chunk */
10635 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10638 "block group[%llu %llu] did not find the related chunk item",
10639 bg_key.objectid, bg_key.offset);
10640 err |= REFERENCER_MISSING;
10642 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10643 struct btrfs_chunk);
10644 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10647 "block group[%llu %llu] related chunk item length does not match",
10648 bg_key.objectid, bg_key.offset);
10649 err |= REFERENCER_MISMATCH;
10652 btrfs_release_path(&path);
10654 /* Search from the block group bytenr */
10655 extent_key.objectid = bg_key.objectid;
10656 extent_key.type = 0;
10657 extent_key.offset = 0;
10659 btrfs_init_path(&path);
10660 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10664 /* Iterate extent tree to account used space */
10666 leaf = path.nodes[0];
10667 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10668 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10671 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10672 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10674 if (extent_key.objectid < bg_key.objectid)
10677 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10680 total += extent_key.offset;
10682 ei = btrfs_item_ptr(leaf, path.slots[0],
10683 struct btrfs_extent_item);
10684 flags = btrfs_extent_flags(leaf, ei);
10685 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10686 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10688 "bad extent[%llu, %llu) type mismatch with chunk",
10689 extent_key.objectid,
10690 extent_key.objectid + extent_key.offset);
10691 err |= CHUNK_TYPE_MISMATCH;
10693 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10694 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10695 BTRFS_BLOCK_GROUP_METADATA))) {
10697 "bad extent[%llu, %llu) type mismatch with chunk",
10698 extent_key.objectid,
10699 extent_key.objectid + nodesize);
10700 err |= CHUNK_TYPE_MISMATCH;
10704 ret = btrfs_next_item(extent_root, &path);
10710 btrfs_release_path(&path);
10712 if (total != used) {
10714 "block group[%llu %llu] used %llu but extent items used %llu",
10715 bg_key.objectid, bg_key.offset, used, total);
10716 err |= ACCOUNTING_MISMATCH;
10722 * Check a chunk item.
10723 * Including checking all referred dev_extents and block group
10725 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10726 struct extent_buffer *eb, int slot)
10728 struct btrfs_root *extent_root = fs_info->extent_root;
10729 struct btrfs_root *dev_root = fs_info->dev_root;
10730 struct btrfs_path path;
10731 struct btrfs_key chunk_key;
10732 struct btrfs_key bg_key;
10733 struct btrfs_key devext_key;
10734 struct btrfs_chunk *chunk;
10735 struct extent_buffer *leaf;
10736 struct btrfs_block_group_item *bi;
10737 struct btrfs_block_group_item bg_item;
10738 struct btrfs_dev_extent *ptr;
10739 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10751 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10752 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10753 length = btrfs_chunk_length(eb, chunk);
10754 chunk_end = chunk_key.offset + length;
10755 if (!IS_ALIGNED(length, sectorsize)) {
10756 error("chunk[%llu %llu) not aligned to %u",
10757 chunk_key.offset, chunk_end, sectorsize);
10758 err |= BYTES_UNALIGNED;
10762 type = btrfs_chunk_type(eb, chunk);
10763 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10764 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10765 error("chunk[%llu %llu) has no chunk type",
10766 chunk_key.offset, chunk_end);
10767 err |= UNKNOWN_TYPE;
10769 if (profile && (profile & (profile - 1))) {
10770 error("chunk[%llu %llu) multiple profiles detected: %llx",
10771 chunk_key.offset, chunk_end, profile);
10772 err |= UNKNOWN_TYPE;
10775 bg_key.objectid = chunk_key.offset;
10776 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10777 bg_key.offset = length;
10779 btrfs_init_path(&path);
10780 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10783 "chunk[%llu %llu) did not find the related block group item",
10784 chunk_key.offset, chunk_end);
10785 err |= REFERENCER_MISSING;
10787 leaf = path.nodes[0];
10788 bi = btrfs_item_ptr(leaf, path.slots[0],
10789 struct btrfs_block_group_item);
10790 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10792 if (btrfs_block_group_flags(&bg_item) != type) {
10794 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10795 chunk_key.offset, chunk_end, type,
10796 btrfs_block_group_flags(&bg_item));
10797 err |= REFERENCER_MISSING;
10801 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10802 for (i = 0; i < num_stripes; i++) {
10803 btrfs_release_path(&path);
10804 btrfs_init_path(&path);
10805 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10806 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10807 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10809 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10812 goto not_match_dev;
10814 leaf = path.nodes[0];
10815 ptr = btrfs_item_ptr(leaf, path.slots[0],
10816 struct btrfs_dev_extent);
10817 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10818 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10819 if (objectid != chunk_key.objectid ||
10820 offset != chunk_key.offset ||
10821 btrfs_dev_extent_length(leaf, ptr) != length)
10822 goto not_match_dev;
10825 err |= BACKREF_MISSING;
10827 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10828 chunk_key.objectid, chunk_end, i);
10831 btrfs_release_path(&path);
10837 * Main entry function to check known items and update related accounting info
10839 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10841 struct btrfs_fs_info *fs_info = root->fs_info;
10842 struct btrfs_key key;
10845 struct btrfs_extent_data_ref *dref;
10850 btrfs_item_key_to_cpu(eb, &key, slot);
10854 case BTRFS_EXTENT_DATA_KEY:
10855 ret = check_extent_data_item(root, eb, slot);
10858 case BTRFS_BLOCK_GROUP_ITEM_KEY:
10859 ret = check_block_group_item(fs_info, eb, slot);
10862 case BTRFS_DEV_ITEM_KEY:
10863 ret = check_dev_item(fs_info, eb, slot);
10866 case BTRFS_CHUNK_ITEM_KEY:
10867 ret = check_chunk_item(fs_info, eb, slot);
10870 case BTRFS_DEV_EXTENT_KEY:
10871 ret = check_dev_extent_item(fs_info, eb, slot);
10874 case BTRFS_EXTENT_ITEM_KEY:
10875 case BTRFS_METADATA_ITEM_KEY:
10876 ret = check_extent_item(fs_info, eb, slot);
10879 case BTRFS_EXTENT_CSUM_KEY:
10880 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10882 case BTRFS_TREE_BLOCK_REF_KEY:
10883 ret = check_tree_block_backref(fs_info, key.offset,
10887 case BTRFS_EXTENT_DATA_REF_KEY:
10888 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10889 ret = check_extent_data_backref(fs_info,
10890 btrfs_extent_data_ref_root(eb, dref),
10891 btrfs_extent_data_ref_objectid(eb, dref),
10892 btrfs_extent_data_ref_offset(eb, dref),
10894 btrfs_extent_data_ref_count(eb, dref));
10897 case BTRFS_SHARED_BLOCK_REF_KEY:
10898 ret = check_shared_block_backref(fs_info, key.offset,
10902 case BTRFS_SHARED_DATA_REF_KEY:
10903 ret = check_shared_data_backref(fs_info, key.offset,
10911 if (++slot < btrfs_header_nritems(eb))
10918 * Helper function for later fs/subvol tree check. To determine if a tree
10919 * block should be checked.
10920 * This function will ensure only the direct referencer with lowest rootid to
10921 * check a fs/subvolume tree block.
10923 * Backref check at extent tree would detect errors like missing subvolume
10924 * tree, so we can do aggressive check to reduce duplicated checks.
10926 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10928 struct btrfs_root *extent_root = root->fs_info->extent_root;
10929 struct btrfs_key key;
10930 struct btrfs_path path;
10931 struct extent_buffer *leaf;
10933 struct btrfs_extent_item *ei;
10939 struct btrfs_extent_inline_ref *iref;
10942 btrfs_init_path(&path);
10943 key.objectid = btrfs_header_bytenr(eb);
10944 key.type = BTRFS_METADATA_ITEM_KEY;
10945 key.offset = (u64)-1;
10948 * Any failure in backref resolving means we can't determine
10949 * whom the tree block belongs to.
10950 * So in that case, we need to check that tree block
10952 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10956 ret = btrfs_previous_extent_item(extent_root, &path,
10957 btrfs_header_bytenr(eb));
10961 leaf = path.nodes[0];
10962 slot = path.slots[0];
10963 btrfs_item_key_to_cpu(leaf, &key, slot);
10964 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10966 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10967 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10969 struct btrfs_tree_block_info *info;
10971 info = (struct btrfs_tree_block_info *)(ei + 1);
10972 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10975 item_size = btrfs_item_size_nr(leaf, slot);
10976 ptr = (unsigned long)iref;
10977 end = (unsigned long)ei + item_size;
10978 while (ptr < end) {
10979 iref = (struct btrfs_extent_inline_ref *)ptr;
10980 type = btrfs_extent_inline_ref_type(leaf, iref);
10981 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10984 * We only check the tree block if current root is
10985 * the lowest referencer of it.
10987 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10988 offset < root->objectid) {
10989 btrfs_release_path(&path);
10993 ptr += btrfs_extent_inline_ref_size(type);
10996 * Normally we should also check keyed tree block ref, but that may be
10997 * very time consuming. Inlined ref should already make us skip a lot
10998 * of refs now. So skip search keyed tree block ref.
11002 btrfs_release_path(&path);
11007 * Traversal function for tree block. We will do:
11008 * 1) Skip shared fs/subvolume tree blocks
11009 * 2) Update related bytes accounting
11010 * 3) Pre-order traversal
11012 static int traverse_tree_block(struct btrfs_root *root,
11013 struct extent_buffer *node)
11015 struct extent_buffer *eb;
11016 struct btrfs_key key;
11017 struct btrfs_key drop_key;
11025 * Skip shared fs/subvolume tree block, in that case they will
11026 * be checked by referencer with lowest rootid
11028 if (is_fstree(root->objectid) && !should_check(root, node))
11031 /* Update bytes accounting */
11032 total_btree_bytes += node->len;
11033 if (fs_root_objectid(btrfs_header_owner(node)))
11034 total_fs_tree_bytes += node->len;
11035 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11036 total_extent_tree_bytes += node->len;
11037 if (!found_old_backref &&
11038 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11039 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11040 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11041 found_old_backref = 1;
11043 /* pre-order tranversal, check itself first */
11044 level = btrfs_header_level(node);
11045 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11046 btrfs_header_level(node),
11047 btrfs_header_owner(node));
11051 "check %s failed root %llu bytenr %llu level %d, force continue check",
11052 level ? "node":"leaf", root->objectid,
11053 btrfs_header_bytenr(node), btrfs_header_level(node));
11056 btree_space_waste += btrfs_leaf_free_space(root, node);
11057 ret = check_leaf_items(root, node);
11062 nr = btrfs_header_nritems(node);
11063 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11064 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11065 sizeof(struct btrfs_key_ptr);
11067 /* Then check all its children */
11068 for (i = 0; i < nr; i++) {
11069 u64 blocknr = btrfs_node_blockptr(node, i);
11071 btrfs_node_key_to_cpu(node, &key, i);
11072 if (level == root->root_item.drop_level &&
11073 is_dropped_key(&key, &drop_key))
11077 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11078 * to call the function itself.
11080 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11081 if (extent_buffer_uptodate(eb)) {
11082 ret = traverse_tree_block(root, eb);
11085 free_extent_buffer(eb);
11092 * Low memory usage version check_chunks_and_extents.
11094 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11096 struct btrfs_path path;
11097 struct btrfs_key key;
11098 struct btrfs_root *root1;
11099 struct btrfs_root *cur_root;
11103 root1 = root->fs_info->chunk_root;
11104 ret = traverse_tree_block(root1, root1->node);
11107 root1 = root->fs_info->tree_root;
11108 ret = traverse_tree_block(root1, root1->node);
11111 btrfs_init_path(&path);
11112 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11114 key.type = BTRFS_ROOT_ITEM_KEY;
11116 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11118 error("cannot find extent treet in tree_root");
11123 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11124 if (key.type != BTRFS_ROOT_ITEM_KEY)
11126 key.offset = (u64)-1;
11128 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11129 if (IS_ERR(cur_root) || !cur_root) {
11130 error("failed to read tree: %lld", key.objectid);
11134 ret = traverse_tree_block(cur_root, cur_root->node);
11138 ret = btrfs_next_item(root1, &path);
11144 btrfs_release_path(&path);
11148 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11149 struct btrfs_root *root, int overwrite)
11151 struct extent_buffer *c;
11152 struct extent_buffer *old = root->node;
11155 struct btrfs_disk_key disk_key = {0,0,0};
11161 extent_buffer_get(c);
11164 c = btrfs_alloc_free_block(trans, root,
11166 root->root_key.objectid,
11167 &disk_key, level, 0, 0);
11170 extent_buffer_get(c);
11174 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11175 btrfs_set_header_level(c, level);
11176 btrfs_set_header_bytenr(c, c->start);
11177 btrfs_set_header_generation(c, trans->transid);
11178 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11179 btrfs_set_header_owner(c, root->root_key.objectid);
11181 write_extent_buffer(c, root->fs_info->fsid,
11182 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11184 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11185 btrfs_header_chunk_tree_uuid(c),
11188 btrfs_mark_buffer_dirty(c);
11190 * this case can happen in the following case:
11192 * 1.overwrite previous root.
11194 * 2.reinit reloc data root, this is because we skip pin
11195 * down reloc data tree before which means we can allocate
11196 * same block bytenr here.
11198 if (old->start == c->start) {
11199 btrfs_set_root_generation(&root->root_item,
11201 root->root_item.level = btrfs_header_level(root->node);
11202 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11203 &root->root_key, &root->root_item);
11205 free_extent_buffer(c);
11209 free_extent_buffer(old);
11211 add_root_to_dirty_list(root);
11215 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11216 struct extent_buffer *eb, int tree_root)
11218 struct extent_buffer *tmp;
11219 struct btrfs_root_item *ri;
11220 struct btrfs_key key;
11223 int level = btrfs_header_level(eb);
11229 * If we have pinned this block before, don't pin it again.
11230 * This can not only avoid forever loop with broken filesystem
11231 * but also give us some speedups.
11233 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11234 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11237 btrfs_pin_extent(fs_info, eb->start, eb->len);
11239 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11240 nritems = btrfs_header_nritems(eb);
11241 for (i = 0; i < nritems; i++) {
11243 btrfs_item_key_to_cpu(eb, &key, i);
11244 if (key.type != BTRFS_ROOT_ITEM_KEY)
11246 /* Skip the extent root and reloc roots */
11247 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11248 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11249 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11251 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11252 bytenr = btrfs_disk_root_bytenr(eb, ri);
11255 * If at any point we start needing the real root we
11256 * will have to build a stump root for the root we are
11257 * in, but for now this doesn't actually use the root so
11258 * just pass in extent_root.
11260 tmp = read_tree_block(fs_info->extent_root, bytenr,
11262 if (!extent_buffer_uptodate(tmp)) {
11263 fprintf(stderr, "Error reading root block\n");
11266 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11267 free_extent_buffer(tmp);
11271 bytenr = btrfs_node_blockptr(eb, i);
11273 /* If we aren't the tree root don't read the block */
11274 if (level == 1 && !tree_root) {
11275 btrfs_pin_extent(fs_info, bytenr, nodesize);
11279 tmp = read_tree_block(fs_info->extent_root, bytenr,
11281 if (!extent_buffer_uptodate(tmp)) {
11282 fprintf(stderr, "Error reading tree block\n");
11285 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11286 free_extent_buffer(tmp);
11295 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11299 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11303 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11306 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11308 struct btrfs_block_group_cache *cache;
11309 struct btrfs_path path;
11310 struct extent_buffer *leaf;
11311 struct btrfs_chunk *chunk;
11312 struct btrfs_key key;
11316 btrfs_init_path(&path);
11318 key.type = BTRFS_CHUNK_ITEM_KEY;
11320 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11322 btrfs_release_path(&path);
11327 * We do this in case the block groups were screwed up and had alloc
11328 * bits that aren't actually set on the chunks. This happens with
11329 * restored images every time and could happen in real life I guess.
11331 fs_info->avail_data_alloc_bits = 0;
11332 fs_info->avail_metadata_alloc_bits = 0;
11333 fs_info->avail_system_alloc_bits = 0;
11335 /* First we need to create the in-memory block groups */
11337 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11338 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11340 btrfs_release_path(&path);
11348 leaf = path.nodes[0];
11349 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11350 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11355 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11356 btrfs_add_block_group(fs_info, 0,
11357 btrfs_chunk_type(leaf, chunk),
11358 key.objectid, key.offset,
11359 btrfs_chunk_length(leaf, chunk));
11360 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11361 key.offset + btrfs_chunk_length(leaf, chunk),
11367 cache = btrfs_lookup_first_block_group(fs_info, start);
11371 start = cache->key.objectid + cache->key.offset;
11374 btrfs_release_path(&path);
11378 static int reset_balance(struct btrfs_trans_handle *trans,
11379 struct btrfs_fs_info *fs_info)
11381 struct btrfs_root *root = fs_info->tree_root;
11382 struct btrfs_path path;
11383 struct extent_buffer *leaf;
11384 struct btrfs_key key;
11385 int del_slot, del_nr = 0;
11389 btrfs_init_path(&path);
11390 key.objectid = BTRFS_BALANCE_OBJECTID;
11391 key.type = BTRFS_BALANCE_ITEM_KEY;
11393 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11398 goto reinit_data_reloc;
11403 ret = btrfs_del_item(trans, root, &path);
11406 btrfs_release_path(&path);
11408 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11409 key.type = BTRFS_ROOT_ITEM_KEY;
11411 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11415 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11420 ret = btrfs_del_items(trans, root, &path,
11427 btrfs_release_path(&path);
11430 ret = btrfs_search_slot(trans, root, &key, &path,
11437 leaf = path.nodes[0];
11438 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11439 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11441 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11446 del_slot = path.slots[0];
11455 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11459 btrfs_release_path(&path);
11462 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11463 key.type = BTRFS_ROOT_ITEM_KEY;
11464 key.offset = (u64)-1;
11465 root = btrfs_read_fs_root(fs_info, &key);
11466 if (IS_ERR(root)) {
11467 fprintf(stderr, "Error reading data reloc tree\n");
11468 ret = PTR_ERR(root);
11471 record_root_in_trans(trans, root);
11472 ret = btrfs_fsck_reinit_root(trans, root, 0);
11475 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11477 btrfs_release_path(&path);
11481 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11482 struct btrfs_fs_info *fs_info)
11488 * The only reason we don't do this is because right now we're just
11489 * walking the trees we find and pinning down their bytes, we don't look
11490 * at any of the leaves. In order to do mixed groups we'd have to check
11491 * the leaves of any fs roots and pin down the bytes for any file
11492 * extents we find. Not hard but why do it if we don't have to?
11494 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11495 fprintf(stderr, "We don't support re-initing the extent tree "
11496 "for mixed block groups yet, please notify a btrfs "
11497 "developer you want to do this so they can add this "
11498 "functionality.\n");
11503 * first we need to walk all of the trees except the extent tree and pin
11504 * down the bytes that are in use so we don't overwrite any existing
11507 ret = pin_metadata_blocks(fs_info);
11509 fprintf(stderr, "error pinning down used bytes\n");
11514 * Need to drop all the block groups since we're going to recreate all
11517 btrfs_free_block_groups(fs_info);
11518 ret = reset_block_groups(fs_info);
11520 fprintf(stderr, "error resetting the block groups\n");
11524 /* Ok we can allocate now, reinit the extent root */
11525 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11527 fprintf(stderr, "extent root initialization failed\n");
11529 * When the transaction code is updated we should end the
11530 * transaction, but for now progs only knows about commit so
11531 * just return an error.
11537 * Now we have all the in-memory block groups setup so we can make
11538 * allocations properly, and the metadata we care about is safe since we
11539 * pinned all of it above.
11542 struct btrfs_block_group_cache *cache;
11544 cache = btrfs_lookup_first_block_group(fs_info, start);
11547 start = cache->key.objectid + cache->key.offset;
11548 ret = btrfs_insert_item(trans, fs_info->extent_root,
11549 &cache->key, &cache->item,
11550 sizeof(cache->item));
11552 fprintf(stderr, "Error adding block group\n");
11555 btrfs_extent_post_op(trans, fs_info->extent_root);
11558 ret = reset_balance(trans, fs_info);
11560 fprintf(stderr, "error resetting the pending balance\n");
11565 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11567 struct btrfs_path path;
11568 struct btrfs_trans_handle *trans;
11569 struct btrfs_key key;
11572 printf("Recowing metadata block %llu\n", eb->start);
11573 key.objectid = btrfs_header_owner(eb);
11574 key.type = BTRFS_ROOT_ITEM_KEY;
11575 key.offset = (u64)-1;
11577 root = btrfs_read_fs_root(root->fs_info, &key);
11578 if (IS_ERR(root)) {
11579 fprintf(stderr, "Couldn't find owner root %llu\n",
11581 return PTR_ERR(root);
11584 trans = btrfs_start_transaction(root, 1);
11586 return PTR_ERR(trans);
11588 btrfs_init_path(&path);
11589 path.lowest_level = btrfs_header_level(eb);
11590 if (path.lowest_level)
11591 btrfs_node_key_to_cpu(eb, &key, 0);
11593 btrfs_item_key_to_cpu(eb, &key, 0);
11595 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11596 btrfs_commit_transaction(trans, root);
11597 btrfs_release_path(&path);
11601 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11603 struct btrfs_path path;
11604 struct btrfs_trans_handle *trans;
11605 struct btrfs_key key;
11608 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11609 bad->key.type, bad->key.offset);
11610 key.objectid = bad->root_id;
11611 key.type = BTRFS_ROOT_ITEM_KEY;
11612 key.offset = (u64)-1;
11614 root = btrfs_read_fs_root(root->fs_info, &key);
11615 if (IS_ERR(root)) {
11616 fprintf(stderr, "Couldn't find owner root %llu\n",
11618 return PTR_ERR(root);
11621 trans = btrfs_start_transaction(root, 1);
11623 return PTR_ERR(trans);
11625 btrfs_init_path(&path);
11626 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11632 ret = btrfs_del_item(trans, root, &path);
11634 btrfs_commit_transaction(trans, root);
11635 btrfs_release_path(&path);
11639 static int zero_log_tree(struct btrfs_root *root)
11641 struct btrfs_trans_handle *trans;
11644 trans = btrfs_start_transaction(root, 1);
11645 if (IS_ERR(trans)) {
11646 ret = PTR_ERR(trans);
11649 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11650 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11651 ret = btrfs_commit_transaction(trans, root);
11655 static int populate_csum(struct btrfs_trans_handle *trans,
11656 struct btrfs_root *csum_root, char *buf, u64 start,
11663 while (offset < len) {
11664 sectorsize = csum_root->sectorsize;
11665 ret = read_extent_data(csum_root, buf, start + offset,
11669 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11670 start + offset, buf, sectorsize);
11673 offset += sectorsize;
11678 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11679 struct btrfs_root *csum_root,
11680 struct btrfs_root *cur_root)
11682 struct btrfs_path path;
11683 struct btrfs_key key;
11684 struct extent_buffer *node;
11685 struct btrfs_file_extent_item *fi;
11692 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11696 btrfs_init_path(&path);
11700 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11703 /* Iterate all regular file extents and fill its csum */
11705 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11707 if (key.type != BTRFS_EXTENT_DATA_KEY)
11709 node = path.nodes[0];
11710 slot = path.slots[0];
11711 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11712 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11714 start = btrfs_file_extent_disk_bytenr(node, fi);
11715 len = btrfs_file_extent_disk_num_bytes(node, fi);
11717 ret = populate_csum(trans, csum_root, buf, start, len);
11718 if (ret == -EEXIST)
11724 * TODO: if next leaf is corrupted, jump to nearest next valid
11727 ret = btrfs_next_item(cur_root, &path);
11737 btrfs_release_path(&path);
11742 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11743 struct btrfs_root *csum_root)
11745 struct btrfs_fs_info *fs_info = csum_root->fs_info;
11746 struct btrfs_path path;
11747 struct btrfs_root *tree_root = fs_info->tree_root;
11748 struct btrfs_root *cur_root;
11749 struct extent_buffer *node;
11750 struct btrfs_key key;
11754 btrfs_init_path(&path);
11755 key.objectid = BTRFS_FS_TREE_OBJECTID;
11757 key.type = BTRFS_ROOT_ITEM_KEY;
11758 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11767 node = path.nodes[0];
11768 slot = path.slots[0];
11769 btrfs_item_key_to_cpu(node, &key, slot);
11770 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11772 if (key.type != BTRFS_ROOT_ITEM_KEY)
11774 if (!is_fstree(key.objectid))
11776 key.offset = (u64)-1;
11778 cur_root = btrfs_read_fs_root(fs_info, &key);
11779 if (IS_ERR(cur_root) || !cur_root) {
11780 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11784 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11789 ret = btrfs_next_item(tree_root, &path);
11799 btrfs_release_path(&path);
11803 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11804 struct btrfs_root *csum_root)
11806 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11807 struct btrfs_path path;
11808 struct btrfs_extent_item *ei;
11809 struct extent_buffer *leaf;
11811 struct btrfs_key key;
11814 btrfs_init_path(&path);
11816 key.type = BTRFS_EXTENT_ITEM_KEY;
11818 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11820 btrfs_release_path(&path);
11824 buf = malloc(csum_root->sectorsize);
11826 btrfs_release_path(&path);
11831 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11832 ret = btrfs_next_leaf(extent_root, &path);
11840 leaf = path.nodes[0];
11842 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11843 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11848 ei = btrfs_item_ptr(leaf, path.slots[0],
11849 struct btrfs_extent_item);
11850 if (!(btrfs_extent_flags(leaf, ei) &
11851 BTRFS_EXTENT_FLAG_DATA)) {
11856 ret = populate_csum(trans, csum_root, buf, key.objectid,
11863 btrfs_release_path(&path);
11869 * Recalculate the csum and put it into the csum tree.
11871 * Extent tree init will wipe out all the extent info, so in that case, we
11872 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
11873 * will use fs/subvol trees to init the csum tree.
11875 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11876 struct btrfs_root *csum_root,
11877 int search_fs_tree)
11879 if (search_fs_tree)
11880 return fill_csum_tree_from_fs(trans, csum_root);
11882 return fill_csum_tree_from_extent(trans, csum_root);
11885 static void free_roots_info_cache(void)
11887 if (!roots_info_cache)
11890 while (!cache_tree_empty(roots_info_cache)) {
11891 struct cache_extent *entry;
11892 struct root_item_info *rii;
11894 entry = first_cache_extent(roots_info_cache);
11897 remove_cache_extent(roots_info_cache, entry);
11898 rii = container_of(entry, struct root_item_info, cache_extent);
11902 free(roots_info_cache);
11903 roots_info_cache = NULL;
11906 static int build_roots_info_cache(struct btrfs_fs_info *info)
11909 struct btrfs_key key;
11910 struct extent_buffer *leaf;
11911 struct btrfs_path path;
11913 if (!roots_info_cache) {
11914 roots_info_cache = malloc(sizeof(*roots_info_cache));
11915 if (!roots_info_cache)
11917 cache_tree_init(roots_info_cache);
11920 btrfs_init_path(&path);
11922 key.type = BTRFS_EXTENT_ITEM_KEY;
11924 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11927 leaf = path.nodes[0];
11930 struct btrfs_key found_key;
11931 struct btrfs_extent_item *ei;
11932 struct btrfs_extent_inline_ref *iref;
11933 int slot = path.slots[0];
11938 struct cache_extent *entry;
11939 struct root_item_info *rii;
11941 if (slot >= btrfs_header_nritems(leaf)) {
11942 ret = btrfs_next_leaf(info->extent_root, &path);
11949 leaf = path.nodes[0];
11950 slot = path.slots[0];
11953 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11955 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11956 found_key.type != BTRFS_METADATA_ITEM_KEY)
11959 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11960 flags = btrfs_extent_flags(leaf, ei);
11962 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11963 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11966 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11967 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11968 level = found_key.offset;
11970 struct btrfs_tree_block_info *binfo;
11972 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11973 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11974 level = btrfs_tree_block_level(leaf, binfo);
11978 * For a root extent, it must be of the following type and the
11979 * first (and only one) iref in the item.
11981 type = btrfs_extent_inline_ref_type(leaf, iref);
11982 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11985 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11986 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11988 rii = malloc(sizeof(struct root_item_info));
11993 rii->cache_extent.start = root_id;
11994 rii->cache_extent.size = 1;
11995 rii->level = (u8)-1;
11996 entry = &rii->cache_extent;
11997 ret = insert_cache_extent(roots_info_cache, entry);
12000 rii = container_of(entry, struct root_item_info,
12004 ASSERT(rii->cache_extent.start == root_id);
12005 ASSERT(rii->cache_extent.size == 1);
12007 if (level > rii->level || rii->level == (u8)-1) {
12008 rii->level = level;
12009 rii->bytenr = found_key.objectid;
12010 rii->gen = btrfs_extent_generation(leaf, ei);
12011 rii->node_count = 1;
12012 } else if (level == rii->level) {
12020 btrfs_release_path(&path);
12025 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12026 struct btrfs_path *path,
12027 const struct btrfs_key *root_key,
12028 const int read_only_mode)
12030 const u64 root_id = root_key->objectid;
12031 struct cache_extent *entry;
12032 struct root_item_info *rii;
12033 struct btrfs_root_item ri;
12034 unsigned long offset;
12036 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12039 "Error: could not find extent items for root %llu\n",
12040 root_key->objectid);
12044 rii = container_of(entry, struct root_item_info, cache_extent);
12045 ASSERT(rii->cache_extent.start == root_id);
12046 ASSERT(rii->cache_extent.size == 1);
12048 if (rii->node_count != 1) {
12050 "Error: could not find btree root extent for root %llu\n",
12055 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12056 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12058 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12059 btrfs_root_level(&ri) != rii->level ||
12060 btrfs_root_generation(&ri) != rii->gen) {
12063 * If we're in repair mode but our caller told us to not update
12064 * the root item, i.e. just check if it needs to be updated, don't
12065 * print this message, since the caller will call us again shortly
12066 * for the same root item without read only mode (the caller will
12067 * open a transaction first).
12069 if (!(read_only_mode && repair))
12071 "%sroot item for root %llu,"
12072 " current bytenr %llu, current gen %llu, current level %u,"
12073 " new bytenr %llu, new gen %llu, new level %u\n",
12074 (read_only_mode ? "" : "fixing "),
12076 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12077 btrfs_root_level(&ri),
12078 rii->bytenr, rii->gen, rii->level);
12080 if (btrfs_root_generation(&ri) > rii->gen) {
12082 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12083 root_id, btrfs_root_generation(&ri), rii->gen);
12087 if (!read_only_mode) {
12088 btrfs_set_root_bytenr(&ri, rii->bytenr);
12089 btrfs_set_root_level(&ri, rii->level);
12090 btrfs_set_root_generation(&ri, rii->gen);
12091 write_extent_buffer(path->nodes[0], &ri,
12092 offset, sizeof(ri));
12102 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12103 * caused read-only snapshots to be corrupted if they were created at a moment
12104 * when the source subvolume/snapshot had orphan items. The issue was that the
12105 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12106 * node instead of the post orphan cleanup root node.
12107 * So this function, and its callees, just detects and fixes those cases. Even
12108 * though the regression was for read-only snapshots, this function applies to
12109 * any snapshot/subvolume root.
12110 * This must be run before any other repair code - not doing it so, makes other
12111 * repair code delete or modify backrefs in the extent tree for example, which
12112 * will result in an inconsistent fs after repairing the root items.
12114 static int repair_root_items(struct btrfs_fs_info *info)
12116 struct btrfs_path path;
12117 struct btrfs_key key;
12118 struct extent_buffer *leaf;
12119 struct btrfs_trans_handle *trans = NULL;
12122 int need_trans = 0;
12124 btrfs_init_path(&path);
12126 ret = build_roots_info_cache(info);
12130 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12131 key.type = BTRFS_ROOT_ITEM_KEY;
12136 * Avoid opening and committing transactions if a leaf doesn't have
12137 * any root items that need to be fixed, so that we avoid rotating
12138 * backup roots unnecessarily.
12141 trans = btrfs_start_transaction(info->tree_root, 1);
12142 if (IS_ERR(trans)) {
12143 ret = PTR_ERR(trans);
12148 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12152 leaf = path.nodes[0];
12155 struct btrfs_key found_key;
12157 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12158 int no_more_keys = find_next_key(&path, &key);
12160 btrfs_release_path(&path);
12162 ret = btrfs_commit_transaction(trans,
12174 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12176 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12178 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12181 ret = maybe_repair_root_item(info, &path, &found_key,
12186 if (!trans && repair) {
12189 btrfs_release_path(&path);
12199 free_roots_info_cache();
12200 btrfs_release_path(&path);
12202 btrfs_commit_transaction(trans, info->tree_root);
12209 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12211 struct btrfs_trans_handle *trans;
12212 struct btrfs_block_group_cache *bg_cache;
12216 /* Clear all free space cache inodes and its extent data */
12218 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12221 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12224 current = bg_cache->key.objectid + bg_cache->key.offset;
12227 /* Don't forget to set cache_generation to -1 */
12228 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12229 if (IS_ERR(trans)) {
12230 error("failed to update super block cache generation");
12231 return PTR_ERR(trans);
12233 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12234 btrfs_commit_transaction(trans, fs_info->tree_root);
12239 const char * const cmd_check_usage[] = {
12240 "btrfs check [options] <device>",
12241 "Check structural integrity of a filesystem (unmounted).",
12242 "Check structural integrity of an unmounted filesystem. Verify internal",
12243 "trees' consistency and item connectivity. In the repair mode try to",
12244 "fix the problems found. ",
12245 "WARNING: the repair mode is considered dangerous",
12247 "-s|--super <superblock> use this superblock copy",
12248 "-b|--backup use the first valid backup root copy",
12249 "--repair try to repair the filesystem",
12250 "--readonly run in read-only mode (default)",
12251 "--init-csum-tree create a new CRC tree",
12252 "--init-extent-tree create a new extent tree",
12253 "--mode <MODE> allows choice of memory/IO trade-offs",
12254 " where MODE is one of:",
12255 " original - read inodes and extents to memory (requires",
12256 " more memory, does less IO)",
12257 " lowmem - try to use less memory but read blocks again",
12259 "--check-data-csum verify checksums of data blocks",
12260 "-Q|--qgroup-report print a report on qgroup consistency",
12261 "-E|--subvol-extents <subvolid>",
12262 " print subvolume extents and sharing state",
12263 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12264 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12265 "-p|--progress indicate progress",
12266 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12270 int cmd_check(int argc, char **argv)
12272 struct cache_tree root_cache;
12273 struct btrfs_root *root;
12274 struct btrfs_fs_info *info;
12277 u64 tree_root_bytenr = 0;
12278 u64 chunk_root_bytenr = 0;
12279 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12282 int init_csum_tree = 0;
12284 int clear_space_cache = 0;
12285 int qgroup_report = 0;
12286 int qgroups_repaired = 0;
12287 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12291 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12292 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12293 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12294 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12295 static const struct option long_options[] = {
12296 { "super", required_argument, NULL, 's' },
12297 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12298 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12299 { "init-csum-tree", no_argument, NULL,
12300 GETOPT_VAL_INIT_CSUM },
12301 { "init-extent-tree", no_argument, NULL,
12302 GETOPT_VAL_INIT_EXTENT },
12303 { "check-data-csum", no_argument, NULL,
12304 GETOPT_VAL_CHECK_CSUM },
12305 { "backup", no_argument, NULL, 'b' },
12306 { "subvol-extents", required_argument, NULL, 'E' },
12307 { "qgroup-report", no_argument, NULL, 'Q' },
12308 { "tree-root", required_argument, NULL, 'r' },
12309 { "chunk-root", required_argument, NULL,
12310 GETOPT_VAL_CHUNK_TREE },
12311 { "progress", no_argument, NULL, 'p' },
12312 { "mode", required_argument, NULL,
12314 { "clear-space-cache", required_argument, NULL,
12315 GETOPT_VAL_CLEAR_SPACE_CACHE},
12316 { NULL, 0, NULL, 0}
12319 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12323 case 'a': /* ignored */ break;
12325 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12328 num = arg_strtou64(optarg);
12329 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12331 "super mirror should be less than %d",
12332 BTRFS_SUPER_MIRROR_MAX);
12335 bytenr = btrfs_sb_offset(((int)num));
12336 printf("using SB copy %llu, bytenr %llu\n", num,
12337 (unsigned long long)bytenr);
12343 subvolid = arg_strtou64(optarg);
12346 tree_root_bytenr = arg_strtou64(optarg);
12348 case GETOPT_VAL_CHUNK_TREE:
12349 chunk_root_bytenr = arg_strtou64(optarg);
12352 ctx.progress_enabled = true;
12356 usage(cmd_check_usage);
12357 case GETOPT_VAL_REPAIR:
12358 printf("enabling repair mode\n");
12360 ctree_flags |= OPEN_CTREE_WRITES;
12362 case GETOPT_VAL_READONLY:
12365 case GETOPT_VAL_INIT_CSUM:
12366 printf("Creating a new CRC tree\n");
12367 init_csum_tree = 1;
12369 ctree_flags |= OPEN_CTREE_WRITES;
12371 case GETOPT_VAL_INIT_EXTENT:
12372 init_extent_tree = 1;
12373 ctree_flags |= (OPEN_CTREE_WRITES |
12374 OPEN_CTREE_NO_BLOCK_GROUPS);
12377 case GETOPT_VAL_CHECK_CSUM:
12378 check_data_csum = 1;
12380 case GETOPT_VAL_MODE:
12381 check_mode = parse_check_mode(optarg);
12382 if (check_mode == CHECK_MODE_UNKNOWN) {
12383 error("unknown mode: %s", optarg);
12387 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12388 if (strcmp(optarg, "v1") == 0) {
12389 clear_space_cache = 1;
12390 } else if (strcmp(optarg, "v2") == 0) {
12391 clear_space_cache = 2;
12392 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12395 "invalid argument to --clear-space-cache, must be v1 or v2");
12398 ctree_flags |= OPEN_CTREE_WRITES;
12403 if (check_argc_exact(argc - optind, 1))
12404 usage(cmd_check_usage);
12406 if (ctx.progress_enabled) {
12407 ctx.tp = TASK_NOTHING;
12408 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12411 /* This check is the only reason for --readonly to exist */
12412 if (readonly && repair) {
12413 error("repair options are not compatible with --readonly");
12418 * Not supported yet
12420 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12421 error("low memory mode doesn't support repair yet");
12426 cache_tree_init(&root_cache);
12428 if((ret = check_mounted(argv[optind])) < 0) {
12429 error("could not check mount status: %s", strerror(-ret));
12432 error("%s is currently mounted, aborting", argv[optind]);
12437 /* only allow partial opening under repair mode */
12439 ctree_flags |= OPEN_CTREE_PARTIAL;
12441 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12442 chunk_root_bytenr, ctree_flags);
12444 error("cannot open file system");
12449 global_info = info;
12450 root = info->fs_root;
12451 if (clear_space_cache == 1) {
12452 if (btrfs_fs_compat_ro(info,
12453 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12455 "free space cache v2 detected, use --clear-space-cache v2");
12459 printf("Clearing free space cache\n");
12460 ret = clear_free_space_cache(info);
12462 error("failed to clear free space cache");
12465 printf("Free space cache cleared\n");
12468 } else if (clear_space_cache == 2) {
12469 if (!btrfs_fs_compat_ro(info,
12470 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12471 printf("no free space cache v2 to clear\n");
12475 printf("Clear free space cache v2\n");
12476 ret = btrfs_clear_free_space_tree(info);
12478 error("failed to clear free space cache v2: %d", ret);
12481 printf("free space cache v2 cleared\n");
12487 * repair mode will force us to commit transaction which
12488 * will make us fail to load log tree when mounting.
12490 if (repair && btrfs_super_log_root(info->super_copy)) {
12491 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12496 ret = zero_log_tree(root);
12498 error("failed to zero log tree: %d", ret);
12503 uuid_unparse(info->super_copy->fsid, uuidbuf);
12504 if (qgroup_report) {
12505 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12507 ret = qgroup_verify_all(info);
12513 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12514 subvolid, argv[optind], uuidbuf);
12515 ret = print_extent_state(info, subvolid);
12518 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12520 if (!extent_buffer_uptodate(info->tree_root->node) ||
12521 !extent_buffer_uptodate(info->dev_root->node) ||
12522 !extent_buffer_uptodate(info->chunk_root->node)) {
12523 error("critical roots corrupted, unable to check the filesystem");
12528 if (init_extent_tree || init_csum_tree) {
12529 struct btrfs_trans_handle *trans;
12531 trans = btrfs_start_transaction(info->extent_root, 0);
12532 if (IS_ERR(trans)) {
12533 error("error starting transaction");
12534 ret = PTR_ERR(trans);
12538 if (init_extent_tree) {
12539 printf("Creating a new extent tree\n");
12540 ret = reinit_extent_tree(trans, info);
12545 if (init_csum_tree) {
12546 printf("Reinitialize checksum tree\n");
12547 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12549 error("checksum tree initialization failed: %d",
12555 ret = fill_csum_tree(trans, info->csum_root,
12558 error("checksum tree refilling failed: %d", ret);
12563 * Ok now we commit and run the normal fsck, which will add
12564 * extent entries for all of the items it finds.
12566 ret = btrfs_commit_transaction(trans, info->extent_root);
12570 if (!extent_buffer_uptodate(info->extent_root->node)) {
12571 error("critical: extent_root, unable to check the filesystem");
12575 if (!extent_buffer_uptodate(info->csum_root->node)) {
12576 error("critical: csum_root, unable to check the filesystem");
12581 if (!ctx.progress_enabled)
12582 fprintf(stderr, "checking extents\n");
12583 if (check_mode == CHECK_MODE_LOWMEM)
12584 ret = check_chunks_and_extents_v2(root);
12586 ret = check_chunks_and_extents(root);
12589 "errors found in extent allocation tree or chunk allocation");
12591 ret = repair_root_items(info);
12595 fprintf(stderr, "Fixed %d roots.\n", ret);
12597 } else if (ret > 0) {
12599 "Found %d roots with an outdated root item.\n",
12602 "Please run a filesystem check with the option --repair to fix them.\n");
12607 if (!ctx.progress_enabled) {
12608 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12609 fprintf(stderr, "checking free space tree\n");
12611 fprintf(stderr, "checking free space cache\n");
12613 ret = check_space_cache(root);
12618 * We used to have to have these hole extents in between our real
12619 * extents so if we don't have this flag set we need to make sure there
12620 * are no gaps in the file extents for inodes, otherwise we can just
12621 * ignore it when this happens.
12623 no_holes = btrfs_fs_incompat(root->fs_info,
12624 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12625 if (!ctx.progress_enabled)
12626 fprintf(stderr, "checking fs roots\n");
12627 if (check_mode == CHECK_MODE_LOWMEM)
12628 ret = check_fs_roots_v2(root->fs_info);
12630 ret = check_fs_roots(root, &root_cache);
12634 fprintf(stderr, "checking csums\n");
12635 ret = check_csums(root);
12639 fprintf(stderr, "checking root refs\n");
12640 /* For low memory mode, check_fs_roots_v2 handles root refs */
12641 if (check_mode != CHECK_MODE_LOWMEM) {
12642 ret = check_root_refs(root, &root_cache);
12647 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12648 struct extent_buffer *eb;
12650 eb = list_first_entry(&root->fs_info->recow_ebs,
12651 struct extent_buffer, recow);
12652 list_del_init(&eb->recow);
12653 ret = recow_extent_buffer(root, eb);
12658 while (!list_empty(&delete_items)) {
12659 struct bad_item *bad;
12661 bad = list_first_entry(&delete_items, struct bad_item, list);
12662 list_del_init(&bad->list);
12664 ret = delete_bad_item(root, bad);
12668 if (info->quota_enabled) {
12670 fprintf(stderr, "checking quota groups\n");
12671 err = qgroup_verify_all(info);
12675 err = repair_qgroups(info, &qgroups_repaired);
12680 if (!list_empty(&root->fs_info->recow_ebs)) {
12681 error("transid errors in file system");
12685 /* Don't override original ret */
12686 if (!ret && qgroups_repaired)
12687 ret = qgroups_repaired;
12689 if (found_old_backref) { /*
12690 * there was a disk format change when mixed
12691 * backref was in testing tree. The old format
12692 * existed about one week.
12694 printf("\n * Found old mixed backref format. "
12695 "The old format is not supported! *"
12696 "\n * Please mount the FS in readonly mode, "
12697 "backup data and re-format the FS. *\n\n");
12700 printf("found %llu bytes used err is %d\n",
12701 (unsigned long long)bytes_used, ret);
12702 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12703 printf("total tree bytes: %llu\n",
12704 (unsigned long long)total_btree_bytes);
12705 printf("total fs tree bytes: %llu\n",
12706 (unsigned long long)total_fs_tree_bytes);
12707 printf("total extent tree bytes: %llu\n",
12708 (unsigned long long)total_extent_tree_bytes);
12709 printf("btree space waste bytes: %llu\n",
12710 (unsigned long long)btree_space_waste);
12711 printf("file data blocks allocated: %llu\n referenced %llu\n",
12712 (unsigned long long)data_bytes_allocated,
12713 (unsigned long long)data_bytes_referenced);
12715 free_qgroup_counts();
12716 free_root_recs_tree(&root_cache);
12720 if (ctx.progress_enabled)
12721 task_deinit(ctx.info);