2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
50 TASK_NOTHING, /* have to be the last element */
55 enum task_position tp;
57 struct task_info *info;
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
118 return container_of(back, struct data_backref, node);
122 * Much like data_backref, just removed the undetermined members
123 * and change it to use list_head.
124 * During extent scan, it is stored in root->orphan_data_extent.
125 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
127 struct orphan_data_extent {
128 struct list_head list;
136 struct tree_backref {
137 struct extent_backref node;
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
146 return container_of(back, struct tree_backref, node);
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
152 struct extent_record {
153 struct list_head backrefs;
154 struct list_head dups;
155 struct list_head list;
156 struct cache_extent cache;
157 struct btrfs_disk_key parent_key;
162 u64 extent_item_refs;
164 u64 parent_generation;
168 unsigned int flag_block_full_backref:2;
169 unsigned int found_rec:1;
170 unsigned int content_checked:1;
171 unsigned int owner_ref_checked:1;
172 unsigned int is_root:1;
173 unsigned int metadata:1;
174 unsigned int bad_full_backref:1;
175 unsigned int crossing_stripes:1;
176 unsigned int wrong_chunk_type:1;
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
181 return container_of(entry, struct extent_record, list);
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
200 return list_entry(entry, struct inode_backref, list);
203 struct root_item_record {
204 struct list_head list;
211 struct btrfs_key drop_key;
214 #define REF_ERR_NO_DIR_ITEM (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX (1 << 1)
216 #define REF_ERR_NO_INODE_REF (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
219 #define REF_ERR_DUP_INODE_REF (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
228 struct file_extent_hole {
234 struct inode_record {
235 struct list_head backrefs;
236 unsigned int checked:1;
237 unsigned int merging:1;
238 unsigned int found_inode_item:1;
239 unsigned int found_dir_item:1;
240 unsigned int found_file_extent:1;
241 unsigned int found_csum_item:1;
242 unsigned int some_csum_missing:1;
243 unsigned int nodatasum:1;
256 struct rb_root holes;
257 struct list_head orphan_extents;
262 #define I_ERR_NO_INODE_ITEM (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
278 struct root_backref {
279 struct list_head list;
280 unsigned int found_dir_item:1;
281 unsigned int found_dir_index:1;
282 unsigned int found_back_ref:1;
283 unsigned int found_forward_ref:1;
284 unsigned int reachable:1;
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
295 return list_entry(entry, struct root_backref, list);
299 struct list_head backrefs;
300 struct cache_extent cache;
301 unsigned int found_root_item:1;
307 struct cache_extent cache;
312 struct cache_extent cache;
313 struct cache_tree root_cache;
314 struct cache_tree inode_cache;
315 struct inode_record *current;
324 struct walk_control {
325 struct cache_tree shared;
326 struct shared_node *nodes[BTRFS_MAX_LEVEL];
332 struct btrfs_key key;
334 struct list_head list;
337 struct extent_entry {
342 struct list_head list;
345 struct root_item_info {
346 /* level of the root */
348 /* number of nodes at this level, must be 1 for a root */
352 struct cache_extent cache_extent;
356 * Error bit for low memory mode check.
358 * Currently no caller cares about it yet. Just internal use for error
361 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH (1 << 8)
372 static void *print_status_check(void *p)
374 struct task_ctx *priv = p;
375 const char work_indicator[] = { '.', 'o', 'O', 'o' };
377 static char *task_position_string[] = {
379 "checking free space cache",
383 task_period_start(priv->info, 1000 /* 1s */);
385 if (priv->tp == TASK_NOTHING)
389 printf("%s [%c]\r", task_position_string[priv->tp],
390 work_indicator[count % 4]);
393 task_period_wait(priv->info);
398 static int print_status_return(void *p)
406 static enum btrfs_check_mode parse_check_mode(const char *str)
408 if (strcmp(str, "lowmem") == 0)
409 return CHECK_MODE_LOWMEM;
410 if (strcmp(str, "orig") == 0)
411 return CHECK_MODE_ORIGINAL;
412 if (strcmp(str, "original") == 0)
413 return CHECK_MODE_ORIGINAL;
415 return CHECK_MODE_UNKNOWN;
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
421 struct file_extent_hole *hole;
423 if (RB_EMPTY_ROOT(holes))
426 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
432 struct file_extent_hole *hole1;
433 struct file_extent_hole *hole2;
435 hole1 = rb_entry(node1, struct file_extent_hole, node);
436 hole2 = rb_entry(node2, struct file_extent_hole, node);
438 if (hole1->start > hole2->start)
440 if (hole1->start < hole2->start)
442 /* Now hole1->start == hole2->start */
443 if (hole1->len >= hole2->len)
445 * Hole 1 will be merge center
446 * Same hole will be merged later
449 /* Hole 2 will be merge center */
454 * Add a hole to the record
456 * This will do hole merge for copy_file_extent_holes(),
457 * which will ensure there won't be continuous holes.
459 static int add_file_extent_hole(struct rb_root *holes,
462 struct file_extent_hole *hole;
463 struct file_extent_hole *prev = NULL;
464 struct file_extent_hole *next = NULL;
466 hole = malloc(sizeof(*hole));
471 /* Since compare will not return 0, no -EEXIST will happen */
472 rb_insert(holes, &hole->node, compare_hole);
474 /* simple merge with previous hole */
475 if (rb_prev(&hole->node))
476 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
478 if (prev && prev->start + prev->len >= hole->start) {
479 hole->len = hole->start + hole->len - prev->start;
480 hole->start = prev->start;
481 rb_erase(&prev->node, holes);
486 /* iterate merge with next holes */
488 if (!rb_next(&hole->node))
490 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
492 if (hole->start + hole->len >= next->start) {
493 if (hole->start + hole->len <= next->start + next->len)
494 hole->len = next->start + next->len -
496 rb_erase(&next->node, holes);
505 static int compare_hole_range(struct rb_node *node, void *data)
507 struct file_extent_hole *hole;
510 hole = (struct file_extent_hole *)data;
513 hole = rb_entry(node, struct file_extent_hole, node);
514 if (start < hole->start)
516 if (start >= hole->start && start < hole->start + hole->len)
522 * Delete a hole in the record
524 * This will do the hole split and is much restrict than add.
526 static int del_file_extent_hole(struct rb_root *holes,
529 struct file_extent_hole *hole;
530 struct file_extent_hole tmp;
535 struct rb_node *node;
542 node = rb_search(holes, &tmp, compare_hole_range, NULL);
545 hole = rb_entry(node, struct file_extent_hole, node);
546 if (start + len > hole->start + hole->len)
550 * Now there will be no overlap, delete the hole and re-add the
551 * split(s) if they exists.
553 if (start > hole->start) {
554 prev_start = hole->start;
555 prev_len = start - hole->start;
558 if (hole->start + hole->len > start + len) {
559 next_start = start + len;
560 next_len = hole->start + hole->len - start - len;
563 rb_erase(node, holes);
566 ret = add_file_extent_hole(holes, prev_start, prev_len);
571 ret = add_file_extent_hole(holes, next_start, next_len);
578 static int copy_file_extent_holes(struct rb_root *dst,
581 struct file_extent_hole *hole;
582 struct rb_node *node;
585 node = rb_first(src);
587 hole = rb_entry(node, struct file_extent_hole, node);
588 ret = add_file_extent_hole(dst, hole->start, hole->len);
591 node = rb_next(node);
596 static void free_file_extent_holes(struct rb_root *holes)
598 struct rb_node *node;
599 struct file_extent_hole *hole;
601 node = rb_first(holes);
603 hole = rb_entry(node, struct file_extent_hole, node);
604 rb_erase(node, holes);
606 node = rb_first(holes);
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613 struct btrfs_root *root)
615 if (root->last_trans != trans->transid) {
616 root->track_dirty = 1;
617 root->last_trans = trans->transid;
618 root->commit_root = root->node;
619 extent_buffer_get(root->node);
623 static u8 imode_to_type(u32 imode)
626 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
628 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
629 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
630 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
631 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
632 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
633 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
636 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
642 struct device_record *rec1;
643 struct device_record *rec2;
645 rec1 = rb_entry(node1, struct device_record, node);
646 rec2 = rb_entry(node2, struct device_record, node);
647 if (rec1->devid > rec2->devid)
649 else if (rec1->devid < rec2->devid)
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
657 struct inode_record *rec;
658 struct inode_backref *backref;
659 struct inode_backref *orig;
660 struct inode_backref *tmp;
661 struct orphan_data_extent *src_orphan;
662 struct orphan_data_extent *dst_orphan;
667 rec = malloc(sizeof(*rec));
669 return ERR_PTR(-ENOMEM);
670 memcpy(rec, orig_rec, sizeof(*rec));
672 INIT_LIST_HEAD(&rec->backrefs);
673 INIT_LIST_HEAD(&rec->orphan_extents);
674 rec->holes = RB_ROOT;
676 list_for_each_entry(orig, &orig_rec->backrefs, list) {
677 size = sizeof(*orig) + orig->namelen + 1;
678 backref = malloc(size);
683 memcpy(backref, orig, size);
684 list_add_tail(&backref->list, &rec->backrefs);
686 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687 dst_orphan = malloc(sizeof(*dst_orphan));
692 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
695 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
702 rb = rb_first(&rec->holes);
704 struct file_extent_hole *hole;
706 hole = rb_entry(rb, struct file_extent_hole, node);
712 if (!list_empty(&rec->backrefs))
713 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714 list_del(&orig->list);
718 if (!list_empty(&rec->orphan_extents))
719 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720 list_del(&orig->list);
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
732 struct orphan_data_extent *orphan;
734 if (list_empty(orphan_extents))
736 printf("The following data extent is lost in tree %llu:\n",
738 list_for_each_entry(orphan, orphan_extents, list) {
739 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740 orphan->objectid, orphan->offset, orphan->disk_bytenr,
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
747 u64 root_objectid = root->root_key.objectid;
748 int errors = rec->errors;
752 /* reloc root errors, we print its corresponding fs root objectid*/
753 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754 root_objectid = root->root_key.offset;
755 fprintf(stderr, "reloc");
757 fprintf(stderr, "root %llu inode %llu errors %x",
758 (unsigned long long) root_objectid,
759 (unsigned long long) rec->ino, rec->errors);
761 if (errors & I_ERR_NO_INODE_ITEM)
762 fprintf(stderr, ", no inode item");
763 if (errors & I_ERR_NO_ORPHAN_ITEM)
764 fprintf(stderr, ", no orphan item");
765 if (errors & I_ERR_DUP_INODE_ITEM)
766 fprintf(stderr, ", dup inode item");
767 if (errors & I_ERR_DUP_DIR_INDEX)
768 fprintf(stderr, ", dup dir index");
769 if (errors & I_ERR_ODD_DIR_ITEM)
770 fprintf(stderr, ", odd dir item");
771 if (errors & I_ERR_ODD_FILE_EXTENT)
772 fprintf(stderr, ", odd file extent");
773 if (errors & I_ERR_BAD_FILE_EXTENT)
774 fprintf(stderr, ", bad file extent");
775 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776 fprintf(stderr, ", file extent overlap");
777 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778 fprintf(stderr, ", file extent discount");
779 if (errors & I_ERR_DIR_ISIZE_WRONG)
780 fprintf(stderr, ", dir isize wrong");
781 if (errors & I_ERR_FILE_NBYTES_WRONG)
782 fprintf(stderr, ", nbytes wrong");
783 if (errors & I_ERR_ODD_CSUM_ITEM)
784 fprintf(stderr, ", odd csum item");
785 if (errors & I_ERR_SOME_CSUM_MISSING)
786 fprintf(stderr, ", some csum missing");
787 if (errors & I_ERR_LINK_COUNT_WRONG)
788 fprintf(stderr, ", link count wrong");
789 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790 fprintf(stderr, ", orphan file extent");
791 fprintf(stderr, "\n");
792 /* Print the orphan extents if needed */
793 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
796 /* Print the holes if needed */
797 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798 struct file_extent_hole *hole;
799 struct rb_node *node;
802 node = rb_first(&rec->holes);
803 fprintf(stderr, "Found file extent holes:\n");
806 hole = rb_entry(node, struct file_extent_hole, node);
807 fprintf(stderr, "\tstart: %llu, len: %llu\n",
808 hole->start, hole->len);
809 node = rb_next(node);
812 fprintf(stderr, "\tstart: 0, len: %llu\n",
813 round_up(rec->isize, root->sectorsize));
817 static void print_ref_error(int errors)
819 if (errors & REF_ERR_NO_DIR_ITEM)
820 fprintf(stderr, ", no dir item");
821 if (errors & REF_ERR_NO_DIR_INDEX)
822 fprintf(stderr, ", no dir index");
823 if (errors & REF_ERR_NO_INODE_REF)
824 fprintf(stderr, ", no inode ref");
825 if (errors & REF_ERR_DUP_DIR_ITEM)
826 fprintf(stderr, ", dup dir item");
827 if (errors & REF_ERR_DUP_DIR_INDEX)
828 fprintf(stderr, ", dup dir index");
829 if (errors & REF_ERR_DUP_INODE_REF)
830 fprintf(stderr, ", dup inode ref");
831 if (errors & REF_ERR_INDEX_UNMATCH)
832 fprintf(stderr, ", index mismatch");
833 if (errors & REF_ERR_FILETYPE_UNMATCH)
834 fprintf(stderr, ", filetype mismatch");
835 if (errors & REF_ERR_NAME_TOO_LONG)
836 fprintf(stderr, ", name too long");
837 if (errors & REF_ERR_NO_ROOT_REF)
838 fprintf(stderr, ", no root ref");
839 if (errors & REF_ERR_NO_ROOT_BACKREF)
840 fprintf(stderr, ", no root backref");
841 if (errors & REF_ERR_DUP_ROOT_REF)
842 fprintf(stderr, ", dup root ref");
843 if (errors & REF_ERR_DUP_ROOT_BACKREF)
844 fprintf(stderr, ", dup root backref");
845 fprintf(stderr, "\n");
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
851 struct ptr_node *node;
852 struct cache_extent *cache;
853 struct inode_record *rec = NULL;
856 cache = lookup_cache_extent(inode_cache, ino, 1);
858 node = container_of(cache, struct ptr_node, cache);
860 if (mod && rec->refs > 1) {
861 node->data = clone_inode_rec(rec);
862 if (IS_ERR(node->data))
868 rec = calloc(1, sizeof(*rec));
870 return ERR_PTR(-ENOMEM);
872 rec->extent_start = (u64)-1;
874 INIT_LIST_HEAD(&rec->backrefs);
875 INIT_LIST_HEAD(&rec->orphan_extents);
876 rec->holes = RB_ROOT;
878 node = malloc(sizeof(*node));
881 return ERR_PTR(-ENOMEM);
883 node->cache.start = ino;
884 node->cache.size = 1;
887 if (ino == BTRFS_FREE_INO_OBJECTID)
890 ret = insert_cache_extent(inode_cache, &node->cache);
892 return ERR_PTR(-EEXIST);
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
899 struct orphan_data_extent *orphan;
901 while (!list_empty(orphan_extents)) {
902 orphan = list_entry(orphan_extents->next,
903 struct orphan_data_extent, list);
904 list_del(&orphan->list);
909 static void free_inode_rec(struct inode_record *rec)
911 struct inode_backref *backref;
916 while (!list_empty(&rec->backrefs)) {
917 backref = to_inode_backref(rec->backrefs.next);
918 list_del(&backref->list);
921 free_orphan_data_extents(&rec->orphan_extents);
922 free_file_extent_holes(&rec->holes);
926 static int can_free_inode_rec(struct inode_record *rec)
928 if (!rec->errors && rec->checked && rec->found_inode_item &&
929 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935 struct inode_record *rec)
937 struct cache_extent *cache;
938 struct inode_backref *tmp, *backref;
939 struct ptr_node *node;
942 if (!rec->found_inode_item)
945 filetype = imode_to_type(rec->imode);
946 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947 if (backref->found_dir_item && backref->found_dir_index) {
948 if (backref->filetype != filetype)
949 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950 if (!backref->errors && backref->found_inode_ref &&
951 rec->nlink == rec->found_link) {
952 list_del(&backref->list);
958 if (!rec->checked || rec->merging)
961 if (S_ISDIR(rec->imode)) {
962 if (rec->found_size != rec->isize)
963 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964 if (rec->found_file_extent)
965 rec->errors |= I_ERR_ODD_FILE_EXTENT;
966 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967 if (rec->found_dir_item)
968 rec->errors |= I_ERR_ODD_DIR_ITEM;
969 if (rec->found_size != rec->nbytes)
970 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971 if (rec->nlink > 0 && !no_holes &&
972 (rec->extent_end < rec->isize ||
973 first_extent_gap(&rec->holes) < rec->isize))
974 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
977 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978 if (rec->found_csum_item && rec->nodatasum)
979 rec->errors |= I_ERR_ODD_CSUM_ITEM;
980 if (rec->some_csum_missing && !rec->nodatasum)
981 rec->errors |= I_ERR_SOME_CSUM_MISSING;
984 BUG_ON(rec->refs != 1);
985 if (can_free_inode_rec(rec)) {
986 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987 node = container_of(cache, struct ptr_node, cache);
988 BUG_ON(node->data != rec);
989 remove_cache_extent(inode_cache, &node->cache);
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
997 struct btrfs_path path;
998 struct btrfs_key key;
1001 key.objectid = BTRFS_ORPHAN_OBJECTID;
1002 key.type = BTRFS_ORPHAN_ITEM_KEY;
1005 btrfs_init_path(&path);
1006 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007 btrfs_release_path(&path);
1013 static int process_inode_item(struct extent_buffer *eb,
1014 int slot, struct btrfs_key *key,
1015 struct shared_node *active_node)
1017 struct inode_record *rec;
1018 struct btrfs_inode_item *item;
1020 rec = active_node->current;
1021 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022 if (rec->found_inode_item) {
1023 rec->errors |= I_ERR_DUP_INODE_ITEM;
1026 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027 rec->nlink = btrfs_inode_nlink(eb, item);
1028 rec->isize = btrfs_inode_size(eb, item);
1029 rec->nbytes = btrfs_inode_nbytes(eb, item);
1030 rec->imode = btrfs_inode_mode(eb, item);
1031 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1033 rec->found_inode_item = 1;
1034 if (rec->nlink == 0)
1035 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036 maybe_free_inode_rec(&active_node->inode_cache, rec);
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1042 int namelen, u64 dir)
1044 struct inode_backref *backref;
1046 list_for_each_entry(backref, &rec->backrefs, list) {
1047 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1049 if (backref->dir != dir || backref->namelen != namelen)
1051 if (memcmp(name, backref->name, namelen))
1056 backref = malloc(sizeof(*backref) + namelen + 1);
1059 memset(backref, 0, sizeof(*backref));
1061 backref->namelen = namelen;
1062 memcpy(backref->name, name, namelen);
1063 backref->name[namelen] = '\0';
1064 list_add_tail(&backref->list, &rec->backrefs);
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069 u64 ino, u64 dir, u64 index,
1070 const char *name, int namelen,
1071 u8 filetype, u8 itemtype, int errors)
1073 struct inode_record *rec;
1074 struct inode_backref *backref;
1076 rec = get_inode_rec(inode_cache, ino, 1);
1077 BUG_ON(IS_ERR(rec));
1078 backref = get_inode_backref(rec, name, namelen, dir);
1081 backref->errors |= errors;
1082 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083 if (backref->found_dir_index)
1084 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085 if (backref->found_inode_ref && backref->index != index)
1086 backref->errors |= REF_ERR_INDEX_UNMATCH;
1087 if (backref->found_dir_item && backref->filetype != filetype)
1088 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1090 backref->index = index;
1091 backref->filetype = filetype;
1092 backref->found_dir_index = 1;
1093 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1095 if (backref->found_dir_item)
1096 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097 if (backref->found_dir_index && backref->filetype != filetype)
1098 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1100 backref->filetype = filetype;
1101 backref->found_dir_item = 1;
1102 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104 if (backref->found_inode_ref)
1105 backref->errors |= REF_ERR_DUP_INODE_REF;
1106 if (backref->found_dir_index && backref->index != index)
1107 backref->errors |= REF_ERR_INDEX_UNMATCH;
1109 backref->index = index;
1111 backref->ref_type = itemtype;
1112 backref->found_inode_ref = 1;
1117 maybe_free_inode_rec(inode_cache, rec);
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122 struct cache_tree *dst_cache)
1124 struct inode_backref *backref;
1129 list_for_each_entry(backref, &src->backrefs, list) {
1130 if (backref->found_dir_index) {
1131 add_inode_backref(dst_cache, dst->ino, backref->dir,
1132 backref->index, backref->name,
1133 backref->namelen, backref->filetype,
1134 BTRFS_DIR_INDEX_KEY, backref->errors);
1136 if (backref->found_dir_item) {
1138 add_inode_backref(dst_cache, dst->ino,
1139 backref->dir, 0, backref->name,
1140 backref->namelen, backref->filetype,
1141 BTRFS_DIR_ITEM_KEY, backref->errors);
1143 if (backref->found_inode_ref) {
1144 add_inode_backref(dst_cache, dst->ino,
1145 backref->dir, backref->index,
1146 backref->name, backref->namelen, 0,
1147 backref->ref_type, backref->errors);
1151 if (src->found_dir_item)
1152 dst->found_dir_item = 1;
1153 if (src->found_file_extent)
1154 dst->found_file_extent = 1;
1155 if (src->found_csum_item)
1156 dst->found_csum_item = 1;
1157 if (src->some_csum_missing)
1158 dst->some_csum_missing = 1;
1159 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1165 BUG_ON(src->found_link < dir_count);
1166 dst->found_link += src->found_link - dir_count;
1167 dst->found_size += src->found_size;
1168 if (src->extent_start != (u64)-1) {
1169 if (dst->extent_start == (u64)-1) {
1170 dst->extent_start = src->extent_start;
1171 dst->extent_end = src->extent_end;
1173 if (dst->extent_end > src->extent_start)
1174 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175 else if (dst->extent_end < src->extent_start) {
1176 ret = add_file_extent_hole(&dst->holes,
1178 src->extent_start - dst->extent_end);
1180 if (dst->extent_end < src->extent_end)
1181 dst->extent_end = src->extent_end;
1185 dst->errors |= src->errors;
1186 if (src->found_inode_item) {
1187 if (!dst->found_inode_item) {
1188 dst->nlink = src->nlink;
1189 dst->isize = src->isize;
1190 dst->nbytes = src->nbytes;
1191 dst->imode = src->imode;
1192 dst->nodatasum = src->nodatasum;
1193 dst->found_inode_item = 1;
1195 dst->errors |= I_ERR_DUP_INODE_ITEM;
1203 static int splice_shared_node(struct shared_node *src_node,
1204 struct shared_node *dst_node)
1206 struct cache_extent *cache;
1207 struct ptr_node *node, *ins;
1208 struct cache_tree *src, *dst;
1209 struct inode_record *rec, *conflict;
1210 u64 current_ino = 0;
1214 if (--src_node->refs == 0)
1216 if (src_node->current)
1217 current_ino = src_node->current->ino;
1219 src = &src_node->root_cache;
1220 dst = &dst_node->root_cache;
1222 cache = search_cache_extent(src, 0);
1224 node = container_of(cache, struct ptr_node, cache);
1226 cache = next_cache_extent(cache);
1229 remove_cache_extent(src, &node->cache);
1232 ins = malloc(sizeof(*ins));
1234 ins->cache.start = node->cache.start;
1235 ins->cache.size = node->cache.size;
1239 ret = insert_cache_extent(dst, &ins->cache);
1240 if (ret == -EEXIST) {
1241 conflict = get_inode_rec(dst, rec->ino, 1);
1242 BUG_ON(IS_ERR(conflict));
1243 merge_inode_recs(rec, conflict, dst);
1245 conflict->checked = 1;
1246 if (dst_node->current == conflict)
1247 dst_node->current = NULL;
1249 maybe_free_inode_rec(dst, conflict);
1250 free_inode_rec(rec);
1257 if (src == &src_node->root_cache) {
1258 src = &src_node->inode_cache;
1259 dst = &dst_node->inode_cache;
1263 if (current_ino > 0 && (!dst_node->current ||
1264 current_ino > dst_node->current->ino)) {
1265 if (dst_node->current) {
1266 dst_node->current->checked = 1;
1267 maybe_free_inode_rec(dst, dst_node->current);
1269 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270 BUG_ON(IS_ERR(dst_node->current));
1275 static void free_inode_ptr(struct cache_extent *cache)
1277 struct ptr_node *node;
1278 struct inode_record *rec;
1280 node = container_of(cache, struct ptr_node, cache);
1282 free_inode_rec(rec);
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1291 struct cache_extent *cache;
1292 struct shared_node *node;
1294 cache = lookup_cache_extent(shared, bytenr, 1);
1296 node = container_of(cache, struct shared_node, cache);
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1305 struct shared_node *node;
1307 node = calloc(1, sizeof(*node));
1310 node->cache.start = bytenr;
1311 node->cache.size = 1;
1312 cache_tree_init(&node->root_cache);
1313 cache_tree_init(&node->inode_cache);
1316 ret = insert_cache_extent(shared, &node->cache);
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322 struct walk_control *wc, int level)
1324 struct shared_node *node;
1325 struct shared_node *dest;
1328 if (level == wc->active_node)
1331 BUG_ON(wc->active_node <= level);
1332 node = find_shared_node(&wc->shared, bytenr);
1334 ret = add_shared_node(&wc->shared, bytenr, refs);
1336 node = find_shared_node(&wc->shared, bytenr);
1337 wc->nodes[level] = node;
1338 wc->active_node = level;
1342 if (wc->root_level == wc->active_node &&
1343 btrfs_root_refs(&root->root_item) == 0) {
1344 if (--node->refs == 0) {
1345 free_inode_recs_tree(&node->root_cache);
1346 free_inode_recs_tree(&node->inode_cache);
1347 remove_cache_extent(&wc->shared, &node->cache);
1353 dest = wc->nodes[wc->active_node];
1354 splice_shared_node(node, dest);
1355 if (node->refs == 0) {
1356 remove_cache_extent(&wc->shared, &node->cache);
1362 static int leave_shared_node(struct btrfs_root *root,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->root_level)
1372 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1376 BUG_ON(i >= BTRFS_MAX_LEVEL);
1378 node = wc->nodes[wc->active_node];
1379 wc->nodes[wc->active_node] = NULL;
1380 wc->active_node = i;
1382 dest = wc->nodes[wc->active_node];
1383 if (wc->active_node < wc->root_level ||
1384 btrfs_root_refs(&root->root_item) > 0) {
1385 BUG_ON(node->refs <= 1);
1386 splice_shared_node(node, dest);
1388 BUG_ON(node->refs < 2);
1397 * 1 - if the root with id child_root_id is a child of root parent_root_id
1398 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1399 * has other root(s) as parent(s)
1400 * 2 - if the root child_root_id doesn't have any parent roots
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1405 struct btrfs_path path;
1406 struct btrfs_key key;
1407 struct extent_buffer *leaf;
1411 btrfs_init_path(&path);
1413 key.objectid = parent_root_id;
1414 key.type = BTRFS_ROOT_REF_KEY;
1415 key.offset = child_root_id;
1416 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1420 btrfs_release_path(&path);
1424 key.objectid = child_root_id;
1425 key.type = BTRFS_ROOT_BACKREF_KEY;
1427 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1433 leaf = path.nodes[0];
1434 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1438 leaf = path.nodes[0];
1441 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442 if (key.objectid != child_root_id ||
1443 key.type != BTRFS_ROOT_BACKREF_KEY)
1448 if (key.offset == parent_root_id) {
1449 btrfs_release_path(&path);
1456 btrfs_release_path(&path);
1459 return has_parent ? 0 : 2;
1462 static int process_dir_item(struct btrfs_root *root,
1463 struct extent_buffer *eb,
1464 int slot, struct btrfs_key *key,
1465 struct shared_node *active_node)
1475 struct btrfs_dir_item *di;
1476 struct inode_record *rec;
1477 struct cache_tree *root_cache;
1478 struct cache_tree *inode_cache;
1479 struct btrfs_key location;
1480 char namebuf[BTRFS_NAME_LEN];
1482 root_cache = &active_node->root_cache;
1483 inode_cache = &active_node->inode_cache;
1484 rec = active_node->current;
1485 rec->found_dir_item = 1;
1487 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1491 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492 name_len = btrfs_dir_name_len(eb, di);
1493 data_len = btrfs_dir_data_len(eb, di);
1494 filetype = btrfs_dir_type(eb, di);
1496 rec->found_size += name_len;
1497 if (name_len <= BTRFS_NAME_LEN) {
1501 len = BTRFS_NAME_LEN;
1502 error = REF_ERR_NAME_TOO_LONG;
1504 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1506 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507 add_inode_backref(inode_cache, location.objectid,
1508 key->objectid, key->offset, namebuf,
1509 len, filetype, key->type, error);
1510 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511 add_inode_backref(root_cache, location.objectid,
1512 key->objectid, key->offset,
1513 namebuf, len, filetype,
1516 fprintf(stderr, "invalid location in dir item %u\n",
1518 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519 key->objectid, key->offset, namebuf,
1520 len, filetype, key->type, error);
1523 len = sizeof(*di) + name_len + data_len;
1524 di = (struct btrfs_dir_item *)((char *)di + len);
1527 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528 rec->errors |= I_ERR_DUP_DIR_INDEX;
1533 static int process_inode_ref(struct extent_buffer *eb,
1534 int slot, struct btrfs_key *key,
1535 struct shared_node *active_node)
1543 struct cache_tree *inode_cache;
1544 struct btrfs_inode_ref *ref;
1545 char namebuf[BTRFS_NAME_LEN];
1547 inode_cache = &active_node->inode_cache;
1549 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550 total = btrfs_item_size_nr(eb, slot);
1551 while (cur < total) {
1552 name_len = btrfs_inode_ref_name_len(eb, ref);
1553 index = btrfs_inode_ref_index(eb, ref);
1554 if (name_len <= BTRFS_NAME_LEN) {
1558 len = BTRFS_NAME_LEN;
1559 error = REF_ERR_NAME_TOO_LONG;
1561 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562 add_inode_backref(inode_cache, key->objectid, key->offset,
1563 index, namebuf, len, 0, key->type, error);
1565 len = sizeof(*ref) + name_len;
1566 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1572 static int process_inode_extref(struct extent_buffer *eb,
1573 int slot, struct btrfs_key *key,
1574 struct shared_node *active_node)
1583 struct cache_tree *inode_cache;
1584 struct btrfs_inode_extref *extref;
1585 char namebuf[BTRFS_NAME_LEN];
1587 inode_cache = &active_node->inode_cache;
1589 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590 total = btrfs_item_size_nr(eb, slot);
1591 while (cur < total) {
1592 name_len = btrfs_inode_extref_name_len(eb, extref);
1593 index = btrfs_inode_extref_index(eb, extref);
1594 parent = btrfs_inode_extref_parent(eb, extref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf,
1603 (unsigned long)(extref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, parent,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*extref) + name_len;
1608 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616 u64 len, u64 *found)
1618 struct btrfs_key key;
1619 struct btrfs_path path;
1620 struct extent_buffer *leaf;
1625 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1627 btrfs_init_path(&path);
1629 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1631 key.type = BTRFS_EXTENT_CSUM_KEY;
1633 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1637 if (ret > 0 && path.slots[0] > 0) {
1638 leaf = path.nodes[0];
1639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641 key.type == BTRFS_EXTENT_CSUM_KEY)
1646 leaf = path.nodes[0];
1647 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1653 leaf = path.nodes[0];
1656 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658 key.type != BTRFS_EXTENT_CSUM_KEY)
1661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662 if (key.offset >= start + len)
1665 if (key.offset > start)
1668 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670 if (csum_end > start) {
1671 size = min(csum_end - start, len);
1680 btrfs_release_path(&path);
1686 static int process_file_extent(struct btrfs_root *root,
1687 struct extent_buffer *eb,
1688 int slot, struct btrfs_key *key,
1689 struct shared_node *active_node)
1691 struct inode_record *rec;
1692 struct btrfs_file_extent_item *fi;
1694 u64 disk_bytenr = 0;
1695 u64 extent_offset = 0;
1696 u64 mask = root->sectorsize - 1;
1700 rec = active_node->current;
1701 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702 rec->found_file_extent = 1;
1704 if (rec->extent_start == (u64)-1) {
1705 rec->extent_start = key->offset;
1706 rec->extent_end = key->offset;
1709 if (rec->extent_end > key->offset)
1710 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711 else if (rec->extent_end < key->offset) {
1712 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713 key->offset - rec->extent_end);
1718 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719 extent_type = btrfs_file_extent_type(eb, fi);
1721 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1724 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725 rec->found_size += num_bytes;
1726 num_bytes = (num_bytes + mask) & ~mask;
1727 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731 extent_offset = btrfs_file_extent_offset(eb, fi);
1732 if (num_bytes == 0 || (num_bytes & mask))
1733 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 if (num_bytes + extent_offset >
1735 btrfs_file_extent_ram_bytes(eb, fi))
1736 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738 (btrfs_file_extent_compression(eb, fi) ||
1739 btrfs_file_extent_encryption(eb, fi) ||
1740 btrfs_file_extent_other_encoding(eb, fi)))
1741 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742 if (disk_bytenr > 0)
1743 rec->found_size += num_bytes;
1745 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1747 rec->extent_end = key->offset + num_bytes;
1750 * The data reloc tree will copy full extents into its inode and then
1751 * copy the corresponding csums. Because the extent it copied could be
1752 * a preallocated extent that hasn't been written to yet there may be no
1753 * csums to copy, ergo we won't have csums for our file extent. This is
1754 * ok so just don't bother checking csums if the inode belongs to the
1757 if (disk_bytenr > 0 &&
1758 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1760 if (btrfs_file_extent_compression(eb, fi))
1761 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1763 disk_bytenr += extent_offset;
1765 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1768 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1770 rec->found_csum_item = 1;
1771 if (found < num_bytes)
1772 rec->some_csum_missing = 1;
1773 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1775 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782 struct walk_control *wc)
1784 struct btrfs_key key;
1788 struct cache_tree *inode_cache;
1789 struct shared_node *active_node;
1791 if (wc->root_level == wc->active_node &&
1792 btrfs_root_refs(&root->root_item) == 0)
1795 active_node = wc->nodes[wc->active_node];
1796 inode_cache = &active_node->inode_cache;
1797 nritems = btrfs_header_nritems(eb);
1798 for (i = 0; i < nritems; i++) {
1799 btrfs_item_key_to_cpu(eb, &key, i);
1801 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1803 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1806 if (active_node->current == NULL ||
1807 active_node->current->ino < key.objectid) {
1808 if (active_node->current) {
1809 active_node->current->checked = 1;
1810 maybe_free_inode_rec(inode_cache,
1811 active_node->current);
1813 active_node->current = get_inode_rec(inode_cache,
1815 BUG_ON(IS_ERR(active_node->current));
1818 case BTRFS_DIR_ITEM_KEY:
1819 case BTRFS_DIR_INDEX_KEY:
1820 ret = process_dir_item(root, eb, i, &key, active_node);
1822 case BTRFS_INODE_REF_KEY:
1823 ret = process_inode_ref(eb, i, &key, active_node);
1825 case BTRFS_INODE_EXTREF_KEY:
1826 ret = process_inode_extref(eb, i, &key, active_node);
1828 case BTRFS_INODE_ITEM_KEY:
1829 ret = process_inode_item(eb, i, &key, active_node);
1831 case BTRFS_EXTENT_DATA_KEY:
1832 ret = process_file_extent(root, eb, i, &key,
1842 static void reada_walk_down(struct btrfs_root *root,
1843 struct extent_buffer *node, int slot)
1852 level = btrfs_header_level(node);
1856 nritems = btrfs_header_nritems(node);
1857 blocksize = root->nodesize;
1858 for (i = slot; i < nritems; i++) {
1859 bytenr = btrfs_node_blockptr(node, i);
1860 ptr_gen = btrfs_node_ptr_generation(node, i);
1861 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1866 * Check the child node/leaf by the following condition:
1867 * 1. the first item key of the node/leaf should be the same with the one
1869 * 2. block in parent node should match the child node/leaf.
1870 * 3. generation of parent node and child's header should be consistent.
1872 * Or the child node/leaf pointed by the key in parent is not valid.
1874 * We hope to check leaf owner too, but since subvol may share leaves,
1875 * which makes leaf owner check not so strong, key check should be
1876 * sufficient enough for that case.
1878 static int check_child_node(struct btrfs_root *root,
1879 struct extent_buffer *parent, int slot,
1880 struct extent_buffer *child)
1882 struct btrfs_key parent_key;
1883 struct btrfs_key child_key;
1886 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887 if (btrfs_header_level(child) == 0)
1888 btrfs_item_key_to_cpu(child, &child_key, 0);
1890 btrfs_node_key_to_cpu(child, &child_key, 0);
1892 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1895 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896 parent_key.objectid, parent_key.type, parent_key.offset,
1897 child_key.objectid, child_key.type, child_key.offset);
1899 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1901 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902 btrfs_node_blockptr(parent, slot),
1903 btrfs_header_bytenr(child));
1905 if (btrfs_node_ptr_generation(parent, slot) !=
1906 btrfs_header_generation(child)) {
1908 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909 btrfs_header_generation(child),
1910 btrfs_node_ptr_generation(parent, slot));
1916 u64 bytenr[BTRFS_MAX_LEVEL];
1917 u64 refs[BTRFS_MAX_LEVEL];
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level,
1922 struct node_refs *nrefs)
1924 enum btrfs_tree_block_status status;
1927 struct extent_buffer *next;
1928 struct extent_buffer *cur;
1933 WARN_ON(*level < 0);
1934 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1936 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937 refs = nrefs->refs[*level];
1940 ret = btrfs_lookup_extent_info(NULL, root,
1941 path->nodes[*level]->start,
1942 *level, 1, &refs, NULL);
1947 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948 nrefs->refs[*level] = refs;
1952 ret = enter_shared_node(root, path->nodes[*level]->start,
1960 while (*level >= 0) {
1961 WARN_ON(*level < 0);
1962 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963 cur = path->nodes[*level];
1965 if (btrfs_header_level(cur) != *level)
1968 if (path->slots[*level] >= btrfs_header_nritems(cur))
1971 ret = process_one_leaf(root, cur, wc);
1976 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978 blocksize = root->nodesize;
1980 if (bytenr == nrefs->bytenr[*level - 1]) {
1981 refs = nrefs->refs[*level - 1];
1983 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984 *level - 1, 1, &refs, NULL);
1988 nrefs->bytenr[*level - 1] = bytenr;
1989 nrefs->refs[*level - 1] = refs;
1994 ret = enter_shared_node(root, bytenr, refs,
1997 path->slots[*level]++;
2002 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004 free_extent_buffer(next);
2005 reada_walk_down(root, cur, path->slots[*level]);
2006 next = read_tree_block(root, bytenr, blocksize,
2008 if (!extent_buffer_uptodate(next)) {
2009 struct btrfs_key node_key;
2011 btrfs_node_key_to_cpu(path->nodes[*level],
2013 path->slots[*level]);
2014 btrfs_add_corrupt_extent_record(root->fs_info,
2016 path->nodes[*level]->start,
2017 root->nodesize, *level);
2023 ret = check_child_node(root, cur, path->slots[*level], next);
2029 if (btrfs_is_leaf(next))
2030 status = btrfs_check_leaf(root, NULL, next);
2032 status = btrfs_check_node(root, NULL, next);
2033 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034 free_extent_buffer(next);
2039 *level = *level - 1;
2040 free_extent_buffer(path->nodes[*level]);
2041 path->nodes[*level] = next;
2042 path->slots[*level] = 0;
2045 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050 struct walk_control *wc, int *level)
2053 struct extent_buffer *leaf;
2055 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056 leaf = path->nodes[i];
2057 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2062 free_extent_buffer(path->nodes[*level]);
2063 path->nodes[*level] = NULL;
2064 BUG_ON(*level > wc->active_node);
2065 if (*level == wc->active_node)
2066 leave_shared_node(root, wc, *level);
2073 static int check_root_dir(struct inode_record *rec)
2075 struct inode_backref *backref;
2078 if (!rec->found_inode_item || rec->errors)
2080 if (rec->nlink != 1 || rec->found_link != 0)
2082 if (list_empty(&rec->backrefs))
2084 backref = to_inode_backref(rec->backrefs.next);
2085 if (!backref->found_inode_ref)
2087 if (backref->index != 0 || backref->namelen != 2 ||
2088 memcmp(backref->name, "..", 2))
2090 if (backref->found_dir_index || backref->found_dir_item)
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098 struct btrfs_root *root, struct btrfs_path *path,
2099 struct inode_record *rec)
2101 struct btrfs_inode_item *ei;
2102 struct btrfs_key key;
2105 key.objectid = rec->ino;
2106 key.type = BTRFS_INODE_ITEM_KEY;
2107 key.offset = (u64)-1;
2109 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2113 if (!path->slots[0]) {
2120 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121 if (key.objectid != rec->ino) {
2126 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127 struct btrfs_inode_item);
2128 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129 btrfs_mark_buffer_dirty(path->nodes[0]);
2130 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132 root->root_key.objectid);
2134 btrfs_release_path(path);
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139 struct btrfs_root *root,
2140 struct btrfs_path *path,
2141 struct inode_record *rec)
2145 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146 btrfs_release_path(path);
2148 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153 struct btrfs_root *root,
2154 struct btrfs_path *path,
2155 struct inode_record *rec)
2157 struct btrfs_inode_item *ei;
2158 struct btrfs_key key;
2161 key.objectid = rec->ino;
2162 key.type = BTRFS_INODE_ITEM_KEY;
2165 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2172 /* Since ret == 0, no need to check anything */
2173 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174 struct btrfs_inode_item);
2175 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176 btrfs_mark_buffer_dirty(path->nodes[0]);
2177 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178 printf("reset nbytes for ino %llu root %llu\n",
2179 rec->ino, root->root_key.objectid);
2181 btrfs_release_path(path);
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186 struct cache_tree *inode_cache,
2187 struct inode_record *rec,
2188 struct inode_backref *backref)
2190 struct btrfs_path path;
2191 struct btrfs_trans_handle *trans;
2192 struct btrfs_dir_item *dir_item;
2193 struct extent_buffer *leaf;
2194 struct btrfs_key key;
2195 struct btrfs_disk_key disk_key;
2196 struct inode_record *dir_rec;
2197 unsigned long name_ptr;
2198 u32 data_size = sizeof(*dir_item) + backref->namelen;
2201 trans = btrfs_start_transaction(root, 1);
2203 return PTR_ERR(trans);
2205 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206 (unsigned long long)rec->ino);
2208 btrfs_init_path(&path);
2209 key.objectid = backref->dir;
2210 key.type = BTRFS_DIR_INDEX_KEY;
2211 key.offset = backref->index;
2212 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2215 leaf = path.nodes[0];
2216 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2218 disk_key.objectid = cpu_to_le64(rec->ino);
2219 disk_key.type = BTRFS_INODE_ITEM_KEY;
2220 disk_key.offset = 0;
2222 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224 btrfs_set_dir_data_len(leaf, dir_item, 0);
2225 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226 name_ptr = (unsigned long)(dir_item + 1);
2227 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228 btrfs_mark_buffer_dirty(leaf);
2229 btrfs_release_path(&path);
2230 btrfs_commit_transaction(trans, root);
2232 backref->found_dir_index = 1;
2233 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234 BUG_ON(IS_ERR(dir_rec));
2237 dir_rec->found_size += backref->namelen;
2238 if (dir_rec->found_size == dir_rec->isize &&
2239 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241 if (dir_rec->found_size != dir_rec->isize)
2242 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247 static int delete_dir_index(struct btrfs_root *root,
2248 struct cache_tree *inode_cache,
2249 struct inode_record *rec,
2250 struct inode_backref *backref)
2252 struct btrfs_trans_handle *trans;
2253 struct btrfs_dir_item *di;
2254 struct btrfs_path path;
2257 trans = btrfs_start_transaction(root, 1);
2259 return PTR_ERR(trans);
2261 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262 (unsigned long long)backref->dir,
2263 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264 (unsigned long long)root->objectid);
2266 btrfs_init_path(&path);
2267 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268 backref->name, backref->namelen,
2269 backref->index, -1);
2272 btrfs_release_path(&path);
2273 btrfs_commit_transaction(trans, root);
2280 ret = btrfs_del_item(trans, root, &path);
2282 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2284 btrfs_release_path(&path);
2285 btrfs_commit_transaction(trans, root);
2289 static int create_inode_item(struct btrfs_root *root,
2290 struct inode_record *rec,
2291 struct inode_backref *backref, int root_dir)
2293 struct btrfs_trans_handle *trans;
2294 struct btrfs_inode_item inode_item;
2295 time_t now = time(NULL);
2298 trans = btrfs_start_transaction(root, 1);
2299 if (IS_ERR(trans)) {
2300 ret = PTR_ERR(trans);
2304 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305 "be incomplete, please check permissions and content after "
2306 "the fsck completes.\n", (unsigned long long)root->objectid,
2307 (unsigned long long)rec->ino);
2309 memset(&inode_item, 0, sizeof(inode_item));
2310 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2312 btrfs_set_stack_inode_nlink(&inode_item, 1);
2314 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316 if (rec->found_dir_item) {
2317 if (rec->found_file_extent)
2318 fprintf(stderr, "root %llu inode %llu has both a dir "
2319 "item and extents, unsure if it is a dir or a "
2320 "regular file so setting it as a directory\n",
2321 (unsigned long long)root->objectid,
2322 (unsigned long long)rec->ino);
2323 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325 } else if (!rec->found_dir_item) {
2326 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2329 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2338 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2340 btrfs_commit_transaction(trans, root);
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345 struct inode_record *rec,
2346 struct cache_tree *inode_cache,
2349 struct inode_backref *tmp, *backref;
2350 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 if (!delete && rec->ino == root_dirid) {
2356 if (!rec->found_inode_item) {
2357 ret = create_inode_item(root, rec, backref, 1);
2364 /* Index 0 for root dir's are special, don't mess with it */
2365 if (rec->ino == root_dirid && backref->index == 0)
2369 ((backref->found_dir_index && !backref->found_inode_ref) ||
2370 (backref->found_dir_index && backref->found_inode_ref &&
2371 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372 ret = delete_dir_index(root, inode_cache, rec, backref);
2376 list_del(&backref->list);
2380 if (!delete && !backref->found_dir_index &&
2381 backref->found_dir_item && backref->found_inode_ref) {
2382 ret = add_missing_dir_index(root, inode_cache, rec,
2387 if (backref->found_dir_item &&
2388 backref->found_dir_index &&
2389 backref->found_dir_index) {
2390 if (!backref->errors &&
2391 backref->found_inode_ref) {
2392 list_del(&backref->list);
2398 if (!delete && (!backref->found_dir_index &&
2399 !backref->found_dir_item &&
2400 backref->found_inode_ref)) {
2401 struct btrfs_trans_handle *trans;
2402 struct btrfs_key location;
2404 ret = check_dir_conflict(root, backref->name,
2410 * let nlink fixing routine to handle it,
2411 * which can do it better.
2416 location.objectid = rec->ino;
2417 location.type = BTRFS_INODE_ITEM_KEY;
2418 location.offset = 0;
2420 trans = btrfs_start_transaction(root, 1);
2421 if (IS_ERR(trans)) {
2422 ret = PTR_ERR(trans);
2425 fprintf(stderr, "adding missing dir index/item pair "
2427 (unsigned long long)rec->ino);
2428 ret = btrfs_insert_dir_item(trans, root, backref->name,
2430 backref->dir, &location,
2431 imode_to_type(rec->imode),
2434 btrfs_commit_transaction(trans, root);
2438 if (!delete && (backref->found_inode_ref &&
2439 backref->found_dir_index &&
2440 backref->found_dir_item &&
2441 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442 !rec->found_inode_item)) {
2443 ret = create_inode_item(root, rec, backref, 0);
2450 return ret ? ret : repaired;
2454 * To determine the file type for nlink/inode_item repair
2456 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457 * Return -ENOENT if file type is not found.
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2461 struct inode_backref *backref;
2463 /* For inode item recovered case */
2464 if (rec->found_inode_item) {
2465 *type = imode_to_type(rec->imode);
2469 list_for_each_entry(backref, &rec->backrefs, list) {
2470 if (backref->found_dir_index || backref->found_dir_item) {
2471 *type = backref->filetype;
2479 * To determine the file name for nlink repair
2481 * Return 0 if file name is found, set name and namelen.
2482 * Return -ENOENT if file name is not found.
2484 static int find_file_name(struct inode_record *rec,
2485 char *name, int *namelen)
2487 struct inode_backref *backref;
2489 list_for_each_entry(backref, &rec->backrefs, list) {
2490 if (backref->found_dir_index || backref->found_dir_item ||
2491 backref->found_inode_ref) {
2492 memcpy(name, backref->name, backref->namelen);
2493 *namelen = backref->namelen;
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502 struct btrfs_root *root,
2503 struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct inode_backref *backref;
2507 struct inode_backref *tmp;
2508 struct btrfs_key key;
2509 struct btrfs_inode_item *inode_item;
2512 /* We don't believe this either, reset it and iterate backref */
2513 rec->found_link = 0;
2515 /* Remove all backref including the valid ones */
2516 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518 backref->index, backref->name,
2519 backref->namelen, 0);
2523 /* remove invalid backref, so it won't be added back */
2524 if (!(backref->found_dir_index &&
2525 backref->found_dir_item &&
2526 backref->found_inode_ref)) {
2527 list_del(&backref->list);
2534 /* Set nlink to 0 */
2535 key.objectid = rec->ino;
2536 key.type = BTRFS_INODE_ITEM_KEY;
2538 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2545 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546 struct btrfs_inode_item);
2547 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548 btrfs_mark_buffer_dirty(path->nodes[0]);
2549 btrfs_release_path(path);
2552 * Add back valid inode_ref/dir_item/dir_index,
2553 * add_link() will handle the nlink inc, so new nlink must be correct
2555 list_for_each_entry(backref, &rec->backrefs, list) {
2556 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557 backref->name, backref->namelen,
2558 backref->filetype, &backref->index, 1);
2563 btrfs_release_path(path);
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568 struct btrfs_root *root,
2569 struct btrfs_path *path,
2570 struct inode_record *rec)
2572 char *dir_name = "lost+found";
2573 char namebuf[BTRFS_NAME_LEN] = {0};
2578 int name_recovered = 0;
2579 int type_recovered = 0;
2583 * Get file name and type first before these invalid inode ref
2584 * are deleted by remove_all_invalid_backref()
2586 name_recovered = !find_file_name(rec, namebuf, &namelen);
2587 type_recovered = !find_file_type(rec, &type);
2589 if (!name_recovered) {
2590 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591 rec->ino, rec->ino);
2592 namelen = count_digits(rec->ino);
2593 sprintf(namebuf, "%llu", rec->ino);
2596 if (!type_recovered) {
2597 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2599 type = BTRFS_FT_REG_FILE;
2603 ret = reset_nlink(trans, root, path, rec);
2606 "Failed to reset nlink for inode %llu: %s\n",
2607 rec->ino, strerror(-ret));
2611 if (rec->found_link == 0) {
2612 lost_found_ino = root->highest_inode;
2613 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2618 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2622 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623 dir_name, strerror(-ret));
2626 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627 namebuf, namelen, type, NULL, 1);
2629 * Add ".INO" suffix several times to handle case where
2630 * "FILENAME.INO" is already taken by another file.
2632 while (ret == -EEXIST) {
2634 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2636 if (namelen + count_digits(rec->ino) + 1 >
2641 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2643 namelen += count_digits(rec->ino) + 1;
2644 ret = btrfs_add_link(trans, root, rec->ino,
2645 lost_found_ino, namebuf,
2646 namelen, type, NULL, 1);
2650 "Failed to link the inode %llu to %s dir: %s\n",
2651 rec->ino, dir_name, strerror(-ret));
2655 * Just increase the found_link, don't actually add the
2656 * backref. This will make things easier and this inode
2657 * record will be freed after the repair is done.
2658 * So fsck will not report problem about this inode.
2661 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662 namelen, namebuf, dir_name);
2664 printf("Fixed the nlink of inode %llu\n", rec->ino);
2667 * Clear the flag anyway, or we will loop forever for the same inode
2668 * as it will not be removed from the bad inode list and the dead loop
2671 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672 btrfs_release_path(path);
2677 * Check if there is any normal(reg or prealloc) file extent for given
2679 * This is used to determine the file type when neither its dir_index/item or
2680 * inode_item exists.
2682 * This will *NOT* report error, if any error happens, just consider it does
2683 * not have any normal file extent.
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2687 struct btrfs_path path;
2688 struct btrfs_key key;
2689 struct btrfs_key found_key;
2690 struct btrfs_file_extent_item *fi;
2694 btrfs_init_path(&path);
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2699 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2704 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705 ret = btrfs_next_leaf(root, &path);
2712 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2717 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path.nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2726 btrfs_release_path(&path);
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2752 int type_recovered = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2768 * 1. If found backref(inode_index/item is already handled) to it,
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2782 filetype = BTRFS_FT_REG_FILE;
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2884 node = rb_first(&rec->holes);
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path path;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2930 * For nlink repair, it may create a dir and add link, so
2931 * 2 for parent(256)'s dir_index and dir_item
2932 * 2 for lost+found dir's inode_item and inode_ref
2933 * 1 for the new inode_ref of the file
2934 * 2 for lost+found dir's dir_index and dir_item for the file
2936 trans = btrfs_start_transaction(root, 7);
2938 return PTR_ERR(trans);
2940 btrfs_init_path(&path);
2941 if (rec->errors & I_ERR_NO_INODE_ITEM)
2942 ret = repair_inode_no_item(trans, root, &path, rec);
2943 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948 ret = repair_inode_isize(trans, root, &path, rec);
2949 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952 ret = repair_inode_nlinks(trans, root, &path, rec);
2953 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954 ret = repair_inode_nbytes(trans, root, &path, rec);
2955 btrfs_commit_transaction(trans, root);
2956 btrfs_release_path(&path);
2960 static int check_inode_recs(struct btrfs_root *root,
2961 struct cache_tree *inode_cache)
2963 struct cache_extent *cache;
2964 struct ptr_node *node;
2965 struct inode_record *rec;
2966 struct inode_backref *backref;
2971 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2973 if (btrfs_root_refs(&root->root_item) == 0) {
2974 if (!cache_tree_empty(inode_cache))
2975 fprintf(stderr, "warning line %d\n", __LINE__);
2980 * We need to record the highest inode number for later 'lost+found'
2982 * We must select an ino not used/referred by any existing inode, or
2983 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984 * this may cause 'lost+found' dir has wrong nlinks.
2986 cache = last_cache_extent(inode_cache);
2988 node = container_of(cache, struct ptr_node, cache);
2990 if (rec->ino > root->highest_inode)
2991 root->highest_inode = rec->ino;
2995 * We need to repair backrefs first because we could change some of the
2996 * errors in the inode recs.
2998 * We also need to go through and delete invalid backrefs first and then
2999 * add the correct ones second. We do this because we may get EEXIST
3000 * when adding back the correct index because we hadn't yet deleted the
3003 * For example, if we were missing a dir index then the directories
3004 * isize would be wrong, so if we fixed the isize to what we thought it
3005 * would be and then fixed the backref we'd still have a invalid fs, so
3006 * we need to add back the dir index and then check to see if the isize
3011 if (stage == 3 && !err)
3014 cache = search_cache_extent(inode_cache, 0);
3015 while (repair && cache) {
3016 node = container_of(cache, struct ptr_node, cache);
3018 cache = next_cache_extent(cache);
3020 /* Need to free everything up and rescan */
3022 remove_cache_extent(inode_cache, &node->cache);
3024 free_inode_rec(rec);
3028 if (list_empty(&rec->backrefs))
3031 ret = repair_inode_backrefs(root, rec, inode_cache,
3045 rec = get_inode_rec(inode_cache, root_dirid, 0);
3046 BUG_ON(IS_ERR(rec));
3048 ret = check_root_dir(rec);
3050 fprintf(stderr, "root %llu root dir %llu error\n",
3051 (unsigned long long)root->root_key.objectid,
3052 (unsigned long long)root_dirid);
3053 print_inode_error(root, rec);
3058 struct btrfs_trans_handle *trans;
3060 trans = btrfs_start_transaction(root, 1);
3061 if (IS_ERR(trans)) {
3062 err = PTR_ERR(trans);
3067 "root %llu missing its root dir, recreating\n",
3068 (unsigned long long)root->objectid);
3070 ret = btrfs_make_root_dir(trans, root, root_dirid);
3073 btrfs_commit_transaction(trans, root);
3077 fprintf(stderr, "root %llu root dir %llu not found\n",
3078 (unsigned long long)root->root_key.objectid,
3079 (unsigned long long)root_dirid);
3083 cache = search_cache_extent(inode_cache, 0);
3086 node = container_of(cache, struct ptr_node, cache);
3088 remove_cache_extent(inode_cache, &node->cache);
3090 if (rec->ino == root_dirid ||
3091 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092 free_inode_rec(rec);
3096 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097 ret = check_orphan_item(root, rec->ino);
3099 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100 if (can_free_inode_rec(rec)) {
3101 free_inode_rec(rec);
3106 if (!rec->found_inode_item)
3107 rec->errors |= I_ERR_NO_INODE_ITEM;
3108 if (rec->found_link != rec->nlink)
3109 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3111 ret = try_repair_inode(root, rec);
3112 if (ret == 0 && can_free_inode_rec(rec)) {
3113 free_inode_rec(rec);
3119 if (!(repair && ret == 0))
3121 print_inode_error(root, rec);
3122 list_for_each_entry(backref, &rec->backrefs, list) {
3123 if (!backref->found_dir_item)
3124 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125 if (!backref->found_dir_index)
3126 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127 if (!backref->found_inode_ref)
3128 backref->errors |= REF_ERR_NO_INODE_REF;
3129 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130 " namelen %u name %s filetype %d errors %x",
3131 (unsigned long long)backref->dir,
3132 (unsigned long long)backref->index,
3133 backref->namelen, backref->name,
3134 backref->filetype, backref->errors);
3135 print_ref_error(backref->errors);
3137 free_inode_rec(rec);
3139 return (error > 0) ? -1 : 0;
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3145 struct cache_extent *cache;
3146 struct root_record *rec = NULL;
3149 cache = lookup_cache_extent(root_cache, objectid, 1);
3151 rec = container_of(cache, struct root_record, cache);
3153 rec = calloc(1, sizeof(*rec));
3155 return ERR_PTR(-ENOMEM);
3156 rec->objectid = objectid;
3157 INIT_LIST_HEAD(&rec->backrefs);
3158 rec->cache.start = objectid;
3159 rec->cache.size = 1;
3161 ret = insert_cache_extent(root_cache, &rec->cache);
3163 return ERR_PTR(-EEXIST);
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169 u64 ref_root, u64 dir, u64 index,
3170 const char *name, int namelen)
3172 struct root_backref *backref;
3174 list_for_each_entry(backref, &rec->backrefs, list) {
3175 if (backref->ref_root != ref_root || backref->dir != dir ||
3176 backref->namelen != namelen)
3178 if (memcmp(name, backref->name, namelen))
3183 backref = calloc(1, sizeof(*backref) + namelen + 1);
3186 backref->ref_root = ref_root;
3188 backref->index = index;
3189 backref->namelen = namelen;
3190 memcpy(backref->name, name, namelen);
3191 backref->name[namelen] = '\0';
3192 list_add_tail(&backref->list, &rec->backrefs);
3196 static void free_root_record(struct cache_extent *cache)
3198 struct root_record *rec;
3199 struct root_backref *backref;
3201 rec = container_of(cache, struct root_record, cache);
3202 while (!list_empty(&rec->backrefs)) {
3203 backref = to_root_backref(rec->backrefs.next);
3204 list_del(&backref->list);
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3213 static int add_root_backref(struct cache_tree *root_cache,
3214 u64 root_id, u64 ref_root, u64 dir, u64 index,
3215 const char *name, int namelen,
3216 int item_type, int errors)
3218 struct root_record *rec;
3219 struct root_backref *backref;
3221 rec = get_root_rec(root_cache, root_id);
3222 BUG_ON(IS_ERR(rec));
3223 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3226 backref->errors |= errors;
3228 if (item_type != BTRFS_DIR_ITEM_KEY) {
3229 if (backref->found_dir_index || backref->found_back_ref ||
3230 backref->found_forward_ref) {
3231 if (backref->index != index)
3232 backref->errors |= REF_ERR_INDEX_UNMATCH;
3234 backref->index = index;
3238 if (item_type == BTRFS_DIR_ITEM_KEY) {
3239 if (backref->found_forward_ref)
3241 backref->found_dir_item = 1;
3242 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243 backref->found_dir_index = 1;
3244 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245 if (backref->found_forward_ref)
3246 backref->errors |= REF_ERR_DUP_ROOT_REF;
3247 else if (backref->found_dir_item)
3249 backref->found_forward_ref = 1;
3250 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251 if (backref->found_back_ref)
3252 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253 backref->found_back_ref = 1;
3258 if (backref->found_forward_ref && backref->found_dir_item)
3259 backref->reachable = 1;
3263 static int merge_root_recs(struct btrfs_root *root,
3264 struct cache_tree *src_cache,
3265 struct cache_tree *dst_cache)
3267 struct cache_extent *cache;
3268 struct ptr_node *node;
3269 struct inode_record *rec;
3270 struct inode_backref *backref;
3273 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274 free_inode_recs_tree(src_cache);
3279 cache = search_cache_extent(src_cache, 0);
3282 node = container_of(cache, struct ptr_node, cache);
3284 remove_cache_extent(src_cache, &node->cache);
3287 ret = is_child_root(root, root->objectid, rec->ino);
3293 list_for_each_entry(backref, &rec->backrefs, list) {
3294 BUG_ON(backref->found_inode_ref);
3295 if (backref->found_dir_item)
3296 add_root_backref(dst_cache, rec->ino,
3297 root->root_key.objectid, backref->dir,
3298 backref->index, backref->name,
3299 backref->namelen, BTRFS_DIR_ITEM_KEY,
3301 if (backref->found_dir_index)
3302 add_root_backref(dst_cache, rec->ino,
3303 root->root_key.objectid, backref->dir,
3304 backref->index, backref->name,
3305 backref->namelen, BTRFS_DIR_INDEX_KEY,
3309 free_inode_rec(rec);
3316 static int check_root_refs(struct btrfs_root *root,
3317 struct cache_tree *root_cache)
3319 struct root_record *rec;
3320 struct root_record *ref_root;
3321 struct root_backref *backref;
3322 struct cache_extent *cache;
3328 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329 BUG_ON(IS_ERR(rec));
3332 /* fixme: this can not detect circular references */
3335 cache = search_cache_extent(root_cache, 0);
3339 rec = container_of(cache, struct root_record, cache);
3340 cache = next_cache_extent(cache);
3342 if (rec->found_ref == 0)
3345 list_for_each_entry(backref, &rec->backrefs, list) {
3346 if (!backref->reachable)
3349 ref_root = get_root_rec(root_cache,
3351 BUG_ON(IS_ERR(ref_root));
3352 if (ref_root->found_ref > 0)
3355 backref->reachable = 0;
3357 if (rec->found_ref == 0)
3363 cache = search_cache_extent(root_cache, 0);
3367 rec = container_of(cache, struct root_record, cache);
3368 cache = next_cache_extent(cache);
3370 if (rec->found_ref == 0 &&
3371 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373 ret = check_orphan_item(root->fs_info->tree_root,
3379 * If we don't have a root item then we likely just have
3380 * a dir item in a snapshot for this root but no actual
3381 * ref key or anything so it's meaningless.
3383 if (!rec->found_root_item)
3386 fprintf(stderr, "fs tree %llu not referenced\n",
3387 (unsigned long long)rec->objectid);
3391 if (rec->found_ref > 0 && !rec->found_root_item)
3393 list_for_each_entry(backref, &rec->backrefs, list) {
3394 if (!backref->found_dir_item)
3395 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396 if (!backref->found_dir_index)
3397 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398 if (!backref->found_back_ref)
3399 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400 if (!backref->found_forward_ref)
3401 backref->errors |= REF_ERR_NO_ROOT_REF;
3402 if (backref->reachable && backref->errors)
3409 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410 (unsigned long long)rec->objectid, rec->found_ref,
3411 rec->found_root_item ? "" : "not found");
3413 list_for_each_entry(backref, &rec->backrefs, list) {
3414 if (!backref->reachable)
3416 if (!backref->errors && rec->found_root_item)
3418 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419 " index %llu namelen %u name %s errors %x\n",
3420 (unsigned long long)backref->ref_root,
3421 (unsigned long long)backref->dir,
3422 (unsigned long long)backref->index,
3423 backref->namelen, backref->name,
3425 print_ref_error(backref->errors);
3428 return errors > 0 ? 1 : 0;
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432 struct btrfs_key *key,
3433 struct cache_tree *root_cache)
3439 struct btrfs_root_ref *ref;
3440 char namebuf[BTRFS_NAME_LEN];
3443 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3445 dirid = btrfs_root_ref_dirid(eb, ref);
3446 index = btrfs_root_ref_sequence(eb, ref);
3447 name_len = btrfs_root_ref_name_len(eb, ref);
3449 if (name_len <= BTRFS_NAME_LEN) {
3453 len = BTRFS_NAME_LEN;
3454 error = REF_ERR_NAME_TOO_LONG;
3456 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3458 if (key->type == BTRFS_ROOT_REF_KEY) {
3459 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460 index, namebuf, len, key->type, error);
3462 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463 index, namebuf, len, key->type, error);
3468 static void free_corrupt_block(struct cache_extent *cache)
3470 struct btrfs_corrupt_block *corrupt;
3472 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3479 * Repair the btree of the given root.
3481 * The fix is to remove the node key in corrupt_blocks cache_tree.
3482 * and rebalance the tree.
3483 * After the fix, the btree should be writeable.
3485 static int repair_btree(struct btrfs_root *root,
3486 struct cache_tree *corrupt_blocks)
3488 struct btrfs_trans_handle *trans;
3489 struct btrfs_path path;
3490 struct btrfs_corrupt_block *corrupt;
3491 struct cache_extent *cache;
3492 struct btrfs_key key;
3497 if (cache_tree_empty(corrupt_blocks))
3500 trans = btrfs_start_transaction(root, 1);
3501 if (IS_ERR(trans)) {
3502 ret = PTR_ERR(trans);
3503 fprintf(stderr, "Error starting transaction: %s\n",
3507 btrfs_init_path(&path);
3508 cache = first_cache_extent(corrupt_blocks);
3510 corrupt = container_of(cache, struct btrfs_corrupt_block,
3512 level = corrupt->level;
3513 path.lowest_level = level;
3514 key.objectid = corrupt->key.objectid;
3515 key.type = corrupt->key.type;
3516 key.offset = corrupt->key.offset;
3519 * Here we don't want to do any tree balance, since it may
3520 * cause a balance with corrupted brother leaf/node,
3521 * so ins_len set to 0 here.
3522 * Balance will be done after all corrupt node/leaf is deleted.
3524 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3527 offset = btrfs_node_blockptr(path.nodes[level],
3530 /* Remove the ptr */
3531 ret = btrfs_del_ptr(trans, root, &path, level,
3536 * Remove the corresponding extent
3537 * return value is not concerned.
3539 btrfs_release_path(&path);
3540 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541 0, root->root_key.objectid,
3543 cache = next_cache_extent(cache);
3546 /* Balance the btree using btrfs_search_slot() */
3547 cache = first_cache_extent(corrupt_blocks);
3549 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551 memcpy(&key, &corrupt->key, sizeof(key));
3552 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3555 /* return will always >0 since it won't find the item */
3557 btrfs_release_path(&path);
3558 cache = next_cache_extent(cache);
3561 btrfs_commit_transaction(trans, root);
3562 btrfs_release_path(&path);
3566 static int check_fs_root(struct btrfs_root *root,
3567 struct cache_tree *root_cache,
3568 struct walk_control *wc)
3574 struct btrfs_path path;
3575 struct shared_node root_node;
3576 struct root_record *rec;
3577 struct btrfs_root_item *root_item = &root->root_item;
3578 struct cache_tree corrupt_blocks;
3579 struct orphan_data_extent *orphan;
3580 struct orphan_data_extent *tmp;
3581 enum btrfs_tree_block_status status;
3582 struct node_refs nrefs;
3585 * Reuse the corrupt_block cache tree to record corrupted tree block
3587 * Unlike the usage in extent tree check, here we do it in a per
3588 * fs/subvol tree base.
3590 cache_tree_init(&corrupt_blocks);
3591 root->fs_info->corrupt_blocks = &corrupt_blocks;
3593 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594 rec = get_root_rec(root_cache, root->root_key.objectid);
3595 BUG_ON(IS_ERR(rec));
3596 if (btrfs_root_refs(root_item) > 0)
3597 rec->found_root_item = 1;
3600 btrfs_init_path(&path);
3601 memset(&root_node, 0, sizeof(root_node));
3602 cache_tree_init(&root_node.root_cache);
3603 cache_tree_init(&root_node.inode_cache);
3604 memset(&nrefs, 0, sizeof(nrefs));
3606 /* Move the orphan extent record to corresponding inode_record */
3607 list_for_each_entry_safe(orphan, tmp,
3608 &root->orphan_data_extents, list) {
3609 struct inode_record *inode;
3611 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3613 BUG_ON(IS_ERR(inode));
3614 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615 list_move(&orphan->list, &inode->orphan_extents);
3618 level = btrfs_header_level(root->node);
3619 memset(wc->nodes, 0, sizeof(wc->nodes));
3620 wc->nodes[level] = &root_node;
3621 wc->active_node = level;
3622 wc->root_level = level;
3624 /* We may not have checked the root block, lets do that now */
3625 if (btrfs_is_leaf(root->node))
3626 status = btrfs_check_leaf(root, NULL, root->node);
3628 status = btrfs_check_node(root, NULL, root->node);
3629 if (status != BTRFS_TREE_BLOCK_CLEAN)
3632 if (btrfs_root_refs(root_item) > 0 ||
3633 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634 path.nodes[level] = root->node;
3635 extent_buffer_get(root->node);
3636 path.slots[level] = 0;
3638 struct btrfs_key key;
3639 struct btrfs_disk_key found_key;
3641 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642 level = root_item->drop_level;
3643 path.lowest_level = level;
3644 if (level > btrfs_header_level(root->node) ||
3645 level >= BTRFS_MAX_LEVEL) {
3646 error("ignoring invalid drop level: %u", level);
3649 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3652 btrfs_node_key(path.nodes[level], &found_key,
3654 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655 sizeof(found_key)));
3659 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3665 wret = walk_up_tree(root, &path, wc, &level);
3672 btrfs_release_path(&path);
3674 if (!cache_tree_empty(&corrupt_blocks)) {
3675 struct cache_extent *cache;
3676 struct btrfs_corrupt_block *corrupt;
3678 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679 root->root_key.objectid);
3680 cache = first_cache_extent(&corrupt_blocks);
3682 corrupt = container_of(cache,
3683 struct btrfs_corrupt_block,
3685 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686 cache->start, corrupt->level,
3687 corrupt->key.objectid, corrupt->key.type,
3688 corrupt->key.offset);
3689 cache = next_cache_extent(cache);
3692 printf("Try to repair the btree for root %llu\n",
3693 root->root_key.objectid);
3694 ret = repair_btree(root, &corrupt_blocks);
3696 fprintf(stderr, "Failed to repair btree: %s\n",
3699 printf("Btree for root %llu is fixed\n",
3700 root->root_key.objectid);
3704 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3708 if (root_node.current) {
3709 root_node.current->checked = 1;
3710 maybe_free_inode_rec(&root_node.inode_cache,
3714 err = check_inode_recs(root, &root_node.inode_cache);
3718 free_corrupt_blocks_tree(&corrupt_blocks);
3719 root->fs_info->corrupt_blocks = NULL;
3720 free_orphan_data_extents(&root->orphan_data_extents);
3724 static int fs_root_objectid(u64 objectid)
3726 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3729 return is_fstree(objectid);
3732 static int check_fs_roots(struct btrfs_root *root,
3733 struct cache_tree *root_cache)
3735 struct btrfs_path path;
3736 struct btrfs_key key;
3737 struct walk_control wc;
3738 struct extent_buffer *leaf, *tree_node;
3739 struct btrfs_root *tmp_root;
3740 struct btrfs_root *tree_root = root->fs_info->tree_root;
3744 if (ctx.progress_enabled) {
3745 ctx.tp = TASK_FS_ROOTS;
3746 task_start(ctx.info);
3750 * Just in case we made any changes to the extent tree that weren't
3751 * reflected into the free space cache yet.
3754 reset_cached_block_groups(root->fs_info);
3755 memset(&wc, 0, sizeof(wc));
3756 cache_tree_init(&wc.shared);
3757 btrfs_init_path(&path);
3762 key.type = BTRFS_ROOT_ITEM_KEY;
3763 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3768 tree_node = tree_root->node;
3770 if (tree_node != tree_root->node) {
3771 free_root_recs_tree(root_cache);
3772 btrfs_release_path(&path);
3775 leaf = path.nodes[0];
3776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777 ret = btrfs_next_leaf(tree_root, &path);
3783 leaf = path.nodes[0];
3785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787 fs_root_objectid(key.objectid)) {
3788 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789 tmp_root = btrfs_read_fs_root_no_cache(
3790 root->fs_info, &key);
3792 key.offset = (u64)-1;
3793 tmp_root = btrfs_read_fs_root(
3794 root->fs_info, &key);
3796 if (IS_ERR(tmp_root)) {
3800 ret = check_fs_root(tmp_root, root_cache, &wc);
3801 if (ret == -EAGAIN) {
3802 free_root_recs_tree(root_cache);
3803 btrfs_release_path(&path);
3808 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809 btrfs_free_fs_root(tmp_root);
3810 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811 key.type == BTRFS_ROOT_BACKREF_KEY) {
3812 process_root_ref(leaf, path.slots[0], &key,
3819 btrfs_release_path(&path);
3821 free_extent_cache_tree(&wc.shared);
3822 if (!cache_tree_empty(&wc.shared))
3823 fprintf(stderr, "warning line %d\n", __LINE__);
3825 task_stop(ctx.info);
3830 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
3839 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
3840 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
3841 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
3842 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
3843 #define NO_INODE_ITEM (1<<14) /* no inode_item */
3844 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
3845 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
3846 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
3849 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3850 * INODE_REF/INODE_EXTREF match.
3852 * @root: the root of the fs/file tree
3853 * @ref_key: the key of the INODE_REF/INODE_EXTREF
3854 * @key: the key of the DIR_ITEM/DIR_INDEX
3855 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
3856 * distinguish root_dir between normal dir/file
3857 * @name: the name in the INODE_REF/INODE_EXTREF
3858 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
3859 * @mode: the st_mode of INODE_ITEM
3861 * Return 0 if no error occurred.
3862 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3863 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3865 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3866 * not match for normal dir/file.
3868 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3869 struct btrfs_key *key, u64 index, char *name,
3870 u32 namelen, u32 mode)
3872 struct btrfs_path path;
3873 struct extent_buffer *node;
3874 struct btrfs_dir_item *di;
3875 struct btrfs_key location;
3876 char namebuf[BTRFS_NAME_LEN] = {0};
3886 btrfs_init_path(&path);
3887 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3889 ret = DIR_ITEM_MISSING;
3893 /* Process root dir and goto out*/
3896 ret = ROOT_DIR_ERROR;
3898 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3900 ref_key->type == BTRFS_INODE_REF_KEY ?
3902 ref_key->objectid, ref_key->offset,
3903 key->type == BTRFS_DIR_ITEM_KEY ?
3904 "DIR_ITEM" : "DIR_INDEX");
3912 /* Process normal file/dir */
3914 ret = DIR_ITEM_MISSING;
3916 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3918 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3919 ref_key->objectid, ref_key->offset,
3920 key->type == BTRFS_DIR_ITEM_KEY ?
3921 "DIR_ITEM" : "DIR_INDEX",
3922 key->objectid, key->offset, namelen, name,
3923 imode_to_type(mode));
3927 /* Check whether inode_id/filetype/name match */
3928 node = path.nodes[0];
3929 slot = path.slots[0];
3930 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3931 total = btrfs_item_size_nr(node, slot);
3932 while (cur < total) {
3933 ret = DIR_ITEM_MISMATCH;
3934 name_len = btrfs_dir_name_len(node, di);
3935 data_len = btrfs_dir_data_len(node, di);
3937 btrfs_dir_item_key_to_cpu(node, di, &location);
3938 if (location.objectid != ref_key->objectid ||
3939 location.type != BTRFS_INODE_ITEM_KEY ||
3940 location.offset != 0)
3943 filetype = btrfs_dir_type(node, di);
3944 if (imode_to_type(mode) != filetype)
3947 if (name_len <= BTRFS_NAME_LEN) {
3950 len = BTRFS_NAME_LEN;
3951 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3953 key->type == BTRFS_DIR_ITEM_KEY ?
3954 "DIR_ITEM" : "DIR_INDEX",
3955 key->objectid, key->offset, name_len);
3957 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3958 if (len != namelen || strncmp(namebuf, name, len))
3964 len = sizeof(*di) + name_len + data_len;
3965 di = (struct btrfs_dir_item *)((char *)di + len);
3968 if (ret == DIR_ITEM_MISMATCH)
3970 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3972 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3973 ref_key->objectid, ref_key->offset,
3974 key->type == BTRFS_DIR_ITEM_KEY ?
3975 "DIR_ITEM" : "DIR_INDEX",
3976 key->objectid, key->offset, namelen, name,
3977 imode_to_type(mode));
3979 btrfs_release_path(&path);
3984 * Traverse the given INODE_REF and call find_dir_item() to find related
3985 * DIR_ITEM/DIR_INDEX.
3987 * @root: the root of the fs/file tree
3988 * @ref_key: the key of the INODE_REF
3989 * @refs: the count of INODE_REF
3990 * @mode: the st_mode of INODE_ITEM
3992 * Return 0 if no error occurred.
3994 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3995 struct extent_buffer *node, int slot, u64 *refs,
3998 struct btrfs_key key;
3999 struct btrfs_inode_ref *ref;
4000 char namebuf[BTRFS_NAME_LEN] = {0};
4008 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4009 total = btrfs_item_size_nr(node, slot);
4012 /* Update inode ref count */
4015 index = btrfs_inode_ref_index(node, ref);
4016 name_len = btrfs_inode_ref_name_len(node, ref);
4017 if (name_len <= BTRFS_NAME_LEN) {
4020 len = BTRFS_NAME_LEN;
4021 warning("root %llu INODE_REF[%llu %llu] name too long",
4022 root->objectid, ref_key->objectid, ref_key->offset);
4025 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4027 /* Check root dir ref name */
4028 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4029 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4030 root->objectid, ref_key->objectid, ref_key->offset,
4032 err |= ROOT_DIR_ERROR;
4035 /* Find related DIR_INDEX */
4036 key.objectid = ref_key->offset;
4037 key.type = BTRFS_DIR_INDEX_KEY;
4039 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4042 /* Find related dir_item */
4043 key.objectid = ref_key->offset;
4044 key.type = BTRFS_DIR_ITEM_KEY;
4045 key.offset = btrfs_name_hash(namebuf, len);
4046 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4049 len = sizeof(*ref) + name_len;
4050 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4059 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4060 * DIR_ITEM/DIR_INDEX.
4062 * @root: the root of the fs/file tree
4063 * @ref_key: the key of the INODE_EXTREF
4064 * @refs: the count of INODE_EXTREF
4065 * @mode: the st_mode of INODE_ITEM
4067 * Return 0 if no error occurred.
4069 static int check_inode_extref(struct btrfs_root *root,
4070 struct btrfs_key *ref_key,
4071 struct extent_buffer *node, int slot, u64 *refs,
4074 struct btrfs_key key;
4075 struct btrfs_inode_extref *extref;
4076 char namebuf[BTRFS_NAME_LEN] = {0};
4086 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4087 total = btrfs_item_size_nr(node, slot);
4090 /* update inode ref count */
4092 name_len = btrfs_inode_extref_name_len(node, extref);
4093 index = btrfs_inode_extref_index(node, extref);
4094 parent = btrfs_inode_extref_parent(node, extref);
4095 if (name_len <= BTRFS_NAME_LEN) {
4098 len = BTRFS_NAME_LEN;
4099 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4100 root->objectid, ref_key->objectid, ref_key->offset);
4102 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4104 /* Check root dir ref name */
4105 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4106 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4107 root->objectid, ref_key->objectid, ref_key->offset,
4109 err |= ROOT_DIR_ERROR;
4112 /* find related dir_index */
4113 key.objectid = parent;
4114 key.type = BTRFS_DIR_INDEX_KEY;
4116 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4119 /* find related dir_item */
4120 key.objectid = parent;
4121 key.type = BTRFS_DIR_ITEM_KEY;
4122 key.offset = btrfs_name_hash(namebuf, len);
4123 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4126 len = sizeof(*extref) + name_len;
4127 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4137 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4138 * DIR_ITEM/DIR_INDEX match.
4140 * @root: the root of the fs/file tree
4141 * @key: the key of the INODE_REF/INODE_EXTREF
4142 * @name: the name in the INODE_REF/INODE_EXTREF
4143 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4144 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4146 * @ext_ref: the EXTENDED_IREF feature
4148 * Return 0 if no error occurred.
4149 * Return >0 for error bitmap
4151 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4152 char *name, int namelen, u64 index,
4153 unsigned int ext_ref)
4155 struct btrfs_path path;
4156 struct btrfs_inode_ref *ref;
4157 struct btrfs_inode_extref *extref;
4158 struct extent_buffer *node;
4159 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4170 btrfs_init_path(&path);
4171 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4173 ret = INODE_REF_MISSING;
4177 node = path.nodes[0];
4178 slot = path.slots[0];
4180 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4181 total = btrfs_item_size_nr(node, slot);
4183 /* Iterate all entry of INODE_REF */
4184 while (cur < total) {
4185 ret = INODE_REF_MISSING;
4187 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4188 ref_index = btrfs_inode_ref_index(node, ref);
4189 if (index != (u64)-1 && index != ref_index)
4192 if (ref_namelen <= BTRFS_NAME_LEN) {
4195 len = BTRFS_NAME_LEN;
4196 warning("root %llu INODE %s[%llu %llu] name too long",
4198 key->type == BTRFS_INODE_REF_KEY ?
4200 key->objectid, key->offset);
4202 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4205 if (len != namelen || strncmp(ref_namebuf, name, len))
4211 len = sizeof(*ref) + ref_namelen;
4212 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4217 /* Skip if not support EXTENDED_IREF feature */
4221 btrfs_release_path(&path);
4222 btrfs_init_path(&path);
4224 dir_id = key->offset;
4225 key->type = BTRFS_INODE_EXTREF_KEY;
4226 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4228 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4230 ret = INODE_REF_MISSING;
4234 node = path.nodes[0];
4235 slot = path.slots[0];
4237 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4239 total = btrfs_item_size_nr(node, slot);
4241 /* Iterate all entry of INODE_EXTREF */
4242 while (cur < total) {
4243 ret = INODE_REF_MISSING;
4245 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4246 ref_index = btrfs_inode_extref_index(node, extref);
4247 parent = btrfs_inode_extref_parent(node, extref);
4248 if (index != (u64)-1 && index != ref_index)
4251 if (parent != dir_id)
4254 if (ref_namelen <= BTRFS_NAME_LEN) {
4257 len = BTRFS_NAME_LEN;
4258 warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4260 key->type == BTRFS_INODE_REF_KEY ?
4262 key->objectid, key->offset);
4264 read_extent_buffer(node, ref_namebuf,
4265 (unsigned long)(extref + 1), len);
4267 if (len != namelen || strncmp(ref_namebuf, name, len))
4274 len = sizeof(*extref) + ref_namelen;
4275 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4280 btrfs_release_path(&path);
4285 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4286 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4288 * @root: the root of the fs/file tree
4289 * @key: the key of the INODE_REF/INODE_EXTREF
4290 * @size: the st_size of the INODE_ITEM
4291 * @ext_ref: the EXTENDED_IREF feature
4293 * Return 0 if no error occurred.
4295 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4296 struct extent_buffer *node, int slot, u64 *size,
4297 unsigned int ext_ref)
4299 struct btrfs_dir_item *di;
4300 struct btrfs_inode_item *ii;
4301 struct btrfs_path path;
4302 struct btrfs_key location;
4303 char namebuf[BTRFS_NAME_LEN] = {0};
4316 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4317 * ignore index check.
4319 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4321 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4322 total = btrfs_item_size_nr(node, slot);
4324 while (cur < total) {
4325 data_len = btrfs_dir_data_len(node, di);
4327 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4328 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4329 "DIR_ITEM" : "DIR_INDEX",
4330 key->objectid, key->offset, data_len);
4332 name_len = btrfs_dir_name_len(node, di);
4333 if (name_len <= BTRFS_NAME_LEN) {
4336 len = BTRFS_NAME_LEN;
4337 warning("root %llu %s[%llu %llu] name too long",
4339 key->type == BTRFS_DIR_ITEM_KEY ?
4340 "DIR_ITEM" : "DIR_INDEX",
4341 key->objectid, key->offset);
4343 (*size) += name_len;
4345 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4346 filetype = btrfs_dir_type(node, di);
4348 btrfs_init_path(&path);
4349 btrfs_dir_item_key_to_cpu(node, di, &location);
4351 /* Ignore related ROOT_ITEM check */
4352 if (location.type == BTRFS_ROOT_ITEM_KEY)
4355 /* Check relative INODE_ITEM(existence/filetype) */
4356 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4358 err |= INODE_ITEM_MISSING;
4359 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4360 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4361 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4362 key->offset, location.objectid, name_len,
4367 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4368 struct btrfs_inode_item);
4369 mode = btrfs_inode_mode(path.nodes[0], ii);
4371 if (imode_to_type(mode) != filetype) {
4372 err |= INODE_ITEM_MISMATCH;
4373 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4374 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4375 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4376 key->offset, name_len, namebuf, filetype);
4379 /* Check relative INODE_REF/INODE_EXTREF */
4380 location.type = BTRFS_INODE_REF_KEY;
4381 location.offset = key->objectid;
4382 ret = find_inode_ref(root, &location, namebuf, len,
4385 if (ret & INODE_REF_MISSING)
4386 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4387 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4388 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4389 key->offset, name_len, namebuf, filetype);
4392 btrfs_release_path(&path);
4393 len = sizeof(*di) + name_len + data_len;
4394 di = (struct btrfs_dir_item *)((char *)di + len);
4397 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4398 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4399 root->objectid, key->objectid, key->offset);
4408 * Check file extent datasum/hole, update the size of the file extents,
4409 * check and update the last offset of the file extent.
4411 * @root: the root of fs/file tree.
4412 * @fkey: the key of the file extent.
4413 * @nodatasum: INODE_NODATASUM feature.
4414 * @size: the sum of all EXTENT_DATA items size for this inode.
4415 * @end: the offset of the last extent.
4417 * Return 0 if no error occurred.
4419 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4420 struct extent_buffer *node, int slot,
4421 unsigned int nodatasum, u64 *size, u64 *end)
4423 struct btrfs_file_extent_item *fi;
4426 u64 extent_num_bytes;
4428 unsigned int extent_type;
4429 unsigned int is_hole;
4433 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4435 extent_type = btrfs_file_extent_type(node, fi);
4436 /* Skip if file extent is inline */
4437 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4438 struct btrfs_item *e = btrfs_item_nr(slot);
4439 u32 item_inline_len;
4441 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4442 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4443 if (extent_num_bytes == 0 ||
4444 extent_num_bytes != item_inline_len)
4445 err |= FILE_EXTENT_ERROR;
4446 *size += extent_num_bytes;
4450 /* Check extent type */
4451 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4452 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4453 err |= FILE_EXTENT_ERROR;
4454 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4455 root->objectid, fkey->objectid, fkey->offset);
4459 /* Check REG_EXTENT/PREALLOC_EXTENT */
4460 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4461 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4462 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4463 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4465 /* Check EXTENT_DATA datasum */
4466 ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4467 if (found > 0 && nodatasum) {
4468 err |= ODD_CSUM_ITEM;
4469 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4470 root->objectid, fkey->objectid, fkey->offset);
4471 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4473 (ret < 0 || found == 0 || found < disk_num_bytes)) {
4474 err |= CSUM_ITEM_MISSING;
4475 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4476 root->objectid, fkey->objectid, fkey->offset);
4477 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4478 err |= ODD_CSUM_ITEM;
4479 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4480 root->objectid, fkey->objectid, fkey->offset);
4483 /* Check EXTENT_DATA hole */
4484 if (no_holes && is_hole) {
4485 err |= FILE_EXTENT_ERROR;
4486 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4487 root->objectid, fkey->objectid, fkey->offset);
4488 } else if (!no_holes && *end != fkey->offset) {
4489 err |= FILE_EXTENT_ERROR;
4490 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4491 root->objectid, fkey->objectid, fkey->offset);
4494 *end += extent_num_bytes;
4496 *size += extent_num_bytes;
4502 * Check INODE_ITEM and related ITEMs (the same inode number)
4503 * 1. check link count
4504 * 2. check inode ref/extref
4505 * 3. check dir item/index
4507 * @ext_ref: the EXTENDED_IREF feature
4509 * Return 0 if no error occurred.
4510 * Return >0 for error or hit the traversal is done(by error bitmap)
4512 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4513 unsigned int ext_ref)
4515 struct extent_buffer *node;
4516 struct btrfs_inode_item *ii;
4517 struct btrfs_key key;
4526 u64 extent_size = 0;
4528 unsigned int nodatasum;
4533 node = path->nodes[0];
4534 slot = path->slots[0];
4536 btrfs_item_key_to_cpu(node, &key, slot);
4537 inode_id = key.objectid;
4539 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4540 ret = btrfs_next_item(root, path);
4546 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4547 isize = btrfs_inode_size(node, ii);
4548 nbytes = btrfs_inode_nbytes(node, ii);
4549 mode = btrfs_inode_mode(node, ii);
4550 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4551 nlink = btrfs_inode_nlink(node, ii);
4552 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4555 ret = btrfs_next_item(root, path);
4557 /* out will fill 'err' rusing current statistics */
4559 } else if (ret > 0) {
4564 node = path->nodes[0];
4565 slot = path->slots[0];
4566 btrfs_item_key_to_cpu(node, &key, slot);
4567 if (key.objectid != inode_id)
4571 case BTRFS_INODE_REF_KEY:
4572 ret = check_inode_ref(root, &key, node, slot, &refs,
4576 case BTRFS_INODE_EXTREF_KEY:
4577 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4578 warning("root %llu EXTREF[%llu %llu] isn't supported",
4579 root->objectid, key.objectid,
4581 ret = check_inode_extref(root, &key, node, slot, &refs,
4585 case BTRFS_DIR_ITEM_KEY:
4586 case BTRFS_DIR_INDEX_KEY:
4588 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4589 root->objectid, inode_id,
4590 imode_to_type(mode), key.objectid,
4593 ret = check_dir_item(root, &key, node, slot, &size,
4597 case BTRFS_EXTENT_DATA_KEY:
4599 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4600 root->objectid, inode_id, key.objectid,
4603 ret = check_file_extent(root, &key, node, slot,
4604 nodatasum, &extent_size,
4608 case BTRFS_XATTR_ITEM_KEY:
4611 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4612 key.objectid, key.type, key.offset);
4617 /* verify INODE_ITEM nlink/isize/nbytes */
4620 err |= LINK_COUNT_ERROR;
4621 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4622 root->objectid, inode_id, nlink);
4626 * Just a warning, as dir inode nbytes is just an
4627 * instructive value.
4629 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4630 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4631 root->objectid, inode_id, root->nodesize);
4634 if (isize != size) {
4636 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4637 root->objectid, inode_id, isize, size);
4640 if (nlink != refs) {
4641 err |= LINK_COUNT_ERROR;
4642 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4643 root->objectid, inode_id, nlink, refs);
4644 } else if (!nlink) {
4648 if (!nbytes && !no_holes && extent_end < isize) {
4649 err |= NBYTES_ERROR;
4650 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4651 root->objectid, inode_id, isize);
4654 if (nbytes != extent_size) {
4655 err |= NBYTES_ERROR;
4656 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4657 root->objectid, inode_id, nbytes, extent_size);
4665 * Iterate all item on the tree and call check_inode_item() to check.
4667 * @root: the root of the tree to be checked.
4668 * @ext_ref: the EXTENDED_IREF feature
4670 * Return 0 if no error found.
4671 * Return <0 for error.
4672 * All internal error bitmap will be converted to -EIO, to avoid
4673 * mixing negative and postive return value.
4675 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4677 struct btrfs_path *path;
4678 struct btrfs_key key;
4682 path = btrfs_alloc_path();
4690 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4695 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4698 * All check must start with inode item, skip if not
4700 if (key.type == BTRFS_INODE_ITEM_KEY) {
4701 ret = check_inode_item(root, path, ext_ref);
4703 if (err & LAST_ITEM)
4707 error("root %llu ITEM[%llu %u %llu] isn't INODE_ITEM, skip to next inode",
4708 root->objectid, key.objectid, key.type,
4711 err |= NO_INODE_ITEM;
4712 inode_id = key.objectid;
4715 * skip to next inode
4716 * TODO: Maybe search_slot() will be faster?
4719 ret = btrfs_next_item(root, path);
4722 } else if (ret < 0) {
4726 btrfs_item_key_to_cpu(path->nodes[0], &key,
4728 } while (inode_id == key.objectid);
4735 btrfs_free_path(path);
4740 * Find the relative ref for root_ref and root_backref.
4742 * @root: the root of the root tree.
4743 * @ref_key: the key of the root ref.
4745 * Return 0 if no error occurred.
4747 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4748 struct extent_buffer *node, int slot)
4750 struct btrfs_path *path;
4751 struct btrfs_key key;
4752 struct btrfs_root_ref *ref;
4753 struct btrfs_root_ref *backref;
4754 char ref_name[BTRFS_NAME_LEN];
4755 char backref_name[BTRFS_NAME_LEN];
4761 u32 backref_namelen;
4766 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
4767 ref_dirid = btrfs_root_ref_dirid(node, ref);
4768 ref_seq = btrfs_root_ref_sequence(node, ref);
4769 ref_namelen = btrfs_root_ref_name_len(node, ref);
4771 if (ref_namelen <= BTRFS_NAME_LEN) {
4774 len = BTRFS_NAME_LEN;
4775 warning("%s[%llu %llu] ref_name too long",
4776 ref_key->type == BTRFS_ROOT_REF_KEY ?
4777 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
4780 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
4782 /* Find relative root_ref */
4783 key.objectid = ref_key->offset;
4784 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
4785 key.offset = ref_key->objectid;
4787 path = btrfs_alloc_path();
4788 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4790 err |= ROOT_REF_MISSING;
4791 error("%s[%llu %llu] couldn't find relative ref",
4792 ref_key->type == BTRFS_ROOT_REF_KEY ?
4793 "ROOT_REF" : "ROOT_BACKREF",
4794 ref_key->objectid, ref_key->offset);
4798 backref = btrfs_item_ptr(path->nodes[0], path->slots[0],
4799 struct btrfs_root_ref);
4800 backref_dirid = btrfs_root_ref_dirid(path->nodes[0], backref);
4801 backref_seq = btrfs_root_ref_sequence(path->nodes[0], backref);
4802 backref_namelen = btrfs_root_ref_name_len(path->nodes[0], backref);
4804 if (backref_namelen <= BTRFS_NAME_LEN) {
4805 len = backref_namelen;
4807 len = BTRFS_NAME_LEN;
4808 warning("%s[%llu %llu] ref_name too long",
4809 key.type == BTRFS_ROOT_REF_KEY ?
4810 "ROOT_REF" : "ROOT_BACKREF",
4811 key.objectid, key.offset);
4813 read_extent_buffer(path->nodes[0], backref_name,
4814 (unsigned long)(backref + 1), len);
4816 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
4817 ref_namelen != backref_namelen ||
4818 strncmp(ref_name, backref_name, len)) {
4819 err |= ROOT_REF_MISMATCH;
4820 error("%s[%llu %llu] mismatch relative ref",
4821 ref_key->type == BTRFS_ROOT_REF_KEY ?
4822 "ROOT_REF" : "ROOT_BACKREF",
4823 ref_key->objectid, ref_key->offset);
4826 btrfs_free_path(path);
4830 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4832 struct list_head *cur = rec->backrefs.next;
4833 struct extent_backref *back;
4834 struct tree_backref *tback;
4835 struct data_backref *dback;
4839 while(cur != &rec->backrefs) {
4840 back = to_extent_backref(cur);
4842 if (!back->found_extent_tree) {
4846 if (back->is_data) {
4847 dback = to_data_backref(back);
4848 fprintf(stderr, "Backref %llu %s %llu"
4849 " owner %llu offset %llu num_refs %lu"
4850 " not found in extent tree\n",
4851 (unsigned long long)rec->start,
4852 back->full_backref ?
4854 back->full_backref ?
4855 (unsigned long long)dback->parent:
4856 (unsigned long long)dback->root,
4857 (unsigned long long)dback->owner,
4858 (unsigned long long)dback->offset,
4859 (unsigned long)dback->num_refs);
4861 tback = to_tree_backref(back);
4862 fprintf(stderr, "Backref %llu parent %llu"
4863 " root %llu not found in extent tree\n",
4864 (unsigned long long)rec->start,
4865 (unsigned long long)tback->parent,
4866 (unsigned long long)tback->root);
4869 if (!back->is_data && !back->found_ref) {
4873 tback = to_tree_backref(back);
4874 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4875 (unsigned long long)rec->start,
4876 back->full_backref ? "parent" : "root",
4877 back->full_backref ?
4878 (unsigned long long)tback->parent :
4879 (unsigned long long)tback->root, back);
4881 if (back->is_data) {
4882 dback = to_data_backref(back);
4883 if (dback->found_ref != dback->num_refs) {
4887 fprintf(stderr, "Incorrect local backref count"
4888 " on %llu %s %llu owner %llu"
4889 " offset %llu found %u wanted %u back %p\n",
4890 (unsigned long long)rec->start,
4891 back->full_backref ?
4893 back->full_backref ?
4894 (unsigned long long)dback->parent:
4895 (unsigned long long)dback->root,
4896 (unsigned long long)dback->owner,
4897 (unsigned long long)dback->offset,
4898 dback->found_ref, dback->num_refs, back);
4900 if (dback->disk_bytenr != rec->start) {
4904 fprintf(stderr, "Backref disk bytenr does not"
4905 " match extent record, bytenr=%llu, "
4906 "ref bytenr=%llu\n",
4907 (unsigned long long)rec->start,
4908 (unsigned long long)dback->disk_bytenr);
4911 if (dback->bytes != rec->nr) {
4915 fprintf(stderr, "Backref bytes do not match "
4916 "extent backref, bytenr=%llu, ref "
4917 "bytes=%llu, backref bytes=%llu\n",
4918 (unsigned long long)rec->start,
4919 (unsigned long long)rec->nr,
4920 (unsigned long long)dback->bytes);
4923 if (!back->is_data) {
4926 dback = to_data_backref(back);
4927 found += dback->found_ref;
4930 if (found != rec->refs) {
4934 fprintf(stderr, "Incorrect global backref count "
4935 "on %llu found %llu wanted %llu\n",
4936 (unsigned long long)rec->start,
4937 (unsigned long long)found,
4938 (unsigned long long)rec->refs);
4944 static int free_all_extent_backrefs(struct extent_record *rec)
4946 struct extent_backref *back;
4947 struct list_head *cur;
4948 while (!list_empty(&rec->backrefs)) {
4949 cur = rec->backrefs.next;
4950 back = to_extent_backref(cur);
4957 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4958 struct cache_tree *extent_cache)
4960 struct cache_extent *cache;
4961 struct extent_record *rec;
4964 cache = first_cache_extent(extent_cache);
4967 rec = container_of(cache, struct extent_record, cache);
4968 remove_cache_extent(extent_cache, cache);
4969 free_all_extent_backrefs(rec);
4974 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4975 struct extent_record *rec)
4977 if (rec->content_checked && rec->owner_ref_checked &&
4978 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4979 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4980 !rec->bad_full_backref && !rec->crossing_stripes &&
4981 !rec->wrong_chunk_type) {
4982 remove_cache_extent(extent_cache, &rec->cache);
4983 free_all_extent_backrefs(rec);
4984 list_del_init(&rec->list);
4990 static int check_owner_ref(struct btrfs_root *root,
4991 struct extent_record *rec,
4992 struct extent_buffer *buf)
4994 struct extent_backref *node;
4995 struct tree_backref *back;
4996 struct btrfs_root *ref_root;
4997 struct btrfs_key key;
4998 struct btrfs_path path;
4999 struct extent_buffer *parent;
5004 list_for_each_entry(node, &rec->backrefs, list) {
5007 if (!node->found_ref)
5009 if (node->full_backref)
5011 back = to_tree_backref(node);
5012 if (btrfs_header_owner(buf) == back->root)
5015 BUG_ON(rec->is_root);
5017 /* try to find the block by search corresponding fs tree */
5018 key.objectid = btrfs_header_owner(buf);
5019 key.type = BTRFS_ROOT_ITEM_KEY;
5020 key.offset = (u64)-1;
5022 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5023 if (IS_ERR(ref_root))
5026 level = btrfs_header_level(buf);
5028 btrfs_item_key_to_cpu(buf, &key, 0);
5030 btrfs_node_key_to_cpu(buf, &key, 0);
5032 btrfs_init_path(&path);
5033 path.lowest_level = level + 1;
5034 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5038 parent = path.nodes[level + 1];
5039 if (parent && buf->start == btrfs_node_blockptr(parent,
5040 path.slots[level + 1]))
5043 btrfs_release_path(&path);
5044 return found ? 0 : 1;
5047 static int is_extent_tree_record(struct extent_record *rec)
5049 struct list_head *cur = rec->backrefs.next;
5050 struct extent_backref *node;
5051 struct tree_backref *back;
5054 while(cur != &rec->backrefs) {
5055 node = to_extent_backref(cur);
5059 back = to_tree_backref(node);
5060 if (node->full_backref)
5062 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5069 static int record_bad_block_io(struct btrfs_fs_info *info,
5070 struct cache_tree *extent_cache,
5073 struct extent_record *rec;
5074 struct cache_extent *cache;
5075 struct btrfs_key key;
5077 cache = lookup_cache_extent(extent_cache, start, len);
5081 rec = container_of(cache, struct extent_record, cache);
5082 if (!is_extent_tree_record(rec))
5085 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5086 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5089 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5090 struct extent_buffer *buf, int slot)
5092 if (btrfs_header_level(buf)) {
5093 struct btrfs_key_ptr ptr1, ptr2;
5095 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5096 sizeof(struct btrfs_key_ptr));
5097 read_extent_buffer(buf, &ptr2,
5098 btrfs_node_key_ptr_offset(slot + 1),
5099 sizeof(struct btrfs_key_ptr));
5100 write_extent_buffer(buf, &ptr1,
5101 btrfs_node_key_ptr_offset(slot + 1),
5102 sizeof(struct btrfs_key_ptr));
5103 write_extent_buffer(buf, &ptr2,
5104 btrfs_node_key_ptr_offset(slot),
5105 sizeof(struct btrfs_key_ptr));
5107 struct btrfs_disk_key key;
5108 btrfs_node_key(buf, &key, 0);
5109 btrfs_fixup_low_keys(root, path, &key,
5110 btrfs_header_level(buf) + 1);
5113 struct btrfs_item *item1, *item2;
5114 struct btrfs_key k1, k2;
5115 char *item1_data, *item2_data;
5116 u32 item1_offset, item2_offset, item1_size, item2_size;
5118 item1 = btrfs_item_nr(slot);
5119 item2 = btrfs_item_nr(slot + 1);
5120 btrfs_item_key_to_cpu(buf, &k1, slot);
5121 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5122 item1_offset = btrfs_item_offset(buf, item1);
5123 item2_offset = btrfs_item_offset(buf, item2);
5124 item1_size = btrfs_item_size(buf, item1);
5125 item2_size = btrfs_item_size(buf, item2);
5127 item1_data = malloc(item1_size);
5130 item2_data = malloc(item2_size);
5136 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5137 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5139 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5140 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5144 btrfs_set_item_offset(buf, item1, item2_offset);
5145 btrfs_set_item_offset(buf, item2, item1_offset);
5146 btrfs_set_item_size(buf, item1, item2_size);
5147 btrfs_set_item_size(buf, item2, item1_size);
5149 path->slots[0] = slot;
5150 btrfs_set_item_key_unsafe(root, path, &k2);
5151 path->slots[0] = slot + 1;
5152 btrfs_set_item_key_unsafe(root, path, &k1);
5157 static int fix_key_order(struct btrfs_trans_handle *trans,
5158 struct btrfs_root *root,
5159 struct btrfs_path *path)
5161 struct extent_buffer *buf;
5162 struct btrfs_key k1, k2;
5164 int level = path->lowest_level;
5167 buf = path->nodes[level];
5168 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5170 btrfs_node_key_to_cpu(buf, &k1, i);
5171 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5173 btrfs_item_key_to_cpu(buf, &k1, i);
5174 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5176 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5178 ret = swap_values(root, path, buf, i);
5181 btrfs_mark_buffer_dirty(buf);
5187 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5188 struct btrfs_root *root,
5189 struct btrfs_path *path,
5190 struct extent_buffer *buf, int slot)
5192 struct btrfs_key key;
5193 int nritems = btrfs_header_nritems(buf);
5195 btrfs_item_key_to_cpu(buf, &key, slot);
5197 /* These are all the keys we can deal with missing. */
5198 if (key.type != BTRFS_DIR_INDEX_KEY &&
5199 key.type != BTRFS_EXTENT_ITEM_KEY &&
5200 key.type != BTRFS_METADATA_ITEM_KEY &&
5201 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5202 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5205 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5206 (unsigned long long)key.objectid, key.type,
5207 (unsigned long long)key.offset, slot, buf->start);
5208 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5209 btrfs_item_nr_offset(slot + 1),
5210 sizeof(struct btrfs_item) *
5211 (nritems - slot - 1));
5212 btrfs_set_header_nritems(buf, nritems - 1);
5214 struct btrfs_disk_key disk_key;
5216 btrfs_item_key(buf, &disk_key, 0);
5217 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5219 btrfs_mark_buffer_dirty(buf);
5223 static int fix_item_offset(struct btrfs_trans_handle *trans,
5224 struct btrfs_root *root,
5225 struct btrfs_path *path)
5227 struct extent_buffer *buf;
5231 /* We should only get this for leaves */
5232 BUG_ON(path->lowest_level);
5233 buf = path->nodes[0];
5235 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5236 unsigned int shift = 0, offset;
5238 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5239 BTRFS_LEAF_DATA_SIZE(root)) {
5240 if (btrfs_item_end_nr(buf, i) >
5241 BTRFS_LEAF_DATA_SIZE(root)) {
5242 ret = delete_bogus_item(trans, root, path,
5246 fprintf(stderr, "item is off the end of the "
5247 "leaf, can't fix\n");
5251 shift = BTRFS_LEAF_DATA_SIZE(root) -
5252 btrfs_item_end_nr(buf, i);
5253 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5254 btrfs_item_offset_nr(buf, i - 1)) {
5255 if (btrfs_item_end_nr(buf, i) >
5256 btrfs_item_offset_nr(buf, i - 1)) {
5257 ret = delete_bogus_item(trans, root, path,
5261 fprintf(stderr, "items overlap, can't fix\n");
5265 shift = btrfs_item_offset_nr(buf, i - 1) -
5266 btrfs_item_end_nr(buf, i);
5271 printf("Shifting item nr %d by %u bytes in block %llu\n",
5272 i, shift, (unsigned long long)buf->start);
5273 offset = btrfs_item_offset_nr(buf, i);
5274 memmove_extent_buffer(buf,
5275 btrfs_leaf_data(buf) + offset + shift,
5276 btrfs_leaf_data(buf) + offset,
5277 btrfs_item_size_nr(buf, i));
5278 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5280 btrfs_mark_buffer_dirty(buf);
5284 * We may have moved things, in which case we want to exit so we don't
5285 * write those changes out. Once we have proper abort functionality in
5286 * progs this can be changed to something nicer.
5293 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5294 * then just return -EIO.
5296 static int try_to_fix_bad_block(struct btrfs_root *root,
5297 struct extent_buffer *buf,
5298 enum btrfs_tree_block_status status)
5300 struct btrfs_trans_handle *trans;
5301 struct ulist *roots;
5302 struct ulist_node *node;
5303 struct btrfs_root *search_root;
5304 struct btrfs_path path;
5305 struct ulist_iterator iter;
5306 struct btrfs_key root_key, key;
5309 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5310 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5313 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5317 btrfs_init_path(&path);
5318 ULIST_ITER_INIT(&iter);
5319 while ((node = ulist_next(roots, &iter))) {
5320 root_key.objectid = node->val;
5321 root_key.type = BTRFS_ROOT_ITEM_KEY;
5322 root_key.offset = (u64)-1;
5324 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5331 trans = btrfs_start_transaction(search_root, 0);
5332 if (IS_ERR(trans)) {
5333 ret = PTR_ERR(trans);
5337 path.lowest_level = btrfs_header_level(buf);
5338 path.skip_check_block = 1;
5339 if (path.lowest_level)
5340 btrfs_node_key_to_cpu(buf, &key, 0);
5342 btrfs_item_key_to_cpu(buf, &key, 0);
5343 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5346 btrfs_commit_transaction(trans, search_root);
5349 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5350 ret = fix_key_order(trans, search_root, &path);
5351 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5352 ret = fix_item_offset(trans, search_root, &path);
5354 btrfs_commit_transaction(trans, search_root);
5357 btrfs_release_path(&path);
5358 btrfs_commit_transaction(trans, search_root);
5361 btrfs_release_path(&path);
5365 static int check_block(struct btrfs_root *root,
5366 struct cache_tree *extent_cache,
5367 struct extent_buffer *buf, u64 flags)
5369 struct extent_record *rec;
5370 struct cache_extent *cache;
5371 struct btrfs_key key;
5372 enum btrfs_tree_block_status status;
5376 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5379 rec = container_of(cache, struct extent_record, cache);
5380 rec->generation = btrfs_header_generation(buf);
5382 level = btrfs_header_level(buf);
5383 if (btrfs_header_nritems(buf) > 0) {
5386 btrfs_item_key_to_cpu(buf, &key, 0);
5388 btrfs_node_key_to_cpu(buf, &key, 0);
5390 rec->info_objectid = key.objectid;
5392 rec->info_level = level;
5394 if (btrfs_is_leaf(buf))
5395 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5397 status = btrfs_check_node(root, &rec->parent_key, buf);
5399 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5401 status = try_to_fix_bad_block(root, buf, status);
5402 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5404 fprintf(stderr, "bad block %llu\n",
5405 (unsigned long long)buf->start);
5408 * Signal to callers we need to start the scan over
5409 * again since we'll have cowed blocks.
5414 rec->content_checked = 1;
5415 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5416 rec->owner_ref_checked = 1;
5418 ret = check_owner_ref(root, rec, buf);
5420 rec->owner_ref_checked = 1;
5424 maybe_free_extent_rec(extent_cache, rec);
5428 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5429 u64 parent, u64 root)
5431 struct list_head *cur = rec->backrefs.next;
5432 struct extent_backref *node;
5433 struct tree_backref *back;
5435 while(cur != &rec->backrefs) {
5436 node = to_extent_backref(cur);
5440 back = to_tree_backref(node);
5442 if (!node->full_backref)
5444 if (parent == back->parent)
5447 if (node->full_backref)
5449 if (back->root == root)
5456 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5457 u64 parent, u64 root)
5459 struct tree_backref *ref = malloc(sizeof(*ref));
5463 memset(&ref->node, 0, sizeof(ref->node));
5465 ref->parent = parent;
5466 ref->node.full_backref = 1;
5469 ref->node.full_backref = 0;
5471 list_add_tail(&ref->node.list, &rec->backrefs);
5476 static struct data_backref *find_data_backref(struct extent_record *rec,
5477 u64 parent, u64 root,
5478 u64 owner, u64 offset,
5480 u64 disk_bytenr, u64 bytes)
5482 struct list_head *cur = rec->backrefs.next;
5483 struct extent_backref *node;
5484 struct data_backref *back;
5486 while(cur != &rec->backrefs) {
5487 node = to_extent_backref(cur);
5491 back = to_data_backref(node);
5493 if (!node->full_backref)
5495 if (parent == back->parent)
5498 if (node->full_backref)
5500 if (back->root == root && back->owner == owner &&
5501 back->offset == offset) {
5502 if (found_ref && node->found_ref &&
5503 (back->bytes != bytes ||
5504 back->disk_bytenr != disk_bytenr))
5513 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5514 u64 parent, u64 root,
5515 u64 owner, u64 offset,
5518 struct data_backref *ref = malloc(sizeof(*ref));
5522 memset(&ref->node, 0, sizeof(ref->node));
5523 ref->node.is_data = 1;
5526 ref->parent = parent;
5529 ref->node.full_backref = 1;
5533 ref->offset = offset;
5534 ref->node.full_backref = 0;
5536 ref->bytes = max_size;
5539 list_add_tail(&ref->node.list, &rec->backrefs);
5540 if (max_size > rec->max_size)
5541 rec->max_size = max_size;
5545 /* Check if the type of extent matches with its chunk */
5546 static void check_extent_type(struct extent_record *rec)
5548 struct btrfs_block_group_cache *bg_cache;
5550 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5554 /* data extent, check chunk directly*/
5555 if (!rec->metadata) {
5556 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5557 rec->wrong_chunk_type = 1;
5561 /* metadata extent, check the obvious case first */
5562 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5563 BTRFS_BLOCK_GROUP_METADATA))) {
5564 rec->wrong_chunk_type = 1;
5569 * Check SYSTEM extent, as it's also marked as metadata, we can only
5570 * make sure it's a SYSTEM extent by its backref
5572 if (!list_empty(&rec->backrefs)) {
5573 struct extent_backref *node;
5574 struct tree_backref *tback;
5577 node = to_extent_backref(rec->backrefs.next);
5578 if (node->is_data) {
5579 /* tree block shouldn't have data backref */
5580 rec->wrong_chunk_type = 1;
5583 tback = container_of(node, struct tree_backref, node);
5585 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5586 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5588 bg_type = BTRFS_BLOCK_GROUP_METADATA;
5589 if (!(bg_cache->flags & bg_type))
5590 rec->wrong_chunk_type = 1;
5595 * Allocate a new extent record, fill default values from @tmpl and insert int
5596 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5597 * the cache, otherwise it fails.
5599 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5600 struct extent_record *tmpl)
5602 struct extent_record *rec;
5605 rec = malloc(sizeof(*rec));
5608 rec->start = tmpl->start;
5609 rec->max_size = tmpl->max_size;
5610 rec->nr = max(tmpl->nr, tmpl->max_size);
5611 rec->found_rec = tmpl->found_rec;
5612 rec->content_checked = tmpl->content_checked;
5613 rec->owner_ref_checked = tmpl->owner_ref_checked;
5614 rec->num_duplicates = 0;
5615 rec->metadata = tmpl->metadata;
5616 rec->flag_block_full_backref = FLAG_UNSET;
5617 rec->bad_full_backref = 0;
5618 rec->crossing_stripes = 0;
5619 rec->wrong_chunk_type = 0;
5620 rec->is_root = tmpl->is_root;
5621 rec->refs = tmpl->refs;
5622 rec->extent_item_refs = tmpl->extent_item_refs;
5623 rec->parent_generation = tmpl->parent_generation;
5624 INIT_LIST_HEAD(&rec->backrefs);
5625 INIT_LIST_HEAD(&rec->dups);
5626 INIT_LIST_HEAD(&rec->list);
5627 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5628 rec->cache.start = tmpl->start;
5629 rec->cache.size = tmpl->nr;
5630 ret = insert_cache_extent(extent_cache, &rec->cache);
5635 bytes_used += rec->nr;
5638 rec->crossing_stripes = check_crossing_stripes(global_info,
5639 rec->start, global_info->tree_root->nodesize);
5640 check_extent_type(rec);
5645 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5647 * - refs - if found, increase refs
5648 * - is_root - if found, set
5649 * - content_checked - if found, set
5650 * - owner_ref_checked - if found, set
5652 * If not found, create a new one, initialize and insert.
5654 static int add_extent_rec(struct cache_tree *extent_cache,
5655 struct extent_record *tmpl)
5657 struct extent_record *rec;
5658 struct cache_extent *cache;
5662 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5664 rec = container_of(cache, struct extent_record, cache);
5668 rec->nr = max(tmpl->nr, tmpl->max_size);
5671 * We need to make sure to reset nr to whatever the extent
5672 * record says was the real size, this way we can compare it to
5675 if (tmpl->found_rec) {
5676 if (tmpl->start != rec->start || rec->found_rec) {
5677 struct extent_record *tmp;
5680 if (list_empty(&rec->list))
5681 list_add_tail(&rec->list,
5682 &duplicate_extents);
5685 * We have to do this song and dance in case we
5686 * find an extent record that falls inside of
5687 * our current extent record but does not have
5688 * the same objectid.
5690 tmp = malloc(sizeof(*tmp));
5693 tmp->start = tmpl->start;
5694 tmp->max_size = tmpl->max_size;
5697 tmp->metadata = tmpl->metadata;
5698 tmp->extent_item_refs = tmpl->extent_item_refs;
5699 INIT_LIST_HEAD(&tmp->list);
5700 list_add_tail(&tmp->list, &rec->dups);
5701 rec->num_duplicates++;
5708 if (tmpl->extent_item_refs && !dup) {
5709 if (rec->extent_item_refs) {
5710 fprintf(stderr, "block %llu rec "
5711 "extent_item_refs %llu, passed %llu\n",
5712 (unsigned long long)tmpl->start,
5713 (unsigned long long)
5714 rec->extent_item_refs,
5715 (unsigned long long)tmpl->extent_item_refs);
5717 rec->extent_item_refs = tmpl->extent_item_refs;
5721 if (tmpl->content_checked)
5722 rec->content_checked = 1;
5723 if (tmpl->owner_ref_checked)
5724 rec->owner_ref_checked = 1;
5725 memcpy(&rec->parent_key, &tmpl->parent_key,
5726 sizeof(tmpl->parent_key));
5727 if (tmpl->parent_generation)
5728 rec->parent_generation = tmpl->parent_generation;
5729 if (rec->max_size < tmpl->max_size)
5730 rec->max_size = tmpl->max_size;
5733 * A metadata extent can't cross stripe_len boundary, otherwise
5734 * kernel scrub won't be able to handle it.
5735 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5739 rec->crossing_stripes = check_crossing_stripes(
5740 global_info, rec->start,
5741 global_info->tree_root->nodesize);
5742 check_extent_type(rec);
5743 maybe_free_extent_rec(extent_cache, rec);
5747 ret = add_extent_rec_nolookup(extent_cache, tmpl);
5752 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5753 u64 parent, u64 root, int found_ref)
5755 struct extent_record *rec;
5756 struct tree_backref *back;
5757 struct cache_extent *cache;
5760 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5762 struct extent_record tmpl;
5764 memset(&tmpl, 0, sizeof(tmpl));
5765 tmpl.start = bytenr;
5769 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5773 /* really a bug in cache_extent implement now */
5774 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5779 rec = container_of(cache, struct extent_record, cache);
5780 if (rec->start != bytenr) {
5782 * Several cause, from unaligned bytenr to over lapping extents
5787 back = find_tree_backref(rec, parent, root);
5789 back = alloc_tree_backref(rec, parent, root);
5795 if (back->node.found_ref) {
5796 fprintf(stderr, "Extent back ref already exists "
5797 "for %llu parent %llu root %llu \n",
5798 (unsigned long long)bytenr,
5799 (unsigned long long)parent,
5800 (unsigned long long)root);
5802 back->node.found_ref = 1;
5804 if (back->node.found_extent_tree) {
5805 fprintf(stderr, "Extent back ref already exists "
5806 "for %llu parent %llu root %llu \n",
5807 (unsigned long long)bytenr,
5808 (unsigned long long)parent,
5809 (unsigned long long)root);
5811 back->node.found_extent_tree = 1;
5813 check_extent_type(rec);
5814 maybe_free_extent_rec(extent_cache, rec);
5818 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5819 u64 parent, u64 root, u64 owner, u64 offset,
5820 u32 num_refs, int found_ref, u64 max_size)
5822 struct extent_record *rec;
5823 struct data_backref *back;
5824 struct cache_extent *cache;
5827 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5829 struct extent_record tmpl;
5831 memset(&tmpl, 0, sizeof(tmpl));
5832 tmpl.start = bytenr;
5834 tmpl.max_size = max_size;
5836 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5840 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5845 rec = container_of(cache, struct extent_record, cache);
5846 if (rec->max_size < max_size)
5847 rec->max_size = max_size;
5850 * If found_ref is set then max_size is the real size and must match the
5851 * existing refs. So if we have already found a ref then we need to
5852 * make sure that this ref matches the existing one, otherwise we need
5853 * to add a new backref so we can notice that the backrefs don't match
5854 * and we need to figure out who is telling the truth. This is to
5855 * account for that awful fsync bug I introduced where we'd end up with
5856 * a btrfs_file_extent_item that would have its length include multiple
5857 * prealloc extents or point inside of a prealloc extent.
5859 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5862 back = alloc_data_backref(rec, parent, root, owner, offset,
5868 BUG_ON(num_refs != 1);
5869 if (back->node.found_ref)
5870 BUG_ON(back->bytes != max_size);
5871 back->node.found_ref = 1;
5872 back->found_ref += 1;
5873 back->bytes = max_size;
5874 back->disk_bytenr = bytenr;
5876 rec->content_checked = 1;
5877 rec->owner_ref_checked = 1;
5879 if (back->node.found_extent_tree) {
5880 fprintf(stderr, "Extent back ref already exists "
5881 "for %llu parent %llu root %llu "
5882 "owner %llu offset %llu num_refs %lu\n",
5883 (unsigned long long)bytenr,
5884 (unsigned long long)parent,
5885 (unsigned long long)root,
5886 (unsigned long long)owner,
5887 (unsigned long long)offset,
5888 (unsigned long)num_refs);
5890 back->num_refs = num_refs;
5891 back->node.found_extent_tree = 1;
5893 maybe_free_extent_rec(extent_cache, rec);
5897 static int add_pending(struct cache_tree *pending,
5898 struct cache_tree *seen, u64 bytenr, u32 size)
5901 ret = add_cache_extent(seen, bytenr, size);
5904 add_cache_extent(pending, bytenr, size);
5908 static int pick_next_pending(struct cache_tree *pending,
5909 struct cache_tree *reada,
5910 struct cache_tree *nodes,
5911 u64 last, struct block_info *bits, int bits_nr,
5914 unsigned long node_start = last;
5915 struct cache_extent *cache;
5918 cache = search_cache_extent(reada, 0);
5920 bits[0].start = cache->start;
5921 bits[0].size = cache->size;
5926 if (node_start > 32768)
5927 node_start -= 32768;
5929 cache = search_cache_extent(nodes, node_start);
5931 cache = search_cache_extent(nodes, 0);
5934 cache = search_cache_extent(pending, 0);
5939 bits[ret].start = cache->start;
5940 bits[ret].size = cache->size;
5941 cache = next_cache_extent(cache);
5943 } while (cache && ret < bits_nr);
5949 bits[ret].start = cache->start;
5950 bits[ret].size = cache->size;
5951 cache = next_cache_extent(cache);
5953 } while (cache && ret < bits_nr);
5955 if (bits_nr - ret > 8) {
5956 u64 lookup = bits[0].start + bits[0].size;
5957 struct cache_extent *next;
5958 next = search_cache_extent(pending, lookup);
5960 if (next->start - lookup > 32768)
5962 bits[ret].start = next->start;
5963 bits[ret].size = next->size;
5964 lookup = next->start + next->size;
5968 next = next_cache_extent(next);
5976 static void free_chunk_record(struct cache_extent *cache)
5978 struct chunk_record *rec;
5980 rec = container_of(cache, struct chunk_record, cache);
5981 list_del_init(&rec->list);
5982 list_del_init(&rec->dextents);
5986 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5988 cache_tree_free_extents(chunk_cache, free_chunk_record);
5991 static void free_device_record(struct rb_node *node)
5993 struct device_record *rec;
5995 rec = container_of(node, struct device_record, node);
5999 FREE_RB_BASED_TREE(device_cache, free_device_record);
6001 int insert_block_group_record(struct block_group_tree *tree,
6002 struct block_group_record *bg_rec)
6006 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6010 list_add_tail(&bg_rec->list, &tree->block_groups);
6014 static void free_block_group_record(struct cache_extent *cache)
6016 struct block_group_record *rec;
6018 rec = container_of(cache, struct block_group_record, cache);
6019 list_del_init(&rec->list);
6023 void free_block_group_tree(struct block_group_tree *tree)
6025 cache_tree_free_extents(&tree->tree, free_block_group_record);
6028 int insert_device_extent_record(struct device_extent_tree *tree,
6029 struct device_extent_record *de_rec)
6034 * Device extent is a bit different from the other extents, because
6035 * the extents which belong to the different devices may have the
6036 * same start and size, so we need use the special extent cache
6037 * search/insert functions.
6039 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6043 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6044 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6048 static void free_device_extent_record(struct cache_extent *cache)
6050 struct device_extent_record *rec;
6052 rec = container_of(cache, struct device_extent_record, cache);
6053 if (!list_empty(&rec->chunk_list))
6054 list_del_init(&rec->chunk_list);
6055 if (!list_empty(&rec->device_list))
6056 list_del_init(&rec->device_list);
6060 void free_device_extent_tree(struct device_extent_tree *tree)
6062 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6065 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6066 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6067 struct extent_buffer *leaf, int slot)
6069 struct btrfs_extent_ref_v0 *ref0;
6070 struct btrfs_key key;
6073 btrfs_item_key_to_cpu(leaf, &key, slot);
6074 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6075 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6076 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6079 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6080 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6086 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6087 struct btrfs_key *key,
6090 struct btrfs_chunk *ptr;
6091 struct chunk_record *rec;
6094 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6095 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6097 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6099 fprintf(stderr, "memory allocation failed\n");
6103 INIT_LIST_HEAD(&rec->list);
6104 INIT_LIST_HEAD(&rec->dextents);
6107 rec->cache.start = key->offset;
6108 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6110 rec->generation = btrfs_header_generation(leaf);
6112 rec->objectid = key->objectid;
6113 rec->type = key->type;
6114 rec->offset = key->offset;
6116 rec->length = rec->cache.size;
6117 rec->owner = btrfs_chunk_owner(leaf, ptr);
6118 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6119 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6120 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6121 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6122 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6123 rec->num_stripes = num_stripes;
6124 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6126 for (i = 0; i < rec->num_stripes; ++i) {
6127 rec->stripes[i].devid =
6128 btrfs_stripe_devid_nr(leaf, ptr, i);
6129 rec->stripes[i].offset =
6130 btrfs_stripe_offset_nr(leaf, ptr, i);
6131 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6132 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6139 static int process_chunk_item(struct cache_tree *chunk_cache,
6140 struct btrfs_key *key, struct extent_buffer *eb,
6143 struct chunk_record *rec;
6144 struct btrfs_chunk *chunk;
6147 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6149 * Do extra check for this chunk item,
6151 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6152 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6153 * and owner<->key_type check.
6155 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6158 error("chunk(%llu, %llu) is not valid, ignore it",
6159 key->offset, btrfs_chunk_length(eb, chunk));
6162 rec = btrfs_new_chunk_record(eb, key, slot);
6163 ret = insert_cache_extent(chunk_cache, &rec->cache);
6165 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6166 rec->offset, rec->length);
6173 static int process_device_item(struct rb_root *dev_cache,
6174 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6176 struct btrfs_dev_item *ptr;
6177 struct device_record *rec;
6180 ptr = btrfs_item_ptr(eb,
6181 slot, struct btrfs_dev_item);
6183 rec = malloc(sizeof(*rec));
6185 fprintf(stderr, "memory allocation failed\n");
6189 rec->devid = key->offset;
6190 rec->generation = btrfs_header_generation(eb);
6192 rec->objectid = key->objectid;
6193 rec->type = key->type;
6194 rec->offset = key->offset;
6196 rec->devid = btrfs_device_id(eb, ptr);
6197 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6198 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6200 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6202 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6209 struct block_group_record *
6210 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6213 struct btrfs_block_group_item *ptr;
6214 struct block_group_record *rec;
6216 rec = calloc(1, sizeof(*rec));
6218 fprintf(stderr, "memory allocation failed\n");
6222 rec->cache.start = key->objectid;
6223 rec->cache.size = key->offset;
6225 rec->generation = btrfs_header_generation(leaf);
6227 rec->objectid = key->objectid;
6228 rec->type = key->type;
6229 rec->offset = key->offset;
6231 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6232 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6234 INIT_LIST_HEAD(&rec->list);
6239 static int process_block_group_item(struct block_group_tree *block_group_cache,
6240 struct btrfs_key *key,
6241 struct extent_buffer *eb, int slot)
6243 struct block_group_record *rec;
6246 rec = btrfs_new_block_group_record(eb, key, slot);
6247 ret = insert_block_group_record(block_group_cache, rec);
6249 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6250 rec->objectid, rec->offset);
6257 struct device_extent_record *
6258 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6259 struct btrfs_key *key, int slot)
6261 struct device_extent_record *rec;
6262 struct btrfs_dev_extent *ptr;
6264 rec = calloc(1, sizeof(*rec));
6266 fprintf(stderr, "memory allocation failed\n");
6270 rec->cache.objectid = key->objectid;
6271 rec->cache.start = key->offset;
6273 rec->generation = btrfs_header_generation(leaf);
6275 rec->objectid = key->objectid;
6276 rec->type = key->type;
6277 rec->offset = key->offset;
6279 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6280 rec->chunk_objecteid =
6281 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6283 btrfs_dev_extent_chunk_offset(leaf, ptr);
6284 rec->length = btrfs_dev_extent_length(leaf, ptr);
6285 rec->cache.size = rec->length;
6287 INIT_LIST_HEAD(&rec->chunk_list);
6288 INIT_LIST_HEAD(&rec->device_list);
6294 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6295 struct btrfs_key *key, struct extent_buffer *eb,
6298 struct device_extent_record *rec;
6301 rec = btrfs_new_device_extent_record(eb, key, slot);
6302 ret = insert_device_extent_record(dev_extent_cache, rec);
6305 "Device extent[%llu, %llu, %llu] existed.\n",
6306 rec->objectid, rec->offset, rec->length);
6313 static int process_extent_item(struct btrfs_root *root,
6314 struct cache_tree *extent_cache,
6315 struct extent_buffer *eb, int slot)
6317 struct btrfs_extent_item *ei;
6318 struct btrfs_extent_inline_ref *iref;
6319 struct btrfs_extent_data_ref *dref;
6320 struct btrfs_shared_data_ref *sref;
6321 struct btrfs_key key;
6322 struct extent_record tmpl;
6327 u32 item_size = btrfs_item_size_nr(eb, slot);
6333 btrfs_item_key_to_cpu(eb, &key, slot);
6335 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6337 num_bytes = root->nodesize;
6339 num_bytes = key.offset;
6342 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6343 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6344 key.objectid, root->sectorsize);
6347 if (item_size < sizeof(*ei)) {
6348 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6349 struct btrfs_extent_item_v0 *ei0;
6350 BUG_ON(item_size != sizeof(*ei0));
6351 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6352 refs = btrfs_extent_refs_v0(eb, ei0);
6356 memset(&tmpl, 0, sizeof(tmpl));
6357 tmpl.start = key.objectid;
6358 tmpl.nr = num_bytes;
6359 tmpl.extent_item_refs = refs;
6360 tmpl.metadata = metadata;
6362 tmpl.max_size = num_bytes;
6364 return add_extent_rec(extent_cache, &tmpl);
6367 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6368 refs = btrfs_extent_refs(eb, ei);
6369 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6373 if (metadata && num_bytes != root->nodesize) {
6374 error("ignore invalid metadata extent, length %llu does not equal to %u",
6375 num_bytes, root->nodesize);
6378 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6379 error("ignore invalid data extent, length %llu is not aligned to %u",
6380 num_bytes, root->sectorsize);
6384 memset(&tmpl, 0, sizeof(tmpl));
6385 tmpl.start = key.objectid;
6386 tmpl.nr = num_bytes;
6387 tmpl.extent_item_refs = refs;
6388 tmpl.metadata = metadata;
6390 tmpl.max_size = num_bytes;
6391 add_extent_rec(extent_cache, &tmpl);
6393 ptr = (unsigned long)(ei + 1);
6394 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6395 key.type == BTRFS_EXTENT_ITEM_KEY)
6396 ptr += sizeof(struct btrfs_tree_block_info);
6398 end = (unsigned long)ei + item_size;
6400 iref = (struct btrfs_extent_inline_ref *)ptr;
6401 type = btrfs_extent_inline_ref_type(eb, iref);
6402 offset = btrfs_extent_inline_ref_offset(eb, iref);
6404 case BTRFS_TREE_BLOCK_REF_KEY:
6405 ret = add_tree_backref(extent_cache, key.objectid,
6408 error("add_tree_backref failed: %s",
6411 case BTRFS_SHARED_BLOCK_REF_KEY:
6412 ret = add_tree_backref(extent_cache, key.objectid,
6415 error("add_tree_backref failed: %s",
6418 case BTRFS_EXTENT_DATA_REF_KEY:
6419 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6420 add_data_backref(extent_cache, key.objectid, 0,
6421 btrfs_extent_data_ref_root(eb, dref),
6422 btrfs_extent_data_ref_objectid(eb,
6424 btrfs_extent_data_ref_offset(eb, dref),
6425 btrfs_extent_data_ref_count(eb, dref),
6428 case BTRFS_SHARED_DATA_REF_KEY:
6429 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6430 add_data_backref(extent_cache, key.objectid, offset,
6432 btrfs_shared_data_ref_count(eb, sref),
6436 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6437 key.objectid, key.type, num_bytes);
6440 ptr += btrfs_extent_inline_ref_size(type);
6447 static int check_cache_range(struct btrfs_root *root,
6448 struct btrfs_block_group_cache *cache,
6449 u64 offset, u64 bytes)
6451 struct btrfs_free_space *entry;
6457 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6458 bytenr = btrfs_sb_offset(i);
6459 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6460 cache->key.objectid, bytenr, 0,
6461 &logical, &nr, &stripe_len);
6466 if (logical[nr] + stripe_len <= offset)
6468 if (offset + bytes <= logical[nr])
6470 if (logical[nr] == offset) {
6471 if (stripe_len >= bytes) {
6475 bytes -= stripe_len;
6476 offset += stripe_len;
6477 } else if (logical[nr] < offset) {
6478 if (logical[nr] + stripe_len >=
6483 bytes = (offset + bytes) -
6484 (logical[nr] + stripe_len);
6485 offset = logical[nr] + stripe_len;
6488 * Could be tricky, the super may land in the
6489 * middle of the area we're checking. First
6490 * check the easiest case, it's at the end.
6492 if (logical[nr] + stripe_len >=
6494 bytes = logical[nr] - offset;
6498 /* Check the left side */
6499 ret = check_cache_range(root, cache,
6501 logical[nr] - offset);
6507 /* Now we continue with the right side */
6508 bytes = (offset + bytes) -
6509 (logical[nr] + stripe_len);
6510 offset = logical[nr] + stripe_len;
6517 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6519 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6520 offset, offset+bytes);
6524 if (entry->offset != offset) {
6525 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6530 if (entry->bytes != bytes) {
6531 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6532 bytes, entry->bytes, offset);
6536 unlink_free_space(cache->free_space_ctl, entry);
6541 static int verify_space_cache(struct btrfs_root *root,
6542 struct btrfs_block_group_cache *cache)
6544 struct btrfs_path path;
6545 struct extent_buffer *leaf;
6546 struct btrfs_key key;
6550 root = root->fs_info->extent_root;
6552 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6554 btrfs_init_path(&path);
6555 key.objectid = last;
6557 key.type = BTRFS_EXTENT_ITEM_KEY;
6558 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6563 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6564 ret = btrfs_next_leaf(root, &path);
6572 leaf = path.nodes[0];
6573 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6574 if (key.objectid >= cache->key.offset + cache->key.objectid)
6576 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6577 key.type != BTRFS_METADATA_ITEM_KEY) {
6582 if (last == key.objectid) {
6583 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6584 last = key.objectid + key.offset;
6586 last = key.objectid + root->nodesize;
6591 ret = check_cache_range(root, cache, last,
6592 key.objectid - last);
6595 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6596 last = key.objectid + key.offset;
6598 last = key.objectid + root->nodesize;
6602 if (last < cache->key.objectid + cache->key.offset)
6603 ret = check_cache_range(root, cache, last,
6604 cache->key.objectid +
6605 cache->key.offset - last);
6608 btrfs_release_path(&path);
6611 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6612 fprintf(stderr, "There are still entries left in the space "
6620 static int check_space_cache(struct btrfs_root *root)
6622 struct btrfs_block_group_cache *cache;
6623 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6627 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6628 btrfs_super_generation(root->fs_info->super_copy) !=
6629 btrfs_super_cache_generation(root->fs_info->super_copy)) {
6630 printf("cache and super generation don't match, space cache "
6631 "will be invalidated\n");
6635 if (ctx.progress_enabled) {
6636 ctx.tp = TASK_FREE_SPACE;
6637 task_start(ctx.info);
6641 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6645 start = cache->key.objectid + cache->key.offset;
6646 if (!cache->free_space_ctl) {
6647 if (btrfs_init_free_space_ctl(cache,
6648 root->sectorsize)) {
6653 btrfs_remove_free_space_cache(cache);
6656 if (btrfs_fs_compat_ro(root->fs_info,
6657 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6658 ret = exclude_super_stripes(root, cache);
6660 fprintf(stderr, "could not exclude super stripes: %s\n",
6665 ret = load_free_space_tree(root->fs_info, cache);
6666 free_excluded_extents(root, cache);
6668 fprintf(stderr, "could not load free space tree: %s\n",
6675 ret = load_free_space_cache(root->fs_info, cache);
6680 ret = verify_space_cache(root, cache);
6682 fprintf(stderr, "cache appears valid but isn't %Lu\n",
6683 cache->key.objectid);
6688 task_stop(ctx.info);
6690 return error ? -EINVAL : 0;
6693 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6694 u64 num_bytes, unsigned long leaf_offset,
6695 struct extent_buffer *eb) {
6698 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6700 unsigned long csum_offset;
6704 u64 data_checked = 0;
6710 if (num_bytes % root->sectorsize)
6713 data = malloc(num_bytes);
6717 while (offset < num_bytes) {
6720 read_len = num_bytes - offset;
6721 /* read as much space once a time */
6722 ret = read_extent_data(root, data + offset,
6723 bytenr + offset, &read_len, mirror);
6727 /* verify every 4k data's checksum */
6728 while (data_checked < read_len) {
6730 tmp = offset + data_checked;
6732 csum = btrfs_csum_data(NULL, (char *)data + tmp,
6733 csum, root->sectorsize);
6734 btrfs_csum_final(csum, (u8 *)&csum);
6736 csum_offset = leaf_offset +
6737 tmp / root->sectorsize * csum_size;
6738 read_extent_buffer(eb, (char *)&csum_expected,
6739 csum_offset, csum_size);
6740 /* try another mirror */
6741 if (csum != csum_expected) {
6742 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6743 mirror, bytenr + tmp,
6744 csum, csum_expected);
6745 num_copies = btrfs_num_copies(
6746 &root->fs_info->mapping_tree,
6748 if (mirror < num_copies - 1) {
6753 data_checked += root->sectorsize;
6762 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6765 struct btrfs_path path;
6766 struct extent_buffer *leaf;
6767 struct btrfs_key key;
6770 btrfs_init_path(&path);
6771 key.objectid = bytenr;
6772 key.type = BTRFS_EXTENT_ITEM_KEY;
6773 key.offset = (u64)-1;
6776 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6779 fprintf(stderr, "Error looking up extent record %d\n", ret);
6780 btrfs_release_path(&path);
6783 if (path.slots[0] > 0) {
6786 ret = btrfs_prev_leaf(root, &path);
6789 } else if (ret > 0) {
6796 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6799 * Block group items come before extent items if they have the same
6800 * bytenr, so walk back one more just in case. Dear future traveller,
6801 * first congrats on mastering time travel. Now if it's not too much
6802 * trouble could you go back to 2006 and tell Chris to make the
6803 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6804 * EXTENT_ITEM_KEY please?
6806 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6807 if (path.slots[0] > 0) {
6810 ret = btrfs_prev_leaf(root, &path);
6813 } else if (ret > 0) {
6818 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6822 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6823 ret = btrfs_next_leaf(root, &path);
6825 fprintf(stderr, "Error going to next leaf "
6827 btrfs_release_path(&path);
6833 leaf = path.nodes[0];
6834 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6835 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6839 if (key.objectid + key.offset < bytenr) {
6843 if (key.objectid > bytenr + num_bytes)
6846 if (key.objectid == bytenr) {
6847 if (key.offset >= num_bytes) {
6851 num_bytes -= key.offset;
6852 bytenr += key.offset;
6853 } else if (key.objectid < bytenr) {
6854 if (key.objectid + key.offset >= bytenr + num_bytes) {
6858 num_bytes = (bytenr + num_bytes) -
6859 (key.objectid + key.offset);
6860 bytenr = key.objectid + key.offset;
6862 if (key.objectid + key.offset < bytenr + num_bytes) {
6863 u64 new_start = key.objectid + key.offset;
6864 u64 new_bytes = bytenr + num_bytes - new_start;
6867 * Weird case, the extent is in the middle of
6868 * our range, we'll have to search one side
6869 * and then the other. Not sure if this happens
6870 * in real life, but no harm in coding it up
6871 * anyway just in case.
6873 btrfs_release_path(&path);
6874 ret = check_extent_exists(root, new_start,
6877 fprintf(stderr, "Right section didn't "
6881 num_bytes = key.objectid - bytenr;
6884 num_bytes = key.objectid - bytenr;
6891 if (num_bytes && !ret) {
6892 fprintf(stderr, "There are no extents for csum range "
6893 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6897 btrfs_release_path(&path);
6901 static int check_csums(struct btrfs_root *root)
6903 struct btrfs_path path;
6904 struct extent_buffer *leaf;
6905 struct btrfs_key key;
6906 u64 offset = 0, num_bytes = 0;
6907 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6911 unsigned long leaf_offset;
6913 root = root->fs_info->csum_root;
6914 if (!extent_buffer_uptodate(root->node)) {
6915 fprintf(stderr, "No valid csum tree found\n");
6919 btrfs_init_path(&path);
6920 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6921 key.type = BTRFS_EXTENT_CSUM_KEY;
6923 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6925 fprintf(stderr, "Error searching csum tree %d\n", ret);
6926 btrfs_release_path(&path);
6930 if (ret > 0 && path.slots[0])
6935 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6936 ret = btrfs_next_leaf(root, &path);
6938 fprintf(stderr, "Error going to next leaf "
6945 leaf = path.nodes[0];
6947 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6948 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6953 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6954 csum_size) * root->sectorsize;
6955 if (!check_data_csum)
6956 goto skip_csum_check;
6957 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6958 ret = check_extent_csums(root, key.offset, data_len,
6964 offset = key.offset;
6965 } else if (key.offset != offset + num_bytes) {
6966 ret = check_extent_exists(root, offset, num_bytes);
6968 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6969 "there is no extent record\n",
6970 offset, offset+num_bytes);
6973 offset = key.offset;
6976 num_bytes += data_len;
6980 btrfs_release_path(&path);
6984 static int is_dropped_key(struct btrfs_key *key,
6985 struct btrfs_key *drop_key) {
6986 if (key->objectid < drop_key->objectid)
6988 else if (key->objectid == drop_key->objectid) {
6989 if (key->type < drop_key->type)
6991 else if (key->type == drop_key->type) {
6992 if (key->offset < drop_key->offset)
7000 * Here are the rules for FULL_BACKREF.
7002 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7003 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7005 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7006 * if it happened after the relocation occurred since we'll have dropped the
7007 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7008 * have no real way to know for sure.
7010 * We process the blocks one root at a time, and we start from the lowest root
7011 * objectid and go to the highest. So we can just lookup the owner backref for
7012 * the record and if we don't find it then we know it doesn't exist and we have
7015 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7016 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7017 * be set or not and then we can check later once we've gathered all the refs.
7019 static int calc_extent_flag(struct btrfs_root *root,
7020 struct cache_tree *extent_cache,
7021 struct extent_buffer *buf,
7022 struct root_item_record *ri,
7025 struct extent_record *rec;
7026 struct cache_extent *cache;
7027 struct tree_backref *tback;
7030 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7031 /* we have added this extent before */
7035 rec = container_of(cache, struct extent_record, cache);
7038 * Except file/reloc tree, we can not have
7041 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7046 if (buf->start == ri->bytenr)
7049 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7052 owner = btrfs_header_owner(buf);
7053 if (owner == ri->objectid)
7056 tback = find_tree_backref(rec, 0, owner);
7061 if (rec->flag_block_full_backref != FLAG_UNSET &&
7062 rec->flag_block_full_backref != 0)
7063 rec->bad_full_backref = 1;
7066 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7067 if (rec->flag_block_full_backref != FLAG_UNSET &&
7068 rec->flag_block_full_backref != 1)
7069 rec->bad_full_backref = 1;
7073 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7075 fprintf(stderr, "Invalid key type(");
7076 print_key_type(stderr, 0, key_type);
7077 fprintf(stderr, ") found in root(");
7078 print_objectid(stderr, rootid, 0);
7079 fprintf(stderr, ")\n");
7083 * Check if the key is valid with its extent buffer.
7085 * This is a early check in case invalid key exists in a extent buffer
7086 * This is not comprehensive yet, but should prevent wrong key/item passed
7089 static int check_type_with_root(u64 rootid, u8 key_type)
7092 /* Only valid in chunk tree */
7093 case BTRFS_DEV_ITEM_KEY:
7094 case BTRFS_CHUNK_ITEM_KEY:
7095 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7098 /* valid in csum and log tree */
7099 case BTRFS_CSUM_TREE_OBJECTID:
7100 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7104 case BTRFS_EXTENT_ITEM_KEY:
7105 case BTRFS_METADATA_ITEM_KEY:
7106 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7107 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7110 case BTRFS_ROOT_ITEM_KEY:
7111 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7114 case BTRFS_DEV_EXTENT_KEY:
7115 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7121 report_mismatch_key_root(key_type, rootid);
7125 static int run_next_block(struct btrfs_root *root,
7126 struct block_info *bits,
7129 struct cache_tree *pending,
7130 struct cache_tree *seen,
7131 struct cache_tree *reada,
7132 struct cache_tree *nodes,
7133 struct cache_tree *extent_cache,
7134 struct cache_tree *chunk_cache,
7135 struct rb_root *dev_cache,
7136 struct block_group_tree *block_group_cache,
7137 struct device_extent_tree *dev_extent_cache,
7138 struct root_item_record *ri)
7140 struct extent_buffer *buf;
7141 struct extent_record *rec = NULL;
7152 struct btrfs_key key;
7153 struct cache_extent *cache;
7156 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7157 bits_nr, &reada_bits);
7162 for(i = 0; i < nritems; i++) {
7163 ret = add_cache_extent(reada, bits[i].start,
7168 /* fixme, get the parent transid */
7169 readahead_tree_block(root, bits[i].start,
7173 *last = bits[0].start;
7174 bytenr = bits[0].start;
7175 size = bits[0].size;
7177 cache = lookup_cache_extent(pending, bytenr, size);
7179 remove_cache_extent(pending, cache);
7182 cache = lookup_cache_extent(reada, bytenr, size);
7184 remove_cache_extent(reada, cache);
7187 cache = lookup_cache_extent(nodes, bytenr, size);
7189 remove_cache_extent(nodes, cache);
7192 cache = lookup_cache_extent(extent_cache, bytenr, size);
7194 rec = container_of(cache, struct extent_record, cache);
7195 gen = rec->parent_generation;
7198 /* fixme, get the real parent transid */
7199 buf = read_tree_block(root, bytenr, size, gen);
7200 if (!extent_buffer_uptodate(buf)) {
7201 record_bad_block_io(root->fs_info,
7202 extent_cache, bytenr, size);
7206 nritems = btrfs_header_nritems(buf);
7209 if (!init_extent_tree) {
7210 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7211 btrfs_header_level(buf), 1, NULL,
7214 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7216 fprintf(stderr, "Couldn't calc extent flags\n");
7217 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7222 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7224 fprintf(stderr, "Couldn't calc extent flags\n");
7225 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7229 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7231 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7232 ri->objectid == btrfs_header_owner(buf)) {
7234 * Ok we got to this block from it's original owner and
7235 * we have FULL_BACKREF set. Relocation can leave
7236 * converted blocks over so this is altogether possible,
7237 * however it's not possible if the generation > the
7238 * last snapshot, so check for this case.
7240 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7241 btrfs_header_generation(buf) > ri->last_snapshot) {
7242 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7243 rec->bad_full_backref = 1;
7248 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7249 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7250 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7251 rec->bad_full_backref = 1;
7255 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7256 rec->flag_block_full_backref = 1;
7260 rec->flag_block_full_backref = 0;
7262 owner = btrfs_header_owner(buf);
7265 ret = check_block(root, extent_cache, buf, flags);
7269 if (btrfs_is_leaf(buf)) {
7270 btree_space_waste += btrfs_leaf_free_space(root, buf);
7271 for (i = 0; i < nritems; i++) {
7272 struct btrfs_file_extent_item *fi;
7273 btrfs_item_key_to_cpu(buf, &key, i);
7275 * Check key type against the leaf owner.
7276 * Could filter quite a lot of early error if
7279 if (check_type_with_root(btrfs_header_owner(buf),
7281 fprintf(stderr, "ignoring invalid key\n");
7284 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7285 process_extent_item(root, extent_cache, buf,
7289 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7290 process_extent_item(root, extent_cache, buf,
7294 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7296 btrfs_item_size_nr(buf, i);
7299 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7300 process_chunk_item(chunk_cache, &key, buf, i);
7303 if (key.type == BTRFS_DEV_ITEM_KEY) {
7304 process_device_item(dev_cache, &key, buf, i);
7307 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7308 process_block_group_item(block_group_cache,
7312 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7313 process_device_extent_item(dev_extent_cache,
7318 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7319 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7320 process_extent_ref_v0(extent_cache, buf, i);
7327 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7328 ret = add_tree_backref(extent_cache,
7329 key.objectid, 0, key.offset, 0);
7331 error("add_tree_backref failed: %s",
7335 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7336 ret = add_tree_backref(extent_cache,
7337 key.objectid, key.offset, 0, 0);
7339 error("add_tree_backref failed: %s",
7343 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7344 struct btrfs_extent_data_ref *ref;
7345 ref = btrfs_item_ptr(buf, i,
7346 struct btrfs_extent_data_ref);
7347 add_data_backref(extent_cache,
7349 btrfs_extent_data_ref_root(buf, ref),
7350 btrfs_extent_data_ref_objectid(buf,
7352 btrfs_extent_data_ref_offset(buf, ref),
7353 btrfs_extent_data_ref_count(buf, ref),
7354 0, root->sectorsize);
7357 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7358 struct btrfs_shared_data_ref *ref;
7359 ref = btrfs_item_ptr(buf, i,
7360 struct btrfs_shared_data_ref);
7361 add_data_backref(extent_cache,
7362 key.objectid, key.offset, 0, 0, 0,
7363 btrfs_shared_data_ref_count(buf, ref),
7364 0, root->sectorsize);
7367 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7368 struct bad_item *bad;
7370 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7374 bad = malloc(sizeof(struct bad_item));
7377 INIT_LIST_HEAD(&bad->list);
7378 memcpy(&bad->key, &key,
7379 sizeof(struct btrfs_key));
7380 bad->root_id = owner;
7381 list_add_tail(&bad->list, &delete_items);
7384 if (key.type != BTRFS_EXTENT_DATA_KEY)
7386 fi = btrfs_item_ptr(buf, i,
7387 struct btrfs_file_extent_item);
7388 if (btrfs_file_extent_type(buf, fi) ==
7389 BTRFS_FILE_EXTENT_INLINE)
7391 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7394 data_bytes_allocated +=
7395 btrfs_file_extent_disk_num_bytes(buf, fi);
7396 if (data_bytes_allocated < root->sectorsize) {
7399 data_bytes_referenced +=
7400 btrfs_file_extent_num_bytes(buf, fi);
7401 add_data_backref(extent_cache,
7402 btrfs_file_extent_disk_bytenr(buf, fi),
7403 parent, owner, key.objectid, key.offset -
7404 btrfs_file_extent_offset(buf, fi), 1, 1,
7405 btrfs_file_extent_disk_num_bytes(buf, fi));
7409 struct btrfs_key first_key;
7411 first_key.objectid = 0;
7414 btrfs_item_key_to_cpu(buf, &first_key, 0);
7415 level = btrfs_header_level(buf);
7416 for (i = 0; i < nritems; i++) {
7417 struct extent_record tmpl;
7419 ptr = btrfs_node_blockptr(buf, i);
7420 size = root->nodesize;
7421 btrfs_node_key_to_cpu(buf, &key, i);
7423 if ((level == ri->drop_level)
7424 && is_dropped_key(&key, &ri->drop_key)) {
7429 memset(&tmpl, 0, sizeof(tmpl));
7430 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7431 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7436 tmpl.max_size = size;
7437 ret = add_extent_rec(extent_cache, &tmpl);
7441 ret = add_tree_backref(extent_cache, ptr, parent,
7444 error("add_tree_backref failed: %s",
7450 add_pending(nodes, seen, ptr, size);
7452 add_pending(pending, seen, ptr, size);
7455 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7456 nritems) * sizeof(struct btrfs_key_ptr);
7458 total_btree_bytes += buf->len;
7459 if (fs_root_objectid(btrfs_header_owner(buf)))
7460 total_fs_tree_bytes += buf->len;
7461 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7462 total_extent_tree_bytes += buf->len;
7463 if (!found_old_backref &&
7464 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7465 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7466 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7467 found_old_backref = 1;
7469 free_extent_buffer(buf);
7473 static int add_root_to_pending(struct extent_buffer *buf,
7474 struct cache_tree *extent_cache,
7475 struct cache_tree *pending,
7476 struct cache_tree *seen,
7477 struct cache_tree *nodes,
7480 struct extent_record tmpl;
7483 if (btrfs_header_level(buf) > 0)
7484 add_pending(nodes, seen, buf->start, buf->len);
7486 add_pending(pending, seen, buf->start, buf->len);
7488 memset(&tmpl, 0, sizeof(tmpl));
7489 tmpl.start = buf->start;
7494 tmpl.max_size = buf->len;
7495 add_extent_rec(extent_cache, &tmpl);
7497 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7498 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7499 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7502 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7507 /* as we fix the tree, we might be deleting blocks that
7508 * we're tracking for repair. This hook makes sure we
7509 * remove any backrefs for blocks as we are fixing them.
7511 static int free_extent_hook(struct btrfs_trans_handle *trans,
7512 struct btrfs_root *root,
7513 u64 bytenr, u64 num_bytes, u64 parent,
7514 u64 root_objectid, u64 owner, u64 offset,
7517 struct extent_record *rec;
7518 struct cache_extent *cache;
7520 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7522 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7523 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7527 rec = container_of(cache, struct extent_record, cache);
7529 struct data_backref *back;
7530 back = find_data_backref(rec, parent, root_objectid, owner,
7531 offset, 1, bytenr, num_bytes);
7534 if (back->node.found_ref) {
7535 back->found_ref -= refs_to_drop;
7537 rec->refs -= refs_to_drop;
7539 if (back->node.found_extent_tree) {
7540 back->num_refs -= refs_to_drop;
7541 if (rec->extent_item_refs)
7542 rec->extent_item_refs -= refs_to_drop;
7544 if (back->found_ref == 0)
7545 back->node.found_ref = 0;
7546 if (back->num_refs == 0)
7547 back->node.found_extent_tree = 0;
7549 if (!back->node.found_extent_tree && back->node.found_ref) {
7550 list_del(&back->node.list);
7554 struct tree_backref *back;
7555 back = find_tree_backref(rec, parent, root_objectid);
7558 if (back->node.found_ref) {
7561 back->node.found_ref = 0;
7563 if (back->node.found_extent_tree) {
7564 if (rec->extent_item_refs)
7565 rec->extent_item_refs--;
7566 back->node.found_extent_tree = 0;
7568 if (!back->node.found_extent_tree && back->node.found_ref) {
7569 list_del(&back->node.list);
7573 maybe_free_extent_rec(extent_cache, rec);
7578 static int delete_extent_records(struct btrfs_trans_handle *trans,
7579 struct btrfs_root *root,
7580 struct btrfs_path *path,
7581 u64 bytenr, u64 new_len)
7583 struct btrfs_key key;
7584 struct btrfs_key found_key;
7585 struct extent_buffer *leaf;
7590 key.objectid = bytenr;
7592 key.offset = (u64)-1;
7595 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7602 if (path->slots[0] == 0)
7608 leaf = path->nodes[0];
7609 slot = path->slots[0];
7611 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7612 if (found_key.objectid != bytenr)
7615 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7616 found_key.type != BTRFS_METADATA_ITEM_KEY &&
7617 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7618 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7619 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7620 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7621 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7622 btrfs_release_path(path);
7623 if (found_key.type == 0) {
7624 if (found_key.offset == 0)
7626 key.offset = found_key.offset - 1;
7627 key.type = found_key.type;
7629 key.type = found_key.type - 1;
7630 key.offset = (u64)-1;
7634 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7635 found_key.objectid, found_key.type, found_key.offset);
7637 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7640 btrfs_release_path(path);
7642 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7643 found_key.type == BTRFS_METADATA_ITEM_KEY) {
7644 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7645 found_key.offset : root->nodesize;
7647 ret = btrfs_update_block_group(trans, root, bytenr,
7654 btrfs_release_path(path);
7659 * for a single backref, this will allocate a new extent
7660 * and add the backref to it.
7662 static int record_extent(struct btrfs_trans_handle *trans,
7663 struct btrfs_fs_info *info,
7664 struct btrfs_path *path,
7665 struct extent_record *rec,
7666 struct extent_backref *back,
7667 int allocated, u64 flags)
7670 struct btrfs_root *extent_root = info->extent_root;
7671 struct extent_buffer *leaf;
7672 struct btrfs_key ins_key;
7673 struct btrfs_extent_item *ei;
7674 struct data_backref *dback;
7675 struct btrfs_tree_block_info *bi;
7678 rec->max_size = max_t(u64, rec->max_size,
7679 info->extent_root->nodesize);
7682 u32 item_size = sizeof(*ei);
7685 item_size += sizeof(*bi);
7687 ins_key.objectid = rec->start;
7688 ins_key.offset = rec->max_size;
7689 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7691 ret = btrfs_insert_empty_item(trans, extent_root, path,
7692 &ins_key, item_size);
7696 leaf = path->nodes[0];
7697 ei = btrfs_item_ptr(leaf, path->slots[0],
7698 struct btrfs_extent_item);
7700 btrfs_set_extent_refs(leaf, ei, 0);
7701 btrfs_set_extent_generation(leaf, ei, rec->generation);
7703 if (back->is_data) {
7704 btrfs_set_extent_flags(leaf, ei,
7705 BTRFS_EXTENT_FLAG_DATA);
7707 struct btrfs_disk_key copy_key;;
7709 bi = (struct btrfs_tree_block_info *)(ei + 1);
7710 memset_extent_buffer(leaf, 0, (unsigned long)bi,
7713 btrfs_set_disk_key_objectid(©_key,
7714 rec->info_objectid);
7715 btrfs_set_disk_key_type(©_key, 0);
7716 btrfs_set_disk_key_offset(©_key, 0);
7718 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7719 btrfs_set_tree_block_key(leaf, bi, ©_key);
7721 btrfs_set_extent_flags(leaf, ei,
7722 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7725 btrfs_mark_buffer_dirty(leaf);
7726 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7727 rec->max_size, 1, 0);
7730 btrfs_release_path(path);
7733 if (back->is_data) {
7737 dback = to_data_backref(back);
7738 if (back->full_backref)
7739 parent = dback->parent;
7743 for (i = 0; i < dback->found_ref; i++) {
7744 /* if parent != 0, we're doing a full backref
7745 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7746 * just makes the backref allocator create a data
7749 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7750 rec->start, rec->max_size,
7754 BTRFS_FIRST_FREE_OBJECTID :
7760 fprintf(stderr, "adding new data backref"
7761 " on %llu %s %llu owner %llu"
7762 " offset %llu found %d\n",
7763 (unsigned long long)rec->start,
7764 back->full_backref ?
7766 back->full_backref ?
7767 (unsigned long long)parent :
7768 (unsigned long long)dback->root,
7769 (unsigned long long)dback->owner,
7770 (unsigned long long)dback->offset,
7774 struct tree_backref *tback;
7776 tback = to_tree_backref(back);
7777 if (back->full_backref)
7778 parent = tback->parent;
7782 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7783 rec->start, rec->max_size,
7784 parent, tback->root, 0, 0);
7785 fprintf(stderr, "adding new tree backref on "
7786 "start %llu len %llu parent %llu root %llu\n",
7787 rec->start, rec->max_size, parent, tback->root);
7790 btrfs_release_path(path);
7794 static struct extent_entry *find_entry(struct list_head *entries,
7795 u64 bytenr, u64 bytes)
7797 struct extent_entry *entry = NULL;
7799 list_for_each_entry(entry, entries, list) {
7800 if (entry->bytenr == bytenr && entry->bytes == bytes)
7807 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7809 struct extent_entry *entry, *best = NULL, *prev = NULL;
7811 list_for_each_entry(entry, entries, list) {
7813 * If there are as many broken entries as entries then we know
7814 * not to trust this particular entry.
7816 if (entry->broken == entry->count)
7820 * Special case, when there are only two entries and 'best' is
7830 * If our current entry == best then we can't be sure our best
7831 * is really the best, so we need to keep searching.
7833 if (best && best->count == entry->count) {
7839 /* Prev == entry, not good enough, have to keep searching */
7840 if (!prev->broken && prev->count == entry->count)
7844 best = (prev->count > entry->count) ? prev : entry;
7845 else if (best->count < entry->count)
7853 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7854 struct data_backref *dback, struct extent_entry *entry)
7856 struct btrfs_trans_handle *trans;
7857 struct btrfs_root *root;
7858 struct btrfs_file_extent_item *fi;
7859 struct extent_buffer *leaf;
7860 struct btrfs_key key;
7864 key.objectid = dback->root;
7865 key.type = BTRFS_ROOT_ITEM_KEY;
7866 key.offset = (u64)-1;
7867 root = btrfs_read_fs_root(info, &key);
7869 fprintf(stderr, "Couldn't find root for our ref\n");
7874 * The backref points to the original offset of the extent if it was
7875 * split, so we need to search down to the offset we have and then walk
7876 * forward until we find the backref we're looking for.
7878 key.objectid = dback->owner;
7879 key.type = BTRFS_EXTENT_DATA_KEY;
7880 key.offset = dback->offset;
7881 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7883 fprintf(stderr, "Error looking up ref %d\n", ret);
7888 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7889 ret = btrfs_next_leaf(root, path);
7891 fprintf(stderr, "Couldn't find our ref, next\n");
7895 leaf = path->nodes[0];
7896 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7897 if (key.objectid != dback->owner ||
7898 key.type != BTRFS_EXTENT_DATA_KEY) {
7899 fprintf(stderr, "Couldn't find our ref, search\n");
7902 fi = btrfs_item_ptr(leaf, path->slots[0],
7903 struct btrfs_file_extent_item);
7904 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7905 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7907 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7912 btrfs_release_path(path);
7914 trans = btrfs_start_transaction(root, 1);
7916 return PTR_ERR(trans);
7919 * Ok we have the key of the file extent we want to fix, now we can cow
7920 * down to the thing and fix it.
7922 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7924 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7925 key.objectid, key.type, key.offset, ret);
7929 fprintf(stderr, "Well that's odd, we just found this key "
7930 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7935 leaf = path->nodes[0];
7936 fi = btrfs_item_ptr(leaf, path->slots[0],
7937 struct btrfs_file_extent_item);
7939 if (btrfs_file_extent_compression(leaf, fi) &&
7940 dback->disk_bytenr != entry->bytenr) {
7941 fprintf(stderr, "Ref doesn't match the record start and is "
7942 "compressed, please take a btrfs-image of this file "
7943 "system and send it to a btrfs developer so they can "
7944 "complete this functionality for bytenr %Lu\n",
7945 dback->disk_bytenr);
7950 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7951 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7952 } else if (dback->disk_bytenr > entry->bytenr) {
7953 u64 off_diff, offset;
7955 off_diff = dback->disk_bytenr - entry->bytenr;
7956 offset = btrfs_file_extent_offset(leaf, fi);
7957 if (dback->disk_bytenr + offset +
7958 btrfs_file_extent_num_bytes(leaf, fi) >
7959 entry->bytenr + entry->bytes) {
7960 fprintf(stderr, "Ref is past the entry end, please "
7961 "take a btrfs-image of this file system and "
7962 "send it to a btrfs developer, ref %Lu\n",
7963 dback->disk_bytenr);
7968 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7969 btrfs_set_file_extent_offset(leaf, fi, offset);
7970 } else if (dback->disk_bytenr < entry->bytenr) {
7973 offset = btrfs_file_extent_offset(leaf, fi);
7974 if (dback->disk_bytenr + offset < entry->bytenr) {
7975 fprintf(stderr, "Ref is before the entry start, please"
7976 " take a btrfs-image of this file system and "
7977 "send it to a btrfs developer, ref %Lu\n",
7978 dback->disk_bytenr);
7983 offset += dback->disk_bytenr;
7984 offset -= entry->bytenr;
7985 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7986 btrfs_set_file_extent_offset(leaf, fi, offset);
7989 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7992 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7993 * only do this if we aren't using compression, otherwise it's a
7996 if (!btrfs_file_extent_compression(leaf, fi))
7997 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7999 printf("ram bytes may be wrong?\n");
8000 btrfs_mark_buffer_dirty(leaf);
8002 err = btrfs_commit_transaction(trans, root);
8003 btrfs_release_path(path);
8004 return ret ? ret : err;
8007 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8008 struct extent_record *rec)
8010 struct extent_backref *back;
8011 struct data_backref *dback;
8012 struct extent_entry *entry, *best = NULL;
8015 int broken_entries = 0;
8020 * Metadata is easy and the backrefs should always agree on bytenr and
8021 * size, if not we've got bigger issues.
8026 list_for_each_entry(back, &rec->backrefs, list) {
8027 if (back->full_backref || !back->is_data)
8030 dback = to_data_backref(back);
8033 * We only pay attention to backrefs that we found a real
8036 if (dback->found_ref == 0)
8040 * For now we only catch when the bytes don't match, not the
8041 * bytenr. We can easily do this at the same time, but I want
8042 * to have a fs image to test on before we just add repair
8043 * functionality willy-nilly so we know we won't screw up the
8047 entry = find_entry(&entries, dback->disk_bytenr,
8050 entry = malloc(sizeof(struct extent_entry));
8055 memset(entry, 0, sizeof(*entry));
8056 entry->bytenr = dback->disk_bytenr;
8057 entry->bytes = dback->bytes;
8058 list_add_tail(&entry->list, &entries);
8063 * If we only have on entry we may think the entries agree when
8064 * in reality they don't so we have to do some extra checking.
8066 if (dback->disk_bytenr != rec->start ||
8067 dback->bytes != rec->nr || back->broken)
8078 /* Yay all the backrefs agree, carry on good sir */
8079 if (nr_entries <= 1 && !mismatch)
8082 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8083 "%Lu\n", rec->start);
8086 * First we want to see if the backrefs can agree amongst themselves who
8087 * is right, so figure out which one of the entries has the highest
8090 best = find_most_right_entry(&entries);
8093 * Ok so we may have an even split between what the backrefs think, so
8094 * this is where we use the extent ref to see what it thinks.
8097 entry = find_entry(&entries, rec->start, rec->nr);
8098 if (!entry && (!broken_entries || !rec->found_rec)) {
8099 fprintf(stderr, "Backrefs don't agree with each other "
8100 "and extent record doesn't agree with anybody,"
8101 " so we can't fix bytenr %Lu bytes %Lu\n",
8102 rec->start, rec->nr);
8105 } else if (!entry) {
8107 * Ok our backrefs were broken, we'll assume this is the
8108 * correct value and add an entry for this range.
8110 entry = malloc(sizeof(struct extent_entry));
8115 memset(entry, 0, sizeof(*entry));
8116 entry->bytenr = rec->start;
8117 entry->bytes = rec->nr;
8118 list_add_tail(&entry->list, &entries);
8122 best = find_most_right_entry(&entries);
8124 fprintf(stderr, "Backrefs and extent record evenly "
8125 "split on who is right, this is going to "
8126 "require user input to fix bytenr %Lu bytes "
8127 "%Lu\n", rec->start, rec->nr);
8134 * I don't think this can happen currently as we'll abort() if we catch
8135 * this case higher up, but in case somebody removes that we still can't
8136 * deal with it properly here yet, so just bail out of that's the case.
8138 if (best->bytenr != rec->start) {
8139 fprintf(stderr, "Extent start and backref starts don't match, "
8140 "please use btrfs-image on this file system and send "
8141 "it to a btrfs developer so they can make fsck fix "
8142 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8143 rec->start, rec->nr);
8149 * Ok great we all agreed on an extent record, let's go find the real
8150 * references and fix up the ones that don't match.
8152 list_for_each_entry(back, &rec->backrefs, list) {
8153 if (back->full_backref || !back->is_data)
8156 dback = to_data_backref(back);
8159 * Still ignoring backrefs that don't have a real ref attached
8162 if (dback->found_ref == 0)
8165 if (dback->bytes == best->bytes &&
8166 dback->disk_bytenr == best->bytenr)
8169 ret = repair_ref(info, path, dback, best);
8175 * Ok we messed with the actual refs, which means we need to drop our
8176 * entire cache and go back and rescan. I know this is a huge pain and
8177 * adds a lot of extra work, but it's the only way to be safe. Once all
8178 * the backrefs agree we may not need to do anything to the extent
8183 while (!list_empty(&entries)) {
8184 entry = list_entry(entries.next, struct extent_entry, list);
8185 list_del_init(&entry->list);
8191 static int process_duplicates(struct btrfs_root *root,
8192 struct cache_tree *extent_cache,
8193 struct extent_record *rec)
8195 struct extent_record *good, *tmp;
8196 struct cache_extent *cache;
8200 * If we found a extent record for this extent then return, or if we
8201 * have more than one duplicate we are likely going to need to delete
8204 if (rec->found_rec || rec->num_duplicates > 1)
8207 /* Shouldn't happen but just in case */
8208 BUG_ON(!rec->num_duplicates);
8211 * So this happens if we end up with a backref that doesn't match the
8212 * actual extent entry. So either the backref is bad or the extent
8213 * entry is bad. Either way we want to have the extent_record actually
8214 * reflect what we found in the extent_tree, so we need to take the
8215 * duplicate out and use that as the extent_record since the only way we
8216 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8218 remove_cache_extent(extent_cache, &rec->cache);
8220 good = to_extent_record(rec->dups.next);
8221 list_del_init(&good->list);
8222 INIT_LIST_HEAD(&good->backrefs);
8223 INIT_LIST_HEAD(&good->dups);
8224 good->cache.start = good->start;
8225 good->cache.size = good->nr;
8226 good->content_checked = 0;
8227 good->owner_ref_checked = 0;
8228 good->num_duplicates = 0;
8229 good->refs = rec->refs;
8230 list_splice_init(&rec->backrefs, &good->backrefs);
8232 cache = lookup_cache_extent(extent_cache, good->start,
8236 tmp = container_of(cache, struct extent_record, cache);
8239 * If we find another overlapping extent and it's found_rec is
8240 * set then it's a duplicate and we need to try and delete
8243 if (tmp->found_rec || tmp->num_duplicates > 0) {
8244 if (list_empty(&good->list))
8245 list_add_tail(&good->list,
8246 &duplicate_extents);
8247 good->num_duplicates += tmp->num_duplicates + 1;
8248 list_splice_init(&tmp->dups, &good->dups);
8249 list_del_init(&tmp->list);
8250 list_add_tail(&tmp->list, &good->dups);
8251 remove_cache_extent(extent_cache, &tmp->cache);
8256 * Ok we have another non extent item backed extent rec, so lets
8257 * just add it to this extent and carry on like we did above.
8259 good->refs += tmp->refs;
8260 list_splice_init(&tmp->backrefs, &good->backrefs);
8261 remove_cache_extent(extent_cache, &tmp->cache);
8264 ret = insert_cache_extent(extent_cache, &good->cache);
8267 return good->num_duplicates ? 0 : 1;
8270 static int delete_duplicate_records(struct btrfs_root *root,
8271 struct extent_record *rec)
8273 struct btrfs_trans_handle *trans;
8274 LIST_HEAD(delete_list);
8275 struct btrfs_path path;
8276 struct extent_record *tmp, *good, *n;
8279 struct btrfs_key key;
8281 btrfs_init_path(&path);
8284 /* Find the record that covers all of the duplicates. */
8285 list_for_each_entry(tmp, &rec->dups, list) {
8286 if (good->start < tmp->start)
8288 if (good->nr > tmp->nr)
8291 if (tmp->start + tmp->nr < good->start + good->nr) {
8292 fprintf(stderr, "Ok we have overlapping extents that "
8293 "aren't completely covered by each other, this "
8294 "is going to require more careful thought. "
8295 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8296 tmp->start, tmp->nr, good->start, good->nr);
8303 list_add_tail(&rec->list, &delete_list);
8305 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8308 list_move_tail(&tmp->list, &delete_list);
8311 root = root->fs_info->extent_root;
8312 trans = btrfs_start_transaction(root, 1);
8313 if (IS_ERR(trans)) {
8314 ret = PTR_ERR(trans);
8318 list_for_each_entry(tmp, &delete_list, list) {
8319 if (tmp->found_rec == 0)
8321 key.objectid = tmp->start;
8322 key.type = BTRFS_EXTENT_ITEM_KEY;
8323 key.offset = tmp->nr;
8325 /* Shouldn't happen but just in case */
8326 if (tmp->metadata) {
8327 fprintf(stderr, "Well this shouldn't happen, extent "
8328 "record overlaps but is metadata? "
8329 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8333 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8339 ret = btrfs_del_item(trans, root, &path);
8342 btrfs_release_path(&path);
8345 err = btrfs_commit_transaction(trans, root);
8349 while (!list_empty(&delete_list)) {
8350 tmp = to_extent_record(delete_list.next);
8351 list_del_init(&tmp->list);
8357 while (!list_empty(&rec->dups)) {
8358 tmp = to_extent_record(rec->dups.next);
8359 list_del_init(&tmp->list);
8363 btrfs_release_path(&path);
8365 if (!ret && !nr_del)
8366 rec->num_duplicates = 0;
8368 return ret ? ret : nr_del;
8371 static int find_possible_backrefs(struct btrfs_fs_info *info,
8372 struct btrfs_path *path,
8373 struct cache_tree *extent_cache,
8374 struct extent_record *rec)
8376 struct btrfs_root *root;
8377 struct extent_backref *back;
8378 struct data_backref *dback;
8379 struct cache_extent *cache;
8380 struct btrfs_file_extent_item *fi;
8381 struct btrfs_key key;
8385 list_for_each_entry(back, &rec->backrefs, list) {
8386 /* Don't care about full backrefs (poor unloved backrefs) */
8387 if (back->full_backref || !back->is_data)
8390 dback = to_data_backref(back);
8392 /* We found this one, we don't need to do a lookup */
8393 if (dback->found_ref)
8396 key.objectid = dback->root;
8397 key.type = BTRFS_ROOT_ITEM_KEY;
8398 key.offset = (u64)-1;
8400 root = btrfs_read_fs_root(info, &key);
8402 /* No root, definitely a bad ref, skip */
8403 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8405 /* Other err, exit */
8407 return PTR_ERR(root);
8409 key.objectid = dback->owner;
8410 key.type = BTRFS_EXTENT_DATA_KEY;
8411 key.offset = dback->offset;
8412 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8414 btrfs_release_path(path);
8417 /* Didn't find it, we can carry on */
8422 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8423 struct btrfs_file_extent_item);
8424 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8425 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8426 btrfs_release_path(path);
8427 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8429 struct extent_record *tmp;
8430 tmp = container_of(cache, struct extent_record, cache);
8433 * If we found an extent record for the bytenr for this
8434 * particular backref then we can't add it to our
8435 * current extent record. We only want to add backrefs
8436 * that don't have a corresponding extent item in the
8437 * extent tree since they likely belong to this record
8438 * and we need to fix it if it doesn't match bytenrs.
8444 dback->found_ref += 1;
8445 dback->disk_bytenr = bytenr;
8446 dback->bytes = bytes;
8449 * Set this so the verify backref code knows not to trust the
8450 * values in this backref.
8459 * Record orphan data ref into corresponding root.
8461 * Return 0 if the extent item contains data ref and recorded.
8462 * Return 1 if the extent item contains no useful data ref
8463 * On that case, it may contains only shared_dataref or metadata backref
8464 * or the file extent exists(this should be handled by the extent bytenr
8466 * Return <0 if something goes wrong.
8468 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8469 struct extent_record *rec)
8471 struct btrfs_key key;
8472 struct btrfs_root *dest_root;
8473 struct extent_backref *back;
8474 struct data_backref *dback;
8475 struct orphan_data_extent *orphan;
8476 struct btrfs_path path;
8477 int recorded_data_ref = 0;
8482 btrfs_init_path(&path);
8483 list_for_each_entry(back, &rec->backrefs, list) {
8484 if (back->full_backref || !back->is_data ||
8485 !back->found_extent_tree)
8487 dback = to_data_backref(back);
8488 if (dback->found_ref)
8490 key.objectid = dback->root;
8491 key.type = BTRFS_ROOT_ITEM_KEY;
8492 key.offset = (u64)-1;
8494 dest_root = btrfs_read_fs_root(fs_info, &key);
8496 /* For non-exist root we just skip it */
8497 if (IS_ERR(dest_root) || !dest_root)
8500 key.objectid = dback->owner;
8501 key.type = BTRFS_EXTENT_DATA_KEY;
8502 key.offset = dback->offset;
8504 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8505 btrfs_release_path(&path);
8507 * For ret < 0, it's OK since the fs-tree may be corrupted,
8508 * we need to record it for inode/file extent rebuild.
8509 * For ret > 0, we record it only for file extent rebuild.
8510 * For ret == 0, the file extent exists but only bytenr
8511 * mismatch, let the original bytenr fix routine to handle,
8517 orphan = malloc(sizeof(*orphan));
8522 INIT_LIST_HEAD(&orphan->list);
8523 orphan->root = dback->root;
8524 orphan->objectid = dback->owner;
8525 orphan->offset = dback->offset;
8526 orphan->disk_bytenr = rec->cache.start;
8527 orphan->disk_len = rec->cache.size;
8528 list_add(&dest_root->orphan_data_extents, &orphan->list);
8529 recorded_data_ref = 1;
8532 btrfs_release_path(&path);
8534 return !recorded_data_ref;
8540 * when an incorrect extent item is found, this will delete
8541 * all of the existing entries for it and recreate them
8542 * based on what the tree scan found.
8544 static int fixup_extent_refs(struct btrfs_fs_info *info,
8545 struct cache_tree *extent_cache,
8546 struct extent_record *rec)
8548 struct btrfs_trans_handle *trans = NULL;
8550 struct btrfs_path path;
8551 struct list_head *cur = rec->backrefs.next;
8552 struct cache_extent *cache;
8553 struct extent_backref *back;
8557 if (rec->flag_block_full_backref)
8558 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8560 btrfs_init_path(&path);
8561 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8563 * Sometimes the backrefs themselves are so broken they don't
8564 * get attached to any meaningful rec, so first go back and
8565 * check any of our backrefs that we couldn't find and throw
8566 * them into the list if we find the backref so that
8567 * verify_backrefs can figure out what to do.
8569 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8574 /* step one, make sure all of the backrefs agree */
8575 ret = verify_backrefs(info, &path, rec);
8579 trans = btrfs_start_transaction(info->extent_root, 1);
8580 if (IS_ERR(trans)) {
8581 ret = PTR_ERR(trans);
8585 /* step two, delete all the existing records */
8586 ret = delete_extent_records(trans, info->extent_root, &path,
8587 rec->start, rec->max_size);
8592 /* was this block corrupt? If so, don't add references to it */
8593 cache = lookup_cache_extent(info->corrupt_blocks,
8594 rec->start, rec->max_size);
8600 /* step three, recreate all the refs we did find */
8601 while(cur != &rec->backrefs) {
8602 back = to_extent_backref(cur);
8606 * if we didn't find any references, don't create a
8609 if (!back->found_ref)
8612 rec->bad_full_backref = 0;
8613 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8621 int err = btrfs_commit_transaction(trans, info->extent_root);
8626 btrfs_release_path(&path);
8630 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8631 struct extent_record *rec)
8633 struct btrfs_trans_handle *trans;
8634 struct btrfs_root *root = fs_info->extent_root;
8635 struct btrfs_path path;
8636 struct btrfs_extent_item *ei;
8637 struct btrfs_key key;
8641 key.objectid = rec->start;
8642 if (rec->metadata) {
8643 key.type = BTRFS_METADATA_ITEM_KEY;
8644 key.offset = rec->info_level;
8646 key.type = BTRFS_EXTENT_ITEM_KEY;
8647 key.offset = rec->max_size;
8650 trans = btrfs_start_transaction(root, 0);
8652 return PTR_ERR(trans);
8654 btrfs_init_path(&path);
8655 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8657 btrfs_release_path(&path);
8658 btrfs_commit_transaction(trans, root);
8661 fprintf(stderr, "Didn't find extent for %llu\n",
8662 (unsigned long long)rec->start);
8663 btrfs_release_path(&path);
8664 btrfs_commit_transaction(trans, root);
8668 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8669 struct btrfs_extent_item);
8670 flags = btrfs_extent_flags(path.nodes[0], ei);
8671 if (rec->flag_block_full_backref) {
8672 fprintf(stderr, "setting full backref on %llu\n",
8673 (unsigned long long)key.objectid);
8674 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8676 fprintf(stderr, "clearing full backref on %llu\n",
8677 (unsigned long long)key.objectid);
8678 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8680 btrfs_set_extent_flags(path.nodes[0], ei, flags);
8681 btrfs_mark_buffer_dirty(path.nodes[0]);
8682 btrfs_release_path(&path);
8683 return btrfs_commit_transaction(trans, root);
8686 /* right now we only prune from the extent allocation tree */
8687 static int prune_one_block(struct btrfs_trans_handle *trans,
8688 struct btrfs_fs_info *info,
8689 struct btrfs_corrupt_block *corrupt)
8692 struct btrfs_path path;
8693 struct extent_buffer *eb;
8697 int level = corrupt->level + 1;
8699 btrfs_init_path(&path);
8701 /* we want to stop at the parent to our busted block */
8702 path.lowest_level = level;
8704 ret = btrfs_search_slot(trans, info->extent_root,
8705 &corrupt->key, &path, -1, 1);
8710 eb = path.nodes[level];
8717 * hopefully the search gave us the block we want to prune,
8718 * lets try that first
8720 slot = path.slots[level];
8721 found = btrfs_node_blockptr(eb, slot);
8722 if (found == corrupt->cache.start)
8725 nritems = btrfs_header_nritems(eb);
8727 /* the search failed, lets scan this node and hope we find it */
8728 for (slot = 0; slot < nritems; slot++) {
8729 found = btrfs_node_blockptr(eb, slot);
8730 if (found == corrupt->cache.start)
8734 * we couldn't find the bad block. TODO, search all the nodes for pointers
8737 if (eb == info->extent_root->node) {
8742 btrfs_release_path(&path);
8747 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8748 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8751 btrfs_release_path(&path);
8755 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8757 struct btrfs_trans_handle *trans = NULL;
8758 struct cache_extent *cache;
8759 struct btrfs_corrupt_block *corrupt;
8762 cache = search_cache_extent(info->corrupt_blocks, 0);
8766 trans = btrfs_start_transaction(info->extent_root, 1);
8768 return PTR_ERR(trans);
8770 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8771 prune_one_block(trans, info, corrupt);
8772 remove_cache_extent(info->corrupt_blocks, cache);
8775 return btrfs_commit_transaction(trans, info->extent_root);
8779 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8781 struct btrfs_block_group_cache *cache;
8786 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8787 &start, &end, EXTENT_DIRTY);
8790 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8796 cache = btrfs_lookup_first_block_group(fs_info, start);
8801 start = cache->key.objectid + cache->key.offset;
8805 static int check_extent_refs(struct btrfs_root *root,
8806 struct cache_tree *extent_cache)
8808 struct extent_record *rec;
8809 struct cache_extent *cache;
8818 * if we're doing a repair, we have to make sure
8819 * we don't allocate from the problem extents.
8820 * In the worst case, this will be all the
8823 cache = search_cache_extent(extent_cache, 0);
8825 rec = container_of(cache, struct extent_record, cache);
8826 set_extent_dirty(root->fs_info->excluded_extents,
8828 rec->start + rec->max_size - 1,
8830 cache = next_cache_extent(cache);
8833 /* pin down all the corrupted blocks too */
8834 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8836 set_extent_dirty(root->fs_info->excluded_extents,
8838 cache->start + cache->size - 1,
8840 cache = next_cache_extent(cache);
8842 prune_corrupt_blocks(root->fs_info);
8843 reset_cached_block_groups(root->fs_info);
8846 reset_cached_block_groups(root->fs_info);
8849 * We need to delete any duplicate entries we find first otherwise we
8850 * could mess up the extent tree when we have backrefs that actually
8851 * belong to a different extent item and not the weird duplicate one.
8853 while (repair && !list_empty(&duplicate_extents)) {
8854 rec = to_extent_record(duplicate_extents.next);
8855 list_del_init(&rec->list);
8857 /* Sometimes we can find a backref before we find an actual
8858 * extent, so we need to process it a little bit to see if there
8859 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8860 * if this is a backref screwup. If we need to delete stuff
8861 * process_duplicates() will return 0, otherwise it will return
8864 if (process_duplicates(root, extent_cache, rec))
8866 ret = delete_duplicate_records(root, rec);
8870 * delete_duplicate_records will return the number of entries
8871 * deleted, so if it's greater than 0 then we know we actually
8872 * did something and we need to remove.
8886 cache = search_cache_extent(extent_cache, 0);
8889 rec = container_of(cache, struct extent_record, cache);
8890 if (rec->num_duplicates) {
8891 fprintf(stderr, "extent item %llu has multiple extent "
8892 "items\n", (unsigned long long)rec->start);
8897 if (rec->refs != rec->extent_item_refs) {
8898 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8899 (unsigned long long)rec->start,
8900 (unsigned long long)rec->nr);
8901 fprintf(stderr, "extent item %llu, found %llu\n",
8902 (unsigned long long)rec->extent_item_refs,
8903 (unsigned long long)rec->refs);
8904 ret = record_orphan_data_extents(root->fs_info, rec);
8911 * we can't use the extent to repair file
8912 * extent, let the fallback method handle it.
8914 if (!fixed && repair) {
8915 ret = fixup_extent_refs(
8926 if (all_backpointers_checked(rec, 1)) {
8927 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8928 (unsigned long long)rec->start,
8929 (unsigned long long)rec->nr);
8931 if (!fixed && !recorded && repair) {
8932 ret = fixup_extent_refs(root->fs_info,
8941 if (!rec->owner_ref_checked) {
8942 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8943 (unsigned long long)rec->start,
8944 (unsigned long long)rec->nr);
8945 if (!fixed && !recorded && repair) {
8946 ret = fixup_extent_refs(root->fs_info,
8955 if (rec->bad_full_backref) {
8956 fprintf(stderr, "bad full backref, on [%llu]\n",
8957 (unsigned long long)rec->start);
8959 ret = fixup_extent_flags(root->fs_info, rec);
8968 * Although it's not a extent ref's problem, we reuse this
8969 * routine for error reporting.
8970 * No repair function yet.
8972 if (rec->crossing_stripes) {
8974 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8975 rec->start, rec->start + rec->max_size);
8980 if (rec->wrong_chunk_type) {
8982 "bad extent [%llu, %llu), type mismatch with chunk\n",
8983 rec->start, rec->start + rec->max_size);
8988 remove_cache_extent(extent_cache, cache);
8989 free_all_extent_backrefs(rec);
8990 if (!init_extent_tree && repair && (!cur_err || fixed))
8991 clear_extent_dirty(root->fs_info->excluded_extents,
8993 rec->start + rec->max_size - 1,
8999 if (ret && ret != -EAGAIN) {
9000 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9003 struct btrfs_trans_handle *trans;
9005 root = root->fs_info->extent_root;
9006 trans = btrfs_start_transaction(root, 1);
9007 if (IS_ERR(trans)) {
9008 ret = PTR_ERR(trans);
9012 btrfs_fix_block_accounting(trans, root);
9013 ret = btrfs_commit_transaction(trans, root);
9018 fprintf(stderr, "repaired damaged extent references\n");
9024 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9028 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9029 stripe_size = length;
9030 stripe_size /= num_stripes;
9031 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9032 stripe_size = length * 2;
9033 stripe_size /= num_stripes;
9034 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9035 stripe_size = length;
9036 stripe_size /= (num_stripes - 1);
9037 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9038 stripe_size = length;
9039 stripe_size /= (num_stripes - 2);
9041 stripe_size = length;
9047 * Check the chunk with its block group/dev list ref:
9048 * Return 0 if all refs seems valid.
9049 * Return 1 if part of refs seems valid, need later check for rebuild ref
9050 * like missing block group and needs to search extent tree to rebuild them.
9051 * Return -1 if essential refs are missing and unable to rebuild.
9053 static int check_chunk_refs(struct chunk_record *chunk_rec,
9054 struct block_group_tree *block_group_cache,
9055 struct device_extent_tree *dev_extent_cache,
9058 struct cache_extent *block_group_item;
9059 struct block_group_record *block_group_rec;
9060 struct cache_extent *dev_extent_item;
9061 struct device_extent_record *dev_extent_rec;
9065 int metadump_v2 = 0;
9069 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9072 if (block_group_item) {
9073 block_group_rec = container_of(block_group_item,
9074 struct block_group_record,
9076 if (chunk_rec->length != block_group_rec->offset ||
9077 chunk_rec->offset != block_group_rec->objectid ||
9079 chunk_rec->type_flags != block_group_rec->flags)) {
9082 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9083 chunk_rec->objectid,
9088 chunk_rec->type_flags,
9089 block_group_rec->objectid,
9090 block_group_rec->type,
9091 block_group_rec->offset,
9092 block_group_rec->offset,
9093 block_group_rec->objectid,
9094 block_group_rec->flags);
9097 list_del_init(&block_group_rec->list);
9098 chunk_rec->bg_rec = block_group_rec;
9103 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9104 chunk_rec->objectid,
9109 chunk_rec->type_flags);
9116 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9117 chunk_rec->num_stripes);
9118 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9119 devid = chunk_rec->stripes[i].devid;
9120 offset = chunk_rec->stripes[i].offset;
9121 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9122 devid, offset, length);
9123 if (dev_extent_item) {
9124 dev_extent_rec = container_of(dev_extent_item,
9125 struct device_extent_record,
9127 if (dev_extent_rec->objectid != devid ||
9128 dev_extent_rec->offset != offset ||
9129 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9130 dev_extent_rec->length != length) {
9133 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9134 chunk_rec->objectid,
9137 chunk_rec->stripes[i].devid,
9138 chunk_rec->stripes[i].offset,
9139 dev_extent_rec->objectid,
9140 dev_extent_rec->offset,
9141 dev_extent_rec->length);
9144 list_move(&dev_extent_rec->chunk_list,
9145 &chunk_rec->dextents);
9150 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9151 chunk_rec->objectid,
9154 chunk_rec->stripes[i].devid,
9155 chunk_rec->stripes[i].offset);
9162 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9163 int check_chunks(struct cache_tree *chunk_cache,
9164 struct block_group_tree *block_group_cache,
9165 struct device_extent_tree *dev_extent_cache,
9166 struct list_head *good, struct list_head *bad,
9167 struct list_head *rebuild, int silent)
9169 struct cache_extent *chunk_item;
9170 struct chunk_record *chunk_rec;
9171 struct block_group_record *bg_rec;
9172 struct device_extent_record *dext_rec;
9176 chunk_item = first_cache_extent(chunk_cache);
9177 while (chunk_item) {
9178 chunk_rec = container_of(chunk_item, struct chunk_record,
9180 err = check_chunk_refs(chunk_rec, block_group_cache,
9181 dev_extent_cache, silent);
9184 if (err == 0 && good)
9185 list_add_tail(&chunk_rec->list, good);
9186 if (err > 0 && rebuild)
9187 list_add_tail(&chunk_rec->list, rebuild);
9189 list_add_tail(&chunk_rec->list, bad);
9190 chunk_item = next_cache_extent(chunk_item);
9193 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9196 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9204 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9208 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9219 static int check_device_used(struct device_record *dev_rec,
9220 struct device_extent_tree *dext_cache)
9222 struct cache_extent *cache;
9223 struct device_extent_record *dev_extent_rec;
9226 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9228 dev_extent_rec = container_of(cache,
9229 struct device_extent_record,
9231 if (dev_extent_rec->objectid != dev_rec->devid)
9234 list_del_init(&dev_extent_rec->device_list);
9235 total_byte += dev_extent_rec->length;
9236 cache = next_cache_extent(cache);
9239 if (total_byte != dev_rec->byte_used) {
9241 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9242 total_byte, dev_rec->byte_used, dev_rec->objectid,
9243 dev_rec->type, dev_rec->offset);
9250 /* check btrfs_dev_item -> btrfs_dev_extent */
9251 static int check_devices(struct rb_root *dev_cache,
9252 struct device_extent_tree *dev_extent_cache)
9254 struct rb_node *dev_node;
9255 struct device_record *dev_rec;
9256 struct device_extent_record *dext_rec;
9260 dev_node = rb_first(dev_cache);
9262 dev_rec = container_of(dev_node, struct device_record, node);
9263 err = check_device_used(dev_rec, dev_extent_cache);
9267 dev_node = rb_next(dev_node);
9269 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9272 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9273 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9280 static int add_root_item_to_list(struct list_head *head,
9281 u64 objectid, u64 bytenr, u64 last_snapshot,
9282 u8 level, u8 drop_level,
9283 int level_size, struct btrfs_key *drop_key)
9286 struct root_item_record *ri_rec;
9287 ri_rec = malloc(sizeof(*ri_rec));
9290 ri_rec->bytenr = bytenr;
9291 ri_rec->objectid = objectid;
9292 ri_rec->level = level;
9293 ri_rec->level_size = level_size;
9294 ri_rec->drop_level = drop_level;
9295 ri_rec->last_snapshot = last_snapshot;
9297 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9298 list_add_tail(&ri_rec->list, head);
9303 static void free_root_item_list(struct list_head *list)
9305 struct root_item_record *ri_rec;
9307 while (!list_empty(list)) {
9308 ri_rec = list_first_entry(list, struct root_item_record,
9310 list_del_init(&ri_rec->list);
9315 static int deal_root_from_list(struct list_head *list,
9316 struct btrfs_root *root,
9317 struct block_info *bits,
9319 struct cache_tree *pending,
9320 struct cache_tree *seen,
9321 struct cache_tree *reada,
9322 struct cache_tree *nodes,
9323 struct cache_tree *extent_cache,
9324 struct cache_tree *chunk_cache,
9325 struct rb_root *dev_cache,
9326 struct block_group_tree *block_group_cache,
9327 struct device_extent_tree *dev_extent_cache)
9332 while (!list_empty(list)) {
9333 struct root_item_record *rec;
9334 struct extent_buffer *buf;
9335 rec = list_entry(list->next,
9336 struct root_item_record, list);
9338 buf = read_tree_block(root->fs_info->tree_root,
9339 rec->bytenr, rec->level_size, 0);
9340 if (!extent_buffer_uptodate(buf)) {
9341 free_extent_buffer(buf);
9345 ret = add_root_to_pending(buf, extent_cache, pending,
9346 seen, nodes, rec->objectid);
9350 * To rebuild extent tree, we need deal with snapshot
9351 * one by one, otherwise we deal with node firstly which
9352 * can maximize readahead.
9355 ret = run_next_block(root, bits, bits_nr, &last,
9356 pending, seen, reada, nodes,
9357 extent_cache, chunk_cache,
9358 dev_cache, block_group_cache,
9359 dev_extent_cache, rec);
9363 free_extent_buffer(buf);
9364 list_del(&rec->list);
9370 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9371 reada, nodes, extent_cache, chunk_cache,
9372 dev_cache, block_group_cache,
9373 dev_extent_cache, NULL);
9383 static int check_chunks_and_extents(struct btrfs_root *root)
9385 struct rb_root dev_cache;
9386 struct cache_tree chunk_cache;
9387 struct block_group_tree block_group_cache;
9388 struct device_extent_tree dev_extent_cache;
9389 struct cache_tree extent_cache;
9390 struct cache_tree seen;
9391 struct cache_tree pending;
9392 struct cache_tree reada;
9393 struct cache_tree nodes;
9394 struct extent_io_tree excluded_extents;
9395 struct cache_tree corrupt_blocks;
9396 struct btrfs_path path;
9397 struct btrfs_key key;
9398 struct btrfs_key found_key;
9400 struct block_info *bits;
9402 struct extent_buffer *leaf;
9404 struct btrfs_root_item ri;
9405 struct list_head dropping_trees;
9406 struct list_head normal_trees;
9407 struct btrfs_root *root1;
9412 dev_cache = RB_ROOT;
9413 cache_tree_init(&chunk_cache);
9414 block_group_tree_init(&block_group_cache);
9415 device_extent_tree_init(&dev_extent_cache);
9417 cache_tree_init(&extent_cache);
9418 cache_tree_init(&seen);
9419 cache_tree_init(&pending);
9420 cache_tree_init(&nodes);
9421 cache_tree_init(&reada);
9422 cache_tree_init(&corrupt_blocks);
9423 extent_io_tree_init(&excluded_extents);
9424 INIT_LIST_HEAD(&dropping_trees);
9425 INIT_LIST_HEAD(&normal_trees);
9428 root->fs_info->excluded_extents = &excluded_extents;
9429 root->fs_info->fsck_extent_cache = &extent_cache;
9430 root->fs_info->free_extent_hook = free_extent_hook;
9431 root->fs_info->corrupt_blocks = &corrupt_blocks;
9435 bits = malloc(bits_nr * sizeof(struct block_info));
9441 if (ctx.progress_enabled) {
9442 ctx.tp = TASK_EXTENTS;
9443 task_start(ctx.info);
9447 root1 = root->fs_info->tree_root;
9448 level = btrfs_header_level(root1->node);
9449 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9450 root1->node->start, 0, level, 0,
9451 root1->nodesize, NULL);
9454 root1 = root->fs_info->chunk_root;
9455 level = btrfs_header_level(root1->node);
9456 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9457 root1->node->start, 0, level, 0,
9458 root1->nodesize, NULL);
9461 btrfs_init_path(&path);
9464 key.type = BTRFS_ROOT_ITEM_KEY;
9465 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9470 leaf = path.nodes[0];
9471 slot = path.slots[0];
9472 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9473 ret = btrfs_next_leaf(root, &path);
9476 leaf = path.nodes[0];
9477 slot = path.slots[0];
9479 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9480 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9481 unsigned long offset;
9484 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9485 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9486 last_snapshot = btrfs_root_last_snapshot(&ri);
9487 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9488 level = btrfs_root_level(&ri);
9489 level_size = root->nodesize;
9490 ret = add_root_item_to_list(&normal_trees,
9492 btrfs_root_bytenr(&ri),
9493 last_snapshot, level,
9494 0, level_size, NULL);
9498 level = btrfs_root_level(&ri);
9499 level_size = root->nodesize;
9500 objectid = found_key.objectid;
9501 btrfs_disk_key_to_cpu(&found_key,
9503 ret = add_root_item_to_list(&dropping_trees,
9505 btrfs_root_bytenr(&ri),
9506 last_snapshot, level,
9508 level_size, &found_key);
9515 btrfs_release_path(&path);
9518 * check_block can return -EAGAIN if it fixes something, please keep
9519 * this in mind when dealing with return values from these functions, if
9520 * we get -EAGAIN we want to fall through and restart the loop.
9522 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9523 &seen, &reada, &nodes, &extent_cache,
9524 &chunk_cache, &dev_cache, &block_group_cache,
9531 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9532 &pending, &seen, &reada, &nodes,
9533 &extent_cache, &chunk_cache, &dev_cache,
9534 &block_group_cache, &dev_extent_cache);
9541 ret = check_chunks(&chunk_cache, &block_group_cache,
9542 &dev_extent_cache, NULL, NULL, NULL, 0);
9549 ret = check_extent_refs(root, &extent_cache);
9556 ret = check_devices(&dev_cache, &dev_extent_cache);
9561 task_stop(ctx.info);
9563 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9564 extent_io_tree_cleanup(&excluded_extents);
9565 root->fs_info->fsck_extent_cache = NULL;
9566 root->fs_info->free_extent_hook = NULL;
9567 root->fs_info->corrupt_blocks = NULL;
9568 root->fs_info->excluded_extents = NULL;
9571 free_chunk_cache_tree(&chunk_cache);
9572 free_device_cache_tree(&dev_cache);
9573 free_block_group_tree(&block_group_cache);
9574 free_device_extent_tree(&dev_extent_cache);
9575 free_extent_cache_tree(&seen);
9576 free_extent_cache_tree(&pending);
9577 free_extent_cache_tree(&reada);
9578 free_extent_cache_tree(&nodes);
9581 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9582 free_extent_cache_tree(&seen);
9583 free_extent_cache_tree(&pending);
9584 free_extent_cache_tree(&reada);
9585 free_extent_cache_tree(&nodes);
9586 free_chunk_cache_tree(&chunk_cache);
9587 free_block_group_tree(&block_group_cache);
9588 free_device_cache_tree(&dev_cache);
9589 free_device_extent_tree(&dev_extent_cache);
9590 free_extent_record_cache(root->fs_info, &extent_cache);
9591 free_root_item_list(&normal_trees);
9592 free_root_item_list(&dropping_trees);
9593 extent_io_tree_cleanup(&excluded_extents);
9598 * Check backrefs of a tree block given by @bytenr or @eb.
9600 * @root: the root containing the @bytenr or @eb
9601 * @eb: tree block extent buffer, can be NULL
9602 * @bytenr: bytenr of the tree block to search
9603 * @level: tree level of the tree block
9604 * @owner: owner of the tree block
9606 * Return >0 for any error found and output error message
9607 * Return 0 for no error found
9609 static int check_tree_block_ref(struct btrfs_root *root,
9610 struct extent_buffer *eb, u64 bytenr,
9611 int level, u64 owner)
9613 struct btrfs_key key;
9614 struct btrfs_root *extent_root = root->fs_info->extent_root;
9615 struct btrfs_path path;
9616 struct btrfs_extent_item *ei;
9617 struct btrfs_extent_inline_ref *iref;
9618 struct extent_buffer *leaf;
9624 u32 nodesize = root->nodesize;
9631 btrfs_init_path(&path);
9632 key.objectid = bytenr;
9633 if (btrfs_fs_incompat(root->fs_info,
9634 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9635 key.type = BTRFS_METADATA_ITEM_KEY;
9637 key.type = BTRFS_EXTENT_ITEM_KEY;
9638 key.offset = (u64)-1;
9640 /* Search for the backref in extent tree */
9641 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9643 err |= BACKREF_MISSING;
9646 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9648 err |= BACKREF_MISSING;
9652 leaf = path.nodes[0];
9653 slot = path.slots[0];
9654 btrfs_item_key_to_cpu(leaf, &key, slot);
9656 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9658 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9659 skinny_level = (int)key.offset;
9660 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9662 struct btrfs_tree_block_info *info;
9664 info = (struct btrfs_tree_block_info *)(ei + 1);
9665 skinny_level = btrfs_tree_block_level(leaf, info);
9666 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9673 if (!(btrfs_extent_flags(leaf, ei) &
9674 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9676 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9677 key.objectid, nodesize,
9678 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9679 err = BACKREF_MISMATCH;
9681 header_gen = btrfs_header_generation(eb);
9682 extent_gen = btrfs_extent_generation(leaf, ei);
9683 if (header_gen != extent_gen) {
9685 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9686 key.objectid, nodesize, header_gen,
9688 err = BACKREF_MISMATCH;
9690 if (level != skinny_level) {
9692 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9693 key.objectid, nodesize, level, skinny_level);
9694 err = BACKREF_MISMATCH;
9696 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9698 "extent[%llu %u] is referred by other roots than %llu",
9699 key.objectid, nodesize, root->objectid);
9700 err = BACKREF_MISMATCH;
9705 * Iterate the extent/metadata item to find the exact backref
9707 item_size = btrfs_item_size_nr(leaf, slot);
9708 ptr = (unsigned long)iref;
9709 end = (unsigned long)ei + item_size;
9711 iref = (struct btrfs_extent_inline_ref *)ptr;
9712 type = btrfs_extent_inline_ref_type(leaf, iref);
9713 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9715 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9716 (offset == root->objectid || offset == owner)) {
9718 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9719 /* Check if the backref points to valid referencer */
9720 found_ref = !check_tree_block_ref(root, NULL, offset,
9726 ptr += btrfs_extent_inline_ref_size(type);
9730 * Inlined extent item doesn't have what we need, check
9731 * TREE_BLOCK_REF_KEY
9734 btrfs_release_path(&path);
9735 key.objectid = bytenr;
9736 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9737 key.offset = root->objectid;
9739 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9744 err |= BACKREF_MISSING;
9746 btrfs_release_path(&path);
9747 if (eb && (err & BACKREF_MISSING))
9748 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9749 bytenr, nodesize, owner, level);
9754 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9756 * Return >0 any error found and output error message
9757 * Return 0 for no error found
9759 static int check_extent_data_item(struct btrfs_root *root,
9760 struct extent_buffer *eb, int slot)
9762 struct btrfs_file_extent_item *fi;
9763 struct btrfs_path path;
9764 struct btrfs_root *extent_root = root->fs_info->extent_root;
9765 struct btrfs_key fi_key;
9766 struct btrfs_key dbref_key;
9767 struct extent_buffer *leaf;
9768 struct btrfs_extent_item *ei;
9769 struct btrfs_extent_inline_ref *iref;
9770 struct btrfs_extent_data_ref *dref;
9772 u64 file_extent_gen;
9775 u64 extent_num_bytes;
9783 int found_dbackref = 0;
9787 btrfs_item_key_to_cpu(eb, &fi_key, slot);
9788 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9789 file_extent_gen = btrfs_file_extent_generation(eb, fi);
9791 /* Nothing to check for hole and inline data extents */
9792 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9793 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9796 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9797 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9798 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9800 /* Check unaligned disk_num_bytes and num_bytes */
9801 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9803 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9804 fi_key.objectid, fi_key.offset, disk_num_bytes,
9806 err |= BYTES_UNALIGNED;
9808 data_bytes_allocated += disk_num_bytes;
9810 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9812 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9813 fi_key.objectid, fi_key.offset, extent_num_bytes,
9815 err |= BYTES_UNALIGNED;
9817 data_bytes_referenced += extent_num_bytes;
9819 owner = btrfs_header_owner(eb);
9821 /* Check the extent item of the file extent in extent tree */
9822 btrfs_init_path(&path);
9823 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9824 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9825 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9827 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9829 err |= BACKREF_MISSING;
9833 leaf = path.nodes[0];
9834 slot = path.slots[0];
9835 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9837 extent_flags = btrfs_extent_flags(leaf, ei);
9838 extent_gen = btrfs_extent_generation(leaf, ei);
9840 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9842 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9843 disk_bytenr, disk_num_bytes,
9844 BTRFS_EXTENT_FLAG_DATA);
9845 err |= BACKREF_MISMATCH;
9848 if (file_extent_gen < extent_gen) {
9850 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9851 disk_bytenr, disk_num_bytes, file_extent_gen,
9853 err |= BACKREF_MISMATCH;
9856 /* Check data backref inside that extent item */
9857 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9858 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9859 ptr = (unsigned long)iref;
9860 end = (unsigned long)ei + item_size;
9862 iref = (struct btrfs_extent_inline_ref *)ptr;
9863 type = btrfs_extent_inline_ref_type(leaf, iref);
9864 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9866 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9867 ref_root = btrfs_extent_data_ref_root(leaf, dref);
9868 if (ref_root == owner || ref_root == root->objectid)
9870 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9871 found_dbackref = !check_tree_block_ref(root, NULL,
9872 btrfs_extent_inline_ref_offset(leaf, iref),
9878 ptr += btrfs_extent_inline_ref_size(type);
9881 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9882 if (!found_dbackref) {
9883 btrfs_release_path(&path);
9885 btrfs_init_path(&path);
9886 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9887 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9888 dbref_key.offset = hash_extent_data_ref(root->objectid,
9889 fi_key.objectid, fi_key.offset);
9891 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9892 &dbref_key, &path, 0, 0);
9897 if (!found_dbackref)
9898 err |= BACKREF_MISSING;
9900 btrfs_release_path(&path);
9901 if (err & BACKREF_MISSING) {
9902 error("data extent[%llu %llu] backref lost",
9903 disk_bytenr, disk_num_bytes);
9909 * Get real tree block level for the case like shared block
9910 * Return >= 0 as tree level
9911 * Return <0 for error
9913 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9915 struct extent_buffer *eb;
9916 struct btrfs_path path;
9917 struct btrfs_key key;
9918 struct btrfs_extent_item *ei;
9921 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9926 /* Search extent tree for extent generation and level */
9927 key.objectid = bytenr;
9928 key.type = BTRFS_METADATA_ITEM_KEY;
9929 key.offset = (u64)-1;
9931 btrfs_init_path(&path);
9932 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9935 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9943 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9944 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9945 struct btrfs_extent_item);
9946 flags = btrfs_extent_flags(path.nodes[0], ei);
9947 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9952 /* Get transid for later read_tree_block() check */
9953 transid = btrfs_extent_generation(path.nodes[0], ei);
9955 /* Get backref level as one source */
9956 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9957 backref_level = key.offset;
9959 struct btrfs_tree_block_info *info;
9961 info = (struct btrfs_tree_block_info *)(ei + 1);
9962 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9964 btrfs_release_path(&path);
9966 /* Get level from tree block as an alternative source */
9967 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9968 if (!extent_buffer_uptodate(eb)) {
9969 free_extent_buffer(eb);
9972 header_level = btrfs_header_level(eb);
9973 free_extent_buffer(eb);
9975 if (header_level != backref_level)
9977 return header_level;
9980 btrfs_release_path(&path);
9985 * Check if a tree block backref is valid (points to a valid tree block)
9986 * if level == -1, level will be resolved
9987 * Return >0 for any error found and print error message
9989 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9990 u64 bytenr, int level)
9992 struct btrfs_root *root;
9993 struct btrfs_key key;
9994 struct btrfs_path path;
9995 struct extent_buffer *eb;
9996 struct extent_buffer *node;
9997 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10001 /* Query level for level == -1 special case */
10003 level = query_tree_block_level(fs_info, bytenr);
10005 err |= REFERENCER_MISSING;
10009 key.objectid = root_id;
10010 key.type = BTRFS_ROOT_ITEM_KEY;
10011 key.offset = (u64)-1;
10013 root = btrfs_read_fs_root(fs_info, &key);
10014 if (IS_ERR(root)) {
10015 err |= REFERENCER_MISSING;
10019 /* Read out the tree block to get item/node key */
10020 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10021 if (!extent_buffer_uptodate(eb)) {
10022 err |= REFERENCER_MISSING;
10023 free_extent_buffer(eb);
10027 /* Empty tree, no need to check key */
10028 if (!btrfs_header_nritems(eb) && !level) {
10029 free_extent_buffer(eb);
10034 btrfs_node_key_to_cpu(eb, &key, 0);
10036 btrfs_item_key_to_cpu(eb, &key, 0);
10038 free_extent_buffer(eb);
10040 btrfs_init_path(&path);
10041 path.lowest_level = level;
10042 /* Search with the first key, to ensure we can reach it */
10043 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10045 err |= REFERENCER_MISSING;
10049 node = path.nodes[level];
10050 if (btrfs_header_bytenr(node) != bytenr) {
10052 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10053 bytenr, nodesize, bytenr,
10054 btrfs_header_bytenr(node));
10055 err |= REFERENCER_MISMATCH;
10057 if (btrfs_header_level(node) != level) {
10059 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10060 bytenr, nodesize, level,
10061 btrfs_header_level(node));
10062 err |= REFERENCER_MISMATCH;
10066 btrfs_release_path(&path);
10068 if (err & REFERENCER_MISSING) {
10070 error("extent [%llu %d] lost referencer (owner: %llu)",
10071 bytenr, nodesize, root_id);
10074 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10075 bytenr, nodesize, root_id, level);
10082 * Check referencer for shared block backref
10083 * If level == -1, this function will resolve the level.
10085 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10086 u64 parent, u64 bytenr, int level)
10088 struct extent_buffer *eb;
10089 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10091 int found_parent = 0;
10094 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10095 if (!extent_buffer_uptodate(eb))
10099 level = query_tree_block_level(fs_info, bytenr);
10103 if (level + 1 != btrfs_header_level(eb))
10106 nr = btrfs_header_nritems(eb);
10107 for (i = 0; i < nr; i++) {
10108 if (bytenr == btrfs_node_blockptr(eb, i)) {
10114 free_extent_buffer(eb);
10115 if (!found_parent) {
10117 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10118 bytenr, nodesize, parent, level);
10119 return REFERENCER_MISSING;
10125 * Check referencer for normal (inlined) data ref
10126 * If len == 0, it will be resolved by searching in extent tree
10128 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10129 u64 root_id, u64 objectid, u64 offset,
10130 u64 bytenr, u64 len, u32 count)
10132 struct btrfs_root *root;
10133 struct btrfs_root *extent_root = fs_info->extent_root;
10134 struct btrfs_key key;
10135 struct btrfs_path path;
10136 struct extent_buffer *leaf;
10137 struct btrfs_file_extent_item *fi;
10138 u32 found_count = 0;
10143 key.objectid = bytenr;
10144 key.type = BTRFS_EXTENT_ITEM_KEY;
10145 key.offset = (u64)-1;
10147 btrfs_init_path(&path);
10148 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10151 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10154 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10155 if (key.objectid != bytenr ||
10156 key.type != BTRFS_EXTENT_ITEM_KEY)
10159 btrfs_release_path(&path);
10161 key.objectid = root_id;
10162 key.type = BTRFS_ROOT_ITEM_KEY;
10163 key.offset = (u64)-1;
10164 btrfs_init_path(&path);
10166 root = btrfs_read_fs_root(fs_info, &key);
10170 key.objectid = objectid;
10171 key.type = BTRFS_EXTENT_DATA_KEY;
10173 * It can be nasty as data backref offset is
10174 * file offset - file extent offset, which is smaller or
10175 * equal to original backref offset. The only special case is
10176 * overflow. So we need to special check and do further search.
10178 key.offset = offset & (1ULL << 63) ? 0 : offset;
10180 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10185 * Search afterwards to get correct one
10186 * NOTE: As we must do a comprehensive check on the data backref to
10187 * make sure the dref count also matches, we must iterate all file
10188 * extents for that inode.
10191 leaf = path.nodes[0];
10192 slot = path.slots[0];
10194 btrfs_item_key_to_cpu(leaf, &key, slot);
10195 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10197 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10199 * Except normal disk bytenr and disk num bytes, we still
10200 * need to do extra check on dbackref offset as
10201 * dbackref offset = file_offset - file_extent_offset
10203 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10204 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10205 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10209 ret = btrfs_next_item(root, &path);
10214 btrfs_release_path(&path);
10215 if (found_count != count) {
10217 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10218 bytenr, len, root_id, objectid, offset, count, found_count);
10219 return REFERENCER_MISSING;
10225 * Check if the referencer of a shared data backref exists
10227 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10228 u64 parent, u64 bytenr)
10230 struct extent_buffer *eb;
10231 struct btrfs_key key;
10232 struct btrfs_file_extent_item *fi;
10233 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10235 int found_parent = 0;
10238 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10239 if (!extent_buffer_uptodate(eb))
10242 nr = btrfs_header_nritems(eb);
10243 for (i = 0; i < nr; i++) {
10244 btrfs_item_key_to_cpu(eb, &key, i);
10245 if (key.type != BTRFS_EXTENT_DATA_KEY)
10248 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10249 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10252 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10259 free_extent_buffer(eb);
10260 if (!found_parent) {
10261 error("shared extent %llu referencer lost (parent: %llu)",
10263 return REFERENCER_MISSING;
10269 * This function will check a given extent item, including its backref and
10270 * itself (like crossing stripe boundary and type)
10272 * Since we don't use extent_record anymore, introduce new error bit
10274 static int check_extent_item(struct btrfs_fs_info *fs_info,
10275 struct extent_buffer *eb, int slot)
10277 struct btrfs_extent_item *ei;
10278 struct btrfs_extent_inline_ref *iref;
10279 struct btrfs_extent_data_ref *dref;
10283 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10284 u32 item_size = btrfs_item_size_nr(eb, slot);
10289 struct btrfs_key key;
10293 btrfs_item_key_to_cpu(eb, &key, slot);
10294 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10295 bytes_used += key.offset;
10297 bytes_used += nodesize;
10299 if (item_size < sizeof(*ei)) {
10301 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10302 * old thing when on disk format is still un-determined.
10303 * No need to care about it anymore
10305 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10309 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10310 flags = btrfs_extent_flags(eb, ei);
10312 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10314 if (metadata && check_crossing_stripes(global_info, key.objectid,
10316 error("bad metadata [%llu, %llu) crossing stripe boundary",
10317 key.objectid, key.objectid + nodesize);
10318 err |= CROSSING_STRIPE_BOUNDARY;
10321 ptr = (unsigned long)(ei + 1);
10323 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10324 /* Old EXTENT_ITEM metadata */
10325 struct btrfs_tree_block_info *info;
10327 info = (struct btrfs_tree_block_info *)ptr;
10328 level = btrfs_tree_block_level(eb, info);
10329 ptr += sizeof(struct btrfs_tree_block_info);
10331 /* New METADATA_ITEM */
10332 level = key.offset;
10334 end = (unsigned long)ei + item_size;
10337 err |= ITEM_SIZE_MISMATCH;
10341 /* Now check every backref in this extent item */
10343 iref = (struct btrfs_extent_inline_ref *)ptr;
10344 type = btrfs_extent_inline_ref_type(eb, iref);
10345 offset = btrfs_extent_inline_ref_offset(eb, iref);
10347 case BTRFS_TREE_BLOCK_REF_KEY:
10348 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10352 case BTRFS_SHARED_BLOCK_REF_KEY:
10353 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10357 case BTRFS_EXTENT_DATA_REF_KEY:
10358 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10359 ret = check_extent_data_backref(fs_info,
10360 btrfs_extent_data_ref_root(eb, dref),
10361 btrfs_extent_data_ref_objectid(eb, dref),
10362 btrfs_extent_data_ref_offset(eb, dref),
10363 key.objectid, key.offset,
10364 btrfs_extent_data_ref_count(eb, dref));
10367 case BTRFS_SHARED_DATA_REF_KEY:
10368 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10372 error("extent[%llu %d %llu] has unknown ref type: %d",
10373 key.objectid, key.type, key.offset, type);
10374 err |= UNKNOWN_TYPE;
10378 ptr += btrfs_extent_inline_ref_size(type);
10387 * Check if a dev extent item is referred correctly by its chunk
10389 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10390 struct extent_buffer *eb, int slot)
10392 struct btrfs_root *chunk_root = fs_info->chunk_root;
10393 struct btrfs_dev_extent *ptr;
10394 struct btrfs_path path;
10395 struct btrfs_key chunk_key;
10396 struct btrfs_key devext_key;
10397 struct btrfs_chunk *chunk;
10398 struct extent_buffer *l;
10402 int found_chunk = 0;
10405 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10406 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10407 length = btrfs_dev_extent_length(eb, ptr);
10409 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10410 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10411 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10413 btrfs_init_path(&path);
10414 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10419 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10420 if (btrfs_chunk_length(l, chunk) != length)
10423 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10424 for (i = 0; i < num_stripes; i++) {
10425 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10426 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10428 if (devid == devext_key.objectid &&
10429 offset == devext_key.offset) {
10435 btrfs_release_path(&path);
10436 if (!found_chunk) {
10438 "device extent[%llu, %llu, %llu] did not find the related chunk",
10439 devext_key.objectid, devext_key.offset, length);
10440 return REFERENCER_MISSING;
10446 * Check if the used space is correct with the dev item
10448 static int check_dev_item(struct btrfs_fs_info *fs_info,
10449 struct extent_buffer *eb, int slot)
10451 struct btrfs_root *dev_root = fs_info->dev_root;
10452 struct btrfs_dev_item *dev_item;
10453 struct btrfs_path path;
10454 struct btrfs_key key;
10455 struct btrfs_dev_extent *ptr;
10461 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10462 dev_id = btrfs_device_id(eb, dev_item);
10463 used = btrfs_device_bytes_used(eb, dev_item);
10465 key.objectid = dev_id;
10466 key.type = BTRFS_DEV_EXTENT_KEY;
10469 btrfs_init_path(&path);
10470 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10472 btrfs_item_key_to_cpu(eb, &key, slot);
10473 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10474 key.objectid, key.type, key.offset);
10475 btrfs_release_path(&path);
10476 return REFERENCER_MISSING;
10479 /* Iterate dev_extents to calculate the used space of a device */
10481 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10483 if (key.objectid > dev_id)
10485 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10488 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10489 struct btrfs_dev_extent);
10490 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10492 ret = btrfs_next_item(dev_root, &path);
10496 btrfs_release_path(&path);
10498 if (used != total) {
10499 btrfs_item_key_to_cpu(eb, &key, slot);
10501 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10502 total, used, BTRFS_ROOT_TREE_OBJECTID,
10503 BTRFS_DEV_EXTENT_KEY, dev_id);
10504 return ACCOUNTING_MISMATCH;
10510 * Check a block group item with its referener (chunk) and its used space
10511 * with extent/metadata item
10513 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10514 struct extent_buffer *eb, int slot)
10516 struct btrfs_root *extent_root = fs_info->extent_root;
10517 struct btrfs_root *chunk_root = fs_info->chunk_root;
10518 struct btrfs_block_group_item *bi;
10519 struct btrfs_block_group_item bg_item;
10520 struct btrfs_path path;
10521 struct btrfs_key bg_key;
10522 struct btrfs_key chunk_key;
10523 struct btrfs_key extent_key;
10524 struct btrfs_chunk *chunk;
10525 struct extent_buffer *leaf;
10526 struct btrfs_extent_item *ei;
10527 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10535 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10536 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10537 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10538 used = btrfs_block_group_used(&bg_item);
10539 bg_flags = btrfs_block_group_flags(&bg_item);
10541 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10542 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10543 chunk_key.offset = bg_key.objectid;
10545 btrfs_init_path(&path);
10546 /* Search for the referencer chunk */
10547 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10550 "block group[%llu %llu] did not find the related chunk item",
10551 bg_key.objectid, bg_key.offset);
10552 err |= REFERENCER_MISSING;
10554 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10555 struct btrfs_chunk);
10556 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10559 "block group[%llu %llu] related chunk item length does not match",
10560 bg_key.objectid, bg_key.offset);
10561 err |= REFERENCER_MISMATCH;
10564 btrfs_release_path(&path);
10566 /* Search from the block group bytenr */
10567 extent_key.objectid = bg_key.objectid;
10568 extent_key.type = 0;
10569 extent_key.offset = 0;
10571 btrfs_init_path(&path);
10572 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10576 /* Iterate extent tree to account used space */
10578 leaf = path.nodes[0];
10579 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10580 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10583 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10584 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10586 if (extent_key.objectid < bg_key.objectid)
10589 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10592 total += extent_key.offset;
10594 ei = btrfs_item_ptr(leaf, path.slots[0],
10595 struct btrfs_extent_item);
10596 flags = btrfs_extent_flags(leaf, ei);
10597 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10598 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10600 "bad extent[%llu, %llu) type mismatch with chunk",
10601 extent_key.objectid,
10602 extent_key.objectid + extent_key.offset);
10603 err |= CHUNK_TYPE_MISMATCH;
10605 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10606 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10607 BTRFS_BLOCK_GROUP_METADATA))) {
10609 "bad extent[%llu, %llu) type mismatch with chunk",
10610 extent_key.objectid,
10611 extent_key.objectid + nodesize);
10612 err |= CHUNK_TYPE_MISMATCH;
10616 ret = btrfs_next_item(extent_root, &path);
10622 btrfs_release_path(&path);
10624 if (total != used) {
10626 "block group[%llu %llu] used %llu but extent items used %llu",
10627 bg_key.objectid, bg_key.offset, used, total);
10628 err |= ACCOUNTING_MISMATCH;
10634 * Check a chunk item.
10635 * Including checking all referred dev_extents and block group
10637 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10638 struct extent_buffer *eb, int slot)
10640 struct btrfs_root *extent_root = fs_info->extent_root;
10641 struct btrfs_root *dev_root = fs_info->dev_root;
10642 struct btrfs_path path;
10643 struct btrfs_key chunk_key;
10644 struct btrfs_key bg_key;
10645 struct btrfs_key devext_key;
10646 struct btrfs_chunk *chunk;
10647 struct extent_buffer *leaf;
10648 struct btrfs_block_group_item *bi;
10649 struct btrfs_block_group_item bg_item;
10650 struct btrfs_dev_extent *ptr;
10651 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10663 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10664 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10665 length = btrfs_chunk_length(eb, chunk);
10666 chunk_end = chunk_key.offset + length;
10667 if (!IS_ALIGNED(length, sectorsize)) {
10668 error("chunk[%llu %llu) not aligned to %u",
10669 chunk_key.offset, chunk_end, sectorsize);
10670 err |= BYTES_UNALIGNED;
10674 type = btrfs_chunk_type(eb, chunk);
10675 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10676 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10677 error("chunk[%llu %llu) has no chunk type",
10678 chunk_key.offset, chunk_end);
10679 err |= UNKNOWN_TYPE;
10681 if (profile && (profile & (profile - 1))) {
10682 error("chunk[%llu %llu) multiple profiles detected: %llx",
10683 chunk_key.offset, chunk_end, profile);
10684 err |= UNKNOWN_TYPE;
10687 bg_key.objectid = chunk_key.offset;
10688 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10689 bg_key.offset = length;
10691 btrfs_init_path(&path);
10692 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10695 "chunk[%llu %llu) did not find the related block group item",
10696 chunk_key.offset, chunk_end);
10697 err |= REFERENCER_MISSING;
10699 leaf = path.nodes[0];
10700 bi = btrfs_item_ptr(leaf, path.slots[0],
10701 struct btrfs_block_group_item);
10702 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10704 if (btrfs_block_group_flags(&bg_item) != type) {
10706 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10707 chunk_key.offset, chunk_end, type,
10708 btrfs_block_group_flags(&bg_item));
10709 err |= REFERENCER_MISSING;
10713 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10714 for (i = 0; i < num_stripes; i++) {
10715 btrfs_release_path(&path);
10716 btrfs_init_path(&path);
10717 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10718 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10719 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10721 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10724 goto not_match_dev;
10726 leaf = path.nodes[0];
10727 ptr = btrfs_item_ptr(leaf, path.slots[0],
10728 struct btrfs_dev_extent);
10729 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10730 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10731 if (objectid != chunk_key.objectid ||
10732 offset != chunk_key.offset ||
10733 btrfs_dev_extent_length(leaf, ptr) != length)
10734 goto not_match_dev;
10737 err |= BACKREF_MISSING;
10739 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10740 chunk_key.objectid, chunk_end, i);
10743 btrfs_release_path(&path);
10749 * Main entry function to check known items and update related accounting info
10751 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10753 struct btrfs_fs_info *fs_info = root->fs_info;
10754 struct btrfs_key key;
10757 struct btrfs_extent_data_ref *dref;
10762 btrfs_item_key_to_cpu(eb, &key, slot);
10766 case BTRFS_EXTENT_DATA_KEY:
10767 ret = check_extent_data_item(root, eb, slot);
10770 case BTRFS_BLOCK_GROUP_ITEM_KEY:
10771 ret = check_block_group_item(fs_info, eb, slot);
10774 case BTRFS_DEV_ITEM_KEY:
10775 ret = check_dev_item(fs_info, eb, slot);
10778 case BTRFS_CHUNK_ITEM_KEY:
10779 ret = check_chunk_item(fs_info, eb, slot);
10782 case BTRFS_DEV_EXTENT_KEY:
10783 ret = check_dev_extent_item(fs_info, eb, slot);
10786 case BTRFS_EXTENT_ITEM_KEY:
10787 case BTRFS_METADATA_ITEM_KEY:
10788 ret = check_extent_item(fs_info, eb, slot);
10791 case BTRFS_EXTENT_CSUM_KEY:
10792 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10794 case BTRFS_TREE_BLOCK_REF_KEY:
10795 ret = check_tree_block_backref(fs_info, key.offset,
10799 case BTRFS_EXTENT_DATA_REF_KEY:
10800 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10801 ret = check_extent_data_backref(fs_info,
10802 btrfs_extent_data_ref_root(eb, dref),
10803 btrfs_extent_data_ref_objectid(eb, dref),
10804 btrfs_extent_data_ref_offset(eb, dref),
10806 btrfs_extent_data_ref_count(eb, dref));
10809 case BTRFS_SHARED_BLOCK_REF_KEY:
10810 ret = check_shared_block_backref(fs_info, key.offset,
10814 case BTRFS_SHARED_DATA_REF_KEY:
10815 ret = check_shared_data_backref(fs_info, key.offset,
10823 if (++slot < btrfs_header_nritems(eb))
10830 * Helper function for later fs/subvol tree check. To determine if a tree
10831 * block should be checked.
10832 * This function will ensure only the direct referencer with lowest rootid to
10833 * check a fs/subvolume tree block.
10835 * Backref check at extent tree would detect errors like missing subvolume
10836 * tree, so we can do aggressive check to reduce duplicated checks.
10838 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10840 struct btrfs_root *extent_root = root->fs_info->extent_root;
10841 struct btrfs_key key;
10842 struct btrfs_path path;
10843 struct extent_buffer *leaf;
10845 struct btrfs_extent_item *ei;
10851 struct btrfs_extent_inline_ref *iref;
10854 btrfs_init_path(&path);
10855 key.objectid = btrfs_header_bytenr(eb);
10856 key.type = BTRFS_METADATA_ITEM_KEY;
10857 key.offset = (u64)-1;
10860 * Any failure in backref resolving means we can't determine
10861 * whom the tree block belongs to.
10862 * So in that case, we need to check that tree block
10864 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10868 ret = btrfs_previous_extent_item(extent_root, &path,
10869 btrfs_header_bytenr(eb));
10873 leaf = path.nodes[0];
10874 slot = path.slots[0];
10875 btrfs_item_key_to_cpu(leaf, &key, slot);
10876 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10878 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10879 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10881 struct btrfs_tree_block_info *info;
10883 info = (struct btrfs_tree_block_info *)(ei + 1);
10884 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10887 item_size = btrfs_item_size_nr(leaf, slot);
10888 ptr = (unsigned long)iref;
10889 end = (unsigned long)ei + item_size;
10890 while (ptr < end) {
10891 iref = (struct btrfs_extent_inline_ref *)ptr;
10892 type = btrfs_extent_inline_ref_type(leaf, iref);
10893 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10896 * We only check the tree block if current root is
10897 * the lowest referencer of it.
10899 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10900 offset < root->objectid) {
10901 btrfs_release_path(&path);
10905 ptr += btrfs_extent_inline_ref_size(type);
10908 * Normally we should also check keyed tree block ref, but that may be
10909 * very time consuming. Inlined ref should already make us skip a lot
10910 * of refs now. So skip search keyed tree block ref.
10914 btrfs_release_path(&path);
10919 * Traversal function for tree block. We will do:
10920 * 1) Skip shared fs/subvolume tree blocks
10921 * 2) Update related bytes accounting
10922 * 3) Pre-order traversal
10924 static int traverse_tree_block(struct btrfs_root *root,
10925 struct extent_buffer *node)
10927 struct extent_buffer *eb;
10928 struct btrfs_key key;
10929 struct btrfs_key drop_key;
10937 * Skip shared fs/subvolume tree block, in that case they will
10938 * be checked by referencer with lowest rootid
10940 if (is_fstree(root->objectid) && !should_check(root, node))
10943 /* Update bytes accounting */
10944 total_btree_bytes += node->len;
10945 if (fs_root_objectid(btrfs_header_owner(node)))
10946 total_fs_tree_bytes += node->len;
10947 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10948 total_extent_tree_bytes += node->len;
10949 if (!found_old_backref &&
10950 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10951 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10952 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10953 found_old_backref = 1;
10955 /* pre-order tranversal, check itself first */
10956 level = btrfs_header_level(node);
10957 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10958 btrfs_header_level(node),
10959 btrfs_header_owner(node));
10963 "check %s failed root %llu bytenr %llu level %d, force continue check",
10964 level ? "node":"leaf", root->objectid,
10965 btrfs_header_bytenr(node), btrfs_header_level(node));
10968 btree_space_waste += btrfs_leaf_free_space(root, node);
10969 ret = check_leaf_items(root, node);
10974 nr = btrfs_header_nritems(node);
10975 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10976 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10977 sizeof(struct btrfs_key_ptr);
10979 /* Then check all its children */
10980 for (i = 0; i < nr; i++) {
10981 u64 blocknr = btrfs_node_blockptr(node, i);
10983 btrfs_node_key_to_cpu(node, &key, i);
10984 if (level == root->root_item.drop_level &&
10985 is_dropped_key(&key, &drop_key))
10989 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10990 * to call the function itself.
10992 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10993 if (extent_buffer_uptodate(eb)) {
10994 ret = traverse_tree_block(root, eb);
10997 free_extent_buffer(eb);
11004 * Low memory usage version check_chunks_and_extents.
11006 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11008 struct btrfs_path path;
11009 struct btrfs_key key;
11010 struct btrfs_root *root1;
11011 struct btrfs_root *cur_root;
11015 root1 = root->fs_info->chunk_root;
11016 ret = traverse_tree_block(root1, root1->node);
11019 root1 = root->fs_info->tree_root;
11020 ret = traverse_tree_block(root1, root1->node);
11023 btrfs_init_path(&path);
11024 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11026 key.type = BTRFS_ROOT_ITEM_KEY;
11028 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11030 error("cannot find extent treet in tree_root");
11035 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11036 if (key.type != BTRFS_ROOT_ITEM_KEY)
11038 key.offset = (u64)-1;
11040 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11041 if (IS_ERR(cur_root) || !cur_root) {
11042 error("failed to read tree: %lld", key.objectid);
11046 ret = traverse_tree_block(cur_root, cur_root->node);
11050 ret = btrfs_next_item(root1, &path);
11056 btrfs_release_path(&path);
11060 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11061 struct btrfs_root *root, int overwrite)
11063 struct extent_buffer *c;
11064 struct extent_buffer *old = root->node;
11067 struct btrfs_disk_key disk_key = {0,0,0};
11073 extent_buffer_get(c);
11076 c = btrfs_alloc_free_block(trans, root,
11078 root->root_key.objectid,
11079 &disk_key, level, 0, 0);
11082 extent_buffer_get(c);
11086 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11087 btrfs_set_header_level(c, level);
11088 btrfs_set_header_bytenr(c, c->start);
11089 btrfs_set_header_generation(c, trans->transid);
11090 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11091 btrfs_set_header_owner(c, root->root_key.objectid);
11093 write_extent_buffer(c, root->fs_info->fsid,
11094 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11096 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11097 btrfs_header_chunk_tree_uuid(c),
11100 btrfs_mark_buffer_dirty(c);
11102 * this case can happen in the following case:
11104 * 1.overwrite previous root.
11106 * 2.reinit reloc data root, this is because we skip pin
11107 * down reloc data tree before which means we can allocate
11108 * same block bytenr here.
11110 if (old->start == c->start) {
11111 btrfs_set_root_generation(&root->root_item,
11113 root->root_item.level = btrfs_header_level(root->node);
11114 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11115 &root->root_key, &root->root_item);
11117 free_extent_buffer(c);
11121 free_extent_buffer(old);
11123 add_root_to_dirty_list(root);
11127 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11128 struct extent_buffer *eb, int tree_root)
11130 struct extent_buffer *tmp;
11131 struct btrfs_root_item *ri;
11132 struct btrfs_key key;
11135 int level = btrfs_header_level(eb);
11141 * If we have pinned this block before, don't pin it again.
11142 * This can not only avoid forever loop with broken filesystem
11143 * but also give us some speedups.
11145 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11146 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11149 btrfs_pin_extent(fs_info, eb->start, eb->len);
11151 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11152 nritems = btrfs_header_nritems(eb);
11153 for (i = 0; i < nritems; i++) {
11155 btrfs_item_key_to_cpu(eb, &key, i);
11156 if (key.type != BTRFS_ROOT_ITEM_KEY)
11158 /* Skip the extent root and reloc roots */
11159 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11160 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11161 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11163 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11164 bytenr = btrfs_disk_root_bytenr(eb, ri);
11167 * If at any point we start needing the real root we
11168 * will have to build a stump root for the root we are
11169 * in, but for now this doesn't actually use the root so
11170 * just pass in extent_root.
11172 tmp = read_tree_block(fs_info->extent_root, bytenr,
11174 if (!extent_buffer_uptodate(tmp)) {
11175 fprintf(stderr, "Error reading root block\n");
11178 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11179 free_extent_buffer(tmp);
11183 bytenr = btrfs_node_blockptr(eb, i);
11185 /* If we aren't the tree root don't read the block */
11186 if (level == 1 && !tree_root) {
11187 btrfs_pin_extent(fs_info, bytenr, nodesize);
11191 tmp = read_tree_block(fs_info->extent_root, bytenr,
11193 if (!extent_buffer_uptodate(tmp)) {
11194 fprintf(stderr, "Error reading tree block\n");
11197 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11198 free_extent_buffer(tmp);
11207 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11211 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11215 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11218 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11220 struct btrfs_block_group_cache *cache;
11221 struct btrfs_path path;
11222 struct extent_buffer *leaf;
11223 struct btrfs_chunk *chunk;
11224 struct btrfs_key key;
11228 btrfs_init_path(&path);
11230 key.type = BTRFS_CHUNK_ITEM_KEY;
11232 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11234 btrfs_release_path(&path);
11239 * We do this in case the block groups were screwed up and had alloc
11240 * bits that aren't actually set on the chunks. This happens with
11241 * restored images every time and could happen in real life I guess.
11243 fs_info->avail_data_alloc_bits = 0;
11244 fs_info->avail_metadata_alloc_bits = 0;
11245 fs_info->avail_system_alloc_bits = 0;
11247 /* First we need to create the in-memory block groups */
11249 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11250 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11252 btrfs_release_path(&path);
11260 leaf = path.nodes[0];
11261 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11262 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11267 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11268 btrfs_add_block_group(fs_info, 0,
11269 btrfs_chunk_type(leaf, chunk),
11270 key.objectid, key.offset,
11271 btrfs_chunk_length(leaf, chunk));
11272 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11273 key.offset + btrfs_chunk_length(leaf, chunk),
11279 cache = btrfs_lookup_first_block_group(fs_info, start);
11283 start = cache->key.objectid + cache->key.offset;
11286 btrfs_release_path(&path);
11290 static int reset_balance(struct btrfs_trans_handle *trans,
11291 struct btrfs_fs_info *fs_info)
11293 struct btrfs_root *root = fs_info->tree_root;
11294 struct btrfs_path path;
11295 struct extent_buffer *leaf;
11296 struct btrfs_key key;
11297 int del_slot, del_nr = 0;
11301 btrfs_init_path(&path);
11302 key.objectid = BTRFS_BALANCE_OBJECTID;
11303 key.type = BTRFS_BALANCE_ITEM_KEY;
11305 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11310 goto reinit_data_reloc;
11315 ret = btrfs_del_item(trans, root, &path);
11318 btrfs_release_path(&path);
11320 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11321 key.type = BTRFS_ROOT_ITEM_KEY;
11323 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11327 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11332 ret = btrfs_del_items(trans, root, &path,
11339 btrfs_release_path(&path);
11342 ret = btrfs_search_slot(trans, root, &key, &path,
11349 leaf = path.nodes[0];
11350 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11351 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11353 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11358 del_slot = path.slots[0];
11367 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11371 btrfs_release_path(&path);
11374 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11375 key.type = BTRFS_ROOT_ITEM_KEY;
11376 key.offset = (u64)-1;
11377 root = btrfs_read_fs_root(fs_info, &key);
11378 if (IS_ERR(root)) {
11379 fprintf(stderr, "Error reading data reloc tree\n");
11380 ret = PTR_ERR(root);
11383 record_root_in_trans(trans, root);
11384 ret = btrfs_fsck_reinit_root(trans, root, 0);
11387 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11389 btrfs_release_path(&path);
11393 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11394 struct btrfs_fs_info *fs_info)
11400 * The only reason we don't do this is because right now we're just
11401 * walking the trees we find and pinning down their bytes, we don't look
11402 * at any of the leaves. In order to do mixed groups we'd have to check
11403 * the leaves of any fs roots and pin down the bytes for any file
11404 * extents we find. Not hard but why do it if we don't have to?
11406 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11407 fprintf(stderr, "We don't support re-initing the extent tree "
11408 "for mixed block groups yet, please notify a btrfs "
11409 "developer you want to do this so they can add this "
11410 "functionality.\n");
11415 * first we need to walk all of the trees except the extent tree and pin
11416 * down the bytes that are in use so we don't overwrite any existing
11419 ret = pin_metadata_blocks(fs_info);
11421 fprintf(stderr, "error pinning down used bytes\n");
11426 * Need to drop all the block groups since we're going to recreate all
11429 btrfs_free_block_groups(fs_info);
11430 ret = reset_block_groups(fs_info);
11432 fprintf(stderr, "error resetting the block groups\n");
11436 /* Ok we can allocate now, reinit the extent root */
11437 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11439 fprintf(stderr, "extent root initialization failed\n");
11441 * When the transaction code is updated we should end the
11442 * transaction, but for now progs only knows about commit so
11443 * just return an error.
11449 * Now we have all the in-memory block groups setup so we can make
11450 * allocations properly, and the metadata we care about is safe since we
11451 * pinned all of it above.
11454 struct btrfs_block_group_cache *cache;
11456 cache = btrfs_lookup_first_block_group(fs_info, start);
11459 start = cache->key.objectid + cache->key.offset;
11460 ret = btrfs_insert_item(trans, fs_info->extent_root,
11461 &cache->key, &cache->item,
11462 sizeof(cache->item));
11464 fprintf(stderr, "Error adding block group\n");
11467 btrfs_extent_post_op(trans, fs_info->extent_root);
11470 ret = reset_balance(trans, fs_info);
11472 fprintf(stderr, "error resetting the pending balance\n");
11477 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11479 struct btrfs_path path;
11480 struct btrfs_trans_handle *trans;
11481 struct btrfs_key key;
11484 printf("Recowing metadata block %llu\n", eb->start);
11485 key.objectid = btrfs_header_owner(eb);
11486 key.type = BTRFS_ROOT_ITEM_KEY;
11487 key.offset = (u64)-1;
11489 root = btrfs_read_fs_root(root->fs_info, &key);
11490 if (IS_ERR(root)) {
11491 fprintf(stderr, "Couldn't find owner root %llu\n",
11493 return PTR_ERR(root);
11496 trans = btrfs_start_transaction(root, 1);
11498 return PTR_ERR(trans);
11500 btrfs_init_path(&path);
11501 path.lowest_level = btrfs_header_level(eb);
11502 if (path.lowest_level)
11503 btrfs_node_key_to_cpu(eb, &key, 0);
11505 btrfs_item_key_to_cpu(eb, &key, 0);
11507 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11508 btrfs_commit_transaction(trans, root);
11509 btrfs_release_path(&path);
11513 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11515 struct btrfs_path path;
11516 struct btrfs_trans_handle *trans;
11517 struct btrfs_key key;
11520 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11521 bad->key.type, bad->key.offset);
11522 key.objectid = bad->root_id;
11523 key.type = BTRFS_ROOT_ITEM_KEY;
11524 key.offset = (u64)-1;
11526 root = btrfs_read_fs_root(root->fs_info, &key);
11527 if (IS_ERR(root)) {
11528 fprintf(stderr, "Couldn't find owner root %llu\n",
11530 return PTR_ERR(root);
11533 trans = btrfs_start_transaction(root, 1);
11535 return PTR_ERR(trans);
11537 btrfs_init_path(&path);
11538 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11544 ret = btrfs_del_item(trans, root, &path);
11546 btrfs_commit_transaction(trans, root);
11547 btrfs_release_path(&path);
11551 static int zero_log_tree(struct btrfs_root *root)
11553 struct btrfs_trans_handle *trans;
11556 trans = btrfs_start_transaction(root, 1);
11557 if (IS_ERR(trans)) {
11558 ret = PTR_ERR(trans);
11561 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11562 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11563 ret = btrfs_commit_transaction(trans, root);
11567 static int populate_csum(struct btrfs_trans_handle *trans,
11568 struct btrfs_root *csum_root, char *buf, u64 start,
11575 while (offset < len) {
11576 sectorsize = csum_root->sectorsize;
11577 ret = read_extent_data(csum_root, buf, start + offset,
11581 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11582 start + offset, buf, sectorsize);
11585 offset += sectorsize;
11590 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11591 struct btrfs_root *csum_root,
11592 struct btrfs_root *cur_root)
11594 struct btrfs_path path;
11595 struct btrfs_key key;
11596 struct extent_buffer *node;
11597 struct btrfs_file_extent_item *fi;
11604 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11608 btrfs_init_path(&path);
11612 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11615 /* Iterate all regular file extents and fill its csum */
11617 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11619 if (key.type != BTRFS_EXTENT_DATA_KEY)
11621 node = path.nodes[0];
11622 slot = path.slots[0];
11623 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11624 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11626 start = btrfs_file_extent_disk_bytenr(node, fi);
11627 len = btrfs_file_extent_disk_num_bytes(node, fi);
11629 ret = populate_csum(trans, csum_root, buf, start, len);
11630 if (ret == -EEXIST)
11636 * TODO: if next leaf is corrupted, jump to nearest next valid
11639 ret = btrfs_next_item(cur_root, &path);
11649 btrfs_release_path(&path);
11654 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11655 struct btrfs_root *csum_root)
11657 struct btrfs_fs_info *fs_info = csum_root->fs_info;
11658 struct btrfs_path path;
11659 struct btrfs_root *tree_root = fs_info->tree_root;
11660 struct btrfs_root *cur_root;
11661 struct extent_buffer *node;
11662 struct btrfs_key key;
11666 btrfs_init_path(&path);
11667 key.objectid = BTRFS_FS_TREE_OBJECTID;
11669 key.type = BTRFS_ROOT_ITEM_KEY;
11670 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11679 node = path.nodes[0];
11680 slot = path.slots[0];
11681 btrfs_item_key_to_cpu(node, &key, slot);
11682 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11684 if (key.type != BTRFS_ROOT_ITEM_KEY)
11686 if (!is_fstree(key.objectid))
11688 key.offset = (u64)-1;
11690 cur_root = btrfs_read_fs_root(fs_info, &key);
11691 if (IS_ERR(cur_root) || !cur_root) {
11692 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11696 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11701 ret = btrfs_next_item(tree_root, &path);
11711 btrfs_release_path(&path);
11715 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11716 struct btrfs_root *csum_root)
11718 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11719 struct btrfs_path path;
11720 struct btrfs_extent_item *ei;
11721 struct extent_buffer *leaf;
11723 struct btrfs_key key;
11726 btrfs_init_path(&path);
11728 key.type = BTRFS_EXTENT_ITEM_KEY;
11730 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11732 btrfs_release_path(&path);
11736 buf = malloc(csum_root->sectorsize);
11738 btrfs_release_path(&path);
11743 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11744 ret = btrfs_next_leaf(extent_root, &path);
11752 leaf = path.nodes[0];
11754 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11755 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11760 ei = btrfs_item_ptr(leaf, path.slots[0],
11761 struct btrfs_extent_item);
11762 if (!(btrfs_extent_flags(leaf, ei) &
11763 BTRFS_EXTENT_FLAG_DATA)) {
11768 ret = populate_csum(trans, csum_root, buf, key.objectid,
11775 btrfs_release_path(&path);
11781 * Recalculate the csum and put it into the csum tree.
11783 * Extent tree init will wipe out all the extent info, so in that case, we
11784 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
11785 * will use fs/subvol trees to init the csum tree.
11787 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11788 struct btrfs_root *csum_root,
11789 int search_fs_tree)
11791 if (search_fs_tree)
11792 return fill_csum_tree_from_fs(trans, csum_root);
11794 return fill_csum_tree_from_extent(trans, csum_root);
11797 static void free_roots_info_cache(void)
11799 if (!roots_info_cache)
11802 while (!cache_tree_empty(roots_info_cache)) {
11803 struct cache_extent *entry;
11804 struct root_item_info *rii;
11806 entry = first_cache_extent(roots_info_cache);
11809 remove_cache_extent(roots_info_cache, entry);
11810 rii = container_of(entry, struct root_item_info, cache_extent);
11814 free(roots_info_cache);
11815 roots_info_cache = NULL;
11818 static int build_roots_info_cache(struct btrfs_fs_info *info)
11821 struct btrfs_key key;
11822 struct extent_buffer *leaf;
11823 struct btrfs_path path;
11825 if (!roots_info_cache) {
11826 roots_info_cache = malloc(sizeof(*roots_info_cache));
11827 if (!roots_info_cache)
11829 cache_tree_init(roots_info_cache);
11832 btrfs_init_path(&path);
11834 key.type = BTRFS_EXTENT_ITEM_KEY;
11836 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11839 leaf = path.nodes[0];
11842 struct btrfs_key found_key;
11843 struct btrfs_extent_item *ei;
11844 struct btrfs_extent_inline_ref *iref;
11845 int slot = path.slots[0];
11850 struct cache_extent *entry;
11851 struct root_item_info *rii;
11853 if (slot >= btrfs_header_nritems(leaf)) {
11854 ret = btrfs_next_leaf(info->extent_root, &path);
11861 leaf = path.nodes[0];
11862 slot = path.slots[0];
11865 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11867 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11868 found_key.type != BTRFS_METADATA_ITEM_KEY)
11871 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11872 flags = btrfs_extent_flags(leaf, ei);
11874 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11875 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11878 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11879 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11880 level = found_key.offset;
11882 struct btrfs_tree_block_info *binfo;
11884 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11885 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11886 level = btrfs_tree_block_level(leaf, binfo);
11890 * For a root extent, it must be of the following type and the
11891 * first (and only one) iref in the item.
11893 type = btrfs_extent_inline_ref_type(leaf, iref);
11894 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11897 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11898 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11900 rii = malloc(sizeof(struct root_item_info));
11905 rii->cache_extent.start = root_id;
11906 rii->cache_extent.size = 1;
11907 rii->level = (u8)-1;
11908 entry = &rii->cache_extent;
11909 ret = insert_cache_extent(roots_info_cache, entry);
11912 rii = container_of(entry, struct root_item_info,
11916 ASSERT(rii->cache_extent.start == root_id);
11917 ASSERT(rii->cache_extent.size == 1);
11919 if (level > rii->level || rii->level == (u8)-1) {
11920 rii->level = level;
11921 rii->bytenr = found_key.objectid;
11922 rii->gen = btrfs_extent_generation(leaf, ei);
11923 rii->node_count = 1;
11924 } else if (level == rii->level) {
11932 btrfs_release_path(&path);
11937 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11938 struct btrfs_path *path,
11939 const struct btrfs_key *root_key,
11940 const int read_only_mode)
11942 const u64 root_id = root_key->objectid;
11943 struct cache_extent *entry;
11944 struct root_item_info *rii;
11945 struct btrfs_root_item ri;
11946 unsigned long offset;
11948 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11951 "Error: could not find extent items for root %llu\n",
11952 root_key->objectid);
11956 rii = container_of(entry, struct root_item_info, cache_extent);
11957 ASSERT(rii->cache_extent.start == root_id);
11958 ASSERT(rii->cache_extent.size == 1);
11960 if (rii->node_count != 1) {
11962 "Error: could not find btree root extent for root %llu\n",
11967 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11968 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11970 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11971 btrfs_root_level(&ri) != rii->level ||
11972 btrfs_root_generation(&ri) != rii->gen) {
11975 * If we're in repair mode but our caller told us to not update
11976 * the root item, i.e. just check if it needs to be updated, don't
11977 * print this message, since the caller will call us again shortly
11978 * for the same root item without read only mode (the caller will
11979 * open a transaction first).
11981 if (!(read_only_mode && repair))
11983 "%sroot item for root %llu,"
11984 " current bytenr %llu, current gen %llu, current level %u,"
11985 " new bytenr %llu, new gen %llu, new level %u\n",
11986 (read_only_mode ? "" : "fixing "),
11988 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11989 btrfs_root_level(&ri),
11990 rii->bytenr, rii->gen, rii->level);
11992 if (btrfs_root_generation(&ri) > rii->gen) {
11994 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11995 root_id, btrfs_root_generation(&ri), rii->gen);
11999 if (!read_only_mode) {
12000 btrfs_set_root_bytenr(&ri, rii->bytenr);
12001 btrfs_set_root_level(&ri, rii->level);
12002 btrfs_set_root_generation(&ri, rii->gen);
12003 write_extent_buffer(path->nodes[0], &ri,
12004 offset, sizeof(ri));
12014 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12015 * caused read-only snapshots to be corrupted if they were created at a moment
12016 * when the source subvolume/snapshot had orphan items. The issue was that the
12017 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12018 * node instead of the post orphan cleanup root node.
12019 * So this function, and its callees, just detects and fixes those cases. Even
12020 * though the regression was for read-only snapshots, this function applies to
12021 * any snapshot/subvolume root.
12022 * This must be run before any other repair code - not doing it so, makes other
12023 * repair code delete or modify backrefs in the extent tree for example, which
12024 * will result in an inconsistent fs after repairing the root items.
12026 static int repair_root_items(struct btrfs_fs_info *info)
12028 struct btrfs_path path;
12029 struct btrfs_key key;
12030 struct extent_buffer *leaf;
12031 struct btrfs_trans_handle *trans = NULL;
12034 int need_trans = 0;
12036 btrfs_init_path(&path);
12038 ret = build_roots_info_cache(info);
12042 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12043 key.type = BTRFS_ROOT_ITEM_KEY;
12048 * Avoid opening and committing transactions if a leaf doesn't have
12049 * any root items that need to be fixed, so that we avoid rotating
12050 * backup roots unnecessarily.
12053 trans = btrfs_start_transaction(info->tree_root, 1);
12054 if (IS_ERR(trans)) {
12055 ret = PTR_ERR(trans);
12060 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12064 leaf = path.nodes[0];
12067 struct btrfs_key found_key;
12069 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12070 int no_more_keys = find_next_key(&path, &key);
12072 btrfs_release_path(&path);
12074 ret = btrfs_commit_transaction(trans,
12086 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12088 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12090 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12093 ret = maybe_repair_root_item(info, &path, &found_key,
12098 if (!trans && repair) {
12101 btrfs_release_path(&path);
12111 free_roots_info_cache();
12112 btrfs_release_path(&path);
12114 btrfs_commit_transaction(trans, info->tree_root);
12121 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12123 struct btrfs_trans_handle *trans;
12124 struct btrfs_block_group_cache *bg_cache;
12128 /* Clear all free space cache inodes and its extent data */
12130 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12133 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12136 current = bg_cache->key.objectid + bg_cache->key.offset;
12139 /* Don't forget to set cache_generation to -1 */
12140 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12141 if (IS_ERR(trans)) {
12142 error("failed to update super block cache generation");
12143 return PTR_ERR(trans);
12145 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12146 btrfs_commit_transaction(trans, fs_info->tree_root);
12151 const char * const cmd_check_usage[] = {
12152 "btrfs check [options] <device>",
12153 "Check structural integrity of a filesystem (unmounted).",
12154 "Check structural integrity of an unmounted filesystem. Verify internal",
12155 "trees' consistency and item connectivity. In the repair mode try to",
12156 "fix the problems found. ",
12157 "WARNING: the repair mode is considered dangerous",
12159 "-s|--super <superblock> use this superblock copy",
12160 "-b|--backup use the first valid backup root copy",
12161 "--repair try to repair the filesystem",
12162 "--readonly run in read-only mode (default)",
12163 "--init-csum-tree create a new CRC tree",
12164 "--init-extent-tree create a new extent tree",
12165 "--mode <MODE> allows choice of memory/IO trade-offs",
12166 " where MODE is one of:",
12167 " original - read inodes and extents to memory (requires",
12168 " more memory, does less IO)",
12169 " lowmem - try to use less memory but read blocks again",
12171 "--check-data-csum verify checksums of data blocks",
12172 "-Q|--qgroup-report print a report on qgroup consistency",
12173 "-E|--subvol-extents <subvolid>",
12174 " print subvolume extents and sharing state",
12175 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12176 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12177 "-p|--progress indicate progress",
12178 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12182 int cmd_check(int argc, char **argv)
12184 struct cache_tree root_cache;
12185 struct btrfs_root *root;
12186 struct btrfs_fs_info *info;
12189 u64 tree_root_bytenr = 0;
12190 u64 chunk_root_bytenr = 0;
12191 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12194 int init_csum_tree = 0;
12196 int clear_space_cache = 0;
12197 int qgroup_report = 0;
12198 int qgroups_repaired = 0;
12199 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12203 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12204 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12205 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12206 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12207 static const struct option long_options[] = {
12208 { "super", required_argument, NULL, 's' },
12209 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12210 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12211 { "init-csum-tree", no_argument, NULL,
12212 GETOPT_VAL_INIT_CSUM },
12213 { "init-extent-tree", no_argument, NULL,
12214 GETOPT_VAL_INIT_EXTENT },
12215 { "check-data-csum", no_argument, NULL,
12216 GETOPT_VAL_CHECK_CSUM },
12217 { "backup", no_argument, NULL, 'b' },
12218 { "subvol-extents", required_argument, NULL, 'E' },
12219 { "qgroup-report", no_argument, NULL, 'Q' },
12220 { "tree-root", required_argument, NULL, 'r' },
12221 { "chunk-root", required_argument, NULL,
12222 GETOPT_VAL_CHUNK_TREE },
12223 { "progress", no_argument, NULL, 'p' },
12224 { "mode", required_argument, NULL,
12226 { "clear-space-cache", required_argument, NULL,
12227 GETOPT_VAL_CLEAR_SPACE_CACHE},
12228 { NULL, 0, NULL, 0}
12231 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12235 case 'a': /* ignored */ break;
12237 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12240 num = arg_strtou64(optarg);
12241 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12243 "super mirror should be less than %d",
12244 BTRFS_SUPER_MIRROR_MAX);
12247 bytenr = btrfs_sb_offset(((int)num));
12248 printf("using SB copy %llu, bytenr %llu\n", num,
12249 (unsigned long long)bytenr);
12255 subvolid = arg_strtou64(optarg);
12258 tree_root_bytenr = arg_strtou64(optarg);
12260 case GETOPT_VAL_CHUNK_TREE:
12261 chunk_root_bytenr = arg_strtou64(optarg);
12264 ctx.progress_enabled = true;
12268 usage(cmd_check_usage);
12269 case GETOPT_VAL_REPAIR:
12270 printf("enabling repair mode\n");
12272 ctree_flags |= OPEN_CTREE_WRITES;
12274 case GETOPT_VAL_READONLY:
12277 case GETOPT_VAL_INIT_CSUM:
12278 printf("Creating a new CRC tree\n");
12279 init_csum_tree = 1;
12281 ctree_flags |= OPEN_CTREE_WRITES;
12283 case GETOPT_VAL_INIT_EXTENT:
12284 init_extent_tree = 1;
12285 ctree_flags |= (OPEN_CTREE_WRITES |
12286 OPEN_CTREE_NO_BLOCK_GROUPS);
12289 case GETOPT_VAL_CHECK_CSUM:
12290 check_data_csum = 1;
12292 case GETOPT_VAL_MODE:
12293 check_mode = parse_check_mode(optarg);
12294 if (check_mode == CHECK_MODE_UNKNOWN) {
12295 error("unknown mode: %s", optarg);
12299 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12300 if (strcmp(optarg, "v1") == 0) {
12301 clear_space_cache = 1;
12302 } else if (strcmp(optarg, "v2") == 0) {
12303 clear_space_cache = 2;
12304 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12307 "invalid argument to --clear-space-cache, must be v1 or v2");
12310 ctree_flags |= OPEN_CTREE_WRITES;
12315 if (check_argc_exact(argc - optind, 1))
12316 usage(cmd_check_usage);
12318 if (ctx.progress_enabled) {
12319 ctx.tp = TASK_NOTHING;
12320 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12323 /* This check is the only reason for --readonly to exist */
12324 if (readonly && repair) {
12325 error("repair options are not compatible with --readonly");
12330 * Not supported yet
12332 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12333 error("low memory mode doesn't support repair yet");
12338 cache_tree_init(&root_cache);
12340 if((ret = check_mounted(argv[optind])) < 0) {
12341 error("could not check mount status: %s", strerror(-ret));
12344 error("%s is currently mounted, aborting", argv[optind]);
12349 /* only allow partial opening under repair mode */
12351 ctree_flags |= OPEN_CTREE_PARTIAL;
12353 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12354 chunk_root_bytenr, ctree_flags);
12356 error("cannot open file system");
12361 global_info = info;
12362 root = info->fs_root;
12363 if (clear_space_cache == 1) {
12364 if (btrfs_fs_compat_ro(info,
12365 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12367 "free space cache v2 detected, use --clear-space-cache v2");
12371 printf("Clearing free space cache\n");
12372 ret = clear_free_space_cache(info);
12374 error("failed to clear free space cache");
12377 printf("Free space cache cleared\n");
12380 } else if (clear_space_cache == 2) {
12381 if (!btrfs_fs_compat_ro(info,
12382 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12383 printf("no free space cache v2 to clear\n");
12387 printf("Clear free space cache v2\n");
12388 ret = btrfs_clear_free_space_tree(info);
12390 error("failed to clear free space cache v2: %d", ret);
12393 printf("free space cache v2 cleared\n");
12399 * repair mode will force us to commit transaction which
12400 * will make us fail to load log tree when mounting.
12402 if (repair && btrfs_super_log_root(info->super_copy)) {
12403 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12408 ret = zero_log_tree(root);
12410 error("failed to zero log tree: %d", ret);
12415 uuid_unparse(info->super_copy->fsid, uuidbuf);
12416 if (qgroup_report) {
12417 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12419 ret = qgroup_verify_all(info);
12425 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12426 subvolid, argv[optind], uuidbuf);
12427 ret = print_extent_state(info, subvolid);
12430 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12432 if (!extent_buffer_uptodate(info->tree_root->node) ||
12433 !extent_buffer_uptodate(info->dev_root->node) ||
12434 !extent_buffer_uptodate(info->chunk_root->node)) {
12435 error("critical roots corrupted, unable to check the filesystem");
12440 if (init_extent_tree || init_csum_tree) {
12441 struct btrfs_trans_handle *trans;
12443 trans = btrfs_start_transaction(info->extent_root, 0);
12444 if (IS_ERR(trans)) {
12445 error("error starting transaction");
12446 ret = PTR_ERR(trans);
12450 if (init_extent_tree) {
12451 printf("Creating a new extent tree\n");
12452 ret = reinit_extent_tree(trans, info);
12457 if (init_csum_tree) {
12458 printf("Reinitialize checksum tree\n");
12459 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12461 error("checksum tree initialization failed: %d",
12467 ret = fill_csum_tree(trans, info->csum_root,
12470 error("checksum tree refilling failed: %d", ret);
12475 * Ok now we commit and run the normal fsck, which will add
12476 * extent entries for all of the items it finds.
12478 ret = btrfs_commit_transaction(trans, info->extent_root);
12482 if (!extent_buffer_uptodate(info->extent_root->node)) {
12483 error("critical: extent_root, unable to check the filesystem");
12487 if (!extent_buffer_uptodate(info->csum_root->node)) {
12488 error("critical: csum_root, unable to check the filesystem");
12493 if (!ctx.progress_enabled)
12494 fprintf(stderr, "checking extents\n");
12495 if (check_mode == CHECK_MODE_LOWMEM)
12496 ret = check_chunks_and_extents_v2(root);
12498 ret = check_chunks_and_extents(root);
12501 "errors found in extent allocation tree or chunk allocation");
12503 ret = repair_root_items(info);
12507 fprintf(stderr, "Fixed %d roots.\n", ret);
12509 } else if (ret > 0) {
12511 "Found %d roots with an outdated root item.\n",
12514 "Please run a filesystem check with the option --repair to fix them.\n");
12519 if (!ctx.progress_enabled) {
12520 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12521 fprintf(stderr, "checking free space tree\n");
12523 fprintf(stderr, "checking free space cache\n");
12525 ret = check_space_cache(root);
12530 * We used to have to have these hole extents in between our real
12531 * extents so if we don't have this flag set we need to make sure there
12532 * are no gaps in the file extents for inodes, otherwise we can just
12533 * ignore it when this happens.
12535 no_holes = btrfs_fs_incompat(root->fs_info,
12536 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12537 if (!ctx.progress_enabled)
12538 fprintf(stderr, "checking fs roots\n");
12539 ret = check_fs_roots(root, &root_cache);
12543 fprintf(stderr, "checking csums\n");
12544 ret = check_csums(root);
12548 fprintf(stderr, "checking root refs\n");
12549 ret = check_root_refs(root, &root_cache);
12553 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12554 struct extent_buffer *eb;
12556 eb = list_first_entry(&root->fs_info->recow_ebs,
12557 struct extent_buffer, recow);
12558 list_del_init(&eb->recow);
12559 ret = recow_extent_buffer(root, eb);
12564 while (!list_empty(&delete_items)) {
12565 struct bad_item *bad;
12567 bad = list_first_entry(&delete_items, struct bad_item, list);
12568 list_del_init(&bad->list);
12570 ret = delete_bad_item(root, bad);
12574 if (info->quota_enabled) {
12576 fprintf(stderr, "checking quota groups\n");
12577 err = qgroup_verify_all(info);
12581 err = repair_qgroups(info, &qgroups_repaired);
12586 if (!list_empty(&root->fs_info->recow_ebs)) {
12587 error("transid errors in file system");
12591 /* Don't override original ret */
12592 if (!ret && qgroups_repaired)
12593 ret = qgroups_repaired;
12595 if (found_old_backref) { /*
12596 * there was a disk format change when mixed
12597 * backref was in testing tree. The old format
12598 * existed about one week.
12600 printf("\n * Found old mixed backref format. "
12601 "The old format is not supported! *"
12602 "\n * Please mount the FS in readonly mode, "
12603 "backup data and re-format the FS. *\n\n");
12606 printf("found %llu bytes used err is %d\n",
12607 (unsigned long long)bytes_used, ret);
12608 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12609 printf("total tree bytes: %llu\n",
12610 (unsigned long long)total_btree_bytes);
12611 printf("total fs tree bytes: %llu\n",
12612 (unsigned long long)total_fs_tree_bytes);
12613 printf("total extent tree bytes: %llu\n",
12614 (unsigned long long)total_extent_tree_bytes);
12615 printf("btree space waste bytes: %llu\n",
12616 (unsigned long long)btree_space_waste);
12617 printf("file data blocks allocated: %llu\n referenced %llu\n",
12618 (unsigned long long)data_bytes_allocated,
12619 (unsigned long long)data_bytes_referenced);
12621 free_qgroup_counts();
12622 free_root_recs_tree(&root_cache);
12626 if (ctx.progress_enabled)
12627 task_deinit(ctx.info);