2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
50 TASK_NOTHING, /* have to be the last element */
55 enum task_position tp;
57 struct task_info *info;
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
118 return container_of(back, struct data_backref, node);
122 * Much like data_backref, just removed the undetermined members
123 * and change it to use list_head.
124 * During extent scan, it is stored in root->orphan_data_extent.
125 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
127 struct orphan_data_extent {
128 struct list_head list;
136 struct tree_backref {
137 struct extent_backref node;
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
146 return container_of(back, struct tree_backref, node);
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
152 struct extent_record {
153 struct list_head backrefs;
154 struct list_head dups;
155 struct list_head list;
156 struct cache_extent cache;
157 struct btrfs_disk_key parent_key;
162 u64 extent_item_refs;
164 u64 parent_generation;
168 unsigned int flag_block_full_backref:2;
169 unsigned int found_rec:1;
170 unsigned int content_checked:1;
171 unsigned int owner_ref_checked:1;
172 unsigned int is_root:1;
173 unsigned int metadata:1;
174 unsigned int bad_full_backref:1;
175 unsigned int crossing_stripes:1;
176 unsigned int wrong_chunk_type:1;
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
181 return container_of(entry, struct extent_record, list);
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
200 return list_entry(entry, struct inode_backref, list);
203 struct root_item_record {
204 struct list_head list;
211 struct btrfs_key drop_key;
214 #define REF_ERR_NO_DIR_ITEM (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX (1 << 1)
216 #define REF_ERR_NO_INODE_REF (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
219 #define REF_ERR_DUP_INODE_REF (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
228 struct file_extent_hole {
234 struct inode_record {
235 struct list_head backrefs;
236 unsigned int checked:1;
237 unsigned int merging:1;
238 unsigned int found_inode_item:1;
239 unsigned int found_dir_item:1;
240 unsigned int found_file_extent:1;
241 unsigned int found_csum_item:1;
242 unsigned int some_csum_missing:1;
243 unsigned int nodatasum:1;
256 struct rb_root holes;
257 struct list_head orphan_extents;
262 #define I_ERR_NO_INODE_ITEM (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
278 struct root_backref {
279 struct list_head list;
280 unsigned int found_dir_item:1;
281 unsigned int found_dir_index:1;
282 unsigned int found_back_ref:1;
283 unsigned int found_forward_ref:1;
284 unsigned int reachable:1;
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
295 return list_entry(entry, struct root_backref, list);
299 struct list_head backrefs;
300 struct cache_extent cache;
301 unsigned int found_root_item:1;
307 struct cache_extent cache;
312 struct cache_extent cache;
313 struct cache_tree root_cache;
314 struct cache_tree inode_cache;
315 struct inode_record *current;
324 struct walk_control {
325 struct cache_tree shared;
326 struct shared_node *nodes[BTRFS_MAX_LEVEL];
332 struct btrfs_key key;
334 struct list_head list;
337 struct extent_entry {
342 struct list_head list;
345 struct root_item_info {
346 /* level of the root */
348 /* number of nodes at this level, must be 1 for a root */
352 struct cache_extent cache_extent;
356 * Error bit for low memory mode check.
358 * Currently no caller cares about it yet. Just internal use for error
361 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH (1 << 8)
372 static void *print_status_check(void *p)
374 struct task_ctx *priv = p;
375 const char work_indicator[] = { '.', 'o', 'O', 'o' };
377 static char *task_position_string[] = {
379 "checking free space cache",
383 task_period_start(priv->info, 1000 /* 1s */);
385 if (priv->tp == TASK_NOTHING)
389 printf("%s [%c]\r", task_position_string[priv->tp],
390 work_indicator[count % 4]);
393 task_period_wait(priv->info);
398 static int print_status_return(void *p)
406 static enum btrfs_check_mode parse_check_mode(const char *str)
408 if (strcmp(str, "lowmem") == 0)
409 return CHECK_MODE_LOWMEM;
410 if (strcmp(str, "orig") == 0)
411 return CHECK_MODE_ORIGINAL;
412 if (strcmp(str, "original") == 0)
413 return CHECK_MODE_ORIGINAL;
415 return CHECK_MODE_UNKNOWN;
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
421 struct file_extent_hole *hole;
423 if (RB_EMPTY_ROOT(holes))
426 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
432 struct file_extent_hole *hole1;
433 struct file_extent_hole *hole2;
435 hole1 = rb_entry(node1, struct file_extent_hole, node);
436 hole2 = rb_entry(node2, struct file_extent_hole, node);
438 if (hole1->start > hole2->start)
440 if (hole1->start < hole2->start)
442 /* Now hole1->start == hole2->start */
443 if (hole1->len >= hole2->len)
445 * Hole 1 will be merge center
446 * Same hole will be merged later
449 /* Hole 2 will be merge center */
454 * Add a hole to the record
456 * This will do hole merge for copy_file_extent_holes(),
457 * which will ensure there won't be continuous holes.
459 static int add_file_extent_hole(struct rb_root *holes,
462 struct file_extent_hole *hole;
463 struct file_extent_hole *prev = NULL;
464 struct file_extent_hole *next = NULL;
466 hole = malloc(sizeof(*hole));
471 /* Since compare will not return 0, no -EEXIST will happen */
472 rb_insert(holes, &hole->node, compare_hole);
474 /* simple merge with previous hole */
475 if (rb_prev(&hole->node))
476 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
478 if (prev && prev->start + prev->len >= hole->start) {
479 hole->len = hole->start + hole->len - prev->start;
480 hole->start = prev->start;
481 rb_erase(&prev->node, holes);
486 /* iterate merge with next holes */
488 if (!rb_next(&hole->node))
490 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
492 if (hole->start + hole->len >= next->start) {
493 if (hole->start + hole->len <= next->start + next->len)
494 hole->len = next->start + next->len -
496 rb_erase(&next->node, holes);
505 static int compare_hole_range(struct rb_node *node, void *data)
507 struct file_extent_hole *hole;
510 hole = (struct file_extent_hole *)data;
513 hole = rb_entry(node, struct file_extent_hole, node);
514 if (start < hole->start)
516 if (start >= hole->start && start < hole->start + hole->len)
522 * Delete a hole in the record
524 * This will do the hole split and is much restrict than add.
526 static int del_file_extent_hole(struct rb_root *holes,
529 struct file_extent_hole *hole;
530 struct file_extent_hole tmp;
535 struct rb_node *node;
542 node = rb_search(holes, &tmp, compare_hole_range, NULL);
545 hole = rb_entry(node, struct file_extent_hole, node);
546 if (start + len > hole->start + hole->len)
550 * Now there will be no overlap, delete the hole and re-add the
551 * split(s) if they exists.
553 if (start > hole->start) {
554 prev_start = hole->start;
555 prev_len = start - hole->start;
558 if (hole->start + hole->len > start + len) {
559 next_start = start + len;
560 next_len = hole->start + hole->len - start - len;
563 rb_erase(node, holes);
566 ret = add_file_extent_hole(holes, prev_start, prev_len);
571 ret = add_file_extent_hole(holes, next_start, next_len);
578 static int copy_file_extent_holes(struct rb_root *dst,
581 struct file_extent_hole *hole;
582 struct rb_node *node;
585 node = rb_first(src);
587 hole = rb_entry(node, struct file_extent_hole, node);
588 ret = add_file_extent_hole(dst, hole->start, hole->len);
591 node = rb_next(node);
596 static void free_file_extent_holes(struct rb_root *holes)
598 struct rb_node *node;
599 struct file_extent_hole *hole;
601 node = rb_first(holes);
603 hole = rb_entry(node, struct file_extent_hole, node);
604 rb_erase(node, holes);
606 node = rb_first(holes);
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613 struct btrfs_root *root)
615 if (root->last_trans != trans->transid) {
616 root->track_dirty = 1;
617 root->last_trans = trans->transid;
618 root->commit_root = root->node;
619 extent_buffer_get(root->node);
623 static u8 imode_to_type(u32 imode)
626 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
628 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
629 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
630 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
631 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
632 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
633 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
636 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
642 struct device_record *rec1;
643 struct device_record *rec2;
645 rec1 = rb_entry(node1, struct device_record, node);
646 rec2 = rb_entry(node2, struct device_record, node);
647 if (rec1->devid > rec2->devid)
649 else if (rec1->devid < rec2->devid)
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
657 struct inode_record *rec;
658 struct inode_backref *backref;
659 struct inode_backref *orig;
660 struct inode_backref *tmp;
661 struct orphan_data_extent *src_orphan;
662 struct orphan_data_extent *dst_orphan;
667 rec = malloc(sizeof(*rec));
669 return ERR_PTR(-ENOMEM);
670 memcpy(rec, orig_rec, sizeof(*rec));
672 INIT_LIST_HEAD(&rec->backrefs);
673 INIT_LIST_HEAD(&rec->orphan_extents);
674 rec->holes = RB_ROOT;
676 list_for_each_entry(orig, &orig_rec->backrefs, list) {
677 size = sizeof(*orig) + orig->namelen + 1;
678 backref = malloc(size);
683 memcpy(backref, orig, size);
684 list_add_tail(&backref->list, &rec->backrefs);
686 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687 dst_orphan = malloc(sizeof(*dst_orphan));
692 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
695 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
702 rb = rb_first(&rec->holes);
704 struct file_extent_hole *hole;
706 hole = rb_entry(rb, struct file_extent_hole, node);
712 if (!list_empty(&rec->backrefs))
713 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714 list_del(&orig->list);
718 if (!list_empty(&rec->orphan_extents))
719 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720 list_del(&orig->list);
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
732 struct orphan_data_extent *orphan;
734 if (list_empty(orphan_extents))
736 printf("The following data extent is lost in tree %llu:\n",
738 list_for_each_entry(orphan, orphan_extents, list) {
739 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740 orphan->objectid, orphan->offset, orphan->disk_bytenr,
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
747 u64 root_objectid = root->root_key.objectid;
748 int errors = rec->errors;
752 /* reloc root errors, we print its corresponding fs root objectid*/
753 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754 root_objectid = root->root_key.offset;
755 fprintf(stderr, "reloc");
757 fprintf(stderr, "root %llu inode %llu errors %x",
758 (unsigned long long) root_objectid,
759 (unsigned long long) rec->ino, rec->errors);
761 if (errors & I_ERR_NO_INODE_ITEM)
762 fprintf(stderr, ", no inode item");
763 if (errors & I_ERR_NO_ORPHAN_ITEM)
764 fprintf(stderr, ", no orphan item");
765 if (errors & I_ERR_DUP_INODE_ITEM)
766 fprintf(stderr, ", dup inode item");
767 if (errors & I_ERR_DUP_DIR_INDEX)
768 fprintf(stderr, ", dup dir index");
769 if (errors & I_ERR_ODD_DIR_ITEM)
770 fprintf(stderr, ", odd dir item");
771 if (errors & I_ERR_ODD_FILE_EXTENT)
772 fprintf(stderr, ", odd file extent");
773 if (errors & I_ERR_BAD_FILE_EXTENT)
774 fprintf(stderr, ", bad file extent");
775 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776 fprintf(stderr, ", file extent overlap");
777 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778 fprintf(stderr, ", file extent discount");
779 if (errors & I_ERR_DIR_ISIZE_WRONG)
780 fprintf(stderr, ", dir isize wrong");
781 if (errors & I_ERR_FILE_NBYTES_WRONG)
782 fprintf(stderr, ", nbytes wrong");
783 if (errors & I_ERR_ODD_CSUM_ITEM)
784 fprintf(stderr, ", odd csum item");
785 if (errors & I_ERR_SOME_CSUM_MISSING)
786 fprintf(stderr, ", some csum missing");
787 if (errors & I_ERR_LINK_COUNT_WRONG)
788 fprintf(stderr, ", link count wrong");
789 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790 fprintf(stderr, ", orphan file extent");
791 fprintf(stderr, "\n");
792 /* Print the orphan extents if needed */
793 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
796 /* Print the holes if needed */
797 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798 struct file_extent_hole *hole;
799 struct rb_node *node;
802 node = rb_first(&rec->holes);
803 fprintf(stderr, "Found file extent holes:\n");
806 hole = rb_entry(node, struct file_extent_hole, node);
807 fprintf(stderr, "\tstart: %llu, len: %llu\n",
808 hole->start, hole->len);
809 node = rb_next(node);
812 fprintf(stderr, "\tstart: 0, len: %llu\n",
813 round_up(rec->isize, root->sectorsize));
817 static void print_ref_error(int errors)
819 if (errors & REF_ERR_NO_DIR_ITEM)
820 fprintf(stderr, ", no dir item");
821 if (errors & REF_ERR_NO_DIR_INDEX)
822 fprintf(stderr, ", no dir index");
823 if (errors & REF_ERR_NO_INODE_REF)
824 fprintf(stderr, ", no inode ref");
825 if (errors & REF_ERR_DUP_DIR_ITEM)
826 fprintf(stderr, ", dup dir item");
827 if (errors & REF_ERR_DUP_DIR_INDEX)
828 fprintf(stderr, ", dup dir index");
829 if (errors & REF_ERR_DUP_INODE_REF)
830 fprintf(stderr, ", dup inode ref");
831 if (errors & REF_ERR_INDEX_UNMATCH)
832 fprintf(stderr, ", index mismatch");
833 if (errors & REF_ERR_FILETYPE_UNMATCH)
834 fprintf(stderr, ", filetype mismatch");
835 if (errors & REF_ERR_NAME_TOO_LONG)
836 fprintf(stderr, ", name too long");
837 if (errors & REF_ERR_NO_ROOT_REF)
838 fprintf(stderr, ", no root ref");
839 if (errors & REF_ERR_NO_ROOT_BACKREF)
840 fprintf(stderr, ", no root backref");
841 if (errors & REF_ERR_DUP_ROOT_REF)
842 fprintf(stderr, ", dup root ref");
843 if (errors & REF_ERR_DUP_ROOT_BACKREF)
844 fprintf(stderr, ", dup root backref");
845 fprintf(stderr, "\n");
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
851 struct ptr_node *node;
852 struct cache_extent *cache;
853 struct inode_record *rec = NULL;
856 cache = lookup_cache_extent(inode_cache, ino, 1);
858 node = container_of(cache, struct ptr_node, cache);
860 if (mod && rec->refs > 1) {
861 node->data = clone_inode_rec(rec);
862 if (IS_ERR(node->data))
868 rec = calloc(1, sizeof(*rec));
870 return ERR_PTR(-ENOMEM);
872 rec->extent_start = (u64)-1;
874 INIT_LIST_HEAD(&rec->backrefs);
875 INIT_LIST_HEAD(&rec->orphan_extents);
876 rec->holes = RB_ROOT;
878 node = malloc(sizeof(*node));
881 return ERR_PTR(-ENOMEM);
883 node->cache.start = ino;
884 node->cache.size = 1;
887 if (ino == BTRFS_FREE_INO_OBJECTID)
890 ret = insert_cache_extent(inode_cache, &node->cache);
892 return ERR_PTR(-EEXIST);
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
899 struct orphan_data_extent *orphan;
901 while (!list_empty(orphan_extents)) {
902 orphan = list_entry(orphan_extents->next,
903 struct orphan_data_extent, list);
904 list_del(&orphan->list);
909 static void free_inode_rec(struct inode_record *rec)
911 struct inode_backref *backref;
916 while (!list_empty(&rec->backrefs)) {
917 backref = to_inode_backref(rec->backrefs.next);
918 list_del(&backref->list);
921 free_orphan_data_extents(&rec->orphan_extents);
922 free_file_extent_holes(&rec->holes);
926 static int can_free_inode_rec(struct inode_record *rec)
928 if (!rec->errors && rec->checked && rec->found_inode_item &&
929 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935 struct inode_record *rec)
937 struct cache_extent *cache;
938 struct inode_backref *tmp, *backref;
939 struct ptr_node *node;
942 if (!rec->found_inode_item)
945 filetype = imode_to_type(rec->imode);
946 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947 if (backref->found_dir_item && backref->found_dir_index) {
948 if (backref->filetype != filetype)
949 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950 if (!backref->errors && backref->found_inode_ref &&
951 rec->nlink == rec->found_link) {
952 list_del(&backref->list);
958 if (!rec->checked || rec->merging)
961 if (S_ISDIR(rec->imode)) {
962 if (rec->found_size != rec->isize)
963 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964 if (rec->found_file_extent)
965 rec->errors |= I_ERR_ODD_FILE_EXTENT;
966 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967 if (rec->found_dir_item)
968 rec->errors |= I_ERR_ODD_DIR_ITEM;
969 if (rec->found_size != rec->nbytes)
970 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971 if (rec->nlink > 0 && !no_holes &&
972 (rec->extent_end < rec->isize ||
973 first_extent_gap(&rec->holes) < rec->isize))
974 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
977 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978 if (rec->found_csum_item && rec->nodatasum)
979 rec->errors |= I_ERR_ODD_CSUM_ITEM;
980 if (rec->some_csum_missing && !rec->nodatasum)
981 rec->errors |= I_ERR_SOME_CSUM_MISSING;
984 BUG_ON(rec->refs != 1);
985 if (can_free_inode_rec(rec)) {
986 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987 node = container_of(cache, struct ptr_node, cache);
988 BUG_ON(node->data != rec);
989 remove_cache_extent(inode_cache, &node->cache);
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
997 struct btrfs_path path;
998 struct btrfs_key key;
1001 key.objectid = BTRFS_ORPHAN_OBJECTID;
1002 key.type = BTRFS_ORPHAN_ITEM_KEY;
1005 btrfs_init_path(&path);
1006 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007 btrfs_release_path(&path);
1013 static int process_inode_item(struct extent_buffer *eb,
1014 int slot, struct btrfs_key *key,
1015 struct shared_node *active_node)
1017 struct inode_record *rec;
1018 struct btrfs_inode_item *item;
1020 rec = active_node->current;
1021 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022 if (rec->found_inode_item) {
1023 rec->errors |= I_ERR_DUP_INODE_ITEM;
1026 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027 rec->nlink = btrfs_inode_nlink(eb, item);
1028 rec->isize = btrfs_inode_size(eb, item);
1029 rec->nbytes = btrfs_inode_nbytes(eb, item);
1030 rec->imode = btrfs_inode_mode(eb, item);
1031 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1033 rec->found_inode_item = 1;
1034 if (rec->nlink == 0)
1035 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036 maybe_free_inode_rec(&active_node->inode_cache, rec);
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1042 int namelen, u64 dir)
1044 struct inode_backref *backref;
1046 list_for_each_entry(backref, &rec->backrefs, list) {
1047 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1049 if (backref->dir != dir || backref->namelen != namelen)
1051 if (memcmp(name, backref->name, namelen))
1056 backref = malloc(sizeof(*backref) + namelen + 1);
1059 memset(backref, 0, sizeof(*backref));
1061 backref->namelen = namelen;
1062 memcpy(backref->name, name, namelen);
1063 backref->name[namelen] = '\0';
1064 list_add_tail(&backref->list, &rec->backrefs);
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069 u64 ino, u64 dir, u64 index,
1070 const char *name, int namelen,
1071 u8 filetype, u8 itemtype, int errors)
1073 struct inode_record *rec;
1074 struct inode_backref *backref;
1076 rec = get_inode_rec(inode_cache, ino, 1);
1077 BUG_ON(IS_ERR(rec));
1078 backref = get_inode_backref(rec, name, namelen, dir);
1081 backref->errors |= errors;
1082 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083 if (backref->found_dir_index)
1084 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085 if (backref->found_inode_ref && backref->index != index)
1086 backref->errors |= REF_ERR_INDEX_UNMATCH;
1087 if (backref->found_dir_item && backref->filetype != filetype)
1088 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1090 backref->index = index;
1091 backref->filetype = filetype;
1092 backref->found_dir_index = 1;
1093 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1095 if (backref->found_dir_item)
1096 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097 if (backref->found_dir_index && backref->filetype != filetype)
1098 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1100 backref->filetype = filetype;
1101 backref->found_dir_item = 1;
1102 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104 if (backref->found_inode_ref)
1105 backref->errors |= REF_ERR_DUP_INODE_REF;
1106 if (backref->found_dir_index && backref->index != index)
1107 backref->errors |= REF_ERR_INDEX_UNMATCH;
1109 backref->index = index;
1111 backref->ref_type = itemtype;
1112 backref->found_inode_ref = 1;
1117 maybe_free_inode_rec(inode_cache, rec);
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122 struct cache_tree *dst_cache)
1124 struct inode_backref *backref;
1129 list_for_each_entry(backref, &src->backrefs, list) {
1130 if (backref->found_dir_index) {
1131 add_inode_backref(dst_cache, dst->ino, backref->dir,
1132 backref->index, backref->name,
1133 backref->namelen, backref->filetype,
1134 BTRFS_DIR_INDEX_KEY, backref->errors);
1136 if (backref->found_dir_item) {
1138 add_inode_backref(dst_cache, dst->ino,
1139 backref->dir, 0, backref->name,
1140 backref->namelen, backref->filetype,
1141 BTRFS_DIR_ITEM_KEY, backref->errors);
1143 if (backref->found_inode_ref) {
1144 add_inode_backref(dst_cache, dst->ino,
1145 backref->dir, backref->index,
1146 backref->name, backref->namelen, 0,
1147 backref->ref_type, backref->errors);
1151 if (src->found_dir_item)
1152 dst->found_dir_item = 1;
1153 if (src->found_file_extent)
1154 dst->found_file_extent = 1;
1155 if (src->found_csum_item)
1156 dst->found_csum_item = 1;
1157 if (src->some_csum_missing)
1158 dst->some_csum_missing = 1;
1159 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1165 BUG_ON(src->found_link < dir_count);
1166 dst->found_link += src->found_link - dir_count;
1167 dst->found_size += src->found_size;
1168 if (src->extent_start != (u64)-1) {
1169 if (dst->extent_start == (u64)-1) {
1170 dst->extent_start = src->extent_start;
1171 dst->extent_end = src->extent_end;
1173 if (dst->extent_end > src->extent_start)
1174 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175 else if (dst->extent_end < src->extent_start) {
1176 ret = add_file_extent_hole(&dst->holes,
1178 src->extent_start - dst->extent_end);
1180 if (dst->extent_end < src->extent_end)
1181 dst->extent_end = src->extent_end;
1185 dst->errors |= src->errors;
1186 if (src->found_inode_item) {
1187 if (!dst->found_inode_item) {
1188 dst->nlink = src->nlink;
1189 dst->isize = src->isize;
1190 dst->nbytes = src->nbytes;
1191 dst->imode = src->imode;
1192 dst->nodatasum = src->nodatasum;
1193 dst->found_inode_item = 1;
1195 dst->errors |= I_ERR_DUP_INODE_ITEM;
1203 static int splice_shared_node(struct shared_node *src_node,
1204 struct shared_node *dst_node)
1206 struct cache_extent *cache;
1207 struct ptr_node *node, *ins;
1208 struct cache_tree *src, *dst;
1209 struct inode_record *rec, *conflict;
1210 u64 current_ino = 0;
1214 if (--src_node->refs == 0)
1216 if (src_node->current)
1217 current_ino = src_node->current->ino;
1219 src = &src_node->root_cache;
1220 dst = &dst_node->root_cache;
1222 cache = search_cache_extent(src, 0);
1224 node = container_of(cache, struct ptr_node, cache);
1226 cache = next_cache_extent(cache);
1229 remove_cache_extent(src, &node->cache);
1232 ins = malloc(sizeof(*ins));
1234 ins->cache.start = node->cache.start;
1235 ins->cache.size = node->cache.size;
1239 ret = insert_cache_extent(dst, &ins->cache);
1240 if (ret == -EEXIST) {
1241 conflict = get_inode_rec(dst, rec->ino, 1);
1242 BUG_ON(IS_ERR(conflict));
1243 merge_inode_recs(rec, conflict, dst);
1245 conflict->checked = 1;
1246 if (dst_node->current == conflict)
1247 dst_node->current = NULL;
1249 maybe_free_inode_rec(dst, conflict);
1250 free_inode_rec(rec);
1257 if (src == &src_node->root_cache) {
1258 src = &src_node->inode_cache;
1259 dst = &dst_node->inode_cache;
1263 if (current_ino > 0 && (!dst_node->current ||
1264 current_ino > dst_node->current->ino)) {
1265 if (dst_node->current) {
1266 dst_node->current->checked = 1;
1267 maybe_free_inode_rec(dst, dst_node->current);
1269 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270 BUG_ON(IS_ERR(dst_node->current));
1275 static void free_inode_ptr(struct cache_extent *cache)
1277 struct ptr_node *node;
1278 struct inode_record *rec;
1280 node = container_of(cache, struct ptr_node, cache);
1282 free_inode_rec(rec);
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1291 struct cache_extent *cache;
1292 struct shared_node *node;
1294 cache = lookup_cache_extent(shared, bytenr, 1);
1296 node = container_of(cache, struct shared_node, cache);
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1305 struct shared_node *node;
1307 node = calloc(1, sizeof(*node));
1310 node->cache.start = bytenr;
1311 node->cache.size = 1;
1312 cache_tree_init(&node->root_cache);
1313 cache_tree_init(&node->inode_cache);
1316 ret = insert_cache_extent(shared, &node->cache);
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322 struct walk_control *wc, int level)
1324 struct shared_node *node;
1325 struct shared_node *dest;
1328 if (level == wc->active_node)
1331 BUG_ON(wc->active_node <= level);
1332 node = find_shared_node(&wc->shared, bytenr);
1334 ret = add_shared_node(&wc->shared, bytenr, refs);
1336 node = find_shared_node(&wc->shared, bytenr);
1337 wc->nodes[level] = node;
1338 wc->active_node = level;
1342 if (wc->root_level == wc->active_node &&
1343 btrfs_root_refs(&root->root_item) == 0) {
1344 if (--node->refs == 0) {
1345 free_inode_recs_tree(&node->root_cache);
1346 free_inode_recs_tree(&node->inode_cache);
1347 remove_cache_extent(&wc->shared, &node->cache);
1353 dest = wc->nodes[wc->active_node];
1354 splice_shared_node(node, dest);
1355 if (node->refs == 0) {
1356 remove_cache_extent(&wc->shared, &node->cache);
1362 static int leave_shared_node(struct btrfs_root *root,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->root_level)
1372 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1376 BUG_ON(i >= BTRFS_MAX_LEVEL);
1378 node = wc->nodes[wc->active_node];
1379 wc->nodes[wc->active_node] = NULL;
1380 wc->active_node = i;
1382 dest = wc->nodes[wc->active_node];
1383 if (wc->active_node < wc->root_level ||
1384 btrfs_root_refs(&root->root_item) > 0) {
1385 BUG_ON(node->refs <= 1);
1386 splice_shared_node(node, dest);
1388 BUG_ON(node->refs < 2);
1397 * 1 - if the root with id child_root_id is a child of root parent_root_id
1398 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1399 * has other root(s) as parent(s)
1400 * 2 - if the root child_root_id doesn't have any parent roots
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1405 struct btrfs_path path;
1406 struct btrfs_key key;
1407 struct extent_buffer *leaf;
1411 btrfs_init_path(&path);
1413 key.objectid = parent_root_id;
1414 key.type = BTRFS_ROOT_REF_KEY;
1415 key.offset = child_root_id;
1416 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1420 btrfs_release_path(&path);
1424 key.objectid = child_root_id;
1425 key.type = BTRFS_ROOT_BACKREF_KEY;
1427 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1433 leaf = path.nodes[0];
1434 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1438 leaf = path.nodes[0];
1441 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442 if (key.objectid != child_root_id ||
1443 key.type != BTRFS_ROOT_BACKREF_KEY)
1448 if (key.offset == parent_root_id) {
1449 btrfs_release_path(&path);
1456 btrfs_release_path(&path);
1459 return has_parent ? 0 : 2;
1462 static int process_dir_item(struct btrfs_root *root,
1463 struct extent_buffer *eb,
1464 int slot, struct btrfs_key *key,
1465 struct shared_node *active_node)
1475 struct btrfs_dir_item *di;
1476 struct inode_record *rec;
1477 struct cache_tree *root_cache;
1478 struct cache_tree *inode_cache;
1479 struct btrfs_key location;
1480 char namebuf[BTRFS_NAME_LEN];
1482 root_cache = &active_node->root_cache;
1483 inode_cache = &active_node->inode_cache;
1484 rec = active_node->current;
1485 rec->found_dir_item = 1;
1487 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1491 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492 name_len = btrfs_dir_name_len(eb, di);
1493 data_len = btrfs_dir_data_len(eb, di);
1494 filetype = btrfs_dir_type(eb, di);
1496 rec->found_size += name_len;
1497 if (name_len <= BTRFS_NAME_LEN) {
1501 len = BTRFS_NAME_LEN;
1502 error = REF_ERR_NAME_TOO_LONG;
1504 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1506 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507 add_inode_backref(inode_cache, location.objectid,
1508 key->objectid, key->offset, namebuf,
1509 len, filetype, key->type, error);
1510 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511 add_inode_backref(root_cache, location.objectid,
1512 key->objectid, key->offset,
1513 namebuf, len, filetype,
1516 fprintf(stderr, "invalid location in dir item %u\n",
1518 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519 key->objectid, key->offset, namebuf,
1520 len, filetype, key->type, error);
1523 len = sizeof(*di) + name_len + data_len;
1524 di = (struct btrfs_dir_item *)((char *)di + len);
1527 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528 rec->errors |= I_ERR_DUP_DIR_INDEX;
1533 static int process_inode_ref(struct extent_buffer *eb,
1534 int slot, struct btrfs_key *key,
1535 struct shared_node *active_node)
1543 struct cache_tree *inode_cache;
1544 struct btrfs_inode_ref *ref;
1545 char namebuf[BTRFS_NAME_LEN];
1547 inode_cache = &active_node->inode_cache;
1549 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550 total = btrfs_item_size_nr(eb, slot);
1551 while (cur < total) {
1552 name_len = btrfs_inode_ref_name_len(eb, ref);
1553 index = btrfs_inode_ref_index(eb, ref);
1554 if (name_len <= BTRFS_NAME_LEN) {
1558 len = BTRFS_NAME_LEN;
1559 error = REF_ERR_NAME_TOO_LONG;
1561 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562 add_inode_backref(inode_cache, key->objectid, key->offset,
1563 index, namebuf, len, 0, key->type, error);
1565 len = sizeof(*ref) + name_len;
1566 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1572 static int process_inode_extref(struct extent_buffer *eb,
1573 int slot, struct btrfs_key *key,
1574 struct shared_node *active_node)
1583 struct cache_tree *inode_cache;
1584 struct btrfs_inode_extref *extref;
1585 char namebuf[BTRFS_NAME_LEN];
1587 inode_cache = &active_node->inode_cache;
1589 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590 total = btrfs_item_size_nr(eb, slot);
1591 while (cur < total) {
1592 name_len = btrfs_inode_extref_name_len(eb, extref);
1593 index = btrfs_inode_extref_index(eb, extref);
1594 parent = btrfs_inode_extref_parent(eb, extref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf,
1603 (unsigned long)(extref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, parent,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*extref) + name_len;
1608 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616 u64 len, u64 *found)
1618 struct btrfs_key key;
1619 struct btrfs_path path;
1620 struct extent_buffer *leaf;
1625 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1627 btrfs_init_path(&path);
1629 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1631 key.type = BTRFS_EXTENT_CSUM_KEY;
1633 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1637 if (ret > 0 && path.slots[0] > 0) {
1638 leaf = path.nodes[0];
1639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641 key.type == BTRFS_EXTENT_CSUM_KEY)
1646 leaf = path.nodes[0];
1647 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1653 leaf = path.nodes[0];
1656 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658 key.type != BTRFS_EXTENT_CSUM_KEY)
1661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662 if (key.offset >= start + len)
1665 if (key.offset > start)
1668 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670 if (csum_end > start) {
1671 size = min(csum_end - start, len);
1680 btrfs_release_path(&path);
1686 static int process_file_extent(struct btrfs_root *root,
1687 struct extent_buffer *eb,
1688 int slot, struct btrfs_key *key,
1689 struct shared_node *active_node)
1691 struct inode_record *rec;
1692 struct btrfs_file_extent_item *fi;
1694 u64 disk_bytenr = 0;
1695 u64 extent_offset = 0;
1696 u64 mask = root->sectorsize - 1;
1700 rec = active_node->current;
1701 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702 rec->found_file_extent = 1;
1704 if (rec->extent_start == (u64)-1) {
1705 rec->extent_start = key->offset;
1706 rec->extent_end = key->offset;
1709 if (rec->extent_end > key->offset)
1710 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711 else if (rec->extent_end < key->offset) {
1712 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713 key->offset - rec->extent_end);
1718 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719 extent_type = btrfs_file_extent_type(eb, fi);
1721 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1724 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725 rec->found_size += num_bytes;
1726 num_bytes = (num_bytes + mask) & ~mask;
1727 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731 extent_offset = btrfs_file_extent_offset(eb, fi);
1732 if (num_bytes == 0 || (num_bytes & mask))
1733 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 if (num_bytes + extent_offset >
1735 btrfs_file_extent_ram_bytes(eb, fi))
1736 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738 (btrfs_file_extent_compression(eb, fi) ||
1739 btrfs_file_extent_encryption(eb, fi) ||
1740 btrfs_file_extent_other_encoding(eb, fi)))
1741 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742 if (disk_bytenr > 0)
1743 rec->found_size += num_bytes;
1745 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1747 rec->extent_end = key->offset + num_bytes;
1750 * The data reloc tree will copy full extents into its inode and then
1751 * copy the corresponding csums. Because the extent it copied could be
1752 * a preallocated extent that hasn't been written to yet there may be no
1753 * csums to copy, ergo we won't have csums for our file extent. This is
1754 * ok so just don't bother checking csums if the inode belongs to the
1757 if (disk_bytenr > 0 &&
1758 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1760 if (btrfs_file_extent_compression(eb, fi))
1761 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1763 disk_bytenr += extent_offset;
1765 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1768 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1770 rec->found_csum_item = 1;
1771 if (found < num_bytes)
1772 rec->some_csum_missing = 1;
1773 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1775 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782 struct walk_control *wc)
1784 struct btrfs_key key;
1788 struct cache_tree *inode_cache;
1789 struct shared_node *active_node;
1791 if (wc->root_level == wc->active_node &&
1792 btrfs_root_refs(&root->root_item) == 0)
1795 active_node = wc->nodes[wc->active_node];
1796 inode_cache = &active_node->inode_cache;
1797 nritems = btrfs_header_nritems(eb);
1798 for (i = 0; i < nritems; i++) {
1799 btrfs_item_key_to_cpu(eb, &key, i);
1801 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1803 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1806 if (active_node->current == NULL ||
1807 active_node->current->ino < key.objectid) {
1808 if (active_node->current) {
1809 active_node->current->checked = 1;
1810 maybe_free_inode_rec(inode_cache,
1811 active_node->current);
1813 active_node->current = get_inode_rec(inode_cache,
1815 BUG_ON(IS_ERR(active_node->current));
1818 case BTRFS_DIR_ITEM_KEY:
1819 case BTRFS_DIR_INDEX_KEY:
1820 ret = process_dir_item(root, eb, i, &key, active_node);
1822 case BTRFS_INODE_REF_KEY:
1823 ret = process_inode_ref(eb, i, &key, active_node);
1825 case BTRFS_INODE_EXTREF_KEY:
1826 ret = process_inode_extref(eb, i, &key, active_node);
1828 case BTRFS_INODE_ITEM_KEY:
1829 ret = process_inode_item(eb, i, &key, active_node);
1831 case BTRFS_EXTENT_DATA_KEY:
1832 ret = process_file_extent(root, eb, i, &key,
1842 static void reada_walk_down(struct btrfs_root *root,
1843 struct extent_buffer *node, int slot)
1852 level = btrfs_header_level(node);
1856 nritems = btrfs_header_nritems(node);
1857 blocksize = root->nodesize;
1858 for (i = slot; i < nritems; i++) {
1859 bytenr = btrfs_node_blockptr(node, i);
1860 ptr_gen = btrfs_node_ptr_generation(node, i);
1861 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1866 * Check the child node/leaf by the following condition:
1867 * 1. the first item key of the node/leaf should be the same with the one
1869 * 2. block in parent node should match the child node/leaf.
1870 * 3. generation of parent node and child's header should be consistent.
1872 * Or the child node/leaf pointed by the key in parent is not valid.
1874 * We hope to check leaf owner too, but since subvol may share leaves,
1875 * which makes leaf owner check not so strong, key check should be
1876 * sufficient enough for that case.
1878 static int check_child_node(struct btrfs_root *root,
1879 struct extent_buffer *parent, int slot,
1880 struct extent_buffer *child)
1882 struct btrfs_key parent_key;
1883 struct btrfs_key child_key;
1886 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887 if (btrfs_header_level(child) == 0)
1888 btrfs_item_key_to_cpu(child, &child_key, 0);
1890 btrfs_node_key_to_cpu(child, &child_key, 0);
1892 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1895 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896 parent_key.objectid, parent_key.type, parent_key.offset,
1897 child_key.objectid, child_key.type, child_key.offset);
1899 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1901 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902 btrfs_node_blockptr(parent, slot),
1903 btrfs_header_bytenr(child));
1905 if (btrfs_node_ptr_generation(parent, slot) !=
1906 btrfs_header_generation(child)) {
1908 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909 btrfs_header_generation(child),
1910 btrfs_node_ptr_generation(parent, slot));
1916 u64 bytenr[BTRFS_MAX_LEVEL];
1917 u64 refs[BTRFS_MAX_LEVEL];
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level,
1922 struct node_refs *nrefs)
1924 enum btrfs_tree_block_status status;
1927 struct extent_buffer *next;
1928 struct extent_buffer *cur;
1933 WARN_ON(*level < 0);
1934 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1936 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937 refs = nrefs->refs[*level];
1940 ret = btrfs_lookup_extent_info(NULL, root,
1941 path->nodes[*level]->start,
1942 *level, 1, &refs, NULL);
1947 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948 nrefs->refs[*level] = refs;
1952 ret = enter_shared_node(root, path->nodes[*level]->start,
1960 while (*level >= 0) {
1961 WARN_ON(*level < 0);
1962 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963 cur = path->nodes[*level];
1965 if (btrfs_header_level(cur) != *level)
1968 if (path->slots[*level] >= btrfs_header_nritems(cur))
1971 ret = process_one_leaf(root, cur, wc);
1976 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978 blocksize = root->nodesize;
1980 if (bytenr == nrefs->bytenr[*level - 1]) {
1981 refs = nrefs->refs[*level - 1];
1983 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984 *level - 1, 1, &refs, NULL);
1988 nrefs->bytenr[*level - 1] = bytenr;
1989 nrefs->refs[*level - 1] = refs;
1994 ret = enter_shared_node(root, bytenr, refs,
1997 path->slots[*level]++;
2002 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004 free_extent_buffer(next);
2005 reada_walk_down(root, cur, path->slots[*level]);
2006 next = read_tree_block(root, bytenr, blocksize,
2008 if (!extent_buffer_uptodate(next)) {
2009 struct btrfs_key node_key;
2011 btrfs_node_key_to_cpu(path->nodes[*level],
2013 path->slots[*level]);
2014 btrfs_add_corrupt_extent_record(root->fs_info,
2016 path->nodes[*level]->start,
2017 root->nodesize, *level);
2023 ret = check_child_node(root, cur, path->slots[*level], next);
2029 if (btrfs_is_leaf(next))
2030 status = btrfs_check_leaf(root, NULL, next);
2032 status = btrfs_check_node(root, NULL, next);
2033 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034 free_extent_buffer(next);
2039 *level = *level - 1;
2040 free_extent_buffer(path->nodes[*level]);
2041 path->nodes[*level] = next;
2042 path->slots[*level] = 0;
2045 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050 struct walk_control *wc, int *level)
2053 struct extent_buffer *leaf;
2055 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056 leaf = path->nodes[i];
2057 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2062 free_extent_buffer(path->nodes[*level]);
2063 path->nodes[*level] = NULL;
2064 BUG_ON(*level > wc->active_node);
2065 if (*level == wc->active_node)
2066 leave_shared_node(root, wc, *level);
2073 static int check_root_dir(struct inode_record *rec)
2075 struct inode_backref *backref;
2078 if (!rec->found_inode_item || rec->errors)
2080 if (rec->nlink != 1 || rec->found_link != 0)
2082 if (list_empty(&rec->backrefs))
2084 backref = to_inode_backref(rec->backrefs.next);
2085 if (!backref->found_inode_ref)
2087 if (backref->index != 0 || backref->namelen != 2 ||
2088 memcmp(backref->name, "..", 2))
2090 if (backref->found_dir_index || backref->found_dir_item)
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098 struct btrfs_root *root, struct btrfs_path *path,
2099 struct inode_record *rec)
2101 struct btrfs_inode_item *ei;
2102 struct btrfs_key key;
2105 key.objectid = rec->ino;
2106 key.type = BTRFS_INODE_ITEM_KEY;
2107 key.offset = (u64)-1;
2109 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2113 if (!path->slots[0]) {
2120 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121 if (key.objectid != rec->ino) {
2126 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127 struct btrfs_inode_item);
2128 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129 btrfs_mark_buffer_dirty(path->nodes[0]);
2130 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132 root->root_key.objectid);
2134 btrfs_release_path(path);
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139 struct btrfs_root *root,
2140 struct btrfs_path *path,
2141 struct inode_record *rec)
2145 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146 btrfs_release_path(path);
2148 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153 struct btrfs_root *root,
2154 struct btrfs_path *path,
2155 struct inode_record *rec)
2157 struct btrfs_inode_item *ei;
2158 struct btrfs_key key;
2161 key.objectid = rec->ino;
2162 key.type = BTRFS_INODE_ITEM_KEY;
2165 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2172 /* Since ret == 0, no need to check anything */
2173 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174 struct btrfs_inode_item);
2175 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176 btrfs_mark_buffer_dirty(path->nodes[0]);
2177 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178 printf("reset nbytes for ino %llu root %llu\n",
2179 rec->ino, root->root_key.objectid);
2181 btrfs_release_path(path);
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186 struct cache_tree *inode_cache,
2187 struct inode_record *rec,
2188 struct inode_backref *backref)
2190 struct btrfs_path path;
2191 struct btrfs_trans_handle *trans;
2192 struct btrfs_dir_item *dir_item;
2193 struct extent_buffer *leaf;
2194 struct btrfs_key key;
2195 struct btrfs_disk_key disk_key;
2196 struct inode_record *dir_rec;
2197 unsigned long name_ptr;
2198 u32 data_size = sizeof(*dir_item) + backref->namelen;
2201 trans = btrfs_start_transaction(root, 1);
2203 return PTR_ERR(trans);
2205 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206 (unsigned long long)rec->ino);
2208 btrfs_init_path(&path);
2209 key.objectid = backref->dir;
2210 key.type = BTRFS_DIR_INDEX_KEY;
2211 key.offset = backref->index;
2212 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2215 leaf = path.nodes[0];
2216 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2218 disk_key.objectid = cpu_to_le64(rec->ino);
2219 disk_key.type = BTRFS_INODE_ITEM_KEY;
2220 disk_key.offset = 0;
2222 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224 btrfs_set_dir_data_len(leaf, dir_item, 0);
2225 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226 name_ptr = (unsigned long)(dir_item + 1);
2227 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228 btrfs_mark_buffer_dirty(leaf);
2229 btrfs_release_path(&path);
2230 btrfs_commit_transaction(trans, root);
2232 backref->found_dir_index = 1;
2233 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234 BUG_ON(IS_ERR(dir_rec));
2237 dir_rec->found_size += backref->namelen;
2238 if (dir_rec->found_size == dir_rec->isize &&
2239 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241 if (dir_rec->found_size != dir_rec->isize)
2242 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247 static int delete_dir_index(struct btrfs_root *root,
2248 struct cache_tree *inode_cache,
2249 struct inode_record *rec,
2250 struct inode_backref *backref)
2252 struct btrfs_trans_handle *trans;
2253 struct btrfs_dir_item *di;
2254 struct btrfs_path path;
2257 trans = btrfs_start_transaction(root, 1);
2259 return PTR_ERR(trans);
2261 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262 (unsigned long long)backref->dir,
2263 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264 (unsigned long long)root->objectid);
2266 btrfs_init_path(&path);
2267 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268 backref->name, backref->namelen,
2269 backref->index, -1);
2272 btrfs_release_path(&path);
2273 btrfs_commit_transaction(trans, root);
2280 ret = btrfs_del_item(trans, root, &path);
2282 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2284 btrfs_release_path(&path);
2285 btrfs_commit_transaction(trans, root);
2289 static int create_inode_item(struct btrfs_root *root,
2290 struct inode_record *rec,
2291 struct inode_backref *backref, int root_dir)
2293 struct btrfs_trans_handle *trans;
2294 struct btrfs_inode_item inode_item;
2295 time_t now = time(NULL);
2298 trans = btrfs_start_transaction(root, 1);
2299 if (IS_ERR(trans)) {
2300 ret = PTR_ERR(trans);
2304 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305 "be incomplete, please check permissions and content after "
2306 "the fsck completes.\n", (unsigned long long)root->objectid,
2307 (unsigned long long)rec->ino);
2309 memset(&inode_item, 0, sizeof(inode_item));
2310 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2312 btrfs_set_stack_inode_nlink(&inode_item, 1);
2314 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316 if (rec->found_dir_item) {
2317 if (rec->found_file_extent)
2318 fprintf(stderr, "root %llu inode %llu has both a dir "
2319 "item and extents, unsure if it is a dir or a "
2320 "regular file so setting it as a directory\n",
2321 (unsigned long long)root->objectid,
2322 (unsigned long long)rec->ino);
2323 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325 } else if (!rec->found_dir_item) {
2326 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2329 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2338 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2340 btrfs_commit_transaction(trans, root);
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345 struct inode_record *rec,
2346 struct cache_tree *inode_cache,
2349 struct inode_backref *tmp, *backref;
2350 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 if (!delete && rec->ino == root_dirid) {
2356 if (!rec->found_inode_item) {
2357 ret = create_inode_item(root, rec, backref, 1);
2364 /* Index 0 for root dir's are special, don't mess with it */
2365 if (rec->ino == root_dirid && backref->index == 0)
2369 ((backref->found_dir_index && !backref->found_inode_ref) ||
2370 (backref->found_dir_index && backref->found_inode_ref &&
2371 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372 ret = delete_dir_index(root, inode_cache, rec, backref);
2376 list_del(&backref->list);
2380 if (!delete && !backref->found_dir_index &&
2381 backref->found_dir_item && backref->found_inode_ref) {
2382 ret = add_missing_dir_index(root, inode_cache, rec,
2387 if (backref->found_dir_item &&
2388 backref->found_dir_index &&
2389 backref->found_dir_index) {
2390 if (!backref->errors &&
2391 backref->found_inode_ref) {
2392 list_del(&backref->list);
2398 if (!delete && (!backref->found_dir_index &&
2399 !backref->found_dir_item &&
2400 backref->found_inode_ref)) {
2401 struct btrfs_trans_handle *trans;
2402 struct btrfs_key location;
2404 ret = check_dir_conflict(root, backref->name,
2410 * let nlink fixing routine to handle it,
2411 * which can do it better.
2416 location.objectid = rec->ino;
2417 location.type = BTRFS_INODE_ITEM_KEY;
2418 location.offset = 0;
2420 trans = btrfs_start_transaction(root, 1);
2421 if (IS_ERR(trans)) {
2422 ret = PTR_ERR(trans);
2425 fprintf(stderr, "adding missing dir index/item pair "
2427 (unsigned long long)rec->ino);
2428 ret = btrfs_insert_dir_item(trans, root, backref->name,
2430 backref->dir, &location,
2431 imode_to_type(rec->imode),
2434 btrfs_commit_transaction(trans, root);
2438 if (!delete && (backref->found_inode_ref &&
2439 backref->found_dir_index &&
2440 backref->found_dir_item &&
2441 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442 !rec->found_inode_item)) {
2443 ret = create_inode_item(root, rec, backref, 0);
2450 return ret ? ret : repaired;
2454 * To determine the file type for nlink/inode_item repair
2456 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457 * Return -ENOENT if file type is not found.
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2461 struct inode_backref *backref;
2463 /* For inode item recovered case */
2464 if (rec->found_inode_item) {
2465 *type = imode_to_type(rec->imode);
2469 list_for_each_entry(backref, &rec->backrefs, list) {
2470 if (backref->found_dir_index || backref->found_dir_item) {
2471 *type = backref->filetype;
2479 * To determine the file name for nlink repair
2481 * Return 0 if file name is found, set name and namelen.
2482 * Return -ENOENT if file name is not found.
2484 static int find_file_name(struct inode_record *rec,
2485 char *name, int *namelen)
2487 struct inode_backref *backref;
2489 list_for_each_entry(backref, &rec->backrefs, list) {
2490 if (backref->found_dir_index || backref->found_dir_item ||
2491 backref->found_inode_ref) {
2492 memcpy(name, backref->name, backref->namelen);
2493 *namelen = backref->namelen;
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502 struct btrfs_root *root,
2503 struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct inode_backref *backref;
2507 struct inode_backref *tmp;
2508 struct btrfs_key key;
2509 struct btrfs_inode_item *inode_item;
2512 /* We don't believe this either, reset it and iterate backref */
2513 rec->found_link = 0;
2515 /* Remove all backref including the valid ones */
2516 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518 backref->index, backref->name,
2519 backref->namelen, 0);
2523 /* remove invalid backref, so it won't be added back */
2524 if (!(backref->found_dir_index &&
2525 backref->found_dir_item &&
2526 backref->found_inode_ref)) {
2527 list_del(&backref->list);
2534 /* Set nlink to 0 */
2535 key.objectid = rec->ino;
2536 key.type = BTRFS_INODE_ITEM_KEY;
2538 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2545 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546 struct btrfs_inode_item);
2547 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548 btrfs_mark_buffer_dirty(path->nodes[0]);
2549 btrfs_release_path(path);
2552 * Add back valid inode_ref/dir_item/dir_index,
2553 * add_link() will handle the nlink inc, so new nlink must be correct
2555 list_for_each_entry(backref, &rec->backrefs, list) {
2556 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557 backref->name, backref->namelen,
2558 backref->filetype, &backref->index, 1);
2563 btrfs_release_path(path);
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568 struct btrfs_root *root,
2569 struct btrfs_path *path,
2570 struct inode_record *rec)
2572 char *dir_name = "lost+found";
2573 char namebuf[BTRFS_NAME_LEN] = {0};
2578 int name_recovered = 0;
2579 int type_recovered = 0;
2583 * Get file name and type first before these invalid inode ref
2584 * are deleted by remove_all_invalid_backref()
2586 name_recovered = !find_file_name(rec, namebuf, &namelen);
2587 type_recovered = !find_file_type(rec, &type);
2589 if (!name_recovered) {
2590 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591 rec->ino, rec->ino);
2592 namelen = count_digits(rec->ino);
2593 sprintf(namebuf, "%llu", rec->ino);
2596 if (!type_recovered) {
2597 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2599 type = BTRFS_FT_REG_FILE;
2603 ret = reset_nlink(trans, root, path, rec);
2606 "Failed to reset nlink for inode %llu: %s\n",
2607 rec->ino, strerror(-ret));
2611 if (rec->found_link == 0) {
2612 lost_found_ino = root->highest_inode;
2613 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2618 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2622 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623 dir_name, strerror(-ret));
2626 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627 namebuf, namelen, type, NULL, 1);
2629 * Add ".INO" suffix several times to handle case where
2630 * "FILENAME.INO" is already taken by another file.
2632 while (ret == -EEXIST) {
2634 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2636 if (namelen + count_digits(rec->ino) + 1 >
2641 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2643 namelen += count_digits(rec->ino) + 1;
2644 ret = btrfs_add_link(trans, root, rec->ino,
2645 lost_found_ino, namebuf,
2646 namelen, type, NULL, 1);
2650 "Failed to link the inode %llu to %s dir: %s\n",
2651 rec->ino, dir_name, strerror(-ret));
2655 * Just increase the found_link, don't actually add the
2656 * backref. This will make things easier and this inode
2657 * record will be freed after the repair is done.
2658 * So fsck will not report problem about this inode.
2661 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662 namelen, namebuf, dir_name);
2664 printf("Fixed the nlink of inode %llu\n", rec->ino);
2667 * Clear the flag anyway, or we will loop forever for the same inode
2668 * as it will not be removed from the bad inode list and the dead loop
2671 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672 btrfs_release_path(path);
2677 * Check if there is any normal(reg or prealloc) file extent for given
2679 * This is used to determine the file type when neither its dir_index/item or
2680 * inode_item exists.
2682 * This will *NOT* report error, if any error happens, just consider it does
2683 * not have any normal file extent.
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2687 struct btrfs_path path;
2688 struct btrfs_key key;
2689 struct btrfs_key found_key;
2690 struct btrfs_file_extent_item *fi;
2694 btrfs_init_path(&path);
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2699 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2704 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705 ret = btrfs_next_leaf(root, &path);
2712 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2717 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path.nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2726 btrfs_release_path(&path);
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2752 int type_recovered = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2768 * 1. If found backref(inode_index/item is already handled) to it,
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2782 filetype = BTRFS_FT_REG_FILE;
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2884 node = rb_first(&rec->holes);
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path path;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2930 * For nlink repair, it may create a dir and add link, so
2931 * 2 for parent(256)'s dir_index and dir_item
2932 * 2 for lost+found dir's inode_item and inode_ref
2933 * 1 for the new inode_ref of the file
2934 * 2 for lost+found dir's dir_index and dir_item for the file
2936 trans = btrfs_start_transaction(root, 7);
2938 return PTR_ERR(trans);
2940 btrfs_init_path(&path);
2941 if (rec->errors & I_ERR_NO_INODE_ITEM)
2942 ret = repair_inode_no_item(trans, root, &path, rec);
2943 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948 ret = repair_inode_isize(trans, root, &path, rec);
2949 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952 ret = repair_inode_nlinks(trans, root, &path, rec);
2953 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954 ret = repair_inode_nbytes(trans, root, &path, rec);
2955 btrfs_commit_transaction(trans, root);
2956 btrfs_release_path(&path);
2960 static int check_inode_recs(struct btrfs_root *root,
2961 struct cache_tree *inode_cache)
2963 struct cache_extent *cache;
2964 struct ptr_node *node;
2965 struct inode_record *rec;
2966 struct inode_backref *backref;
2971 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2973 if (btrfs_root_refs(&root->root_item) == 0) {
2974 if (!cache_tree_empty(inode_cache))
2975 fprintf(stderr, "warning line %d\n", __LINE__);
2980 * We need to record the highest inode number for later 'lost+found'
2982 * We must select an ino not used/referred by any existing inode, or
2983 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984 * this may cause 'lost+found' dir has wrong nlinks.
2986 cache = last_cache_extent(inode_cache);
2988 node = container_of(cache, struct ptr_node, cache);
2990 if (rec->ino > root->highest_inode)
2991 root->highest_inode = rec->ino;
2995 * We need to repair backrefs first because we could change some of the
2996 * errors in the inode recs.
2998 * We also need to go through and delete invalid backrefs first and then
2999 * add the correct ones second. We do this because we may get EEXIST
3000 * when adding back the correct index because we hadn't yet deleted the
3003 * For example, if we were missing a dir index then the directories
3004 * isize would be wrong, so if we fixed the isize to what we thought it
3005 * would be and then fixed the backref we'd still have a invalid fs, so
3006 * we need to add back the dir index and then check to see if the isize
3011 if (stage == 3 && !err)
3014 cache = search_cache_extent(inode_cache, 0);
3015 while (repair && cache) {
3016 node = container_of(cache, struct ptr_node, cache);
3018 cache = next_cache_extent(cache);
3020 /* Need to free everything up and rescan */
3022 remove_cache_extent(inode_cache, &node->cache);
3024 free_inode_rec(rec);
3028 if (list_empty(&rec->backrefs))
3031 ret = repair_inode_backrefs(root, rec, inode_cache,
3045 rec = get_inode_rec(inode_cache, root_dirid, 0);
3046 BUG_ON(IS_ERR(rec));
3048 ret = check_root_dir(rec);
3050 fprintf(stderr, "root %llu root dir %llu error\n",
3051 (unsigned long long)root->root_key.objectid,
3052 (unsigned long long)root_dirid);
3053 print_inode_error(root, rec);
3058 struct btrfs_trans_handle *trans;
3060 trans = btrfs_start_transaction(root, 1);
3061 if (IS_ERR(trans)) {
3062 err = PTR_ERR(trans);
3067 "root %llu missing its root dir, recreating\n",
3068 (unsigned long long)root->objectid);
3070 ret = btrfs_make_root_dir(trans, root, root_dirid);
3073 btrfs_commit_transaction(trans, root);
3077 fprintf(stderr, "root %llu root dir %llu not found\n",
3078 (unsigned long long)root->root_key.objectid,
3079 (unsigned long long)root_dirid);
3083 cache = search_cache_extent(inode_cache, 0);
3086 node = container_of(cache, struct ptr_node, cache);
3088 remove_cache_extent(inode_cache, &node->cache);
3090 if (rec->ino == root_dirid ||
3091 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092 free_inode_rec(rec);
3096 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097 ret = check_orphan_item(root, rec->ino);
3099 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100 if (can_free_inode_rec(rec)) {
3101 free_inode_rec(rec);
3106 if (!rec->found_inode_item)
3107 rec->errors |= I_ERR_NO_INODE_ITEM;
3108 if (rec->found_link != rec->nlink)
3109 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3111 ret = try_repair_inode(root, rec);
3112 if (ret == 0 && can_free_inode_rec(rec)) {
3113 free_inode_rec(rec);
3119 if (!(repair && ret == 0))
3121 print_inode_error(root, rec);
3122 list_for_each_entry(backref, &rec->backrefs, list) {
3123 if (!backref->found_dir_item)
3124 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125 if (!backref->found_dir_index)
3126 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127 if (!backref->found_inode_ref)
3128 backref->errors |= REF_ERR_NO_INODE_REF;
3129 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130 " namelen %u name %s filetype %d errors %x",
3131 (unsigned long long)backref->dir,
3132 (unsigned long long)backref->index,
3133 backref->namelen, backref->name,
3134 backref->filetype, backref->errors);
3135 print_ref_error(backref->errors);
3137 free_inode_rec(rec);
3139 return (error > 0) ? -1 : 0;
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3145 struct cache_extent *cache;
3146 struct root_record *rec = NULL;
3149 cache = lookup_cache_extent(root_cache, objectid, 1);
3151 rec = container_of(cache, struct root_record, cache);
3153 rec = calloc(1, sizeof(*rec));
3155 return ERR_PTR(-ENOMEM);
3156 rec->objectid = objectid;
3157 INIT_LIST_HEAD(&rec->backrefs);
3158 rec->cache.start = objectid;
3159 rec->cache.size = 1;
3161 ret = insert_cache_extent(root_cache, &rec->cache);
3163 return ERR_PTR(-EEXIST);
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169 u64 ref_root, u64 dir, u64 index,
3170 const char *name, int namelen)
3172 struct root_backref *backref;
3174 list_for_each_entry(backref, &rec->backrefs, list) {
3175 if (backref->ref_root != ref_root || backref->dir != dir ||
3176 backref->namelen != namelen)
3178 if (memcmp(name, backref->name, namelen))
3183 backref = calloc(1, sizeof(*backref) + namelen + 1);
3186 backref->ref_root = ref_root;
3188 backref->index = index;
3189 backref->namelen = namelen;
3190 memcpy(backref->name, name, namelen);
3191 backref->name[namelen] = '\0';
3192 list_add_tail(&backref->list, &rec->backrefs);
3196 static void free_root_record(struct cache_extent *cache)
3198 struct root_record *rec;
3199 struct root_backref *backref;
3201 rec = container_of(cache, struct root_record, cache);
3202 while (!list_empty(&rec->backrefs)) {
3203 backref = to_root_backref(rec->backrefs.next);
3204 list_del(&backref->list);
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3213 static int add_root_backref(struct cache_tree *root_cache,
3214 u64 root_id, u64 ref_root, u64 dir, u64 index,
3215 const char *name, int namelen,
3216 int item_type, int errors)
3218 struct root_record *rec;
3219 struct root_backref *backref;
3221 rec = get_root_rec(root_cache, root_id);
3222 BUG_ON(IS_ERR(rec));
3223 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3226 backref->errors |= errors;
3228 if (item_type != BTRFS_DIR_ITEM_KEY) {
3229 if (backref->found_dir_index || backref->found_back_ref ||
3230 backref->found_forward_ref) {
3231 if (backref->index != index)
3232 backref->errors |= REF_ERR_INDEX_UNMATCH;
3234 backref->index = index;
3238 if (item_type == BTRFS_DIR_ITEM_KEY) {
3239 if (backref->found_forward_ref)
3241 backref->found_dir_item = 1;
3242 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243 backref->found_dir_index = 1;
3244 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245 if (backref->found_forward_ref)
3246 backref->errors |= REF_ERR_DUP_ROOT_REF;
3247 else if (backref->found_dir_item)
3249 backref->found_forward_ref = 1;
3250 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251 if (backref->found_back_ref)
3252 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253 backref->found_back_ref = 1;
3258 if (backref->found_forward_ref && backref->found_dir_item)
3259 backref->reachable = 1;
3263 static int merge_root_recs(struct btrfs_root *root,
3264 struct cache_tree *src_cache,
3265 struct cache_tree *dst_cache)
3267 struct cache_extent *cache;
3268 struct ptr_node *node;
3269 struct inode_record *rec;
3270 struct inode_backref *backref;
3273 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274 free_inode_recs_tree(src_cache);
3279 cache = search_cache_extent(src_cache, 0);
3282 node = container_of(cache, struct ptr_node, cache);
3284 remove_cache_extent(src_cache, &node->cache);
3287 ret = is_child_root(root, root->objectid, rec->ino);
3293 list_for_each_entry(backref, &rec->backrefs, list) {
3294 BUG_ON(backref->found_inode_ref);
3295 if (backref->found_dir_item)
3296 add_root_backref(dst_cache, rec->ino,
3297 root->root_key.objectid, backref->dir,
3298 backref->index, backref->name,
3299 backref->namelen, BTRFS_DIR_ITEM_KEY,
3301 if (backref->found_dir_index)
3302 add_root_backref(dst_cache, rec->ino,
3303 root->root_key.objectid, backref->dir,
3304 backref->index, backref->name,
3305 backref->namelen, BTRFS_DIR_INDEX_KEY,
3309 free_inode_rec(rec);
3316 static int check_root_refs(struct btrfs_root *root,
3317 struct cache_tree *root_cache)
3319 struct root_record *rec;
3320 struct root_record *ref_root;
3321 struct root_backref *backref;
3322 struct cache_extent *cache;
3328 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329 BUG_ON(IS_ERR(rec));
3332 /* fixme: this can not detect circular references */
3335 cache = search_cache_extent(root_cache, 0);
3339 rec = container_of(cache, struct root_record, cache);
3340 cache = next_cache_extent(cache);
3342 if (rec->found_ref == 0)
3345 list_for_each_entry(backref, &rec->backrefs, list) {
3346 if (!backref->reachable)
3349 ref_root = get_root_rec(root_cache,
3351 BUG_ON(IS_ERR(ref_root));
3352 if (ref_root->found_ref > 0)
3355 backref->reachable = 0;
3357 if (rec->found_ref == 0)
3363 cache = search_cache_extent(root_cache, 0);
3367 rec = container_of(cache, struct root_record, cache);
3368 cache = next_cache_extent(cache);
3370 if (rec->found_ref == 0 &&
3371 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373 ret = check_orphan_item(root->fs_info->tree_root,
3379 * If we don't have a root item then we likely just have
3380 * a dir item in a snapshot for this root but no actual
3381 * ref key or anything so it's meaningless.
3383 if (!rec->found_root_item)
3386 fprintf(stderr, "fs tree %llu not referenced\n",
3387 (unsigned long long)rec->objectid);
3391 if (rec->found_ref > 0 && !rec->found_root_item)
3393 list_for_each_entry(backref, &rec->backrefs, list) {
3394 if (!backref->found_dir_item)
3395 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396 if (!backref->found_dir_index)
3397 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398 if (!backref->found_back_ref)
3399 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400 if (!backref->found_forward_ref)
3401 backref->errors |= REF_ERR_NO_ROOT_REF;
3402 if (backref->reachable && backref->errors)
3409 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410 (unsigned long long)rec->objectid, rec->found_ref,
3411 rec->found_root_item ? "" : "not found");
3413 list_for_each_entry(backref, &rec->backrefs, list) {
3414 if (!backref->reachable)
3416 if (!backref->errors && rec->found_root_item)
3418 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419 " index %llu namelen %u name %s errors %x\n",
3420 (unsigned long long)backref->ref_root,
3421 (unsigned long long)backref->dir,
3422 (unsigned long long)backref->index,
3423 backref->namelen, backref->name,
3425 print_ref_error(backref->errors);
3428 return errors > 0 ? 1 : 0;
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432 struct btrfs_key *key,
3433 struct cache_tree *root_cache)
3439 struct btrfs_root_ref *ref;
3440 char namebuf[BTRFS_NAME_LEN];
3443 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3445 dirid = btrfs_root_ref_dirid(eb, ref);
3446 index = btrfs_root_ref_sequence(eb, ref);
3447 name_len = btrfs_root_ref_name_len(eb, ref);
3449 if (name_len <= BTRFS_NAME_LEN) {
3453 len = BTRFS_NAME_LEN;
3454 error = REF_ERR_NAME_TOO_LONG;
3456 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3458 if (key->type == BTRFS_ROOT_REF_KEY) {
3459 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460 index, namebuf, len, key->type, error);
3462 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463 index, namebuf, len, key->type, error);
3468 static void free_corrupt_block(struct cache_extent *cache)
3470 struct btrfs_corrupt_block *corrupt;
3472 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3479 * Repair the btree of the given root.
3481 * The fix is to remove the node key in corrupt_blocks cache_tree.
3482 * and rebalance the tree.
3483 * After the fix, the btree should be writeable.
3485 static int repair_btree(struct btrfs_root *root,
3486 struct cache_tree *corrupt_blocks)
3488 struct btrfs_trans_handle *trans;
3489 struct btrfs_path path;
3490 struct btrfs_corrupt_block *corrupt;
3491 struct cache_extent *cache;
3492 struct btrfs_key key;
3497 if (cache_tree_empty(corrupt_blocks))
3500 trans = btrfs_start_transaction(root, 1);
3501 if (IS_ERR(trans)) {
3502 ret = PTR_ERR(trans);
3503 fprintf(stderr, "Error starting transaction: %s\n",
3507 btrfs_init_path(&path);
3508 cache = first_cache_extent(corrupt_blocks);
3510 corrupt = container_of(cache, struct btrfs_corrupt_block,
3512 level = corrupt->level;
3513 path.lowest_level = level;
3514 key.objectid = corrupt->key.objectid;
3515 key.type = corrupt->key.type;
3516 key.offset = corrupt->key.offset;
3519 * Here we don't want to do any tree balance, since it may
3520 * cause a balance with corrupted brother leaf/node,
3521 * so ins_len set to 0 here.
3522 * Balance will be done after all corrupt node/leaf is deleted.
3524 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3527 offset = btrfs_node_blockptr(path.nodes[level],
3530 /* Remove the ptr */
3531 ret = btrfs_del_ptr(trans, root, &path, level,
3536 * Remove the corresponding extent
3537 * return value is not concerned.
3539 btrfs_release_path(&path);
3540 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541 0, root->root_key.objectid,
3543 cache = next_cache_extent(cache);
3546 /* Balance the btree using btrfs_search_slot() */
3547 cache = first_cache_extent(corrupt_blocks);
3549 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551 memcpy(&key, &corrupt->key, sizeof(key));
3552 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3555 /* return will always >0 since it won't find the item */
3557 btrfs_release_path(&path);
3558 cache = next_cache_extent(cache);
3561 btrfs_commit_transaction(trans, root);
3562 btrfs_release_path(&path);
3566 static int check_fs_root(struct btrfs_root *root,
3567 struct cache_tree *root_cache,
3568 struct walk_control *wc)
3574 struct btrfs_path path;
3575 struct shared_node root_node;
3576 struct root_record *rec;
3577 struct btrfs_root_item *root_item = &root->root_item;
3578 struct cache_tree corrupt_blocks;
3579 struct orphan_data_extent *orphan;
3580 struct orphan_data_extent *tmp;
3581 enum btrfs_tree_block_status status;
3582 struct node_refs nrefs;
3585 * Reuse the corrupt_block cache tree to record corrupted tree block
3587 * Unlike the usage in extent tree check, here we do it in a per
3588 * fs/subvol tree base.
3590 cache_tree_init(&corrupt_blocks);
3591 root->fs_info->corrupt_blocks = &corrupt_blocks;
3593 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594 rec = get_root_rec(root_cache, root->root_key.objectid);
3595 BUG_ON(IS_ERR(rec));
3596 if (btrfs_root_refs(root_item) > 0)
3597 rec->found_root_item = 1;
3600 btrfs_init_path(&path);
3601 memset(&root_node, 0, sizeof(root_node));
3602 cache_tree_init(&root_node.root_cache);
3603 cache_tree_init(&root_node.inode_cache);
3604 memset(&nrefs, 0, sizeof(nrefs));
3606 /* Move the orphan extent record to corresponding inode_record */
3607 list_for_each_entry_safe(orphan, tmp,
3608 &root->orphan_data_extents, list) {
3609 struct inode_record *inode;
3611 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3613 BUG_ON(IS_ERR(inode));
3614 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615 list_move(&orphan->list, &inode->orphan_extents);
3618 level = btrfs_header_level(root->node);
3619 memset(wc->nodes, 0, sizeof(wc->nodes));
3620 wc->nodes[level] = &root_node;
3621 wc->active_node = level;
3622 wc->root_level = level;
3624 /* We may not have checked the root block, lets do that now */
3625 if (btrfs_is_leaf(root->node))
3626 status = btrfs_check_leaf(root, NULL, root->node);
3628 status = btrfs_check_node(root, NULL, root->node);
3629 if (status != BTRFS_TREE_BLOCK_CLEAN)
3632 if (btrfs_root_refs(root_item) > 0 ||
3633 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634 path.nodes[level] = root->node;
3635 extent_buffer_get(root->node);
3636 path.slots[level] = 0;
3638 struct btrfs_key key;
3639 struct btrfs_disk_key found_key;
3641 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642 level = root_item->drop_level;
3643 path.lowest_level = level;
3644 if (level > btrfs_header_level(root->node) ||
3645 level >= BTRFS_MAX_LEVEL) {
3646 error("ignoring invalid drop level: %u", level);
3649 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3652 btrfs_node_key(path.nodes[level], &found_key,
3654 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655 sizeof(found_key)));
3659 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3665 wret = walk_up_tree(root, &path, wc, &level);
3672 btrfs_release_path(&path);
3674 if (!cache_tree_empty(&corrupt_blocks)) {
3675 struct cache_extent *cache;
3676 struct btrfs_corrupt_block *corrupt;
3678 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679 root->root_key.objectid);
3680 cache = first_cache_extent(&corrupt_blocks);
3682 corrupt = container_of(cache,
3683 struct btrfs_corrupt_block,
3685 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686 cache->start, corrupt->level,
3687 corrupt->key.objectid, corrupt->key.type,
3688 corrupt->key.offset);
3689 cache = next_cache_extent(cache);
3692 printf("Try to repair the btree for root %llu\n",
3693 root->root_key.objectid);
3694 ret = repair_btree(root, &corrupt_blocks);
3696 fprintf(stderr, "Failed to repair btree: %s\n",
3699 printf("Btree for root %llu is fixed\n",
3700 root->root_key.objectid);
3704 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3708 if (root_node.current) {
3709 root_node.current->checked = 1;
3710 maybe_free_inode_rec(&root_node.inode_cache,
3714 err = check_inode_recs(root, &root_node.inode_cache);
3718 free_corrupt_blocks_tree(&corrupt_blocks);
3719 root->fs_info->corrupt_blocks = NULL;
3720 free_orphan_data_extents(&root->orphan_data_extents);
3724 static int fs_root_objectid(u64 objectid)
3726 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3729 return is_fstree(objectid);
3732 static int check_fs_roots(struct btrfs_root *root,
3733 struct cache_tree *root_cache)
3735 struct btrfs_path path;
3736 struct btrfs_key key;
3737 struct walk_control wc;
3738 struct extent_buffer *leaf, *tree_node;
3739 struct btrfs_root *tmp_root;
3740 struct btrfs_root *tree_root = root->fs_info->tree_root;
3744 if (ctx.progress_enabled) {
3745 ctx.tp = TASK_FS_ROOTS;
3746 task_start(ctx.info);
3750 * Just in case we made any changes to the extent tree that weren't
3751 * reflected into the free space cache yet.
3754 reset_cached_block_groups(root->fs_info);
3755 memset(&wc, 0, sizeof(wc));
3756 cache_tree_init(&wc.shared);
3757 btrfs_init_path(&path);
3762 key.type = BTRFS_ROOT_ITEM_KEY;
3763 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3768 tree_node = tree_root->node;
3770 if (tree_node != tree_root->node) {
3771 free_root_recs_tree(root_cache);
3772 btrfs_release_path(&path);
3775 leaf = path.nodes[0];
3776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777 ret = btrfs_next_leaf(tree_root, &path);
3783 leaf = path.nodes[0];
3785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787 fs_root_objectid(key.objectid)) {
3788 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789 tmp_root = btrfs_read_fs_root_no_cache(
3790 root->fs_info, &key);
3792 key.offset = (u64)-1;
3793 tmp_root = btrfs_read_fs_root(
3794 root->fs_info, &key);
3796 if (IS_ERR(tmp_root)) {
3800 ret = check_fs_root(tmp_root, root_cache, &wc);
3801 if (ret == -EAGAIN) {
3802 free_root_recs_tree(root_cache);
3803 btrfs_release_path(&path);
3808 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809 btrfs_free_fs_root(tmp_root);
3810 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811 key.type == BTRFS_ROOT_BACKREF_KEY) {
3812 process_root_ref(leaf, path.slots[0], &key,
3819 btrfs_release_path(&path);
3821 free_extent_cache_tree(&wc.shared);
3822 if (!cache_tree_empty(&wc.shared))
3823 fprintf(stderr, "warning line %d\n", __LINE__);
3825 task_stop(ctx.info);
3830 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
3839 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
3840 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
3841 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
3842 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
3843 #define NO_INODE_ITEM (1<<14) /* no inode_item */
3844 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
3847 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3848 * INODE_REF/INODE_EXTREF match.
3850 * @root: the root of the fs/file tree
3851 * @ref_key: the key of the INODE_REF/INODE_EXTREF
3852 * @key: the key of the DIR_ITEM/DIR_INDEX
3853 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
3854 * distinguish root_dir between normal dir/file
3855 * @name: the name in the INODE_REF/INODE_EXTREF
3856 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
3857 * @mode: the st_mode of INODE_ITEM
3859 * Return 0 if no error occurred.
3860 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3861 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3863 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3864 * not match for normal dir/file.
3866 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3867 struct btrfs_key *key, u64 index, char *name,
3868 u32 namelen, u32 mode)
3870 struct btrfs_path path;
3871 struct extent_buffer *node;
3872 struct btrfs_dir_item *di;
3873 struct btrfs_key location;
3874 char namebuf[BTRFS_NAME_LEN] = {0};
3884 btrfs_init_path(&path);
3885 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3887 ret = DIR_ITEM_MISSING;
3891 /* Process root dir and goto out*/
3894 ret = ROOT_DIR_ERROR;
3896 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3898 ref_key->type == BTRFS_INODE_REF_KEY ?
3900 ref_key->objectid, ref_key->offset,
3901 key->type == BTRFS_DIR_ITEM_KEY ?
3902 "DIR_ITEM" : "DIR_INDEX");
3910 /* Process normal file/dir */
3912 ret = DIR_ITEM_MISSING;
3914 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3916 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3917 ref_key->objectid, ref_key->offset,
3918 key->type == BTRFS_DIR_ITEM_KEY ?
3919 "DIR_ITEM" : "DIR_INDEX",
3920 key->objectid, key->offset, namelen, name,
3921 imode_to_type(mode));
3925 /* Check whether inode_id/filetype/name match */
3926 node = path.nodes[0];
3927 slot = path.slots[0];
3928 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3929 total = btrfs_item_size_nr(node, slot);
3930 while (cur < total) {
3931 ret = DIR_ITEM_MISMATCH;
3932 name_len = btrfs_dir_name_len(node, di);
3933 data_len = btrfs_dir_data_len(node, di);
3935 btrfs_dir_item_key_to_cpu(node, di, &location);
3936 if (location.objectid != ref_key->objectid ||
3937 location.type != BTRFS_INODE_ITEM_KEY ||
3938 location.offset != 0)
3941 filetype = btrfs_dir_type(node, di);
3942 if (imode_to_type(mode) != filetype)
3945 if (name_len <= BTRFS_NAME_LEN) {
3948 len = BTRFS_NAME_LEN;
3949 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3951 key->type == BTRFS_DIR_ITEM_KEY ?
3952 "DIR_ITEM" : "DIR_INDEX",
3953 key->objectid, key->offset, name_len);
3955 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3956 if (len != namelen || strncmp(namebuf, name, len))
3962 len = sizeof(*di) + name_len + data_len;
3963 di = (struct btrfs_dir_item *)((char *)di + len);
3966 if (ret == DIR_ITEM_MISMATCH)
3968 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3970 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3971 ref_key->objectid, ref_key->offset,
3972 key->type == BTRFS_DIR_ITEM_KEY ?
3973 "DIR_ITEM" : "DIR_INDEX",
3974 key->objectid, key->offset, namelen, name,
3975 imode_to_type(mode));
3977 btrfs_release_path(&path);
3982 * Traverse the given INODE_REF and call find_dir_item() to find related
3983 * DIR_ITEM/DIR_INDEX.
3985 * @root: the root of the fs/file tree
3986 * @ref_key: the key of the INODE_REF
3987 * @refs: the count of INODE_REF
3988 * @mode: the st_mode of INODE_ITEM
3990 * Return 0 if no error occurred.
3992 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3993 struct extent_buffer *node, int slot, u64 *refs,
3996 struct btrfs_key key;
3997 struct btrfs_inode_ref *ref;
3998 char namebuf[BTRFS_NAME_LEN] = {0};
4006 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4007 total = btrfs_item_size_nr(node, slot);
4010 /* Update inode ref count */
4013 index = btrfs_inode_ref_index(node, ref);
4014 name_len = btrfs_inode_ref_name_len(node, ref);
4015 if (name_len <= BTRFS_NAME_LEN) {
4018 len = BTRFS_NAME_LEN;
4019 warning("root %llu INODE_REF[%llu %llu] name too long",
4020 root->objectid, ref_key->objectid, ref_key->offset);
4023 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4025 /* Check root dir ref name */
4026 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4027 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4028 root->objectid, ref_key->objectid, ref_key->offset,
4030 err |= ROOT_DIR_ERROR;
4033 /* Find related DIR_INDEX */
4034 key.objectid = ref_key->offset;
4035 key.type = BTRFS_DIR_INDEX_KEY;
4037 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4040 /* Find related dir_item */
4041 key.objectid = ref_key->offset;
4042 key.type = BTRFS_DIR_ITEM_KEY;
4043 key.offset = btrfs_name_hash(namebuf, len);
4044 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4047 len = sizeof(*ref) + name_len;
4048 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4057 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4058 * DIR_ITEM/DIR_INDEX.
4060 * @root: the root of the fs/file tree
4061 * @ref_key: the key of the INODE_EXTREF
4062 * @refs: the count of INODE_EXTREF
4063 * @mode: the st_mode of INODE_ITEM
4065 * Return 0 if no error occurred.
4067 static int check_inode_extref(struct btrfs_root *root,
4068 struct btrfs_key *ref_key,
4069 struct extent_buffer *node, int slot, u64 *refs,
4072 struct btrfs_key key;
4073 struct btrfs_inode_extref *extref;
4074 char namebuf[BTRFS_NAME_LEN] = {0};
4084 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4085 total = btrfs_item_size_nr(node, slot);
4088 /* update inode ref count */
4090 name_len = btrfs_inode_extref_name_len(node, extref);
4091 index = btrfs_inode_extref_index(node, extref);
4092 parent = btrfs_inode_extref_parent(node, extref);
4093 if (name_len <= BTRFS_NAME_LEN) {
4096 len = BTRFS_NAME_LEN;
4097 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4098 root->objectid, ref_key->objectid, ref_key->offset);
4100 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4102 /* Check root dir ref name */
4103 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4104 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4105 root->objectid, ref_key->objectid, ref_key->offset,
4107 err |= ROOT_DIR_ERROR;
4110 /* find related dir_index */
4111 key.objectid = parent;
4112 key.type = BTRFS_DIR_INDEX_KEY;
4114 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4117 /* find related dir_item */
4118 key.objectid = parent;
4119 key.type = BTRFS_DIR_ITEM_KEY;
4120 key.offset = btrfs_name_hash(namebuf, len);
4121 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4124 len = sizeof(*extref) + name_len;
4125 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4135 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4136 * DIR_ITEM/DIR_INDEX match.
4138 * @root: the root of the fs/file tree
4139 * @key: the key of the INODE_REF/INODE_EXTREF
4140 * @name: the name in the INODE_REF/INODE_EXTREF
4141 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4142 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4144 * @ext_ref: the EXTENDED_IREF feature
4146 * Return 0 if no error occurred.
4147 * Return >0 for error bitmap
4149 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4150 char *name, int namelen, u64 index,
4151 unsigned int ext_ref)
4153 struct btrfs_path path;
4154 struct btrfs_inode_ref *ref;
4155 struct btrfs_inode_extref *extref;
4156 struct extent_buffer *node;
4157 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4168 btrfs_init_path(&path);
4169 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4171 ret = INODE_REF_MISSING;
4175 node = path.nodes[0];
4176 slot = path.slots[0];
4178 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4179 total = btrfs_item_size_nr(node, slot);
4181 /* Iterate all entry of INODE_REF */
4182 while (cur < total) {
4183 ret = INODE_REF_MISSING;
4185 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4186 ref_index = btrfs_inode_ref_index(node, ref);
4187 if (index != (u64)-1 && index != ref_index)
4190 if (ref_namelen <= BTRFS_NAME_LEN) {
4193 len = BTRFS_NAME_LEN;
4194 warning("root %llu INODE %s[%llu %llu] name too long",
4196 key->type == BTRFS_INODE_REF_KEY ?
4198 key->objectid, key->offset);
4200 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4203 if (len != namelen || strncmp(ref_namebuf, name, len))
4209 len = sizeof(*ref) + ref_namelen;
4210 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4215 /* Skip if not support EXTENDED_IREF feature */
4219 btrfs_release_path(&path);
4220 btrfs_init_path(&path);
4222 dir_id = key->offset;
4223 key->type = BTRFS_INODE_EXTREF_KEY;
4224 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4226 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4228 ret = INODE_REF_MISSING;
4232 node = path.nodes[0];
4233 slot = path.slots[0];
4235 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4237 total = btrfs_item_size_nr(node, slot);
4239 /* Iterate all entry of INODE_EXTREF */
4240 while (cur < total) {
4241 ret = INODE_REF_MISSING;
4243 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4244 ref_index = btrfs_inode_extref_index(node, extref);
4245 parent = btrfs_inode_extref_parent(node, extref);
4246 if (index != (u64)-1 && index != ref_index)
4249 if (parent != dir_id)
4252 if (ref_namelen <= BTRFS_NAME_LEN) {
4255 len = BTRFS_NAME_LEN;
4256 warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4258 key->type == BTRFS_INODE_REF_KEY ?
4260 key->objectid, key->offset);
4262 read_extent_buffer(node, ref_namebuf,
4263 (unsigned long)(extref + 1), len);
4265 if (len != namelen || strncmp(ref_namebuf, name, len))
4272 len = sizeof(*extref) + ref_namelen;
4273 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4278 btrfs_release_path(&path);
4283 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4284 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4286 * @root: the root of the fs/file tree
4287 * @key: the key of the INODE_REF/INODE_EXTREF
4288 * @size: the st_size of the INODE_ITEM
4289 * @ext_ref: the EXTENDED_IREF feature
4291 * Return 0 if no error occurred.
4293 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4294 struct extent_buffer *node, int slot, u64 *size,
4295 unsigned int ext_ref)
4297 struct btrfs_dir_item *di;
4298 struct btrfs_inode_item *ii;
4299 struct btrfs_path path;
4300 struct btrfs_key location;
4301 char namebuf[BTRFS_NAME_LEN] = {0};
4314 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4315 * ignore index check.
4317 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4319 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4320 total = btrfs_item_size_nr(node, slot);
4322 while (cur < total) {
4323 data_len = btrfs_dir_data_len(node, di);
4325 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4326 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4327 "DIR_ITEM" : "DIR_INDEX",
4328 key->objectid, key->offset, data_len);
4330 name_len = btrfs_dir_name_len(node, di);
4331 if (name_len <= BTRFS_NAME_LEN) {
4334 len = BTRFS_NAME_LEN;
4335 warning("root %llu %s[%llu %llu] name too long",
4337 key->type == BTRFS_DIR_ITEM_KEY ?
4338 "DIR_ITEM" : "DIR_INDEX",
4339 key->objectid, key->offset);
4341 (*size) += name_len;
4343 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4344 filetype = btrfs_dir_type(node, di);
4346 btrfs_init_path(&path);
4347 btrfs_dir_item_key_to_cpu(node, di, &location);
4349 /* Ignore related ROOT_ITEM check */
4350 if (location.type == BTRFS_ROOT_ITEM_KEY)
4353 /* Check relative INODE_ITEM(existence/filetype) */
4354 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4356 err |= INODE_ITEM_MISSING;
4357 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4358 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4359 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4360 key->offset, location.objectid, name_len,
4365 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4366 struct btrfs_inode_item);
4367 mode = btrfs_inode_mode(path.nodes[0], ii);
4369 if (imode_to_type(mode) != filetype) {
4370 err |= INODE_ITEM_MISMATCH;
4371 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4372 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4373 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4374 key->offset, name_len, namebuf, filetype);
4377 /* Check relative INODE_REF/INODE_EXTREF */
4378 location.type = BTRFS_INODE_REF_KEY;
4379 location.offset = key->objectid;
4380 ret = find_inode_ref(root, &location, namebuf, len,
4383 if (ret & INODE_REF_MISSING)
4384 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4385 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4386 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4387 key->offset, name_len, namebuf, filetype);
4390 btrfs_release_path(&path);
4391 len = sizeof(*di) + name_len + data_len;
4392 di = (struct btrfs_dir_item *)((char *)di + len);
4395 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4396 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4397 root->objectid, key->objectid, key->offset);
4406 * Check file extent datasum/hole, update the size of the file extents,
4407 * check and update the last offset of the file extent.
4409 * @root: the root of fs/file tree.
4410 * @fkey: the key of the file extent.
4411 * @nodatasum: INODE_NODATASUM feature.
4412 * @size: the sum of all EXTENT_DATA items size for this inode.
4413 * @end: the offset of the last extent.
4415 * Return 0 if no error occurred.
4417 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4418 struct extent_buffer *node, int slot,
4419 unsigned int nodatasum, u64 *size, u64 *end)
4421 struct btrfs_file_extent_item *fi;
4424 u64 extent_num_bytes;
4426 unsigned int extent_type;
4427 unsigned int is_hole;
4431 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4433 extent_type = btrfs_file_extent_type(node, fi);
4434 /* Skip if file extent is inline */
4435 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4436 struct btrfs_item *e = btrfs_item_nr(slot);
4437 u32 item_inline_len;
4439 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4440 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4441 if (extent_num_bytes == 0 ||
4442 extent_num_bytes != item_inline_len)
4443 err |= FILE_EXTENT_ERROR;
4444 *size += extent_num_bytes;
4448 /* Check extent type */
4449 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4450 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4451 err |= FILE_EXTENT_ERROR;
4452 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4453 root->objectid, fkey->objectid, fkey->offset);
4457 /* Check REG_EXTENT/PREALLOC_EXTENT */
4458 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4459 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4460 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4461 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4463 /* Check EXTENT_DATA datasum */
4464 ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4465 if (found > 0 && nodatasum) {
4466 err |= ODD_CSUM_ITEM;
4467 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4468 root->objectid, fkey->objectid, fkey->offset);
4469 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4471 (ret < 0 || found == 0 || found < disk_num_bytes)) {
4472 err |= CSUM_ITEM_MISSING;
4473 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4474 root->objectid, fkey->objectid, fkey->offset);
4475 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4476 err |= ODD_CSUM_ITEM;
4477 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4478 root->objectid, fkey->objectid, fkey->offset);
4481 /* Check EXTENT_DATA hole */
4482 if (no_holes && is_hole) {
4483 err |= FILE_EXTENT_ERROR;
4484 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4485 root->objectid, fkey->objectid, fkey->offset);
4486 } else if (!no_holes && *end != fkey->offset) {
4487 err |= FILE_EXTENT_ERROR;
4488 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4489 root->objectid, fkey->objectid, fkey->offset);
4492 *end += extent_num_bytes;
4494 *size += extent_num_bytes;
4500 * Check INODE_ITEM and related ITEMs (the same inode number)
4501 * 1. check link count
4502 * 2. check inode ref/extref
4503 * 3. check dir item/index
4505 * @ext_ref: the EXTENDED_IREF feature
4507 * Return 0 if no error occurred.
4508 * Return >0 for error or hit the traversal is done(by error bitmap)
4510 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4511 unsigned int ext_ref)
4513 struct extent_buffer *node;
4514 struct btrfs_inode_item *ii;
4515 struct btrfs_key key;
4524 u64 extent_size = 0;
4526 unsigned int nodatasum;
4531 node = path->nodes[0];
4532 slot = path->slots[0];
4534 btrfs_item_key_to_cpu(node, &key, slot);
4535 inode_id = key.objectid;
4537 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4538 ret = btrfs_next_item(root, path);
4544 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4545 isize = btrfs_inode_size(node, ii);
4546 nbytes = btrfs_inode_nbytes(node, ii);
4547 mode = btrfs_inode_mode(node, ii);
4548 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4549 nlink = btrfs_inode_nlink(node, ii);
4550 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4553 ret = btrfs_next_item(root, path);
4555 /* out will fill 'err' rusing current statistics */
4557 } else if (ret > 0) {
4562 node = path->nodes[0];
4563 slot = path->slots[0];
4564 btrfs_item_key_to_cpu(node, &key, slot);
4565 if (key.objectid != inode_id)
4569 case BTRFS_INODE_REF_KEY:
4570 ret = check_inode_ref(root, &key, node, slot, &refs,
4574 case BTRFS_INODE_EXTREF_KEY:
4575 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4576 warning("root %llu EXTREF[%llu %llu] isn't supported",
4577 root->objectid, key.objectid,
4579 ret = check_inode_extref(root, &key, node, slot, &refs,
4583 case BTRFS_DIR_ITEM_KEY:
4584 case BTRFS_DIR_INDEX_KEY:
4586 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4587 root->objectid, inode_id,
4588 imode_to_type(mode), key.objectid,
4591 ret = check_dir_item(root, &key, node, slot, &size,
4595 case BTRFS_EXTENT_DATA_KEY:
4597 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4598 root->objectid, inode_id, key.objectid,
4601 ret = check_file_extent(root, &key, node, slot,
4602 nodatasum, &extent_size,
4606 case BTRFS_XATTR_ITEM_KEY:
4609 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4610 key.objectid, key.type, key.offset);
4615 /* verify INODE_ITEM nlink/isize/nbytes */
4618 err |= LINK_COUNT_ERROR;
4619 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4620 root->objectid, inode_id, nlink);
4624 * Just a warning, as dir inode nbytes is just an
4625 * instructive value.
4627 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4628 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4629 root->objectid, inode_id, root->nodesize);
4632 if (isize != size) {
4634 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4635 root->objectid, inode_id, isize, size);
4638 if (nlink != refs) {
4639 err |= LINK_COUNT_ERROR;
4640 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4641 root->objectid, inode_id, nlink, refs);
4642 } else if (!nlink) {
4646 if (!nbytes && !no_holes && extent_end < isize) {
4647 err |= NBYTES_ERROR;
4648 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4649 root->objectid, inode_id, isize);
4652 if (nbytes != extent_size) {
4653 err |= NBYTES_ERROR;
4654 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4655 root->objectid, inode_id, nbytes, extent_size);
4663 * Iterate all item on the tree and call check_inode_item() to check.
4665 * @root: the root of the tree to be checked.
4666 * @ext_ref: the EXTENDED_IREF feature
4668 * Return 0 if no error found.
4669 * Return <0 for error.
4670 * All internal error bitmap will be converted to -EIO, to avoid
4671 * mixing negative and postive return value.
4673 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4675 struct btrfs_path *path;
4676 struct btrfs_key key;
4680 path = btrfs_alloc_path();
4688 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4693 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4696 * All check must start with inode item, skip if not
4698 if (key.type == BTRFS_INODE_ITEM_KEY) {
4699 ret = check_inode_item(root, path, ext_ref);
4701 if (err & LAST_ITEM)
4705 error("root %llu ITEM[%llu %u %llu] isn't INODE_ITEM, skip to next inode",
4706 root->objectid, key.objectid, key.type,
4709 err |= NO_INODE_ITEM;
4710 inode_id = key.objectid;
4713 * skip to next inode
4714 * TODO: Maybe search_slot() will be faster?
4717 ret = btrfs_next_item(root, path);
4720 } else if (ret < 0) {
4724 btrfs_item_key_to_cpu(path->nodes[0], &key,
4726 } while (inode_id == key.objectid);
4733 btrfs_free_path(path);
4737 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4739 struct list_head *cur = rec->backrefs.next;
4740 struct extent_backref *back;
4741 struct tree_backref *tback;
4742 struct data_backref *dback;
4746 while(cur != &rec->backrefs) {
4747 back = to_extent_backref(cur);
4749 if (!back->found_extent_tree) {
4753 if (back->is_data) {
4754 dback = to_data_backref(back);
4755 fprintf(stderr, "Backref %llu %s %llu"
4756 " owner %llu offset %llu num_refs %lu"
4757 " not found in extent tree\n",
4758 (unsigned long long)rec->start,
4759 back->full_backref ?
4761 back->full_backref ?
4762 (unsigned long long)dback->parent:
4763 (unsigned long long)dback->root,
4764 (unsigned long long)dback->owner,
4765 (unsigned long long)dback->offset,
4766 (unsigned long)dback->num_refs);
4768 tback = to_tree_backref(back);
4769 fprintf(stderr, "Backref %llu parent %llu"
4770 " root %llu not found in extent tree\n",
4771 (unsigned long long)rec->start,
4772 (unsigned long long)tback->parent,
4773 (unsigned long long)tback->root);
4776 if (!back->is_data && !back->found_ref) {
4780 tback = to_tree_backref(back);
4781 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4782 (unsigned long long)rec->start,
4783 back->full_backref ? "parent" : "root",
4784 back->full_backref ?
4785 (unsigned long long)tback->parent :
4786 (unsigned long long)tback->root, back);
4788 if (back->is_data) {
4789 dback = to_data_backref(back);
4790 if (dback->found_ref != dback->num_refs) {
4794 fprintf(stderr, "Incorrect local backref count"
4795 " on %llu %s %llu owner %llu"
4796 " offset %llu found %u wanted %u back %p\n",
4797 (unsigned long long)rec->start,
4798 back->full_backref ?
4800 back->full_backref ?
4801 (unsigned long long)dback->parent:
4802 (unsigned long long)dback->root,
4803 (unsigned long long)dback->owner,
4804 (unsigned long long)dback->offset,
4805 dback->found_ref, dback->num_refs, back);
4807 if (dback->disk_bytenr != rec->start) {
4811 fprintf(stderr, "Backref disk bytenr does not"
4812 " match extent record, bytenr=%llu, "
4813 "ref bytenr=%llu\n",
4814 (unsigned long long)rec->start,
4815 (unsigned long long)dback->disk_bytenr);
4818 if (dback->bytes != rec->nr) {
4822 fprintf(stderr, "Backref bytes do not match "
4823 "extent backref, bytenr=%llu, ref "
4824 "bytes=%llu, backref bytes=%llu\n",
4825 (unsigned long long)rec->start,
4826 (unsigned long long)rec->nr,
4827 (unsigned long long)dback->bytes);
4830 if (!back->is_data) {
4833 dback = to_data_backref(back);
4834 found += dback->found_ref;
4837 if (found != rec->refs) {
4841 fprintf(stderr, "Incorrect global backref count "
4842 "on %llu found %llu wanted %llu\n",
4843 (unsigned long long)rec->start,
4844 (unsigned long long)found,
4845 (unsigned long long)rec->refs);
4851 static int free_all_extent_backrefs(struct extent_record *rec)
4853 struct extent_backref *back;
4854 struct list_head *cur;
4855 while (!list_empty(&rec->backrefs)) {
4856 cur = rec->backrefs.next;
4857 back = to_extent_backref(cur);
4864 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4865 struct cache_tree *extent_cache)
4867 struct cache_extent *cache;
4868 struct extent_record *rec;
4871 cache = first_cache_extent(extent_cache);
4874 rec = container_of(cache, struct extent_record, cache);
4875 remove_cache_extent(extent_cache, cache);
4876 free_all_extent_backrefs(rec);
4881 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4882 struct extent_record *rec)
4884 if (rec->content_checked && rec->owner_ref_checked &&
4885 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4886 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4887 !rec->bad_full_backref && !rec->crossing_stripes &&
4888 !rec->wrong_chunk_type) {
4889 remove_cache_extent(extent_cache, &rec->cache);
4890 free_all_extent_backrefs(rec);
4891 list_del_init(&rec->list);
4897 static int check_owner_ref(struct btrfs_root *root,
4898 struct extent_record *rec,
4899 struct extent_buffer *buf)
4901 struct extent_backref *node;
4902 struct tree_backref *back;
4903 struct btrfs_root *ref_root;
4904 struct btrfs_key key;
4905 struct btrfs_path path;
4906 struct extent_buffer *parent;
4911 list_for_each_entry(node, &rec->backrefs, list) {
4914 if (!node->found_ref)
4916 if (node->full_backref)
4918 back = to_tree_backref(node);
4919 if (btrfs_header_owner(buf) == back->root)
4922 BUG_ON(rec->is_root);
4924 /* try to find the block by search corresponding fs tree */
4925 key.objectid = btrfs_header_owner(buf);
4926 key.type = BTRFS_ROOT_ITEM_KEY;
4927 key.offset = (u64)-1;
4929 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4930 if (IS_ERR(ref_root))
4933 level = btrfs_header_level(buf);
4935 btrfs_item_key_to_cpu(buf, &key, 0);
4937 btrfs_node_key_to_cpu(buf, &key, 0);
4939 btrfs_init_path(&path);
4940 path.lowest_level = level + 1;
4941 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4945 parent = path.nodes[level + 1];
4946 if (parent && buf->start == btrfs_node_blockptr(parent,
4947 path.slots[level + 1]))
4950 btrfs_release_path(&path);
4951 return found ? 0 : 1;
4954 static int is_extent_tree_record(struct extent_record *rec)
4956 struct list_head *cur = rec->backrefs.next;
4957 struct extent_backref *node;
4958 struct tree_backref *back;
4961 while(cur != &rec->backrefs) {
4962 node = to_extent_backref(cur);
4966 back = to_tree_backref(node);
4967 if (node->full_backref)
4969 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4976 static int record_bad_block_io(struct btrfs_fs_info *info,
4977 struct cache_tree *extent_cache,
4980 struct extent_record *rec;
4981 struct cache_extent *cache;
4982 struct btrfs_key key;
4984 cache = lookup_cache_extent(extent_cache, start, len);
4988 rec = container_of(cache, struct extent_record, cache);
4989 if (!is_extent_tree_record(rec))
4992 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4993 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4996 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4997 struct extent_buffer *buf, int slot)
4999 if (btrfs_header_level(buf)) {
5000 struct btrfs_key_ptr ptr1, ptr2;
5002 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5003 sizeof(struct btrfs_key_ptr));
5004 read_extent_buffer(buf, &ptr2,
5005 btrfs_node_key_ptr_offset(slot + 1),
5006 sizeof(struct btrfs_key_ptr));
5007 write_extent_buffer(buf, &ptr1,
5008 btrfs_node_key_ptr_offset(slot + 1),
5009 sizeof(struct btrfs_key_ptr));
5010 write_extent_buffer(buf, &ptr2,
5011 btrfs_node_key_ptr_offset(slot),
5012 sizeof(struct btrfs_key_ptr));
5014 struct btrfs_disk_key key;
5015 btrfs_node_key(buf, &key, 0);
5016 btrfs_fixup_low_keys(root, path, &key,
5017 btrfs_header_level(buf) + 1);
5020 struct btrfs_item *item1, *item2;
5021 struct btrfs_key k1, k2;
5022 char *item1_data, *item2_data;
5023 u32 item1_offset, item2_offset, item1_size, item2_size;
5025 item1 = btrfs_item_nr(slot);
5026 item2 = btrfs_item_nr(slot + 1);
5027 btrfs_item_key_to_cpu(buf, &k1, slot);
5028 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5029 item1_offset = btrfs_item_offset(buf, item1);
5030 item2_offset = btrfs_item_offset(buf, item2);
5031 item1_size = btrfs_item_size(buf, item1);
5032 item2_size = btrfs_item_size(buf, item2);
5034 item1_data = malloc(item1_size);
5037 item2_data = malloc(item2_size);
5043 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5044 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5046 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5047 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5051 btrfs_set_item_offset(buf, item1, item2_offset);
5052 btrfs_set_item_offset(buf, item2, item1_offset);
5053 btrfs_set_item_size(buf, item1, item2_size);
5054 btrfs_set_item_size(buf, item2, item1_size);
5056 path->slots[0] = slot;
5057 btrfs_set_item_key_unsafe(root, path, &k2);
5058 path->slots[0] = slot + 1;
5059 btrfs_set_item_key_unsafe(root, path, &k1);
5064 static int fix_key_order(struct btrfs_trans_handle *trans,
5065 struct btrfs_root *root,
5066 struct btrfs_path *path)
5068 struct extent_buffer *buf;
5069 struct btrfs_key k1, k2;
5071 int level = path->lowest_level;
5074 buf = path->nodes[level];
5075 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5077 btrfs_node_key_to_cpu(buf, &k1, i);
5078 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5080 btrfs_item_key_to_cpu(buf, &k1, i);
5081 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5083 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5085 ret = swap_values(root, path, buf, i);
5088 btrfs_mark_buffer_dirty(buf);
5094 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5095 struct btrfs_root *root,
5096 struct btrfs_path *path,
5097 struct extent_buffer *buf, int slot)
5099 struct btrfs_key key;
5100 int nritems = btrfs_header_nritems(buf);
5102 btrfs_item_key_to_cpu(buf, &key, slot);
5104 /* These are all the keys we can deal with missing. */
5105 if (key.type != BTRFS_DIR_INDEX_KEY &&
5106 key.type != BTRFS_EXTENT_ITEM_KEY &&
5107 key.type != BTRFS_METADATA_ITEM_KEY &&
5108 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5109 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5112 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5113 (unsigned long long)key.objectid, key.type,
5114 (unsigned long long)key.offset, slot, buf->start);
5115 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5116 btrfs_item_nr_offset(slot + 1),
5117 sizeof(struct btrfs_item) *
5118 (nritems - slot - 1));
5119 btrfs_set_header_nritems(buf, nritems - 1);
5121 struct btrfs_disk_key disk_key;
5123 btrfs_item_key(buf, &disk_key, 0);
5124 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5126 btrfs_mark_buffer_dirty(buf);
5130 static int fix_item_offset(struct btrfs_trans_handle *trans,
5131 struct btrfs_root *root,
5132 struct btrfs_path *path)
5134 struct extent_buffer *buf;
5138 /* We should only get this for leaves */
5139 BUG_ON(path->lowest_level);
5140 buf = path->nodes[0];
5142 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5143 unsigned int shift = 0, offset;
5145 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5146 BTRFS_LEAF_DATA_SIZE(root)) {
5147 if (btrfs_item_end_nr(buf, i) >
5148 BTRFS_LEAF_DATA_SIZE(root)) {
5149 ret = delete_bogus_item(trans, root, path,
5153 fprintf(stderr, "item is off the end of the "
5154 "leaf, can't fix\n");
5158 shift = BTRFS_LEAF_DATA_SIZE(root) -
5159 btrfs_item_end_nr(buf, i);
5160 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5161 btrfs_item_offset_nr(buf, i - 1)) {
5162 if (btrfs_item_end_nr(buf, i) >
5163 btrfs_item_offset_nr(buf, i - 1)) {
5164 ret = delete_bogus_item(trans, root, path,
5168 fprintf(stderr, "items overlap, can't fix\n");
5172 shift = btrfs_item_offset_nr(buf, i - 1) -
5173 btrfs_item_end_nr(buf, i);
5178 printf("Shifting item nr %d by %u bytes in block %llu\n",
5179 i, shift, (unsigned long long)buf->start);
5180 offset = btrfs_item_offset_nr(buf, i);
5181 memmove_extent_buffer(buf,
5182 btrfs_leaf_data(buf) + offset + shift,
5183 btrfs_leaf_data(buf) + offset,
5184 btrfs_item_size_nr(buf, i));
5185 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5187 btrfs_mark_buffer_dirty(buf);
5191 * We may have moved things, in which case we want to exit so we don't
5192 * write those changes out. Once we have proper abort functionality in
5193 * progs this can be changed to something nicer.
5200 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5201 * then just return -EIO.
5203 static int try_to_fix_bad_block(struct btrfs_root *root,
5204 struct extent_buffer *buf,
5205 enum btrfs_tree_block_status status)
5207 struct btrfs_trans_handle *trans;
5208 struct ulist *roots;
5209 struct ulist_node *node;
5210 struct btrfs_root *search_root;
5211 struct btrfs_path path;
5212 struct ulist_iterator iter;
5213 struct btrfs_key root_key, key;
5216 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5217 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5220 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5224 btrfs_init_path(&path);
5225 ULIST_ITER_INIT(&iter);
5226 while ((node = ulist_next(roots, &iter))) {
5227 root_key.objectid = node->val;
5228 root_key.type = BTRFS_ROOT_ITEM_KEY;
5229 root_key.offset = (u64)-1;
5231 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5238 trans = btrfs_start_transaction(search_root, 0);
5239 if (IS_ERR(trans)) {
5240 ret = PTR_ERR(trans);
5244 path.lowest_level = btrfs_header_level(buf);
5245 path.skip_check_block = 1;
5246 if (path.lowest_level)
5247 btrfs_node_key_to_cpu(buf, &key, 0);
5249 btrfs_item_key_to_cpu(buf, &key, 0);
5250 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5253 btrfs_commit_transaction(trans, search_root);
5256 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5257 ret = fix_key_order(trans, search_root, &path);
5258 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5259 ret = fix_item_offset(trans, search_root, &path);
5261 btrfs_commit_transaction(trans, search_root);
5264 btrfs_release_path(&path);
5265 btrfs_commit_transaction(trans, search_root);
5268 btrfs_release_path(&path);
5272 static int check_block(struct btrfs_root *root,
5273 struct cache_tree *extent_cache,
5274 struct extent_buffer *buf, u64 flags)
5276 struct extent_record *rec;
5277 struct cache_extent *cache;
5278 struct btrfs_key key;
5279 enum btrfs_tree_block_status status;
5283 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5286 rec = container_of(cache, struct extent_record, cache);
5287 rec->generation = btrfs_header_generation(buf);
5289 level = btrfs_header_level(buf);
5290 if (btrfs_header_nritems(buf) > 0) {
5293 btrfs_item_key_to_cpu(buf, &key, 0);
5295 btrfs_node_key_to_cpu(buf, &key, 0);
5297 rec->info_objectid = key.objectid;
5299 rec->info_level = level;
5301 if (btrfs_is_leaf(buf))
5302 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5304 status = btrfs_check_node(root, &rec->parent_key, buf);
5306 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5308 status = try_to_fix_bad_block(root, buf, status);
5309 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5311 fprintf(stderr, "bad block %llu\n",
5312 (unsigned long long)buf->start);
5315 * Signal to callers we need to start the scan over
5316 * again since we'll have cowed blocks.
5321 rec->content_checked = 1;
5322 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5323 rec->owner_ref_checked = 1;
5325 ret = check_owner_ref(root, rec, buf);
5327 rec->owner_ref_checked = 1;
5331 maybe_free_extent_rec(extent_cache, rec);
5335 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5336 u64 parent, u64 root)
5338 struct list_head *cur = rec->backrefs.next;
5339 struct extent_backref *node;
5340 struct tree_backref *back;
5342 while(cur != &rec->backrefs) {
5343 node = to_extent_backref(cur);
5347 back = to_tree_backref(node);
5349 if (!node->full_backref)
5351 if (parent == back->parent)
5354 if (node->full_backref)
5356 if (back->root == root)
5363 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5364 u64 parent, u64 root)
5366 struct tree_backref *ref = malloc(sizeof(*ref));
5370 memset(&ref->node, 0, sizeof(ref->node));
5372 ref->parent = parent;
5373 ref->node.full_backref = 1;
5376 ref->node.full_backref = 0;
5378 list_add_tail(&ref->node.list, &rec->backrefs);
5383 static struct data_backref *find_data_backref(struct extent_record *rec,
5384 u64 parent, u64 root,
5385 u64 owner, u64 offset,
5387 u64 disk_bytenr, u64 bytes)
5389 struct list_head *cur = rec->backrefs.next;
5390 struct extent_backref *node;
5391 struct data_backref *back;
5393 while(cur != &rec->backrefs) {
5394 node = to_extent_backref(cur);
5398 back = to_data_backref(node);
5400 if (!node->full_backref)
5402 if (parent == back->parent)
5405 if (node->full_backref)
5407 if (back->root == root && back->owner == owner &&
5408 back->offset == offset) {
5409 if (found_ref && node->found_ref &&
5410 (back->bytes != bytes ||
5411 back->disk_bytenr != disk_bytenr))
5420 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5421 u64 parent, u64 root,
5422 u64 owner, u64 offset,
5425 struct data_backref *ref = malloc(sizeof(*ref));
5429 memset(&ref->node, 0, sizeof(ref->node));
5430 ref->node.is_data = 1;
5433 ref->parent = parent;
5436 ref->node.full_backref = 1;
5440 ref->offset = offset;
5441 ref->node.full_backref = 0;
5443 ref->bytes = max_size;
5446 list_add_tail(&ref->node.list, &rec->backrefs);
5447 if (max_size > rec->max_size)
5448 rec->max_size = max_size;
5452 /* Check if the type of extent matches with its chunk */
5453 static void check_extent_type(struct extent_record *rec)
5455 struct btrfs_block_group_cache *bg_cache;
5457 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5461 /* data extent, check chunk directly*/
5462 if (!rec->metadata) {
5463 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5464 rec->wrong_chunk_type = 1;
5468 /* metadata extent, check the obvious case first */
5469 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5470 BTRFS_BLOCK_GROUP_METADATA))) {
5471 rec->wrong_chunk_type = 1;
5476 * Check SYSTEM extent, as it's also marked as metadata, we can only
5477 * make sure it's a SYSTEM extent by its backref
5479 if (!list_empty(&rec->backrefs)) {
5480 struct extent_backref *node;
5481 struct tree_backref *tback;
5484 node = to_extent_backref(rec->backrefs.next);
5485 if (node->is_data) {
5486 /* tree block shouldn't have data backref */
5487 rec->wrong_chunk_type = 1;
5490 tback = container_of(node, struct tree_backref, node);
5492 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5493 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5495 bg_type = BTRFS_BLOCK_GROUP_METADATA;
5496 if (!(bg_cache->flags & bg_type))
5497 rec->wrong_chunk_type = 1;
5502 * Allocate a new extent record, fill default values from @tmpl and insert int
5503 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5504 * the cache, otherwise it fails.
5506 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5507 struct extent_record *tmpl)
5509 struct extent_record *rec;
5512 rec = malloc(sizeof(*rec));
5515 rec->start = tmpl->start;
5516 rec->max_size = tmpl->max_size;
5517 rec->nr = max(tmpl->nr, tmpl->max_size);
5518 rec->found_rec = tmpl->found_rec;
5519 rec->content_checked = tmpl->content_checked;
5520 rec->owner_ref_checked = tmpl->owner_ref_checked;
5521 rec->num_duplicates = 0;
5522 rec->metadata = tmpl->metadata;
5523 rec->flag_block_full_backref = FLAG_UNSET;
5524 rec->bad_full_backref = 0;
5525 rec->crossing_stripes = 0;
5526 rec->wrong_chunk_type = 0;
5527 rec->is_root = tmpl->is_root;
5528 rec->refs = tmpl->refs;
5529 rec->extent_item_refs = tmpl->extent_item_refs;
5530 rec->parent_generation = tmpl->parent_generation;
5531 INIT_LIST_HEAD(&rec->backrefs);
5532 INIT_LIST_HEAD(&rec->dups);
5533 INIT_LIST_HEAD(&rec->list);
5534 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5535 rec->cache.start = tmpl->start;
5536 rec->cache.size = tmpl->nr;
5537 ret = insert_cache_extent(extent_cache, &rec->cache);
5542 bytes_used += rec->nr;
5545 rec->crossing_stripes = check_crossing_stripes(global_info,
5546 rec->start, global_info->tree_root->nodesize);
5547 check_extent_type(rec);
5552 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5554 * - refs - if found, increase refs
5555 * - is_root - if found, set
5556 * - content_checked - if found, set
5557 * - owner_ref_checked - if found, set
5559 * If not found, create a new one, initialize and insert.
5561 static int add_extent_rec(struct cache_tree *extent_cache,
5562 struct extent_record *tmpl)
5564 struct extent_record *rec;
5565 struct cache_extent *cache;
5569 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5571 rec = container_of(cache, struct extent_record, cache);
5575 rec->nr = max(tmpl->nr, tmpl->max_size);
5578 * We need to make sure to reset nr to whatever the extent
5579 * record says was the real size, this way we can compare it to
5582 if (tmpl->found_rec) {
5583 if (tmpl->start != rec->start || rec->found_rec) {
5584 struct extent_record *tmp;
5587 if (list_empty(&rec->list))
5588 list_add_tail(&rec->list,
5589 &duplicate_extents);
5592 * We have to do this song and dance in case we
5593 * find an extent record that falls inside of
5594 * our current extent record but does not have
5595 * the same objectid.
5597 tmp = malloc(sizeof(*tmp));
5600 tmp->start = tmpl->start;
5601 tmp->max_size = tmpl->max_size;
5604 tmp->metadata = tmpl->metadata;
5605 tmp->extent_item_refs = tmpl->extent_item_refs;
5606 INIT_LIST_HEAD(&tmp->list);
5607 list_add_tail(&tmp->list, &rec->dups);
5608 rec->num_duplicates++;
5615 if (tmpl->extent_item_refs && !dup) {
5616 if (rec->extent_item_refs) {
5617 fprintf(stderr, "block %llu rec "
5618 "extent_item_refs %llu, passed %llu\n",
5619 (unsigned long long)tmpl->start,
5620 (unsigned long long)
5621 rec->extent_item_refs,
5622 (unsigned long long)tmpl->extent_item_refs);
5624 rec->extent_item_refs = tmpl->extent_item_refs;
5628 if (tmpl->content_checked)
5629 rec->content_checked = 1;
5630 if (tmpl->owner_ref_checked)
5631 rec->owner_ref_checked = 1;
5632 memcpy(&rec->parent_key, &tmpl->parent_key,
5633 sizeof(tmpl->parent_key));
5634 if (tmpl->parent_generation)
5635 rec->parent_generation = tmpl->parent_generation;
5636 if (rec->max_size < tmpl->max_size)
5637 rec->max_size = tmpl->max_size;
5640 * A metadata extent can't cross stripe_len boundary, otherwise
5641 * kernel scrub won't be able to handle it.
5642 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5646 rec->crossing_stripes = check_crossing_stripes(
5647 global_info, rec->start,
5648 global_info->tree_root->nodesize);
5649 check_extent_type(rec);
5650 maybe_free_extent_rec(extent_cache, rec);
5654 ret = add_extent_rec_nolookup(extent_cache, tmpl);
5659 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5660 u64 parent, u64 root, int found_ref)
5662 struct extent_record *rec;
5663 struct tree_backref *back;
5664 struct cache_extent *cache;
5667 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5669 struct extent_record tmpl;
5671 memset(&tmpl, 0, sizeof(tmpl));
5672 tmpl.start = bytenr;
5676 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5680 /* really a bug in cache_extent implement now */
5681 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5686 rec = container_of(cache, struct extent_record, cache);
5687 if (rec->start != bytenr) {
5689 * Several cause, from unaligned bytenr to over lapping extents
5694 back = find_tree_backref(rec, parent, root);
5696 back = alloc_tree_backref(rec, parent, root);
5702 if (back->node.found_ref) {
5703 fprintf(stderr, "Extent back ref already exists "
5704 "for %llu parent %llu root %llu \n",
5705 (unsigned long long)bytenr,
5706 (unsigned long long)parent,
5707 (unsigned long long)root);
5709 back->node.found_ref = 1;
5711 if (back->node.found_extent_tree) {
5712 fprintf(stderr, "Extent back ref already exists "
5713 "for %llu parent %llu root %llu \n",
5714 (unsigned long long)bytenr,
5715 (unsigned long long)parent,
5716 (unsigned long long)root);
5718 back->node.found_extent_tree = 1;
5720 check_extent_type(rec);
5721 maybe_free_extent_rec(extent_cache, rec);
5725 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5726 u64 parent, u64 root, u64 owner, u64 offset,
5727 u32 num_refs, int found_ref, u64 max_size)
5729 struct extent_record *rec;
5730 struct data_backref *back;
5731 struct cache_extent *cache;
5734 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5736 struct extent_record tmpl;
5738 memset(&tmpl, 0, sizeof(tmpl));
5739 tmpl.start = bytenr;
5741 tmpl.max_size = max_size;
5743 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5747 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5752 rec = container_of(cache, struct extent_record, cache);
5753 if (rec->max_size < max_size)
5754 rec->max_size = max_size;
5757 * If found_ref is set then max_size is the real size and must match the
5758 * existing refs. So if we have already found a ref then we need to
5759 * make sure that this ref matches the existing one, otherwise we need
5760 * to add a new backref so we can notice that the backrefs don't match
5761 * and we need to figure out who is telling the truth. This is to
5762 * account for that awful fsync bug I introduced where we'd end up with
5763 * a btrfs_file_extent_item that would have its length include multiple
5764 * prealloc extents or point inside of a prealloc extent.
5766 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5769 back = alloc_data_backref(rec, parent, root, owner, offset,
5775 BUG_ON(num_refs != 1);
5776 if (back->node.found_ref)
5777 BUG_ON(back->bytes != max_size);
5778 back->node.found_ref = 1;
5779 back->found_ref += 1;
5780 back->bytes = max_size;
5781 back->disk_bytenr = bytenr;
5783 rec->content_checked = 1;
5784 rec->owner_ref_checked = 1;
5786 if (back->node.found_extent_tree) {
5787 fprintf(stderr, "Extent back ref already exists "
5788 "for %llu parent %llu root %llu "
5789 "owner %llu offset %llu num_refs %lu\n",
5790 (unsigned long long)bytenr,
5791 (unsigned long long)parent,
5792 (unsigned long long)root,
5793 (unsigned long long)owner,
5794 (unsigned long long)offset,
5795 (unsigned long)num_refs);
5797 back->num_refs = num_refs;
5798 back->node.found_extent_tree = 1;
5800 maybe_free_extent_rec(extent_cache, rec);
5804 static int add_pending(struct cache_tree *pending,
5805 struct cache_tree *seen, u64 bytenr, u32 size)
5808 ret = add_cache_extent(seen, bytenr, size);
5811 add_cache_extent(pending, bytenr, size);
5815 static int pick_next_pending(struct cache_tree *pending,
5816 struct cache_tree *reada,
5817 struct cache_tree *nodes,
5818 u64 last, struct block_info *bits, int bits_nr,
5821 unsigned long node_start = last;
5822 struct cache_extent *cache;
5825 cache = search_cache_extent(reada, 0);
5827 bits[0].start = cache->start;
5828 bits[0].size = cache->size;
5833 if (node_start > 32768)
5834 node_start -= 32768;
5836 cache = search_cache_extent(nodes, node_start);
5838 cache = search_cache_extent(nodes, 0);
5841 cache = search_cache_extent(pending, 0);
5846 bits[ret].start = cache->start;
5847 bits[ret].size = cache->size;
5848 cache = next_cache_extent(cache);
5850 } while (cache && ret < bits_nr);
5856 bits[ret].start = cache->start;
5857 bits[ret].size = cache->size;
5858 cache = next_cache_extent(cache);
5860 } while (cache && ret < bits_nr);
5862 if (bits_nr - ret > 8) {
5863 u64 lookup = bits[0].start + bits[0].size;
5864 struct cache_extent *next;
5865 next = search_cache_extent(pending, lookup);
5867 if (next->start - lookup > 32768)
5869 bits[ret].start = next->start;
5870 bits[ret].size = next->size;
5871 lookup = next->start + next->size;
5875 next = next_cache_extent(next);
5883 static void free_chunk_record(struct cache_extent *cache)
5885 struct chunk_record *rec;
5887 rec = container_of(cache, struct chunk_record, cache);
5888 list_del_init(&rec->list);
5889 list_del_init(&rec->dextents);
5893 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5895 cache_tree_free_extents(chunk_cache, free_chunk_record);
5898 static void free_device_record(struct rb_node *node)
5900 struct device_record *rec;
5902 rec = container_of(node, struct device_record, node);
5906 FREE_RB_BASED_TREE(device_cache, free_device_record);
5908 int insert_block_group_record(struct block_group_tree *tree,
5909 struct block_group_record *bg_rec)
5913 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5917 list_add_tail(&bg_rec->list, &tree->block_groups);
5921 static void free_block_group_record(struct cache_extent *cache)
5923 struct block_group_record *rec;
5925 rec = container_of(cache, struct block_group_record, cache);
5926 list_del_init(&rec->list);
5930 void free_block_group_tree(struct block_group_tree *tree)
5932 cache_tree_free_extents(&tree->tree, free_block_group_record);
5935 int insert_device_extent_record(struct device_extent_tree *tree,
5936 struct device_extent_record *de_rec)
5941 * Device extent is a bit different from the other extents, because
5942 * the extents which belong to the different devices may have the
5943 * same start and size, so we need use the special extent cache
5944 * search/insert functions.
5946 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5950 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5951 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5955 static void free_device_extent_record(struct cache_extent *cache)
5957 struct device_extent_record *rec;
5959 rec = container_of(cache, struct device_extent_record, cache);
5960 if (!list_empty(&rec->chunk_list))
5961 list_del_init(&rec->chunk_list);
5962 if (!list_empty(&rec->device_list))
5963 list_del_init(&rec->device_list);
5967 void free_device_extent_tree(struct device_extent_tree *tree)
5969 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5972 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5973 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5974 struct extent_buffer *leaf, int slot)
5976 struct btrfs_extent_ref_v0 *ref0;
5977 struct btrfs_key key;
5980 btrfs_item_key_to_cpu(leaf, &key, slot);
5981 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5982 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5983 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5986 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5987 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5993 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5994 struct btrfs_key *key,
5997 struct btrfs_chunk *ptr;
5998 struct chunk_record *rec;
6001 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6002 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6004 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6006 fprintf(stderr, "memory allocation failed\n");
6010 INIT_LIST_HEAD(&rec->list);
6011 INIT_LIST_HEAD(&rec->dextents);
6014 rec->cache.start = key->offset;
6015 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6017 rec->generation = btrfs_header_generation(leaf);
6019 rec->objectid = key->objectid;
6020 rec->type = key->type;
6021 rec->offset = key->offset;
6023 rec->length = rec->cache.size;
6024 rec->owner = btrfs_chunk_owner(leaf, ptr);
6025 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6026 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6027 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6028 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6029 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6030 rec->num_stripes = num_stripes;
6031 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6033 for (i = 0; i < rec->num_stripes; ++i) {
6034 rec->stripes[i].devid =
6035 btrfs_stripe_devid_nr(leaf, ptr, i);
6036 rec->stripes[i].offset =
6037 btrfs_stripe_offset_nr(leaf, ptr, i);
6038 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6039 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6046 static int process_chunk_item(struct cache_tree *chunk_cache,
6047 struct btrfs_key *key, struct extent_buffer *eb,
6050 struct chunk_record *rec;
6051 struct btrfs_chunk *chunk;
6054 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6056 * Do extra check for this chunk item,
6058 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6059 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6060 * and owner<->key_type check.
6062 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6065 error("chunk(%llu, %llu) is not valid, ignore it",
6066 key->offset, btrfs_chunk_length(eb, chunk));
6069 rec = btrfs_new_chunk_record(eb, key, slot);
6070 ret = insert_cache_extent(chunk_cache, &rec->cache);
6072 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6073 rec->offset, rec->length);
6080 static int process_device_item(struct rb_root *dev_cache,
6081 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6083 struct btrfs_dev_item *ptr;
6084 struct device_record *rec;
6087 ptr = btrfs_item_ptr(eb,
6088 slot, struct btrfs_dev_item);
6090 rec = malloc(sizeof(*rec));
6092 fprintf(stderr, "memory allocation failed\n");
6096 rec->devid = key->offset;
6097 rec->generation = btrfs_header_generation(eb);
6099 rec->objectid = key->objectid;
6100 rec->type = key->type;
6101 rec->offset = key->offset;
6103 rec->devid = btrfs_device_id(eb, ptr);
6104 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6105 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6107 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6109 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6116 struct block_group_record *
6117 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6120 struct btrfs_block_group_item *ptr;
6121 struct block_group_record *rec;
6123 rec = calloc(1, sizeof(*rec));
6125 fprintf(stderr, "memory allocation failed\n");
6129 rec->cache.start = key->objectid;
6130 rec->cache.size = key->offset;
6132 rec->generation = btrfs_header_generation(leaf);
6134 rec->objectid = key->objectid;
6135 rec->type = key->type;
6136 rec->offset = key->offset;
6138 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6139 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6141 INIT_LIST_HEAD(&rec->list);
6146 static int process_block_group_item(struct block_group_tree *block_group_cache,
6147 struct btrfs_key *key,
6148 struct extent_buffer *eb, int slot)
6150 struct block_group_record *rec;
6153 rec = btrfs_new_block_group_record(eb, key, slot);
6154 ret = insert_block_group_record(block_group_cache, rec);
6156 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6157 rec->objectid, rec->offset);
6164 struct device_extent_record *
6165 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6166 struct btrfs_key *key, int slot)
6168 struct device_extent_record *rec;
6169 struct btrfs_dev_extent *ptr;
6171 rec = calloc(1, sizeof(*rec));
6173 fprintf(stderr, "memory allocation failed\n");
6177 rec->cache.objectid = key->objectid;
6178 rec->cache.start = key->offset;
6180 rec->generation = btrfs_header_generation(leaf);
6182 rec->objectid = key->objectid;
6183 rec->type = key->type;
6184 rec->offset = key->offset;
6186 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6187 rec->chunk_objecteid =
6188 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6190 btrfs_dev_extent_chunk_offset(leaf, ptr);
6191 rec->length = btrfs_dev_extent_length(leaf, ptr);
6192 rec->cache.size = rec->length;
6194 INIT_LIST_HEAD(&rec->chunk_list);
6195 INIT_LIST_HEAD(&rec->device_list);
6201 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6202 struct btrfs_key *key, struct extent_buffer *eb,
6205 struct device_extent_record *rec;
6208 rec = btrfs_new_device_extent_record(eb, key, slot);
6209 ret = insert_device_extent_record(dev_extent_cache, rec);
6212 "Device extent[%llu, %llu, %llu] existed.\n",
6213 rec->objectid, rec->offset, rec->length);
6220 static int process_extent_item(struct btrfs_root *root,
6221 struct cache_tree *extent_cache,
6222 struct extent_buffer *eb, int slot)
6224 struct btrfs_extent_item *ei;
6225 struct btrfs_extent_inline_ref *iref;
6226 struct btrfs_extent_data_ref *dref;
6227 struct btrfs_shared_data_ref *sref;
6228 struct btrfs_key key;
6229 struct extent_record tmpl;
6234 u32 item_size = btrfs_item_size_nr(eb, slot);
6240 btrfs_item_key_to_cpu(eb, &key, slot);
6242 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6244 num_bytes = root->nodesize;
6246 num_bytes = key.offset;
6249 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6250 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6251 key.objectid, root->sectorsize);
6254 if (item_size < sizeof(*ei)) {
6255 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6256 struct btrfs_extent_item_v0 *ei0;
6257 BUG_ON(item_size != sizeof(*ei0));
6258 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6259 refs = btrfs_extent_refs_v0(eb, ei0);
6263 memset(&tmpl, 0, sizeof(tmpl));
6264 tmpl.start = key.objectid;
6265 tmpl.nr = num_bytes;
6266 tmpl.extent_item_refs = refs;
6267 tmpl.metadata = metadata;
6269 tmpl.max_size = num_bytes;
6271 return add_extent_rec(extent_cache, &tmpl);
6274 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6275 refs = btrfs_extent_refs(eb, ei);
6276 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6280 if (metadata && num_bytes != root->nodesize) {
6281 error("ignore invalid metadata extent, length %llu does not equal to %u",
6282 num_bytes, root->nodesize);
6285 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6286 error("ignore invalid data extent, length %llu is not aligned to %u",
6287 num_bytes, root->sectorsize);
6291 memset(&tmpl, 0, sizeof(tmpl));
6292 tmpl.start = key.objectid;
6293 tmpl.nr = num_bytes;
6294 tmpl.extent_item_refs = refs;
6295 tmpl.metadata = metadata;
6297 tmpl.max_size = num_bytes;
6298 add_extent_rec(extent_cache, &tmpl);
6300 ptr = (unsigned long)(ei + 1);
6301 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6302 key.type == BTRFS_EXTENT_ITEM_KEY)
6303 ptr += sizeof(struct btrfs_tree_block_info);
6305 end = (unsigned long)ei + item_size;
6307 iref = (struct btrfs_extent_inline_ref *)ptr;
6308 type = btrfs_extent_inline_ref_type(eb, iref);
6309 offset = btrfs_extent_inline_ref_offset(eb, iref);
6311 case BTRFS_TREE_BLOCK_REF_KEY:
6312 ret = add_tree_backref(extent_cache, key.objectid,
6315 error("add_tree_backref failed: %s",
6318 case BTRFS_SHARED_BLOCK_REF_KEY:
6319 ret = add_tree_backref(extent_cache, key.objectid,
6322 error("add_tree_backref failed: %s",
6325 case BTRFS_EXTENT_DATA_REF_KEY:
6326 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6327 add_data_backref(extent_cache, key.objectid, 0,
6328 btrfs_extent_data_ref_root(eb, dref),
6329 btrfs_extent_data_ref_objectid(eb,
6331 btrfs_extent_data_ref_offset(eb, dref),
6332 btrfs_extent_data_ref_count(eb, dref),
6335 case BTRFS_SHARED_DATA_REF_KEY:
6336 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6337 add_data_backref(extent_cache, key.objectid, offset,
6339 btrfs_shared_data_ref_count(eb, sref),
6343 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6344 key.objectid, key.type, num_bytes);
6347 ptr += btrfs_extent_inline_ref_size(type);
6354 static int check_cache_range(struct btrfs_root *root,
6355 struct btrfs_block_group_cache *cache,
6356 u64 offset, u64 bytes)
6358 struct btrfs_free_space *entry;
6364 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6365 bytenr = btrfs_sb_offset(i);
6366 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6367 cache->key.objectid, bytenr, 0,
6368 &logical, &nr, &stripe_len);
6373 if (logical[nr] + stripe_len <= offset)
6375 if (offset + bytes <= logical[nr])
6377 if (logical[nr] == offset) {
6378 if (stripe_len >= bytes) {
6382 bytes -= stripe_len;
6383 offset += stripe_len;
6384 } else if (logical[nr] < offset) {
6385 if (logical[nr] + stripe_len >=
6390 bytes = (offset + bytes) -
6391 (logical[nr] + stripe_len);
6392 offset = logical[nr] + stripe_len;
6395 * Could be tricky, the super may land in the
6396 * middle of the area we're checking. First
6397 * check the easiest case, it's at the end.
6399 if (logical[nr] + stripe_len >=
6401 bytes = logical[nr] - offset;
6405 /* Check the left side */
6406 ret = check_cache_range(root, cache,
6408 logical[nr] - offset);
6414 /* Now we continue with the right side */
6415 bytes = (offset + bytes) -
6416 (logical[nr] + stripe_len);
6417 offset = logical[nr] + stripe_len;
6424 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6426 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6427 offset, offset+bytes);
6431 if (entry->offset != offset) {
6432 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6437 if (entry->bytes != bytes) {
6438 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6439 bytes, entry->bytes, offset);
6443 unlink_free_space(cache->free_space_ctl, entry);
6448 static int verify_space_cache(struct btrfs_root *root,
6449 struct btrfs_block_group_cache *cache)
6451 struct btrfs_path path;
6452 struct extent_buffer *leaf;
6453 struct btrfs_key key;
6457 root = root->fs_info->extent_root;
6459 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6461 btrfs_init_path(&path);
6462 key.objectid = last;
6464 key.type = BTRFS_EXTENT_ITEM_KEY;
6465 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6470 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6471 ret = btrfs_next_leaf(root, &path);
6479 leaf = path.nodes[0];
6480 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6481 if (key.objectid >= cache->key.offset + cache->key.objectid)
6483 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6484 key.type != BTRFS_METADATA_ITEM_KEY) {
6489 if (last == key.objectid) {
6490 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6491 last = key.objectid + key.offset;
6493 last = key.objectid + root->nodesize;
6498 ret = check_cache_range(root, cache, last,
6499 key.objectid - last);
6502 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6503 last = key.objectid + key.offset;
6505 last = key.objectid + root->nodesize;
6509 if (last < cache->key.objectid + cache->key.offset)
6510 ret = check_cache_range(root, cache, last,
6511 cache->key.objectid +
6512 cache->key.offset - last);
6515 btrfs_release_path(&path);
6518 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6519 fprintf(stderr, "There are still entries left in the space "
6527 static int check_space_cache(struct btrfs_root *root)
6529 struct btrfs_block_group_cache *cache;
6530 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6534 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6535 btrfs_super_generation(root->fs_info->super_copy) !=
6536 btrfs_super_cache_generation(root->fs_info->super_copy)) {
6537 printf("cache and super generation don't match, space cache "
6538 "will be invalidated\n");
6542 if (ctx.progress_enabled) {
6543 ctx.tp = TASK_FREE_SPACE;
6544 task_start(ctx.info);
6548 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6552 start = cache->key.objectid + cache->key.offset;
6553 if (!cache->free_space_ctl) {
6554 if (btrfs_init_free_space_ctl(cache,
6555 root->sectorsize)) {
6560 btrfs_remove_free_space_cache(cache);
6563 if (btrfs_fs_compat_ro(root->fs_info,
6564 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6565 ret = exclude_super_stripes(root, cache);
6567 fprintf(stderr, "could not exclude super stripes: %s\n",
6572 ret = load_free_space_tree(root->fs_info, cache);
6573 free_excluded_extents(root, cache);
6575 fprintf(stderr, "could not load free space tree: %s\n",
6582 ret = load_free_space_cache(root->fs_info, cache);
6587 ret = verify_space_cache(root, cache);
6589 fprintf(stderr, "cache appears valid but isn't %Lu\n",
6590 cache->key.objectid);
6595 task_stop(ctx.info);
6597 return error ? -EINVAL : 0;
6600 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6601 u64 num_bytes, unsigned long leaf_offset,
6602 struct extent_buffer *eb) {
6605 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6607 unsigned long csum_offset;
6611 u64 data_checked = 0;
6617 if (num_bytes % root->sectorsize)
6620 data = malloc(num_bytes);
6624 while (offset < num_bytes) {
6627 read_len = num_bytes - offset;
6628 /* read as much space once a time */
6629 ret = read_extent_data(root, data + offset,
6630 bytenr + offset, &read_len, mirror);
6634 /* verify every 4k data's checksum */
6635 while (data_checked < read_len) {
6637 tmp = offset + data_checked;
6639 csum = btrfs_csum_data(NULL, (char *)data + tmp,
6640 csum, root->sectorsize);
6641 btrfs_csum_final(csum, (u8 *)&csum);
6643 csum_offset = leaf_offset +
6644 tmp / root->sectorsize * csum_size;
6645 read_extent_buffer(eb, (char *)&csum_expected,
6646 csum_offset, csum_size);
6647 /* try another mirror */
6648 if (csum != csum_expected) {
6649 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6650 mirror, bytenr + tmp,
6651 csum, csum_expected);
6652 num_copies = btrfs_num_copies(
6653 &root->fs_info->mapping_tree,
6655 if (mirror < num_copies - 1) {
6660 data_checked += root->sectorsize;
6669 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6672 struct btrfs_path path;
6673 struct extent_buffer *leaf;
6674 struct btrfs_key key;
6677 btrfs_init_path(&path);
6678 key.objectid = bytenr;
6679 key.type = BTRFS_EXTENT_ITEM_KEY;
6680 key.offset = (u64)-1;
6683 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6686 fprintf(stderr, "Error looking up extent record %d\n", ret);
6687 btrfs_release_path(&path);
6690 if (path.slots[0] > 0) {
6693 ret = btrfs_prev_leaf(root, &path);
6696 } else if (ret > 0) {
6703 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6706 * Block group items come before extent items if they have the same
6707 * bytenr, so walk back one more just in case. Dear future traveller,
6708 * first congrats on mastering time travel. Now if it's not too much
6709 * trouble could you go back to 2006 and tell Chris to make the
6710 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6711 * EXTENT_ITEM_KEY please?
6713 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6714 if (path.slots[0] > 0) {
6717 ret = btrfs_prev_leaf(root, &path);
6720 } else if (ret > 0) {
6725 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6729 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6730 ret = btrfs_next_leaf(root, &path);
6732 fprintf(stderr, "Error going to next leaf "
6734 btrfs_release_path(&path);
6740 leaf = path.nodes[0];
6741 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6742 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6746 if (key.objectid + key.offset < bytenr) {
6750 if (key.objectid > bytenr + num_bytes)
6753 if (key.objectid == bytenr) {
6754 if (key.offset >= num_bytes) {
6758 num_bytes -= key.offset;
6759 bytenr += key.offset;
6760 } else if (key.objectid < bytenr) {
6761 if (key.objectid + key.offset >= bytenr + num_bytes) {
6765 num_bytes = (bytenr + num_bytes) -
6766 (key.objectid + key.offset);
6767 bytenr = key.objectid + key.offset;
6769 if (key.objectid + key.offset < bytenr + num_bytes) {
6770 u64 new_start = key.objectid + key.offset;
6771 u64 new_bytes = bytenr + num_bytes - new_start;
6774 * Weird case, the extent is in the middle of
6775 * our range, we'll have to search one side
6776 * and then the other. Not sure if this happens
6777 * in real life, but no harm in coding it up
6778 * anyway just in case.
6780 btrfs_release_path(&path);
6781 ret = check_extent_exists(root, new_start,
6784 fprintf(stderr, "Right section didn't "
6788 num_bytes = key.objectid - bytenr;
6791 num_bytes = key.objectid - bytenr;
6798 if (num_bytes && !ret) {
6799 fprintf(stderr, "There are no extents for csum range "
6800 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6804 btrfs_release_path(&path);
6808 static int check_csums(struct btrfs_root *root)
6810 struct btrfs_path path;
6811 struct extent_buffer *leaf;
6812 struct btrfs_key key;
6813 u64 offset = 0, num_bytes = 0;
6814 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6818 unsigned long leaf_offset;
6820 root = root->fs_info->csum_root;
6821 if (!extent_buffer_uptodate(root->node)) {
6822 fprintf(stderr, "No valid csum tree found\n");
6826 btrfs_init_path(&path);
6827 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6828 key.type = BTRFS_EXTENT_CSUM_KEY;
6830 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6832 fprintf(stderr, "Error searching csum tree %d\n", ret);
6833 btrfs_release_path(&path);
6837 if (ret > 0 && path.slots[0])
6842 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6843 ret = btrfs_next_leaf(root, &path);
6845 fprintf(stderr, "Error going to next leaf "
6852 leaf = path.nodes[0];
6854 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6855 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6860 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6861 csum_size) * root->sectorsize;
6862 if (!check_data_csum)
6863 goto skip_csum_check;
6864 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6865 ret = check_extent_csums(root, key.offset, data_len,
6871 offset = key.offset;
6872 } else if (key.offset != offset + num_bytes) {
6873 ret = check_extent_exists(root, offset, num_bytes);
6875 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6876 "there is no extent record\n",
6877 offset, offset+num_bytes);
6880 offset = key.offset;
6883 num_bytes += data_len;
6887 btrfs_release_path(&path);
6891 static int is_dropped_key(struct btrfs_key *key,
6892 struct btrfs_key *drop_key) {
6893 if (key->objectid < drop_key->objectid)
6895 else if (key->objectid == drop_key->objectid) {
6896 if (key->type < drop_key->type)
6898 else if (key->type == drop_key->type) {
6899 if (key->offset < drop_key->offset)
6907 * Here are the rules for FULL_BACKREF.
6909 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6910 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6912 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6913 * if it happened after the relocation occurred since we'll have dropped the
6914 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6915 * have no real way to know for sure.
6917 * We process the blocks one root at a time, and we start from the lowest root
6918 * objectid and go to the highest. So we can just lookup the owner backref for
6919 * the record and if we don't find it then we know it doesn't exist and we have
6922 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6923 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6924 * be set or not and then we can check later once we've gathered all the refs.
6926 static int calc_extent_flag(struct btrfs_root *root,
6927 struct cache_tree *extent_cache,
6928 struct extent_buffer *buf,
6929 struct root_item_record *ri,
6932 struct extent_record *rec;
6933 struct cache_extent *cache;
6934 struct tree_backref *tback;
6937 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6938 /* we have added this extent before */
6942 rec = container_of(cache, struct extent_record, cache);
6945 * Except file/reloc tree, we can not have
6948 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6953 if (buf->start == ri->bytenr)
6956 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6959 owner = btrfs_header_owner(buf);
6960 if (owner == ri->objectid)
6963 tback = find_tree_backref(rec, 0, owner);
6968 if (rec->flag_block_full_backref != FLAG_UNSET &&
6969 rec->flag_block_full_backref != 0)
6970 rec->bad_full_backref = 1;
6973 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6974 if (rec->flag_block_full_backref != FLAG_UNSET &&
6975 rec->flag_block_full_backref != 1)
6976 rec->bad_full_backref = 1;
6980 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6982 fprintf(stderr, "Invalid key type(");
6983 print_key_type(stderr, 0, key_type);
6984 fprintf(stderr, ") found in root(");
6985 print_objectid(stderr, rootid, 0);
6986 fprintf(stderr, ")\n");
6990 * Check if the key is valid with its extent buffer.
6992 * This is a early check in case invalid key exists in a extent buffer
6993 * This is not comprehensive yet, but should prevent wrong key/item passed
6996 static int check_type_with_root(u64 rootid, u8 key_type)
6999 /* Only valid in chunk tree */
7000 case BTRFS_DEV_ITEM_KEY:
7001 case BTRFS_CHUNK_ITEM_KEY:
7002 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7005 /* valid in csum and log tree */
7006 case BTRFS_CSUM_TREE_OBJECTID:
7007 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7011 case BTRFS_EXTENT_ITEM_KEY:
7012 case BTRFS_METADATA_ITEM_KEY:
7013 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7014 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7017 case BTRFS_ROOT_ITEM_KEY:
7018 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7021 case BTRFS_DEV_EXTENT_KEY:
7022 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7028 report_mismatch_key_root(key_type, rootid);
7032 static int run_next_block(struct btrfs_root *root,
7033 struct block_info *bits,
7036 struct cache_tree *pending,
7037 struct cache_tree *seen,
7038 struct cache_tree *reada,
7039 struct cache_tree *nodes,
7040 struct cache_tree *extent_cache,
7041 struct cache_tree *chunk_cache,
7042 struct rb_root *dev_cache,
7043 struct block_group_tree *block_group_cache,
7044 struct device_extent_tree *dev_extent_cache,
7045 struct root_item_record *ri)
7047 struct extent_buffer *buf;
7048 struct extent_record *rec = NULL;
7059 struct btrfs_key key;
7060 struct cache_extent *cache;
7063 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7064 bits_nr, &reada_bits);
7069 for(i = 0; i < nritems; i++) {
7070 ret = add_cache_extent(reada, bits[i].start,
7075 /* fixme, get the parent transid */
7076 readahead_tree_block(root, bits[i].start,
7080 *last = bits[0].start;
7081 bytenr = bits[0].start;
7082 size = bits[0].size;
7084 cache = lookup_cache_extent(pending, bytenr, size);
7086 remove_cache_extent(pending, cache);
7089 cache = lookup_cache_extent(reada, bytenr, size);
7091 remove_cache_extent(reada, cache);
7094 cache = lookup_cache_extent(nodes, bytenr, size);
7096 remove_cache_extent(nodes, cache);
7099 cache = lookup_cache_extent(extent_cache, bytenr, size);
7101 rec = container_of(cache, struct extent_record, cache);
7102 gen = rec->parent_generation;
7105 /* fixme, get the real parent transid */
7106 buf = read_tree_block(root, bytenr, size, gen);
7107 if (!extent_buffer_uptodate(buf)) {
7108 record_bad_block_io(root->fs_info,
7109 extent_cache, bytenr, size);
7113 nritems = btrfs_header_nritems(buf);
7116 if (!init_extent_tree) {
7117 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7118 btrfs_header_level(buf), 1, NULL,
7121 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7123 fprintf(stderr, "Couldn't calc extent flags\n");
7124 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7129 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7131 fprintf(stderr, "Couldn't calc extent flags\n");
7132 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7136 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7138 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7139 ri->objectid == btrfs_header_owner(buf)) {
7141 * Ok we got to this block from it's original owner and
7142 * we have FULL_BACKREF set. Relocation can leave
7143 * converted blocks over so this is altogether possible,
7144 * however it's not possible if the generation > the
7145 * last snapshot, so check for this case.
7147 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7148 btrfs_header_generation(buf) > ri->last_snapshot) {
7149 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7150 rec->bad_full_backref = 1;
7155 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7156 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7157 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7158 rec->bad_full_backref = 1;
7162 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7163 rec->flag_block_full_backref = 1;
7167 rec->flag_block_full_backref = 0;
7169 owner = btrfs_header_owner(buf);
7172 ret = check_block(root, extent_cache, buf, flags);
7176 if (btrfs_is_leaf(buf)) {
7177 btree_space_waste += btrfs_leaf_free_space(root, buf);
7178 for (i = 0; i < nritems; i++) {
7179 struct btrfs_file_extent_item *fi;
7180 btrfs_item_key_to_cpu(buf, &key, i);
7182 * Check key type against the leaf owner.
7183 * Could filter quite a lot of early error if
7186 if (check_type_with_root(btrfs_header_owner(buf),
7188 fprintf(stderr, "ignoring invalid key\n");
7191 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7192 process_extent_item(root, extent_cache, buf,
7196 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7197 process_extent_item(root, extent_cache, buf,
7201 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7203 btrfs_item_size_nr(buf, i);
7206 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7207 process_chunk_item(chunk_cache, &key, buf, i);
7210 if (key.type == BTRFS_DEV_ITEM_KEY) {
7211 process_device_item(dev_cache, &key, buf, i);
7214 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7215 process_block_group_item(block_group_cache,
7219 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7220 process_device_extent_item(dev_extent_cache,
7225 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7226 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7227 process_extent_ref_v0(extent_cache, buf, i);
7234 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7235 ret = add_tree_backref(extent_cache,
7236 key.objectid, 0, key.offset, 0);
7238 error("add_tree_backref failed: %s",
7242 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7243 ret = add_tree_backref(extent_cache,
7244 key.objectid, key.offset, 0, 0);
7246 error("add_tree_backref failed: %s",
7250 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7251 struct btrfs_extent_data_ref *ref;
7252 ref = btrfs_item_ptr(buf, i,
7253 struct btrfs_extent_data_ref);
7254 add_data_backref(extent_cache,
7256 btrfs_extent_data_ref_root(buf, ref),
7257 btrfs_extent_data_ref_objectid(buf,
7259 btrfs_extent_data_ref_offset(buf, ref),
7260 btrfs_extent_data_ref_count(buf, ref),
7261 0, root->sectorsize);
7264 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7265 struct btrfs_shared_data_ref *ref;
7266 ref = btrfs_item_ptr(buf, i,
7267 struct btrfs_shared_data_ref);
7268 add_data_backref(extent_cache,
7269 key.objectid, key.offset, 0, 0, 0,
7270 btrfs_shared_data_ref_count(buf, ref),
7271 0, root->sectorsize);
7274 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7275 struct bad_item *bad;
7277 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7281 bad = malloc(sizeof(struct bad_item));
7284 INIT_LIST_HEAD(&bad->list);
7285 memcpy(&bad->key, &key,
7286 sizeof(struct btrfs_key));
7287 bad->root_id = owner;
7288 list_add_tail(&bad->list, &delete_items);
7291 if (key.type != BTRFS_EXTENT_DATA_KEY)
7293 fi = btrfs_item_ptr(buf, i,
7294 struct btrfs_file_extent_item);
7295 if (btrfs_file_extent_type(buf, fi) ==
7296 BTRFS_FILE_EXTENT_INLINE)
7298 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7301 data_bytes_allocated +=
7302 btrfs_file_extent_disk_num_bytes(buf, fi);
7303 if (data_bytes_allocated < root->sectorsize) {
7306 data_bytes_referenced +=
7307 btrfs_file_extent_num_bytes(buf, fi);
7308 add_data_backref(extent_cache,
7309 btrfs_file_extent_disk_bytenr(buf, fi),
7310 parent, owner, key.objectid, key.offset -
7311 btrfs_file_extent_offset(buf, fi), 1, 1,
7312 btrfs_file_extent_disk_num_bytes(buf, fi));
7316 struct btrfs_key first_key;
7318 first_key.objectid = 0;
7321 btrfs_item_key_to_cpu(buf, &first_key, 0);
7322 level = btrfs_header_level(buf);
7323 for (i = 0; i < nritems; i++) {
7324 struct extent_record tmpl;
7326 ptr = btrfs_node_blockptr(buf, i);
7327 size = root->nodesize;
7328 btrfs_node_key_to_cpu(buf, &key, i);
7330 if ((level == ri->drop_level)
7331 && is_dropped_key(&key, &ri->drop_key)) {
7336 memset(&tmpl, 0, sizeof(tmpl));
7337 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7338 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7343 tmpl.max_size = size;
7344 ret = add_extent_rec(extent_cache, &tmpl);
7348 ret = add_tree_backref(extent_cache, ptr, parent,
7351 error("add_tree_backref failed: %s",
7357 add_pending(nodes, seen, ptr, size);
7359 add_pending(pending, seen, ptr, size);
7362 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7363 nritems) * sizeof(struct btrfs_key_ptr);
7365 total_btree_bytes += buf->len;
7366 if (fs_root_objectid(btrfs_header_owner(buf)))
7367 total_fs_tree_bytes += buf->len;
7368 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7369 total_extent_tree_bytes += buf->len;
7370 if (!found_old_backref &&
7371 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7372 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7373 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7374 found_old_backref = 1;
7376 free_extent_buffer(buf);
7380 static int add_root_to_pending(struct extent_buffer *buf,
7381 struct cache_tree *extent_cache,
7382 struct cache_tree *pending,
7383 struct cache_tree *seen,
7384 struct cache_tree *nodes,
7387 struct extent_record tmpl;
7390 if (btrfs_header_level(buf) > 0)
7391 add_pending(nodes, seen, buf->start, buf->len);
7393 add_pending(pending, seen, buf->start, buf->len);
7395 memset(&tmpl, 0, sizeof(tmpl));
7396 tmpl.start = buf->start;
7401 tmpl.max_size = buf->len;
7402 add_extent_rec(extent_cache, &tmpl);
7404 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7405 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7406 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7409 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7414 /* as we fix the tree, we might be deleting blocks that
7415 * we're tracking for repair. This hook makes sure we
7416 * remove any backrefs for blocks as we are fixing them.
7418 static int free_extent_hook(struct btrfs_trans_handle *trans,
7419 struct btrfs_root *root,
7420 u64 bytenr, u64 num_bytes, u64 parent,
7421 u64 root_objectid, u64 owner, u64 offset,
7424 struct extent_record *rec;
7425 struct cache_extent *cache;
7427 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7429 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7430 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7434 rec = container_of(cache, struct extent_record, cache);
7436 struct data_backref *back;
7437 back = find_data_backref(rec, parent, root_objectid, owner,
7438 offset, 1, bytenr, num_bytes);
7441 if (back->node.found_ref) {
7442 back->found_ref -= refs_to_drop;
7444 rec->refs -= refs_to_drop;
7446 if (back->node.found_extent_tree) {
7447 back->num_refs -= refs_to_drop;
7448 if (rec->extent_item_refs)
7449 rec->extent_item_refs -= refs_to_drop;
7451 if (back->found_ref == 0)
7452 back->node.found_ref = 0;
7453 if (back->num_refs == 0)
7454 back->node.found_extent_tree = 0;
7456 if (!back->node.found_extent_tree && back->node.found_ref) {
7457 list_del(&back->node.list);
7461 struct tree_backref *back;
7462 back = find_tree_backref(rec, parent, root_objectid);
7465 if (back->node.found_ref) {
7468 back->node.found_ref = 0;
7470 if (back->node.found_extent_tree) {
7471 if (rec->extent_item_refs)
7472 rec->extent_item_refs--;
7473 back->node.found_extent_tree = 0;
7475 if (!back->node.found_extent_tree && back->node.found_ref) {
7476 list_del(&back->node.list);
7480 maybe_free_extent_rec(extent_cache, rec);
7485 static int delete_extent_records(struct btrfs_trans_handle *trans,
7486 struct btrfs_root *root,
7487 struct btrfs_path *path,
7488 u64 bytenr, u64 new_len)
7490 struct btrfs_key key;
7491 struct btrfs_key found_key;
7492 struct extent_buffer *leaf;
7497 key.objectid = bytenr;
7499 key.offset = (u64)-1;
7502 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7509 if (path->slots[0] == 0)
7515 leaf = path->nodes[0];
7516 slot = path->slots[0];
7518 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7519 if (found_key.objectid != bytenr)
7522 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7523 found_key.type != BTRFS_METADATA_ITEM_KEY &&
7524 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7525 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7526 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7527 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7528 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7529 btrfs_release_path(path);
7530 if (found_key.type == 0) {
7531 if (found_key.offset == 0)
7533 key.offset = found_key.offset - 1;
7534 key.type = found_key.type;
7536 key.type = found_key.type - 1;
7537 key.offset = (u64)-1;
7541 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7542 found_key.objectid, found_key.type, found_key.offset);
7544 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7547 btrfs_release_path(path);
7549 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7550 found_key.type == BTRFS_METADATA_ITEM_KEY) {
7551 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7552 found_key.offset : root->nodesize;
7554 ret = btrfs_update_block_group(trans, root, bytenr,
7561 btrfs_release_path(path);
7566 * for a single backref, this will allocate a new extent
7567 * and add the backref to it.
7569 static int record_extent(struct btrfs_trans_handle *trans,
7570 struct btrfs_fs_info *info,
7571 struct btrfs_path *path,
7572 struct extent_record *rec,
7573 struct extent_backref *back,
7574 int allocated, u64 flags)
7577 struct btrfs_root *extent_root = info->extent_root;
7578 struct extent_buffer *leaf;
7579 struct btrfs_key ins_key;
7580 struct btrfs_extent_item *ei;
7581 struct data_backref *dback;
7582 struct btrfs_tree_block_info *bi;
7585 rec->max_size = max_t(u64, rec->max_size,
7586 info->extent_root->nodesize);
7589 u32 item_size = sizeof(*ei);
7592 item_size += sizeof(*bi);
7594 ins_key.objectid = rec->start;
7595 ins_key.offset = rec->max_size;
7596 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7598 ret = btrfs_insert_empty_item(trans, extent_root, path,
7599 &ins_key, item_size);
7603 leaf = path->nodes[0];
7604 ei = btrfs_item_ptr(leaf, path->slots[0],
7605 struct btrfs_extent_item);
7607 btrfs_set_extent_refs(leaf, ei, 0);
7608 btrfs_set_extent_generation(leaf, ei, rec->generation);
7610 if (back->is_data) {
7611 btrfs_set_extent_flags(leaf, ei,
7612 BTRFS_EXTENT_FLAG_DATA);
7614 struct btrfs_disk_key copy_key;;
7616 bi = (struct btrfs_tree_block_info *)(ei + 1);
7617 memset_extent_buffer(leaf, 0, (unsigned long)bi,
7620 btrfs_set_disk_key_objectid(©_key,
7621 rec->info_objectid);
7622 btrfs_set_disk_key_type(©_key, 0);
7623 btrfs_set_disk_key_offset(©_key, 0);
7625 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7626 btrfs_set_tree_block_key(leaf, bi, ©_key);
7628 btrfs_set_extent_flags(leaf, ei,
7629 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7632 btrfs_mark_buffer_dirty(leaf);
7633 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7634 rec->max_size, 1, 0);
7637 btrfs_release_path(path);
7640 if (back->is_data) {
7644 dback = to_data_backref(back);
7645 if (back->full_backref)
7646 parent = dback->parent;
7650 for (i = 0; i < dback->found_ref; i++) {
7651 /* if parent != 0, we're doing a full backref
7652 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7653 * just makes the backref allocator create a data
7656 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7657 rec->start, rec->max_size,
7661 BTRFS_FIRST_FREE_OBJECTID :
7667 fprintf(stderr, "adding new data backref"
7668 " on %llu %s %llu owner %llu"
7669 " offset %llu found %d\n",
7670 (unsigned long long)rec->start,
7671 back->full_backref ?
7673 back->full_backref ?
7674 (unsigned long long)parent :
7675 (unsigned long long)dback->root,
7676 (unsigned long long)dback->owner,
7677 (unsigned long long)dback->offset,
7681 struct tree_backref *tback;
7683 tback = to_tree_backref(back);
7684 if (back->full_backref)
7685 parent = tback->parent;
7689 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7690 rec->start, rec->max_size,
7691 parent, tback->root, 0, 0);
7692 fprintf(stderr, "adding new tree backref on "
7693 "start %llu len %llu parent %llu root %llu\n",
7694 rec->start, rec->max_size, parent, tback->root);
7697 btrfs_release_path(path);
7701 static struct extent_entry *find_entry(struct list_head *entries,
7702 u64 bytenr, u64 bytes)
7704 struct extent_entry *entry = NULL;
7706 list_for_each_entry(entry, entries, list) {
7707 if (entry->bytenr == bytenr && entry->bytes == bytes)
7714 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7716 struct extent_entry *entry, *best = NULL, *prev = NULL;
7718 list_for_each_entry(entry, entries, list) {
7720 * If there are as many broken entries as entries then we know
7721 * not to trust this particular entry.
7723 if (entry->broken == entry->count)
7727 * Special case, when there are only two entries and 'best' is
7737 * If our current entry == best then we can't be sure our best
7738 * is really the best, so we need to keep searching.
7740 if (best && best->count == entry->count) {
7746 /* Prev == entry, not good enough, have to keep searching */
7747 if (!prev->broken && prev->count == entry->count)
7751 best = (prev->count > entry->count) ? prev : entry;
7752 else if (best->count < entry->count)
7760 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7761 struct data_backref *dback, struct extent_entry *entry)
7763 struct btrfs_trans_handle *trans;
7764 struct btrfs_root *root;
7765 struct btrfs_file_extent_item *fi;
7766 struct extent_buffer *leaf;
7767 struct btrfs_key key;
7771 key.objectid = dback->root;
7772 key.type = BTRFS_ROOT_ITEM_KEY;
7773 key.offset = (u64)-1;
7774 root = btrfs_read_fs_root(info, &key);
7776 fprintf(stderr, "Couldn't find root for our ref\n");
7781 * The backref points to the original offset of the extent if it was
7782 * split, so we need to search down to the offset we have and then walk
7783 * forward until we find the backref we're looking for.
7785 key.objectid = dback->owner;
7786 key.type = BTRFS_EXTENT_DATA_KEY;
7787 key.offset = dback->offset;
7788 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7790 fprintf(stderr, "Error looking up ref %d\n", ret);
7795 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7796 ret = btrfs_next_leaf(root, path);
7798 fprintf(stderr, "Couldn't find our ref, next\n");
7802 leaf = path->nodes[0];
7803 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7804 if (key.objectid != dback->owner ||
7805 key.type != BTRFS_EXTENT_DATA_KEY) {
7806 fprintf(stderr, "Couldn't find our ref, search\n");
7809 fi = btrfs_item_ptr(leaf, path->slots[0],
7810 struct btrfs_file_extent_item);
7811 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7812 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7814 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7819 btrfs_release_path(path);
7821 trans = btrfs_start_transaction(root, 1);
7823 return PTR_ERR(trans);
7826 * Ok we have the key of the file extent we want to fix, now we can cow
7827 * down to the thing and fix it.
7829 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7831 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7832 key.objectid, key.type, key.offset, ret);
7836 fprintf(stderr, "Well that's odd, we just found this key "
7837 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7842 leaf = path->nodes[0];
7843 fi = btrfs_item_ptr(leaf, path->slots[0],
7844 struct btrfs_file_extent_item);
7846 if (btrfs_file_extent_compression(leaf, fi) &&
7847 dback->disk_bytenr != entry->bytenr) {
7848 fprintf(stderr, "Ref doesn't match the record start and is "
7849 "compressed, please take a btrfs-image of this file "
7850 "system and send it to a btrfs developer so they can "
7851 "complete this functionality for bytenr %Lu\n",
7852 dback->disk_bytenr);
7857 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7858 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7859 } else if (dback->disk_bytenr > entry->bytenr) {
7860 u64 off_diff, offset;
7862 off_diff = dback->disk_bytenr - entry->bytenr;
7863 offset = btrfs_file_extent_offset(leaf, fi);
7864 if (dback->disk_bytenr + offset +
7865 btrfs_file_extent_num_bytes(leaf, fi) >
7866 entry->bytenr + entry->bytes) {
7867 fprintf(stderr, "Ref is past the entry end, please "
7868 "take a btrfs-image of this file system and "
7869 "send it to a btrfs developer, ref %Lu\n",
7870 dback->disk_bytenr);
7875 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7876 btrfs_set_file_extent_offset(leaf, fi, offset);
7877 } else if (dback->disk_bytenr < entry->bytenr) {
7880 offset = btrfs_file_extent_offset(leaf, fi);
7881 if (dback->disk_bytenr + offset < entry->bytenr) {
7882 fprintf(stderr, "Ref is before the entry start, please"
7883 " take a btrfs-image of this file system and "
7884 "send it to a btrfs developer, ref %Lu\n",
7885 dback->disk_bytenr);
7890 offset += dback->disk_bytenr;
7891 offset -= entry->bytenr;
7892 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7893 btrfs_set_file_extent_offset(leaf, fi, offset);
7896 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7899 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7900 * only do this if we aren't using compression, otherwise it's a
7903 if (!btrfs_file_extent_compression(leaf, fi))
7904 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7906 printf("ram bytes may be wrong?\n");
7907 btrfs_mark_buffer_dirty(leaf);
7909 err = btrfs_commit_transaction(trans, root);
7910 btrfs_release_path(path);
7911 return ret ? ret : err;
7914 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7915 struct extent_record *rec)
7917 struct extent_backref *back;
7918 struct data_backref *dback;
7919 struct extent_entry *entry, *best = NULL;
7922 int broken_entries = 0;
7927 * Metadata is easy and the backrefs should always agree on bytenr and
7928 * size, if not we've got bigger issues.
7933 list_for_each_entry(back, &rec->backrefs, list) {
7934 if (back->full_backref || !back->is_data)
7937 dback = to_data_backref(back);
7940 * We only pay attention to backrefs that we found a real
7943 if (dback->found_ref == 0)
7947 * For now we only catch when the bytes don't match, not the
7948 * bytenr. We can easily do this at the same time, but I want
7949 * to have a fs image to test on before we just add repair
7950 * functionality willy-nilly so we know we won't screw up the
7954 entry = find_entry(&entries, dback->disk_bytenr,
7957 entry = malloc(sizeof(struct extent_entry));
7962 memset(entry, 0, sizeof(*entry));
7963 entry->bytenr = dback->disk_bytenr;
7964 entry->bytes = dback->bytes;
7965 list_add_tail(&entry->list, &entries);
7970 * If we only have on entry we may think the entries agree when
7971 * in reality they don't so we have to do some extra checking.
7973 if (dback->disk_bytenr != rec->start ||
7974 dback->bytes != rec->nr || back->broken)
7985 /* Yay all the backrefs agree, carry on good sir */
7986 if (nr_entries <= 1 && !mismatch)
7989 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7990 "%Lu\n", rec->start);
7993 * First we want to see if the backrefs can agree amongst themselves who
7994 * is right, so figure out which one of the entries has the highest
7997 best = find_most_right_entry(&entries);
8000 * Ok so we may have an even split between what the backrefs think, so
8001 * this is where we use the extent ref to see what it thinks.
8004 entry = find_entry(&entries, rec->start, rec->nr);
8005 if (!entry && (!broken_entries || !rec->found_rec)) {
8006 fprintf(stderr, "Backrefs don't agree with each other "
8007 "and extent record doesn't agree with anybody,"
8008 " so we can't fix bytenr %Lu bytes %Lu\n",
8009 rec->start, rec->nr);
8012 } else if (!entry) {
8014 * Ok our backrefs were broken, we'll assume this is the
8015 * correct value and add an entry for this range.
8017 entry = malloc(sizeof(struct extent_entry));
8022 memset(entry, 0, sizeof(*entry));
8023 entry->bytenr = rec->start;
8024 entry->bytes = rec->nr;
8025 list_add_tail(&entry->list, &entries);
8029 best = find_most_right_entry(&entries);
8031 fprintf(stderr, "Backrefs and extent record evenly "
8032 "split on who is right, this is going to "
8033 "require user input to fix bytenr %Lu bytes "
8034 "%Lu\n", rec->start, rec->nr);
8041 * I don't think this can happen currently as we'll abort() if we catch
8042 * this case higher up, but in case somebody removes that we still can't
8043 * deal with it properly here yet, so just bail out of that's the case.
8045 if (best->bytenr != rec->start) {
8046 fprintf(stderr, "Extent start and backref starts don't match, "
8047 "please use btrfs-image on this file system and send "
8048 "it to a btrfs developer so they can make fsck fix "
8049 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8050 rec->start, rec->nr);
8056 * Ok great we all agreed on an extent record, let's go find the real
8057 * references and fix up the ones that don't match.
8059 list_for_each_entry(back, &rec->backrefs, list) {
8060 if (back->full_backref || !back->is_data)
8063 dback = to_data_backref(back);
8066 * Still ignoring backrefs that don't have a real ref attached
8069 if (dback->found_ref == 0)
8072 if (dback->bytes == best->bytes &&
8073 dback->disk_bytenr == best->bytenr)
8076 ret = repair_ref(info, path, dback, best);
8082 * Ok we messed with the actual refs, which means we need to drop our
8083 * entire cache and go back and rescan. I know this is a huge pain and
8084 * adds a lot of extra work, but it's the only way to be safe. Once all
8085 * the backrefs agree we may not need to do anything to the extent
8090 while (!list_empty(&entries)) {
8091 entry = list_entry(entries.next, struct extent_entry, list);
8092 list_del_init(&entry->list);
8098 static int process_duplicates(struct btrfs_root *root,
8099 struct cache_tree *extent_cache,
8100 struct extent_record *rec)
8102 struct extent_record *good, *tmp;
8103 struct cache_extent *cache;
8107 * If we found a extent record for this extent then return, or if we
8108 * have more than one duplicate we are likely going to need to delete
8111 if (rec->found_rec || rec->num_duplicates > 1)
8114 /* Shouldn't happen but just in case */
8115 BUG_ON(!rec->num_duplicates);
8118 * So this happens if we end up with a backref that doesn't match the
8119 * actual extent entry. So either the backref is bad or the extent
8120 * entry is bad. Either way we want to have the extent_record actually
8121 * reflect what we found in the extent_tree, so we need to take the
8122 * duplicate out and use that as the extent_record since the only way we
8123 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8125 remove_cache_extent(extent_cache, &rec->cache);
8127 good = to_extent_record(rec->dups.next);
8128 list_del_init(&good->list);
8129 INIT_LIST_HEAD(&good->backrefs);
8130 INIT_LIST_HEAD(&good->dups);
8131 good->cache.start = good->start;
8132 good->cache.size = good->nr;
8133 good->content_checked = 0;
8134 good->owner_ref_checked = 0;
8135 good->num_duplicates = 0;
8136 good->refs = rec->refs;
8137 list_splice_init(&rec->backrefs, &good->backrefs);
8139 cache = lookup_cache_extent(extent_cache, good->start,
8143 tmp = container_of(cache, struct extent_record, cache);
8146 * If we find another overlapping extent and it's found_rec is
8147 * set then it's a duplicate and we need to try and delete
8150 if (tmp->found_rec || tmp->num_duplicates > 0) {
8151 if (list_empty(&good->list))
8152 list_add_tail(&good->list,
8153 &duplicate_extents);
8154 good->num_duplicates += tmp->num_duplicates + 1;
8155 list_splice_init(&tmp->dups, &good->dups);
8156 list_del_init(&tmp->list);
8157 list_add_tail(&tmp->list, &good->dups);
8158 remove_cache_extent(extent_cache, &tmp->cache);
8163 * Ok we have another non extent item backed extent rec, so lets
8164 * just add it to this extent and carry on like we did above.
8166 good->refs += tmp->refs;
8167 list_splice_init(&tmp->backrefs, &good->backrefs);
8168 remove_cache_extent(extent_cache, &tmp->cache);
8171 ret = insert_cache_extent(extent_cache, &good->cache);
8174 return good->num_duplicates ? 0 : 1;
8177 static int delete_duplicate_records(struct btrfs_root *root,
8178 struct extent_record *rec)
8180 struct btrfs_trans_handle *trans;
8181 LIST_HEAD(delete_list);
8182 struct btrfs_path path;
8183 struct extent_record *tmp, *good, *n;
8186 struct btrfs_key key;
8188 btrfs_init_path(&path);
8191 /* Find the record that covers all of the duplicates. */
8192 list_for_each_entry(tmp, &rec->dups, list) {
8193 if (good->start < tmp->start)
8195 if (good->nr > tmp->nr)
8198 if (tmp->start + tmp->nr < good->start + good->nr) {
8199 fprintf(stderr, "Ok we have overlapping extents that "
8200 "aren't completely covered by each other, this "
8201 "is going to require more careful thought. "
8202 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8203 tmp->start, tmp->nr, good->start, good->nr);
8210 list_add_tail(&rec->list, &delete_list);
8212 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8215 list_move_tail(&tmp->list, &delete_list);
8218 root = root->fs_info->extent_root;
8219 trans = btrfs_start_transaction(root, 1);
8220 if (IS_ERR(trans)) {
8221 ret = PTR_ERR(trans);
8225 list_for_each_entry(tmp, &delete_list, list) {
8226 if (tmp->found_rec == 0)
8228 key.objectid = tmp->start;
8229 key.type = BTRFS_EXTENT_ITEM_KEY;
8230 key.offset = tmp->nr;
8232 /* Shouldn't happen but just in case */
8233 if (tmp->metadata) {
8234 fprintf(stderr, "Well this shouldn't happen, extent "
8235 "record overlaps but is metadata? "
8236 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8240 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8246 ret = btrfs_del_item(trans, root, &path);
8249 btrfs_release_path(&path);
8252 err = btrfs_commit_transaction(trans, root);
8256 while (!list_empty(&delete_list)) {
8257 tmp = to_extent_record(delete_list.next);
8258 list_del_init(&tmp->list);
8264 while (!list_empty(&rec->dups)) {
8265 tmp = to_extent_record(rec->dups.next);
8266 list_del_init(&tmp->list);
8270 btrfs_release_path(&path);
8272 if (!ret && !nr_del)
8273 rec->num_duplicates = 0;
8275 return ret ? ret : nr_del;
8278 static int find_possible_backrefs(struct btrfs_fs_info *info,
8279 struct btrfs_path *path,
8280 struct cache_tree *extent_cache,
8281 struct extent_record *rec)
8283 struct btrfs_root *root;
8284 struct extent_backref *back;
8285 struct data_backref *dback;
8286 struct cache_extent *cache;
8287 struct btrfs_file_extent_item *fi;
8288 struct btrfs_key key;
8292 list_for_each_entry(back, &rec->backrefs, list) {
8293 /* Don't care about full backrefs (poor unloved backrefs) */
8294 if (back->full_backref || !back->is_data)
8297 dback = to_data_backref(back);
8299 /* We found this one, we don't need to do a lookup */
8300 if (dback->found_ref)
8303 key.objectid = dback->root;
8304 key.type = BTRFS_ROOT_ITEM_KEY;
8305 key.offset = (u64)-1;
8307 root = btrfs_read_fs_root(info, &key);
8309 /* No root, definitely a bad ref, skip */
8310 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8312 /* Other err, exit */
8314 return PTR_ERR(root);
8316 key.objectid = dback->owner;
8317 key.type = BTRFS_EXTENT_DATA_KEY;
8318 key.offset = dback->offset;
8319 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8321 btrfs_release_path(path);
8324 /* Didn't find it, we can carry on */
8329 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8330 struct btrfs_file_extent_item);
8331 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8332 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8333 btrfs_release_path(path);
8334 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8336 struct extent_record *tmp;
8337 tmp = container_of(cache, struct extent_record, cache);
8340 * If we found an extent record for the bytenr for this
8341 * particular backref then we can't add it to our
8342 * current extent record. We only want to add backrefs
8343 * that don't have a corresponding extent item in the
8344 * extent tree since they likely belong to this record
8345 * and we need to fix it if it doesn't match bytenrs.
8351 dback->found_ref += 1;
8352 dback->disk_bytenr = bytenr;
8353 dback->bytes = bytes;
8356 * Set this so the verify backref code knows not to trust the
8357 * values in this backref.
8366 * Record orphan data ref into corresponding root.
8368 * Return 0 if the extent item contains data ref and recorded.
8369 * Return 1 if the extent item contains no useful data ref
8370 * On that case, it may contains only shared_dataref or metadata backref
8371 * or the file extent exists(this should be handled by the extent bytenr
8373 * Return <0 if something goes wrong.
8375 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8376 struct extent_record *rec)
8378 struct btrfs_key key;
8379 struct btrfs_root *dest_root;
8380 struct extent_backref *back;
8381 struct data_backref *dback;
8382 struct orphan_data_extent *orphan;
8383 struct btrfs_path path;
8384 int recorded_data_ref = 0;
8389 btrfs_init_path(&path);
8390 list_for_each_entry(back, &rec->backrefs, list) {
8391 if (back->full_backref || !back->is_data ||
8392 !back->found_extent_tree)
8394 dback = to_data_backref(back);
8395 if (dback->found_ref)
8397 key.objectid = dback->root;
8398 key.type = BTRFS_ROOT_ITEM_KEY;
8399 key.offset = (u64)-1;
8401 dest_root = btrfs_read_fs_root(fs_info, &key);
8403 /* For non-exist root we just skip it */
8404 if (IS_ERR(dest_root) || !dest_root)
8407 key.objectid = dback->owner;
8408 key.type = BTRFS_EXTENT_DATA_KEY;
8409 key.offset = dback->offset;
8411 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8412 btrfs_release_path(&path);
8414 * For ret < 0, it's OK since the fs-tree may be corrupted,
8415 * we need to record it for inode/file extent rebuild.
8416 * For ret > 0, we record it only for file extent rebuild.
8417 * For ret == 0, the file extent exists but only bytenr
8418 * mismatch, let the original bytenr fix routine to handle,
8424 orphan = malloc(sizeof(*orphan));
8429 INIT_LIST_HEAD(&orphan->list);
8430 orphan->root = dback->root;
8431 orphan->objectid = dback->owner;
8432 orphan->offset = dback->offset;
8433 orphan->disk_bytenr = rec->cache.start;
8434 orphan->disk_len = rec->cache.size;
8435 list_add(&dest_root->orphan_data_extents, &orphan->list);
8436 recorded_data_ref = 1;
8439 btrfs_release_path(&path);
8441 return !recorded_data_ref;
8447 * when an incorrect extent item is found, this will delete
8448 * all of the existing entries for it and recreate them
8449 * based on what the tree scan found.
8451 static int fixup_extent_refs(struct btrfs_fs_info *info,
8452 struct cache_tree *extent_cache,
8453 struct extent_record *rec)
8455 struct btrfs_trans_handle *trans = NULL;
8457 struct btrfs_path path;
8458 struct list_head *cur = rec->backrefs.next;
8459 struct cache_extent *cache;
8460 struct extent_backref *back;
8464 if (rec->flag_block_full_backref)
8465 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8467 btrfs_init_path(&path);
8468 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8470 * Sometimes the backrefs themselves are so broken they don't
8471 * get attached to any meaningful rec, so first go back and
8472 * check any of our backrefs that we couldn't find and throw
8473 * them into the list if we find the backref so that
8474 * verify_backrefs can figure out what to do.
8476 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8481 /* step one, make sure all of the backrefs agree */
8482 ret = verify_backrefs(info, &path, rec);
8486 trans = btrfs_start_transaction(info->extent_root, 1);
8487 if (IS_ERR(trans)) {
8488 ret = PTR_ERR(trans);
8492 /* step two, delete all the existing records */
8493 ret = delete_extent_records(trans, info->extent_root, &path,
8494 rec->start, rec->max_size);
8499 /* was this block corrupt? If so, don't add references to it */
8500 cache = lookup_cache_extent(info->corrupt_blocks,
8501 rec->start, rec->max_size);
8507 /* step three, recreate all the refs we did find */
8508 while(cur != &rec->backrefs) {
8509 back = to_extent_backref(cur);
8513 * if we didn't find any references, don't create a
8516 if (!back->found_ref)
8519 rec->bad_full_backref = 0;
8520 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8528 int err = btrfs_commit_transaction(trans, info->extent_root);
8533 btrfs_release_path(&path);
8537 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8538 struct extent_record *rec)
8540 struct btrfs_trans_handle *trans;
8541 struct btrfs_root *root = fs_info->extent_root;
8542 struct btrfs_path path;
8543 struct btrfs_extent_item *ei;
8544 struct btrfs_key key;
8548 key.objectid = rec->start;
8549 if (rec->metadata) {
8550 key.type = BTRFS_METADATA_ITEM_KEY;
8551 key.offset = rec->info_level;
8553 key.type = BTRFS_EXTENT_ITEM_KEY;
8554 key.offset = rec->max_size;
8557 trans = btrfs_start_transaction(root, 0);
8559 return PTR_ERR(trans);
8561 btrfs_init_path(&path);
8562 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8564 btrfs_release_path(&path);
8565 btrfs_commit_transaction(trans, root);
8568 fprintf(stderr, "Didn't find extent for %llu\n",
8569 (unsigned long long)rec->start);
8570 btrfs_release_path(&path);
8571 btrfs_commit_transaction(trans, root);
8575 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8576 struct btrfs_extent_item);
8577 flags = btrfs_extent_flags(path.nodes[0], ei);
8578 if (rec->flag_block_full_backref) {
8579 fprintf(stderr, "setting full backref on %llu\n",
8580 (unsigned long long)key.objectid);
8581 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8583 fprintf(stderr, "clearing full backref on %llu\n",
8584 (unsigned long long)key.objectid);
8585 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8587 btrfs_set_extent_flags(path.nodes[0], ei, flags);
8588 btrfs_mark_buffer_dirty(path.nodes[0]);
8589 btrfs_release_path(&path);
8590 return btrfs_commit_transaction(trans, root);
8593 /* right now we only prune from the extent allocation tree */
8594 static int prune_one_block(struct btrfs_trans_handle *trans,
8595 struct btrfs_fs_info *info,
8596 struct btrfs_corrupt_block *corrupt)
8599 struct btrfs_path path;
8600 struct extent_buffer *eb;
8604 int level = corrupt->level + 1;
8606 btrfs_init_path(&path);
8608 /* we want to stop at the parent to our busted block */
8609 path.lowest_level = level;
8611 ret = btrfs_search_slot(trans, info->extent_root,
8612 &corrupt->key, &path, -1, 1);
8617 eb = path.nodes[level];
8624 * hopefully the search gave us the block we want to prune,
8625 * lets try that first
8627 slot = path.slots[level];
8628 found = btrfs_node_blockptr(eb, slot);
8629 if (found == corrupt->cache.start)
8632 nritems = btrfs_header_nritems(eb);
8634 /* the search failed, lets scan this node and hope we find it */
8635 for (slot = 0; slot < nritems; slot++) {
8636 found = btrfs_node_blockptr(eb, slot);
8637 if (found == corrupt->cache.start)
8641 * we couldn't find the bad block. TODO, search all the nodes for pointers
8644 if (eb == info->extent_root->node) {
8649 btrfs_release_path(&path);
8654 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8655 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8658 btrfs_release_path(&path);
8662 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8664 struct btrfs_trans_handle *trans = NULL;
8665 struct cache_extent *cache;
8666 struct btrfs_corrupt_block *corrupt;
8669 cache = search_cache_extent(info->corrupt_blocks, 0);
8673 trans = btrfs_start_transaction(info->extent_root, 1);
8675 return PTR_ERR(trans);
8677 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8678 prune_one_block(trans, info, corrupt);
8679 remove_cache_extent(info->corrupt_blocks, cache);
8682 return btrfs_commit_transaction(trans, info->extent_root);
8686 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8688 struct btrfs_block_group_cache *cache;
8693 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8694 &start, &end, EXTENT_DIRTY);
8697 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8703 cache = btrfs_lookup_first_block_group(fs_info, start);
8708 start = cache->key.objectid + cache->key.offset;
8712 static int check_extent_refs(struct btrfs_root *root,
8713 struct cache_tree *extent_cache)
8715 struct extent_record *rec;
8716 struct cache_extent *cache;
8725 * if we're doing a repair, we have to make sure
8726 * we don't allocate from the problem extents.
8727 * In the worst case, this will be all the
8730 cache = search_cache_extent(extent_cache, 0);
8732 rec = container_of(cache, struct extent_record, cache);
8733 set_extent_dirty(root->fs_info->excluded_extents,
8735 rec->start + rec->max_size - 1,
8737 cache = next_cache_extent(cache);
8740 /* pin down all the corrupted blocks too */
8741 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8743 set_extent_dirty(root->fs_info->excluded_extents,
8745 cache->start + cache->size - 1,
8747 cache = next_cache_extent(cache);
8749 prune_corrupt_blocks(root->fs_info);
8750 reset_cached_block_groups(root->fs_info);
8753 reset_cached_block_groups(root->fs_info);
8756 * We need to delete any duplicate entries we find first otherwise we
8757 * could mess up the extent tree when we have backrefs that actually
8758 * belong to a different extent item and not the weird duplicate one.
8760 while (repair && !list_empty(&duplicate_extents)) {
8761 rec = to_extent_record(duplicate_extents.next);
8762 list_del_init(&rec->list);
8764 /* Sometimes we can find a backref before we find an actual
8765 * extent, so we need to process it a little bit to see if there
8766 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8767 * if this is a backref screwup. If we need to delete stuff
8768 * process_duplicates() will return 0, otherwise it will return
8771 if (process_duplicates(root, extent_cache, rec))
8773 ret = delete_duplicate_records(root, rec);
8777 * delete_duplicate_records will return the number of entries
8778 * deleted, so if it's greater than 0 then we know we actually
8779 * did something and we need to remove.
8793 cache = search_cache_extent(extent_cache, 0);
8796 rec = container_of(cache, struct extent_record, cache);
8797 if (rec->num_duplicates) {
8798 fprintf(stderr, "extent item %llu has multiple extent "
8799 "items\n", (unsigned long long)rec->start);
8804 if (rec->refs != rec->extent_item_refs) {
8805 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8806 (unsigned long long)rec->start,
8807 (unsigned long long)rec->nr);
8808 fprintf(stderr, "extent item %llu, found %llu\n",
8809 (unsigned long long)rec->extent_item_refs,
8810 (unsigned long long)rec->refs);
8811 ret = record_orphan_data_extents(root->fs_info, rec);
8818 * we can't use the extent to repair file
8819 * extent, let the fallback method handle it.
8821 if (!fixed && repair) {
8822 ret = fixup_extent_refs(
8833 if (all_backpointers_checked(rec, 1)) {
8834 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8835 (unsigned long long)rec->start,
8836 (unsigned long long)rec->nr);
8838 if (!fixed && !recorded && repair) {
8839 ret = fixup_extent_refs(root->fs_info,
8848 if (!rec->owner_ref_checked) {
8849 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8850 (unsigned long long)rec->start,
8851 (unsigned long long)rec->nr);
8852 if (!fixed && !recorded && repair) {
8853 ret = fixup_extent_refs(root->fs_info,
8862 if (rec->bad_full_backref) {
8863 fprintf(stderr, "bad full backref, on [%llu]\n",
8864 (unsigned long long)rec->start);
8866 ret = fixup_extent_flags(root->fs_info, rec);
8875 * Although it's not a extent ref's problem, we reuse this
8876 * routine for error reporting.
8877 * No repair function yet.
8879 if (rec->crossing_stripes) {
8881 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8882 rec->start, rec->start + rec->max_size);
8887 if (rec->wrong_chunk_type) {
8889 "bad extent [%llu, %llu), type mismatch with chunk\n",
8890 rec->start, rec->start + rec->max_size);
8895 remove_cache_extent(extent_cache, cache);
8896 free_all_extent_backrefs(rec);
8897 if (!init_extent_tree && repair && (!cur_err || fixed))
8898 clear_extent_dirty(root->fs_info->excluded_extents,
8900 rec->start + rec->max_size - 1,
8906 if (ret && ret != -EAGAIN) {
8907 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8910 struct btrfs_trans_handle *trans;
8912 root = root->fs_info->extent_root;
8913 trans = btrfs_start_transaction(root, 1);
8914 if (IS_ERR(trans)) {
8915 ret = PTR_ERR(trans);
8919 btrfs_fix_block_accounting(trans, root);
8920 ret = btrfs_commit_transaction(trans, root);
8925 fprintf(stderr, "repaired damaged extent references\n");
8931 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8935 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8936 stripe_size = length;
8937 stripe_size /= num_stripes;
8938 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8939 stripe_size = length * 2;
8940 stripe_size /= num_stripes;
8941 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8942 stripe_size = length;
8943 stripe_size /= (num_stripes - 1);
8944 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8945 stripe_size = length;
8946 stripe_size /= (num_stripes - 2);
8948 stripe_size = length;
8954 * Check the chunk with its block group/dev list ref:
8955 * Return 0 if all refs seems valid.
8956 * Return 1 if part of refs seems valid, need later check for rebuild ref
8957 * like missing block group and needs to search extent tree to rebuild them.
8958 * Return -1 if essential refs are missing and unable to rebuild.
8960 static int check_chunk_refs(struct chunk_record *chunk_rec,
8961 struct block_group_tree *block_group_cache,
8962 struct device_extent_tree *dev_extent_cache,
8965 struct cache_extent *block_group_item;
8966 struct block_group_record *block_group_rec;
8967 struct cache_extent *dev_extent_item;
8968 struct device_extent_record *dev_extent_rec;
8972 int metadump_v2 = 0;
8976 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8979 if (block_group_item) {
8980 block_group_rec = container_of(block_group_item,
8981 struct block_group_record,
8983 if (chunk_rec->length != block_group_rec->offset ||
8984 chunk_rec->offset != block_group_rec->objectid ||
8986 chunk_rec->type_flags != block_group_rec->flags)) {
8989 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8990 chunk_rec->objectid,
8995 chunk_rec->type_flags,
8996 block_group_rec->objectid,
8997 block_group_rec->type,
8998 block_group_rec->offset,
8999 block_group_rec->offset,
9000 block_group_rec->objectid,
9001 block_group_rec->flags);
9004 list_del_init(&block_group_rec->list);
9005 chunk_rec->bg_rec = block_group_rec;
9010 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9011 chunk_rec->objectid,
9016 chunk_rec->type_flags);
9023 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9024 chunk_rec->num_stripes);
9025 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9026 devid = chunk_rec->stripes[i].devid;
9027 offset = chunk_rec->stripes[i].offset;
9028 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9029 devid, offset, length);
9030 if (dev_extent_item) {
9031 dev_extent_rec = container_of(dev_extent_item,
9032 struct device_extent_record,
9034 if (dev_extent_rec->objectid != devid ||
9035 dev_extent_rec->offset != offset ||
9036 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9037 dev_extent_rec->length != length) {
9040 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9041 chunk_rec->objectid,
9044 chunk_rec->stripes[i].devid,
9045 chunk_rec->stripes[i].offset,
9046 dev_extent_rec->objectid,
9047 dev_extent_rec->offset,
9048 dev_extent_rec->length);
9051 list_move(&dev_extent_rec->chunk_list,
9052 &chunk_rec->dextents);
9057 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9058 chunk_rec->objectid,
9061 chunk_rec->stripes[i].devid,
9062 chunk_rec->stripes[i].offset);
9069 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9070 int check_chunks(struct cache_tree *chunk_cache,
9071 struct block_group_tree *block_group_cache,
9072 struct device_extent_tree *dev_extent_cache,
9073 struct list_head *good, struct list_head *bad,
9074 struct list_head *rebuild, int silent)
9076 struct cache_extent *chunk_item;
9077 struct chunk_record *chunk_rec;
9078 struct block_group_record *bg_rec;
9079 struct device_extent_record *dext_rec;
9083 chunk_item = first_cache_extent(chunk_cache);
9084 while (chunk_item) {
9085 chunk_rec = container_of(chunk_item, struct chunk_record,
9087 err = check_chunk_refs(chunk_rec, block_group_cache,
9088 dev_extent_cache, silent);
9091 if (err == 0 && good)
9092 list_add_tail(&chunk_rec->list, good);
9093 if (err > 0 && rebuild)
9094 list_add_tail(&chunk_rec->list, rebuild);
9096 list_add_tail(&chunk_rec->list, bad);
9097 chunk_item = next_cache_extent(chunk_item);
9100 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9103 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9111 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9115 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9126 static int check_device_used(struct device_record *dev_rec,
9127 struct device_extent_tree *dext_cache)
9129 struct cache_extent *cache;
9130 struct device_extent_record *dev_extent_rec;
9133 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9135 dev_extent_rec = container_of(cache,
9136 struct device_extent_record,
9138 if (dev_extent_rec->objectid != dev_rec->devid)
9141 list_del_init(&dev_extent_rec->device_list);
9142 total_byte += dev_extent_rec->length;
9143 cache = next_cache_extent(cache);
9146 if (total_byte != dev_rec->byte_used) {
9148 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9149 total_byte, dev_rec->byte_used, dev_rec->objectid,
9150 dev_rec->type, dev_rec->offset);
9157 /* check btrfs_dev_item -> btrfs_dev_extent */
9158 static int check_devices(struct rb_root *dev_cache,
9159 struct device_extent_tree *dev_extent_cache)
9161 struct rb_node *dev_node;
9162 struct device_record *dev_rec;
9163 struct device_extent_record *dext_rec;
9167 dev_node = rb_first(dev_cache);
9169 dev_rec = container_of(dev_node, struct device_record, node);
9170 err = check_device_used(dev_rec, dev_extent_cache);
9174 dev_node = rb_next(dev_node);
9176 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9179 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9180 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9187 static int add_root_item_to_list(struct list_head *head,
9188 u64 objectid, u64 bytenr, u64 last_snapshot,
9189 u8 level, u8 drop_level,
9190 int level_size, struct btrfs_key *drop_key)
9193 struct root_item_record *ri_rec;
9194 ri_rec = malloc(sizeof(*ri_rec));
9197 ri_rec->bytenr = bytenr;
9198 ri_rec->objectid = objectid;
9199 ri_rec->level = level;
9200 ri_rec->level_size = level_size;
9201 ri_rec->drop_level = drop_level;
9202 ri_rec->last_snapshot = last_snapshot;
9204 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9205 list_add_tail(&ri_rec->list, head);
9210 static void free_root_item_list(struct list_head *list)
9212 struct root_item_record *ri_rec;
9214 while (!list_empty(list)) {
9215 ri_rec = list_first_entry(list, struct root_item_record,
9217 list_del_init(&ri_rec->list);
9222 static int deal_root_from_list(struct list_head *list,
9223 struct btrfs_root *root,
9224 struct block_info *bits,
9226 struct cache_tree *pending,
9227 struct cache_tree *seen,
9228 struct cache_tree *reada,
9229 struct cache_tree *nodes,
9230 struct cache_tree *extent_cache,
9231 struct cache_tree *chunk_cache,
9232 struct rb_root *dev_cache,
9233 struct block_group_tree *block_group_cache,
9234 struct device_extent_tree *dev_extent_cache)
9239 while (!list_empty(list)) {
9240 struct root_item_record *rec;
9241 struct extent_buffer *buf;
9242 rec = list_entry(list->next,
9243 struct root_item_record, list);
9245 buf = read_tree_block(root->fs_info->tree_root,
9246 rec->bytenr, rec->level_size, 0);
9247 if (!extent_buffer_uptodate(buf)) {
9248 free_extent_buffer(buf);
9252 ret = add_root_to_pending(buf, extent_cache, pending,
9253 seen, nodes, rec->objectid);
9257 * To rebuild extent tree, we need deal with snapshot
9258 * one by one, otherwise we deal with node firstly which
9259 * can maximize readahead.
9262 ret = run_next_block(root, bits, bits_nr, &last,
9263 pending, seen, reada, nodes,
9264 extent_cache, chunk_cache,
9265 dev_cache, block_group_cache,
9266 dev_extent_cache, rec);
9270 free_extent_buffer(buf);
9271 list_del(&rec->list);
9277 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9278 reada, nodes, extent_cache, chunk_cache,
9279 dev_cache, block_group_cache,
9280 dev_extent_cache, NULL);
9290 static int check_chunks_and_extents(struct btrfs_root *root)
9292 struct rb_root dev_cache;
9293 struct cache_tree chunk_cache;
9294 struct block_group_tree block_group_cache;
9295 struct device_extent_tree dev_extent_cache;
9296 struct cache_tree extent_cache;
9297 struct cache_tree seen;
9298 struct cache_tree pending;
9299 struct cache_tree reada;
9300 struct cache_tree nodes;
9301 struct extent_io_tree excluded_extents;
9302 struct cache_tree corrupt_blocks;
9303 struct btrfs_path path;
9304 struct btrfs_key key;
9305 struct btrfs_key found_key;
9307 struct block_info *bits;
9309 struct extent_buffer *leaf;
9311 struct btrfs_root_item ri;
9312 struct list_head dropping_trees;
9313 struct list_head normal_trees;
9314 struct btrfs_root *root1;
9319 dev_cache = RB_ROOT;
9320 cache_tree_init(&chunk_cache);
9321 block_group_tree_init(&block_group_cache);
9322 device_extent_tree_init(&dev_extent_cache);
9324 cache_tree_init(&extent_cache);
9325 cache_tree_init(&seen);
9326 cache_tree_init(&pending);
9327 cache_tree_init(&nodes);
9328 cache_tree_init(&reada);
9329 cache_tree_init(&corrupt_blocks);
9330 extent_io_tree_init(&excluded_extents);
9331 INIT_LIST_HEAD(&dropping_trees);
9332 INIT_LIST_HEAD(&normal_trees);
9335 root->fs_info->excluded_extents = &excluded_extents;
9336 root->fs_info->fsck_extent_cache = &extent_cache;
9337 root->fs_info->free_extent_hook = free_extent_hook;
9338 root->fs_info->corrupt_blocks = &corrupt_blocks;
9342 bits = malloc(bits_nr * sizeof(struct block_info));
9348 if (ctx.progress_enabled) {
9349 ctx.tp = TASK_EXTENTS;
9350 task_start(ctx.info);
9354 root1 = root->fs_info->tree_root;
9355 level = btrfs_header_level(root1->node);
9356 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9357 root1->node->start, 0, level, 0,
9358 root1->nodesize, NULL);
9361 root1 = root->fs_info->chunk_root;
9362 level = btrfs_header_level(root1->node);
9363 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9364 root1->node->start, 0, level, 0,
9365 root1->nodesize, NULL);
9368 btrfs_init_path(&path);
9371 key.type = BTRFS_ROOT_ITEM_KEY;
9372 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9377 leaf = path.nodes[0];
9378 slot = path.slots[0];
9379 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9380 ret = btrfs_next_leaf(root, &path);
9383 leaf = path.nodes[0];
9384 slot = path.slots[0];
9386 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9387 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9388 unsigned long offset;
9391 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9392 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9393 last_snapshot = btrfs_root_last_snapshot(&ri);
9394 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9395 level = btrfs_root_level(&ri);
9396 level_size = root->nodesize;
9397 ret = add_root_item_to_list(&normal_trees,
9399 btrfs_root_bytenr(&ri),
9400 last_snapshot, level,
9401 0, level_size, NULL);
9405 level = btrfs_root_level(&ri);
9406 level_size = root->nodesize;
9407 objectid = found_key.objectid;
9408 btrfs_disk_key_to_cpu(&found_key,
9410 ret = add_root_item_to_list(&dropping_trees,
9412 btrfs_root_bytenr(&ri),
9413 last_snapshot, level,
9415 level_size, &found_key);
9422 btrfs_release_path(&path);
9425 * check_block can return -EAGAIN if it fixes something, please keep
9426 * this in mind when dealing with return values from these functions, if
9427 * we get -EAGAIN we want to fall through and restart the loop.
9429 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9430 &seen, &reada, &nodes, &extent_cache,
9431 &chunk_cache, &dev_cache, &block_group_cache,
9438 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9439 &pending, &seen, &reada, &nodes,
9440 &extent_cache, &chunk_cache, &dev_cache,
9441 &block_group_cache, &dev_extent_cache);
9448 ret = check_chunks(&chunk_cache, &block_group_cache,
9449 &dev_extent_cache, NULL, NULL, NULL, 0);
9456 ret = check_extent_refs(root, &extent_cache);
9463 ret = check_devices(&dev_cache, &dev_extent_cache);
9468 task_stop(ctx.info);
9470 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9471 extent_io_tree_cleanup(&excluded_extents);
9472 root->fs_info->fsck_extent_cache = NULL;
9473 root->fs_info->free_extent_hook = NULL;
9474 root->fs_info->corrupt_blocks = NULL;
9475 root->fs_info->excluded_extents = NULL;
9478 free_chunk_cache_tree(&chunk_cache);
9479 free_device_cache_tree(&dev_cache);
9480 free_block_group_tree(&block_group_cache);
9481 free_device_extent_tree(&dev_extent_cache);
9482 free_extent_cache_tree(&seen);
9483 free_extent_cache_tree(&pending);
9484 free_extent_cache_tree(&reada);
9485 free_extent_cache_tree(&nodes);
9488 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9489 free_extent_cache_tree(&seen);
9490 free_extent_cache_tree(&pending);
9491 free_extent_cache_tree(&reada);
9492 free_extent_cache_tree(&nodes);
9493 free_chunk_cache_tree(&chunk_cache);
9494 free_block_group_tree(&block_group_cache);
9495 free_device_cache_tree(&dev_cache);
9496 free_device_extent_tree(&dev_extent_cache);
9497 free_extent_record_cache(root->fs_info, &extent_cache);
9498 free_root_item_list(&normal_trees);
9499 free_root_item_list(&dropping_trees);
9500 extent_io_tree_cleanup(&excluded_extents);
9505 * Check backrefs of a tree block given by @bytenr or @eb.
9507 * @root: the root containing the @bytenr or @eb
9508 * @eb: tree block extent buffer, can be NULL
9509 * @bytenr: bytenr of the tree block to search
9510 * @level: tree level of the tree block
9511 * @owner: owner of the tree block
9513 * Return >0 for any error found and output error message
9514 * Return 0 for no error found
9516 static int check_tree_block_ref(struct btrfs_root *root,
9517 struct extent_buffer *eb, u64 bytenr,
9518 int level, u64 owner)
9520 struct btrfs_key key;
9521 struct btrfs_root *extent_root = root->fs_info->extent_root;
9522 struct btrfs_path path;
9523 struct btrfs_extent_item *ei;
9524 struct btrfs_extent_inline_ref *iref;
9525 struct extent_buffer *leaf;
9531 u32 nodesize = root->nodesize;
9538 btrfs_init_path(&path);
9539 key.objectid = bytenr;
9540 if (btrfs_fs_incompat(root->fs_info,
9541 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9542 key.type = BTRFS_METADATA_ITEM_KEY;
9544 key.type = BTRFS_EXTENT_ITEM_KEY;
9545 key.offset = (u64)-1;
9547 /* Search for the backref in extent tree */
9548 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9550 err |= BACKREF_MISSING;
9553 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9555 err |= BACKREF_MISSING;
9559 leaf = path.nodes[0];
9560 slot = path.slots[0];
9561 btrfs_item_key_to_cpu(leaf, &key, slot);
9563 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9565 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9566 skinny_level = (int)key.offset;
9567 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9569 struct btrfs_tree_block_info *info;
9571 info = (struct btrfs_tree_block_info *)(ei + 1);
9572 skinny_level = btrfs_tree_block_level(leaf, info);
9573 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9580 if (!(btrfs_extent_flags(leaf, ei) &
9581 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9583 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9584 key.objectid, nodesize,
9585 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9586 err = BACKREF_MISMATCH;
9588 header_gen = btrfs_header_generation(eb);
9589 extent_gen = btrfs_extent_generation(leaf, ei);
9590 if (header_gen != extent_gen) {
9592 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9593 key.objectid, nodesize, header_gen,
9595 err = BACKREF_MISMATCH;
9597 if (level != skinny_level) {
9599 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9600 key.objectid, nodesize, level, skinny_level);
9601 err = BACKREF_MISMATCH;
9603 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9605 "extent[%llu %u] is referred by other roots than %llu",
9606 key.objectid, nodesize, root->objectid);
9607 err = BACKREF_MISMATCH;
9612 * Iterate the extent/metadata item to find the exact backref
9614 item_size = btrfs_item_size_nr(leaf, slot);
9615 ptr = (unsigned long)iref;
9616 end = (unsigned long)ei + item_size;
9618 iref = (struct btrfs_extent_inline_ref *)ptr;
9619 type = btrfs_extent_inline_ref_type(leaf, iref);
9620 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9622 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9623 (offset == root->objectid || offset == owner)) {
9625 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9626 /* Check if the backref points to valid referencer */
9627 found_ref = !check_tree_block_ref(root, NULL, offset,
9633 ptr += btrfs_extent_inline_ref_size(type);
9637 * Inlined extent item doesn't have what we need, check
9638 * TREE_BLOCK_REF_KEY
9641 btrfs_release_path(&path);
9642 key.objectid = bytenr;
9643 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9644 key.offset = root->objectid;
9646 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9651 err |= BACKREF_MISSING;
9653 btrfs_release_path(&path);
9654 if (eb && (err & BACKREF_MISSING))
9655 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9656 bytenr, nodesize, owner, level);
9661 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9663 * Return >0 any error found and output error message
9664 * Return 0 for no error found
9666 static int check_extent_data_item(struct btrfs_root *root,
9667 struct extent_buffer *eb, int slot)
9669 struct btrfs_file_extent_item *fi;
9670 struct btrfs_path path;
9671 struct btrfs_root *extent_root = root->fs_info->extent_root;
9672 struct btrfs_key fi_key;
9673 struct btrfs_key dbref_key;
9674 struct extent_buffer *leaf;
9675 struct btrfs_extent_item *ei;
9676 struct btrfs_extent_inline_ref *iref;
9677 struct btrfs_extent_data_ref *dref;
9679 u64 file_extent_gen;
9682 u64 extent_num_bytes;
9690 int found_dbackref = 0;
9694 btrfs_item_key_to_cpu(eb, &fi_key, slot);
9695 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9696 file_extent_gen = btrfs_file_extent_generation(eb, fi);
9698 /* Nothing to check for hole and inline data extents */
9699 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9700 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9703 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9704 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9705 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9707 /* Check unaligned disk_num_bytes and num_bytes */
9708 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9710 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9711 fi_key.objectid, fi_key.offset, disk_num_bytes,
9713 err |= BYTES_UNALIGNED;
9715 data_bytes_allocated += disk_num_bytes;
9717 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9719 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9720 fi_key.objectid, fi_key.offset, extent_num_bytes,
9722 err |= BYTES_UNALIGNED;
9724 data_bytes_referenced += extent_num_bytes;
9726 owner = btrfs_header_owner(eb);
9728 /* Check the extent item of the file extent in extent tree */
9729 btrfs_init_path(&path);
9730 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9731 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9732 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9734 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9736 err |= BACKREF_MISSING;
9740 leaf = path.nodes[0];
9741 slot = path.slots[0];
9742 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9744 extent_flags = btrfs_extent_flags(leaf, ei);
9745 extent_gen = btrfs_extent_generation(leaf, ei);
9747 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9749 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9750 disk_bytenr, disk_num_bytes,
9751 BTRFS_EXTENT_FLAG_DATA);
9752 err |= BACKREF_MISMATCH;
9755 if (file_extent_gen < extent_gen) {
9757 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9758 disk_bytenr, disk_num_bytes, file_extent_gen,
9760 err |= BACKREF_MISMATCH;
9763 /* Check data backref inside that extent item */
9764 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9765 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9766 ptr = (unsigned long)iref;
9767 end = (unsigned long)ei + item_size;
9769 iref = (struct btrfs_extent_inline_ref *)ptr;
9770 type = btrfs_extent_inline_ref_type(leaf, iref);
9771 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9773 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9774 ref_root = btrfs_extent_data_ref_root(leaf, dref);
9775 if (ref_root == owner || ref_root == root->objectid)
9777 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9778 found_dbackref = !check_tree_block_ref(root, NULL,
9779 btrfs_extent_inline_ref_offset(leaf, iref),
9785 ptr += btrfs_extent_inline_ref_size(type);
9788 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9789 if (!found_dbackref) {
9790 btrfs_release_path(&path);
9792 btrfs_init_path(&path);
9793 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9794 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9795 dbref_key.offset = hash_extent_data_ref(root->objectid,
9796 fi_key.objectid, fi_key.offset);
9798 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9799 &dbref_key, &path, 0, 0);
9804 if (!found_dbackref)
9805 err |= BACKREF_MISSING;
9807 btrfs_release_path(&path);
9808 if (err & BACKREF_MISSING) {
9809 error("data extent[%llu %llu] backref lost",
9810 disk_bytenr, disk_num_bytes);
9816 * Get real tree block level for the case like shared block
9817 * Return >= 0 as tree level
9818 * Return <0 for error
9820 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9822 struct extent_buffer *eb;
9823 struct btrfs_path path;
9824 struct btrfs_key key;
9825 struct btrfs_extent_item *ei;
9828 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9833 /* Search extent tree for extent generation and level */
9834 key.objectid = bytenr;
9835 key.type = BTRFS_METADATA_ITEM_KEY;
9836 key.offset = (u64)-1;
9838 btrfs_init_path(&path);
9839 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9842 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9850 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9851 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9852 struct btrfs_extent_item);
9853 flags = btrfs_extent_flags(path.nodes[0], ei);
9854 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9859 /* Get transid for later read_tree_block() check */
9860 transid = btrfs_extent_generation(path.nodes[0], ei);
9862 /* Get backref level as one source */
9863 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9864 backref_level = key.offset;
9866 struct btrfs_tree_block_info *info;
9868 info = (struct btrfs_tree_block_info *)(ei + 1);
9869 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9871 btrfs_release_path(&path);
9873 /* Get level from tree block as an alternative source */
9874 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9875 if (!extent_buffer_uptodate(eb)) {
9876 free_extent_buffer(eb);
9879 header_level = btrfs_header_level(eb);
9880 free_extent_buffer(eb);
9882 if (header_level != backref_level)
9884 return header_level;
9887 btrfs_release_path(&path);
9892 * Check if a tree block backref is valid (points to a valid tree block)
9893 * if level == -1, level will be resolved
9894 * Return >0 for any error found and print error message
9896 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9897 u64 bytenr, int level)
9899 struct btrfs_root *root;
9900 struct btrfs_key key;
9901 struct btrfs_path path;
9902 struct extent_buffer *eb;
9903 struct extent_buffer *node;
9904 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9908 /* Query level for level == -1 special case */
9910 level = query_tree_block_level(fs_info, bytenr);
9912 err |= REFERENCER_MISSING;
9916 key.objectid = root_id;
9917 key.type = BTRFS_ROOT_ITEM_KEY;
9918 key.offset = (u64)-1;
9920 root = btrfs_read_fs_root(fs_info, &key);
9922 err |= REFERENCER_MISSING;
9926 /* Read out the tree block to get item/node key */
9927 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9928 if (!extent_buffer_uptodate(eb)) {
9929 err |= REFERENCER_MISSING;
9930 free_extent_buffer(eb);
9934 /* Empty tree, no need to check key */
9935 if (!btrfs_header_nritems(eb) && !level) {
9936 free_extent_buffer(eb);
9941 btrfs_node_key_to_cpu(eb, &key, 0);
9943 btrfs_item_key_to_cpu(eb, &key, 0);
9945 free_extent_buffer(eb);
9947 btrfs_init_path(&path);
9948 path.lowest_level = level;
9949 /* Search with the first key, to ensure we can reach it */
9950 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9952 err |= REFERENCER_MISSING;
9956 node = path.nodes[level];
9957 if (btrfs_header_bytenr(node) != bytenr) {
9959 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9960 bytenr, nodesize, bytenr,
9961 btrfs_header_bytenr(node));
9962 err |= REFERENCER_MISMATCH;
9964 if (btrfs_header_level(node) != level) {
9966 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9967 bytenr, nodesize, level,
9968 btrfs_header_level(node));
9969 err |= REFERENCER_MISMATCH;
9973 btrfs_release_path(&path);
9975 if (err & REFERENCER_MISSING) {
9977 error("extent [%llu %d] lost referencer (owner: %llu)",
9978 bytenr, nodesize, root_id);
9981 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9982 bytenr, nodesize, root_id, level);
9989 * Check referencer for shared block backref
9990 * If level == -1, this function will resolve the level.
9992 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9993 u64 parent, u64 bytenr, int level)
9995 struct extent_buffer *eb;
9996 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9998 int found_parent = 0;
10001 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10002 if (!extent_buffer_uptodate(eb))
10006 level = query_tree_block_level(fs_info, bytenr);
10010 if (level + 1 != btrfs_header_level(eb))
10013 nr = btrfs_header_nritems(eb);
10014 for (i = 0; i < nr; i++) {
10015 if (bytenr == btrfs_node_blockptr(eb, i)) {
10021 free_extent_buffer(eb);
10022 if (!found_parent) {
10024 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10025 bytenr, nodesize, parent, level);
10026 return REFERENCER_MISSING;
10032 * Check referencer for normal (inlined) data ref
10033 * If len == 0, it will be resolved by searching in extent tree
10035 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10036 u64 root_id, u64 objectid, u64 offset,
10037 u64 bytenr, u64 len, u32 count)
10039 struct btrfs_root *root;
10040 struct btrfs_root *extent_root = fs_info->extent_root;
10041 struct btrfs_key key;
10042 struct btrfs_path path;
10043 struct extent_buffer *leaf;
10044 struct btrfs_file_extent_item *fi;
10045 u32 found_count = 0;
10050 key.objectid = bytenr;
10051 key.type = BTRFS_EXTENT_ITEM_KEY;
10052 key.offset = (u64)-1;
10054 btrfs_init_path(&path);
10055 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10058 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10061 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10062 if (key.objectid != bytenr ||
10063 key.type != BTRFS_EXTENT_ITEM_KEY)
10066 btrfs_release_path(&path);
10068 key.objectid = root_id;
10069 key.type = BTRFS_ROOT_ITEM_KEY;
10070 key.offset = (u64)-1;
10071 btrfs_init_path(&path);
10073 root = btrfs_read_fs_root(fs_info, &key);
10077 key.objectid = objectid;
10078 key.type = BTRFS_EXTENT_DATA_KEY;
10080 * It can be nasty as data backref offset is
10081 * file offset - file extent offset, which is smaller or
10082 * equal to original backref offset. The only special case is
10083 * overflow. So we need to special check and do further search.
10085 key.offset = offset & (1ULL << 63) ? 0 : offset;
10087 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10092 * Search afterwards to get correct one
10093 * NOTE: As we must do a comprehensive check on the data backref to
10094 * make sure the dref count also matches, we must iterate all file
10095 * extents for that inode.
10098 leaf = path.nodes[0];
10099 slot = path.slots[0];
10101 btrfs_item_key_to_cpu(leaf, &key, slot);
10102 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10104 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10106 * Except normal disk bytenr and disk num bytes, we still
10107 * need to do extra check on dbackref offset as
10108 * dbackref offset = file_offset - file_extent_offset
10110 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10111 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10112 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10116 ret = btrfs_next_item(root, &path);
10121 btrfs_release_path(&path);
10122 if (found_count != count) {
10124 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10125 bytenr, len, root_id, objectid, offset, count, found_count);
10126 return REFERENCER_MISSING;
10132 * Check if the referencer of a shared data backref exists
10134 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10135 u64 parent, u64 bytenr)
10137 struct extent_buffer *eb;
10138 struct btrfs_key key;
10139 struct btrfs_file_extent_item *fi;
10140 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10142 int found_parent = 0;
10145 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10146 if (!extent_buffer_uptodate(eb))
10149 nr = btrfs_header_nritems(eb);
10150 for (i = 0; i < nr; i++) {
10151 btrfs_item_key_to_cpu(eb, &key, i);
10152 if (key.type != BTRFS_EXTENT_DATA_KEY)
10155 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10156 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10159 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10166 free_extent_buffer(eb);
10167 if (!found_parent) {
10168 error("shared extent %llu referencer lost (parent: %llu)",
10170 return REFERENCER_MISSING;
10176 * This function will check a given extent item, including its backref and
10177 * itself (like crossing stripe boundary and type)
10179 * Since we don't use extent_record anymore, introduce new error bit
10181 static int check_extent_item(struct btrfs_fs_info *fs_info,
10182 struct extent_buffer *eb, int slot)
10184 struct btrfs_extent_item *ei;
10185 struct btrfs_extent_inline_ref *iref;
10186 struct btrfs_extent_data_ref *dref;
10190 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10191 u32 item_size = btrfs_item_size_nr(eb, slot);
10196 struct btrfs_key key;
10200 btrfs_item_key_to_cpu(eb, &key, slot);
10201 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10202 bytes_used += key.offset;
10204 bytes_used += nodesize;
10206 if (item_size < sizeof(*ei)) {
10208 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10209 * old thing when on disk format is still un-determined.
10210 * No need to care about it anymore
10212 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10216 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10217 flags = btrfs_extent_flags(eb, ei);
10219 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10221 if (metadata && check_crossing_stripes(global_info, key.objectid,
10223 error("bad metadata [%llu, %llu) crossing stripe boundary",
10224 key.objectid, key.objectid + nodesize);
10225 err |= CROSSING_STRIPE_BOUNDARY;
10228 ptr = (unsigned long)(ei + 1);
10230 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10231 /* Old EXTENT_ITEM metadata */
10232 struct btrfs_tree_block_info *info;
10234 info = (struct btrfs_tree_block_info *)ptr;
10235 level = btrfs_tree_block_level(eb, info);
10236 ptr += sizeof(struct btrfs_tree_block_info);
10238 /* New METADATA_ITEM */
10239 level = key.offset;
10241 end = (unsigned long)ei + item_size;
10244 err |= ITEM_SIZE_MISMATCH;
10248 /* Now check every backref in this extent item */
10250 iref = (struct btrfs_extent_inline_ref *)ptr;
10251 type = btrfs_extent_inline_ref_type(eb, iref);
10252 offset = btrfs_extent_inline_ref_offset(eb, iref);
10254 case BTRFS_TREE_BLOCK_REF_KEY:
10255 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10259 case BTRFS_SHARED_BLOCK_REF_KEY:
10260 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10264 case BTRFS_EXTENT_DATA_REF_KEY:
10265 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10266 ret = check_extent_data_backref(fs_info,
10267 btrfs_extent_data_ref_root(eb, dref),
10268 btrfs_extent_data_ref_objectid(eb, dref),
10269 btrfs_extent_data_ref_offset(eb, dref),
10270 key.objectid, key.offset,
10271 btrfs_extent_data_ref_count(eb, dref));
10274 case BTRFS_SHARED_DATA_REF_KEY:
10275 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10279 error("extent[%llu %d %llu] has unknown ref type: %d",
10280 key.objectid, key.type, key.offset, type);
10281 err |= UNKNOWN_TYPE;
10285 ptr += btrfs_extent_inline_ref_size(type);
10294 * Check if a dev extent item is referred correctly by its chunk
10296 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10297 struct extent_buffer *eb, int slot)
10299 struct btrfs_root *chunk_root = fs_info->chunk_root;
10300 struct btrfs_dev_extent *ptr;
10301 struct btrfs_path path;
10302 struct btrfs_key chunk_key;
10303 struct btrfs_key devext_key;
10304 struct btrfs_chunk *chunk;
10305 struct extent_buffer *l;
10309 int found_chunk = 0;
10312 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10313 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10314 length = btrfs_dev_extent_length(eb, ptr);
10316 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10317 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10318 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10320 btrfs_init_path(&path);
10321 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10326 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10327 if (btrfs_chunk_length(l, chunk) != length)
10330 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10331 for (i = 0; i < num_stripes; i++) {
10332 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10333 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10335 if (devid == devext_key.objectid &&
10336 offset == devext_key.offset) {
10342 btrfs_release_path(&path);
10343 if (!found_chunk) {
10345 "device extent[%llu, %llu, %llu] did not find the related chunk",
10346 devext_key.objectid, devext_key.offset, length);
10347 return REFERENCER_MISSING;
10353 * Check if the used space is correct with the dev item
10355 static int check_dev_item(struct btrfs_fs_info *fs_info,
10356 struct extent_buffer *eb, int slot)
10358 struct btrfs_root *dev_root = fs_info->dev_root;
10359 struct btrfs_dev_item *dev_item;
10360 struct btrfs_path path;
10361 struct btrfs_key key;
10362 struct btrfs_dev_extent *ptr;
10368 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10369 dev_id = btrfs_device_id(eb, dev_item);
10370 used = btrfs_device_bytes_used(eb, dev_item);
10372 key.objectid = dev_id;
10373 key.type = BTRFS_DEV_EXTENT_KEY;
10376 btrfs_init_path(&path);
10377 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10379 btrfs_item_key_to_cpu(eb, &key, slot);
10380 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10381 key.objectid, key.type, key.offset);
10382 btrfs_release_path(&path);
10383 return REFERENCER_MISSING;
10386 /* Iterate dev_extents to calculate the used space of a device */
10388 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10390 if (key.objectid > dev_id)
10392 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10395 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10396 struct btrfs_dev_extent);
10397 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10399 ret = btrfs_next_item(dev_root, &path);
10403 btrfs_release_path(&path);
10405 if (used != total) {
10406 btrfs_item_key_to_cpu(eb, &key, slot);
10408 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10409 total, used, BTRFS_ROOT_TREE_OBJECTID,
10410 BTRFS_DEV_EXTENT_KEY, dev_id);
10411 return ACCOUNTING_MISMATCH;
10417 * Check a block group item with its referener (chunk) and its used space
10418 * with extent/metadata item
10420 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10421 struct extent_buffer *eb, int slot)
10423 struct btrfs_root *extent_root = fs_info->extent_root;
10424 struct btrfs_root *chunk_root = fs_info->chunk_root;
10425 struct btrfs_block_group_item *bi;
10426 struct btrfs_block_group_item bg_item;
10427 struct btrfs_path path;
10428 struct btrfs_key bg_key;
10429 struct btrfs_key chunk_key;
10430 struct btrfs_key extent_key;
10431 struct btrfs_chunk *chunk;
10432 struct extent_buffer *leaf;
10433 struct btrfs_extent_item *ei;
10434 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10442 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10443 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10444 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10445 used = btrfs_block_group_used(&bg_item);
10446 bg_flags = btrfs_block_group_flags(&bg_item);
10448 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10449 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10450 chunk_key.offset = bg_key.objectid;
10452 btrfs_init_path(&path);
10453 /* Search for the referencer chunk */
10454 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10457 "block group[%llu %llu] did not find the related chunk item",
10458 bg_key.objectid, bg_key.offset);
10459 err |= REFERENCER_MISSING;
10461 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10462 struct btrfs_chunk);
10463 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10466 "block group[%llu %llu] related chunk item length does not match",
10467 bg_key.objectid, bg_key.offset);
10468 err |= REFERENCER_MISMATCH;
10471 btrfs_release_path(&path);
10473 /* Search from the block group bytenr */
10474 extent_key.objectid = bg_key.objectid;
10475 extent_key.type = 0;
10476 extent_key.offset = 0;
10478 btrfs_init_path(&path);
10479 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10483 /* Iterate extent tree to account used space */
10485 leaf = path.nodes[0];
10486 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10487 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10490 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10491 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10493 if (extent_key.objectid < bg_key.objectid)
10496 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10499 total += extent_key.offset;
10501 ei = btrfs_item_ptr(leaf, path.slots[0],
10502 struct btrfs_extent_item);
10503 flags = btrfs_extent_flags(leaf, ei);
10504 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10505 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10507 "bad extent[%llu, %llu) type mismatch with chunk",
10508 extent_key.objectid,
10509 extent_key.objectid + extent_key.offset);
10510 err |= CHUNK_TYPE_MISMATCH;
10512 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10513 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10514 BTRFS_BLOCK_GROUP_METADATA))) {
10516 "bad extent[%llu, %llu) type mismatch with chunk",
10517 extent_key.objectid,
10518 extent_key.objectid + nodesize);
10519 err |= CHUNK_TYPE_MISMATCH;
10523 ret = btrfs_next_item(extent_root, &path);
10529 btrfs_release_path(&path);
10531 if (total != used) {
10533 "block group[%llu %llu] used %llu but extent items used %llu",
10534 bg_key.objectid, bg_key.offset, used, total);
10535 err |= ACCOUNTING_MISMATCH;
10541 * Check a chunk item.
10542 * Including checking all referred dev_extents and block group
10544 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10545 struct extent_buffer *eb, int slot)
10547 struct btrfs_root *extent_root = fs_info->extent_root;
10548 struct btrfs_root *dev_root = fs_info->dev_root;
10549 struct btrfs_path path;
10550 struct btrfs_key chunk_key;
10551 struct btrfs_key bg_key;
10552 struct btrfs_key devext_key;
10553 struct btrfs_chunk *chunk;
10554 struct extent_buffer *leaf;
10555 struct btrfs_block_group_item *bi;
10556 struct btrfs_block_group_item bg_item;
10557 struct btrfs_dev_extent *ptr;
10558 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10570 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10571 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10572 length = btrfs_chunk_length(eb, chunk);
10573 chunk_end = chunk_key.offset + length;
10574 if (!IS_ALIGNED(length, sectorsize)) {
10575 error("chunk[%llu %llu) not aligned to %u",
10576 chunk_key.offset, chunk_end, sectorsize);
10577 err |= BYTES_UNALIGNED;
10581 type = btrfs_chunk_type(eb, chunk);
10582 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10583 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10584 error("chunk[%llu %llu) has no chunk type",
10585 chunk_key.offset, chunk_end);
10586 err |= UNKNOWN_TYPE;
10588 if (profile && (profile & (profile - 1))) {
10589 error("chunk[%llu %llu) multiple profiles detected: %llx",
10590 chunk_key.offset, chunk_end, profile);
10591 err |= UNKNOWN_TYPE;
10594 bg_key.objectid = chunk_key.offset;
10595 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10596 bg_key.offset = length;
10598 btrfs_init_path(&path);
10599 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10602 "chunk[%llu %llu) did not find the related block group item",
10603 chunk_key.offset, chunk_end);
10604 err |= REFERENCER_MISSING;
10606 leaf = path.nodes[0];
10607 bi = btrfs_item_ptr(leaf, path.slots[0],
10608 struct btrfs_block_group_item);
10609 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10611 if (btrfs_block_group_flags(&bg_item) != type) {
10613 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10614 chunk_key.offset, chunk_end, type,
10615 btrfs_block_group_flags(&bg_item));
10616 err |= REFERENCER_MISSING;
10620 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10621 for (i = 0; i < num_stripes; i++) {
10622 btrfs_release_path(&path);
10623 btrfs_init_path(&path);
10624 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10625 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10626 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10628 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10631 goto not_match_dev;
10633 leaf = path.nodes[0];
10634 ptr = btrfs_item_ptr(leaf, path.slots[0],
10635 struct btrfs_dev_extent);
10636 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10637 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10638 if (objectid != chunk_key.objectid ||
10639 offset != chunk_key.offset ||
10640 btrfs_dev_extent_length(leaf, ptr) != length)
10641 goto not_match_dev;
10644 err |= BACKREF_MISSING;
10646 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10647 chunk_key.objectid, chunk_end, i);
10650 btrfs_release_path(&path);
10656 * Main entry function to check known items and update related accounting info
10658 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10660 struct btrfs_fs_info *fs_info = root->fs_info;
10661 struct btrfs_key key;
10664 struct btrfs_extent_data_ref *dref;
10669 btrfs_item_key_to_cpu(eb, &key, slot);
10673 case BTRFS_EXTENT_DATA_KEY:
10674 ret = check_extent_data_item(root, eb, slot);
10677 case BTRFS_BLOCK_GROUP_ITEM_KEY:
10678 ret = check_block_group_item(fs_info, eb, slot);
10681 case BTRFS_DEV_ITEM_KEY:
10682 ret = check_dev_item(fs_info, eb, slot);
10685 case BTRFS_CHUNK_ITEM_KEY:
10686 ret = check_chunk_item(fs_info, eb, slot);
10689 case BTRFS_DEV_EXTENT_KEY:
10690 ret = check_dev_extent_item(fs_info, eb, slot);
10693 case BTRFS_EXTENT_ITEM_KEY:
10694 case BTRFS_METADATA_ITEM_KEY:
10695 ret = check_extent_item(fs_info, eb, slot);
10698 case BTRFS_EXTENT_CSUM_KEY:
10699 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10701 case BTRFS_TREE_BLOCK_REF_KEY:
10702 ret = check_tree_block_backref(fs_info, key.offset,
10706 case BTRFS_EXTENT_DATA_REF_KEY:
10707 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10708 ret = check_extent_data_backref(fs_info,
10709 btrfs_extent_data_ref_root(eb, dref),
10710 btrfs_extent_data_ref_objectid(eb, dref),
10711 btrfs_extent_data_ref_offset(eb, dref),
10713 btrfs_extent_data_ref_count(eb, dref));
10716 case BTRFS_SHARED_BLOCK_REF_KEY:
10717 ret = check_shared_block_backref(fs_info, key.offset,
10721 case BTRFS_SHARED_DATA_REF_KEY:
10722 ret = check_shared_data_backref(fs_info, key.offset,
10730 if (++slot < btrfs_header_nritems(eb))
10737 * Helper function for later fs/subvol tree check. To determine if a tree
10738 * block should be checked.
10739 * This function will ensure only the direct referencer with lowest rootid to
10740 * check a fs/subvolume tree block.
10742 * Backref check at extent tree would detect errors like missing subvolume
10743 * tree, so we can do aggressive check to reduce duplicated checks.
10745 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10747 struct btrfs_root *extent_root = root->fs_info->extent_root;
10748 struct btrfs_key key;
10749 struct btrfs_path path;
10750 struct extent_buffer *leaf;
10752 struct btrfs_extent_item *ei;
10758 struct btrfs_extent_inline_ref *iref;
10761 btrfs_init_path(&path);
10762 key.objectid = btrfs_header_bytenr(eb);
10763 key.type = BTRFS_METADATA_ITEM_KEY;
10764 key.offset = (u64)-1;
10767 * Any failure in backref resolving means we can't determine
10768 * whom the tree block belongs to.
10769 * So in that case, we need to check that tree block
10771 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10775 ret = btrfs_previous_extent_item(extent_root, &path,
10776 btrfs_header_bytenr(eb));
10780 leaf = path.nodes[0];
10781 slot = path.slots[0];
10782 btrfs_item_key_to_cpu(leaf, &key, slot);
10783 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10785 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10786 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10788 struct btrfs_tree_block_info *info;
10790 info = (struct btrfs_tree_block_info *)(ei + 1);
10791 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10794 item_size = btrfs_item_size_nr(leaf, slot);
10795 ptr = (unsigned long)iref;
10796 end = (unsigned long)ei + item_size;
10797 while (ptr < end) {
10798 iref = (struct btrfs_extent_inline_ref *)ptr;
10799 type = btrfs_extent_inline_ref_type(leaf, iref);
10800 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10803 * We only check the tree block if current root is
10804 * the lowest referencer of it.
10806 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10807 offset < root->objectid) {
10808 btrfs_release_path(&path);
10812 ptr += btrfs_extent_inline_ref_size(type);
10815 * Normally we should also check keyed tree block ref, but that may be
10816 * very time consuming. Inlined ref should already make us skip a lot
10817 * of refs now. So skip search keyed tree block ref.
10821 btrfs_release_path(&path);
10826 * Traversal function for tree block. We will do:
10827 * 1) Skip shared fs/subvolume tree blocks
10828 * 2) Update related bytes accounting
10829 * 3) Pre-order traversal
10831 static int traverse_tree_block(struct btrfs_root *root,
10832 struct extent_buffer *node)
10834 struct extent_buffer *eb;
10835 struct btrfs_key key;
10836 struct btrfs_key drop_key;
10844 * Skip shared fs/subvolume tree block, in that case they will
10845 * be checked by referencer with lowest rootid
10847 if (is_fstree(root->objectid) && !should_check(root, node))
10850 /* Update bytes accounting */
10851 total_btree_bytes += node->len;
10852 if (fs_root_objectid(btrfs_header_owner(node)))
10853 total_fs_tree_bytes += node->len;
10854 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10855 total_extent_tree_bytes += node->len;
10856 if (!found_old_backref &&
10857 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10858 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10859 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10860 found_old_backref = 1;
10862 /* pre-order tranversal, check itself first */
10863 level = btrfs_header_level(node);
10864 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10865 btrfs_header_level(node),
10866 btrfs_header_owner(node));
10870 "check %s failed root %llu bytenr %llu level %d, force continue check",
10871 level ? "node":"leaf", root->objectid,
10872 btrfs_header_bytenr(node), btrfs_header_level(node));
10875 btree_space_waste += btrfs_leaf_free_space(root, node);
10876 ret = check_leaf_items(root, node);
10881 nr = btrfs_header_nritems(node);
10882 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10883 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10884 sizeof(struct btrfs_key_ptr);
10886 /* Then check all its children */
10887 for (i = 0; i < nr; i++) {
10888 u64 blocknr = btrfs_node_blockptr(node, i);
10890 btrfs_node_key_to_cpu(node, &key, i);
10891 if (level == root->root_item.drop_level &&
10892 is_dropped_key(&key, &drop_key))
10896 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10897 * to call the function itself.
10899 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10900 if (extent_buffer_uptodate(eb)) {
10901 ret = traverse_tree_block(root, eb);
10904 free_extent_buffer(eb);
10911 * Low memory usage version check_chunks_and_extents.
10913 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10915 struct btrfs_path path;
10916 struct btrfs_key key;
10917 struct btrfs_root *root1;
10918 struct btrfs_root *cur_root;
10922 root1 = root->fs_info->chunk_root;
10923 ret = traverse_tree_block(root1, root1->node);
10926 root1 = root->fs_info->tree_root;
10927 ret = traverse_tree_block(root1, root1->node);
10930 btrfs_init_path(&path);
10931 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10933 key.type = BTRFS_ROOT_ITEM_KEY;
10935 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10937 error("cannot find extent treet in tree_root");
10942 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10943 if (key.type != BTRFS_ROOT_ITEM_KEY)
10945 key.offset = (u64)-1;
10947 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10948 if (IS_ERR(cur_root) || !cur_root) {
10949 error("failed to read tree: %lld", key.objectid);
10953 ret = traverse_tree_block(cur_root, cur_root->node);
10957 ret = btrfs_next_item(root1, &path);
10963 btrfs_release_path(&path);
10967 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10968 struct btrfs_root *root, int overwrite)
10970 struct extent_buffer *c;
10971 struct extent_buffer *old = root->node;
10974 struct btrfs_disk_key disk_key = {0,0,0};
10980 extent_buffer_get(c);
10983 c = btrfs_alloc_free_block(trans, root,
10985 root->root_key.objectid,
10986 &disk_key, level, 0, 0);
10989 extent_buffer_get(c);
10993 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10994 btrfs_set_header_level(c, level);
10995 btrfs_set_header_bytenr(c, c->start);
10996 btrfs_set_header_generation(c, trans->transid);
10997 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10998 btrfs_set_header_owner(c, root->root_key.objectid);
11000 write_extent_buffer(c, root->fs_info->fsid,
11001 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11003 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11004 btrfs_header_chunk_tree_uuid(c),
11007 btrfs_mark_buffer_dirty(c);
11009 * this case can happen in the following case:
11011 * 1.overwrite previous root.
11013 * 2.reinit reloc data root, this is because we skip pin
11014 * down reloc data tree before which means we can allocate
11015 * same block bytenr here.
11017 if (old->start == c->start) {
11018 btrfs_set_root_generation(&root->root_item,
11020 root->root_item.level = btrfs_header_level(root->node);
11021 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11022 &root->root_key, &root->root_item);
11024 free_extent_buffer(c);
11028 free_extent_buffer(old);
11030 add_root_to_dirty_list(root);
11034 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11035 struct extent_buffer *eb, int tree_root)
11037 struct extent_buffer *tmp;
11038 struct btrfs_root_item *ri;
11039 struct btrfs_key key;
11042 int level = btrfs_header_level(eb);
11048 * If we have pinned this block before, don't pin it again.
11049 * This can not only avoid forever loop with broken filesystem
11050 * but also give us some speedups.
11052 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11053 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11056 btrfs_pin_extent(fs_info, eb->start, eb->len);
11058 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11059 nritems = btrfs_header_nritems(eb);
11060 for (i = 0; i < nritems; i++) {
11062 btrfs_item_key_to_cpu(eb, &key, i);
11063 if (key.type != BTRFS_ROOT_ITEM_KEY)
11065 /* Skip the extent root and reloc roots */
11066 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11067 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11068 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11070 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11071 bytenr = btrfs_disk_root_bytenr(eb, ri);
11074 * If at any point we start needing the real root we
11075 * will have to build a stump root for the root we are
11076 * in, but for now this doesn't actually use the root so
11077 * just pass in extent_root.
11079 tmp = read_tree_block(fs_info->extent_root, bytenr,
11081 if (!extent_buffer_uptodate(tmp)) {
11082 fprintf(stderr, "Error reading root block\n");
11085 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11086 free_extent_buffer(tmp);
11090 bytenr = btrfs_node_blockptr(eb, i);
11092 /* If we aren't the tree root don't read the block */
11093 if (level == 1 && !tree_root) {
11094 btrfs_pin_extent(fs_info, bytenr, nodesize);
11098 tmp = read_tree_block(fs_info->extent_root, bytenr,
11100 if (!extent_buffer_uptodate(tmp)) {
11101 fprintf(stderr, "Error reading tree block\n");
11104 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11105 free_extent_buffer(tmp);
11114 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11118 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11122 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11125 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11127 struct btrfs_block_group_cache *cache;
11128 struct btrfs_path path;
11129 struct extent_buffer *leaf;
11130 struct btrfs_chunk *chunk;
11131 struct btrfs_key key;
11135 btrfs_init_path(&path);
11137 key.type = BTRFS_CHUNK_ITEM_KEY;
11139 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11141 btrfs_release_path(&path);
11146 * We do this in case the block groups were screwed up and had alloc
11147 * bits that aren't actually set on the chunks. This happens with
11148 * restored images every time and could happen in real life I guess.
11150 fs_info->avail_data_alloc_bits = 0;
11151 fs_info->avail_metadata_alloc_bits = 0;
11152 fs_info->avail_system_alloc_bits = 0;
11154 /* First we need to create the in-memory block groups */
11156 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11157 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11159 btrfs_release_path(&path);
11167 leaf = path.nodes[0];
11168 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11169 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11174 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11175 btrfs_add_block_group(fs_info, 0,
11176 btrfs_chunk_type(leaf, chunk),
11177 key.objectid, key.offset,
11178 btrfs_chunk_length(leaf, chunk));
11179 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11180 key.offset + btrfs_chunk_length(leaf, chunk),
11186 cache = btrfs_lookup_first_block_group(fs_info, start);
11190 start = cache->key.objectid + cache->key.offset;
11193 btrfs_release_path(&path);
11197 static int reset_balance(struct btrfs_trans_handle *trans,
11198 struct btrfs_fs_info *fs_info)
11200 struct btrfs_root *root = fs_info->tree_root;
11201 struct btrfs_path path;
11202 struct extent_buffer *leaf;
11203 struct btrfs_key key;
11204 int del_slot, del_nr = 0;
11208 btrfs_init_path(&path);
11209 key.objectid = BTRFS_BALANCE_OBJECTID;
11210 key.type = BTRFS_BALANCE_ITEM_KEY;
11212 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11217 goto reinit_data_reloc;
11222 ret = btrfs_del_item(trans, root, &path);
11225 btrfs_release_path(&path);
11227 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11228 key.type = BTRFS_ROOT_ITEM_KEY;
11230 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11234 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11239 ret = btrfs_del_items(trans, root, &path,
11246 btrfs_release_path(&path);
11249 ret = btrfs_search_slot(trans, root, &key, &path,
11256 leaf = path.nodes[0];
11257 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11258 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11260 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11265 del_slot = path.slots[0];
11274 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11278 btrfs_release_path(&path);
11281 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11282 key.type = BTRFS_ROOT_ITEM_KEY;
11283 key.offset = (u64)-1;
11284 root = btrfs_read_fs_root(fs_info, &key);
11285 if (IS_ERR(root)) {
11286 fprintf(stderr, "Error reading data reloc tree\n");
11287 ret = PTR_ERR(root);
11290 record_root_in_trans(trans, root);
11291 ret = btrfs_fsck_reinit_root(trans, root, 0);
11294 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11296 btrfs_release_path(&path);
11300 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11301 struct btrfs_fs_info *fs_info)
11307 * The only reason we don't do this is because right now we're just
11308 * walking the trees we find and pinning down their bytes, we don't look
11309 * at any of the leaves. In order to do mixed groups we'd have to check
11310 * the leaves of any fs roots and pin down the bytes for any file
11311 * extents we find. Not hard but why do it if we don't have to?
11313 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11314 fprintf(stderr, "We don't support re-initing the extent tree "
11315 "for mixed block groups yet, please notify a btrfs "
11316 "developer you want to do this so they can add this "
11317 "functionality.\n");
11322 * first we need to walk all of the trees except the extent tree and pin
11323 * down the bytes that are in use so we don't overwrite any existing
11326 ret = pin_metadata_blocks(fs_info);
11328 fprintf(stderr, "error pinning down used bytes\n");
11333 * Need to drop all the block groups since we're going to recreate all
11336 btrfs_free_block_groups(fs_info);
11337 ret = reset_block_groups(fs_info);
11339 fprintf(stderr, "error resetting the block groups\n");
11343 /* Ok we can allocate now, reinit the extent root */
11344 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11346 fprintf(stderr, "extent root initialization failed\n");
11348 * When the transaction code is updated we should end the
11349 * transaction, but for now progs only knows about commit so
11350 * just return an error.
11356 * Now we have all the in-memory block groups setup so we can make
11357 * allocations properly, and the metadata we care about is safe since we
11358 * pinned all of it above.
11361 struct btrfs_block_group_cache *cache;
11363 cache = btrfs_lookup_first_block_group(fs_info, start);
11366 start = cache->key.objectid + cache->key.offset;
11367 ret = btrfs_insert_item(trans, fs_info->extent_root,
11368 &cache->key, &cache->item,
11369 sizeof(cache->item));
11371 fprintf(stderr, "Error adding block group\n");
11374 btrfs_extent_post_op(trans, fs_info->extent_root);
11377 ret = reset_balance(trans, fs_info);
11379 fprintf(stderr, "error resetting the pending balance\n");
11384 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11386 struct btrfs_path path;
11387 struct btrfs_trans_handle *trans;
11388 struct btrfs_key key;
11391 printf("Recowing metadata block %llu\n", eb->start);
11392 key.objectid = btrfs_header_owner(eb);
11393 key.type = BTRFS_ROOT_ITEM_KEY;
11394 key.offset = (u64)-1;
11396 root = btrfs_read_fs_root(root->fs_info, &key);
11397 if (IS_ERR(root)) {
11398 fprintf(stderr, "Couldn't find owner root %llu\n",
11400 return PTR_ERR(root);
11403 trans = btrfs_start_transaction(root, 1);
11405 return PTR_ERR(trans);
11407 btrfs_init_path(&path);
11408 path.lowest_level = btrfs_header_level(eb);
11409 if (path.lowest_level)
11410 btrfs_node_key_to_cpu(eb, &key, 0);
11412 btrfs_item_key_to_cpu(eb, &key, 0);
11414 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11415 btrfs_commit_transaction(trans, root);
11416 btrfs_release_path(&path);
11420 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11422 struct btrfs_path path;
11423 struct btrfs_trans_handle *trans;
11424 struct btrfs_key key;
11427 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11428 bad->key.type, bad->key.offset);
11429 key.objectid = bad->root_id;
11430 key.type = BTRFS_ROOT_ITEM_KEY;
11431 key.offset = (u64)-1;
11433 root = btrfs_read_fs_root(root->fs_info, &key);
11434 if (IS_ERR(root)) {
11435 fprintf(stderr, "Couldn't find owner root %llu\n",
11437 return PTR_ERR(root);
11440 trans = btrfs_start_transaction(root, 1);
11442 return PTR_ERR(trans);
11444 btrfs_init_path(&path);
11445 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11451 ret = btrfs_del_item(trans, root, &path);
11453 btrfs_commit_transaction(trans, root);
11454 btrfs_release_path(&path);
11458 static int zero_log_tree(struct btrfs_root *root)
11460 struct btrfs_trans_handle *trans;
11463 trans = btrfs_start_transaction(root, 1);
11464 if (IS_ERR(trans)) {
11465 ret = PTR_ERR(trans);
11468 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11469 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11470 ret = btrfs_commit_transaction(trans, root);
11474 static int populate_csum(struct btrfs_trans_handle *trans,
11475 struct btrfs_root *csum_root, char *buf, u64 start,
11482 while (offset < len) {
11483 sectorsize = csum_root->sectorsize;
11484 ret = read_extent_data(csum_root, buf, start + offset,
11488 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11489 start + offset, buf, sectorsize);
11492 offset += sectorsize;
11497 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11498 struct btrfs_root *csum_root,
11499 struct btrfs_root *cur_root)
11501 struct btrfs_path path;
11502 struct btrfs_key key;
11503 struct extent_buffer *node;
11504 struct btrfs_file_extent_item *fi;
11511 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11515 btrfs_init_path(&path);
11519 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11522 /* Iterate all regular file extents and fill its csum */
11524 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11526 if (key.type != BTRFS_EXTENT_DATA_KEY)
11528 node = path.nodes[0];
11529 slot = path.slots[0];
11530 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11531 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11533 start = btrfs_file_extent_disk_bytenr(node, fi);
11534 len = btrfs_file_extent_disk_num_bytes(node, fi);
11536 ret = populate_csum(trans, csum_root, buf, start, len);
11537 if (ret == -EEXIST)
11543 * TODO: if next leaf is corrupted, jump to nearest next valid
11546 ret = btrfs_next_item(cur_root, &path);
11556 btrfs_release_path(&path);
11561 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11562 struct btrfs_root *csum_root)
11564 struct btrfs_fs_info *fs_info = csum_root->fs_info;
11565 struct btrfs_path path;
11566 struct btrfs_root *tree_root = fs_info->tree_root;
11567 struct btrfs_root *cur_root;
11568 struct extent_buffer *node;
11569 struct btrfs_key key;
11573 btrfs_init_path(&path);
11574 key.objectid = BTRFS_FS_TREE_OBJECTID;
11576 key.type = BTRFS_ROOT_ITEM_KEY;
11577 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11586 node = path.nodes[0];
11587 slot = path.slots[0];
11588 btrfs_item_key_to_cpu(node, &key, slot);
11589 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11591 if (key.type != BTRFS_ROOT_ITEM_KEY)
11593 if (!is_fstree(key.objectid))
11595 key.offset = (u64)-1;
11597 cur_root = btrfs_read_fs_root(fs_info, &key);
11598 if (IS_ERR(cur_root) || !cur_root) {
11599 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11603 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11608 ret = btrfs_next_item(tree_root, &path);
11618 btrfs_release_path(&path);
11622 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11623 struct btrfs_root *csum_root)
11625 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11626 struct btrfs_path path;
11627 struct btrfs_extent_item *ei;
11628 struct extent_buffer *leaf;
11630 struct btrfs_key key;
11633 btrfs_init_path(&path);
11635 key.type = BTRFS_EXTENT_ITEM_KEY;
11637 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11639 btrfs_release_path(&path);
11643 buf = malloc(csum_root->sectorsize);
11645 btrfs_release_path(&path);
11650 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11651 ret = btrfs_next_leaf(extent_root, &path);
11659 leaf = path.nodes[0];
11661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11662 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11667 ei = btrfs_item_ptr(leaf, path.slots[0],
11668 struct btrfs_extent_item);
11669 if (!(btrfs_extent_flags(leaf, ei) &
11670 BTRFS_EXTENT_FLAG_DATA)) {
11675 ret = populate_csum(trans, csum_root, buf, key.objectid,
11682 btrfs_release_path(&path);
11688 * Recalculate the csum and put it into the csum tree.
11690 * Extent tree init will wipe out all the extent info, so in that case, we
11691 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
11692 * will use fs/subvol trees to init the csum tree.
11694 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11695 struct btrfs_root *csum_root,
11696 int search_fs_tree)
11698 if (search_fs_tree)
11699 return fill_csum_tree_from_fs(trans, csum_root);
11701 return fill_csum_tree_from_extent(trans, csum_root);
11704 static void free_roots_info_cache(void)
11706 if (!roots_info_cache)
11709 while (!cache_tree_empty(roots_info_cache)) {
11710 struct cache_extent *entry;
11711 struct root_item_info *rii;
11713 entry = first_cache_extent(roots_info_cache);
11716 remove_cache_extent(roots_info_cache, entry);
11717 rii = container_of(entry, struct root_item_info, cache_extent);
11721 free(roots_info_cache);
11722 roots_info_cache = NULL;
11725 static int build_roots_info_cache(struct btrfs_fs_info *info)
11728 struct btrfs_key key;
11729 struct extent_buffer *leaf;
11730 struct btrfs_path path;
11732 if (!roots_info_cache) {
11733 roots_info_cache = malloc(sizeof(*roots_info_cache));
11734 if (!roots_info_cache)
11736 cache_tree_init(roots_info_cache);
11739 btrfs_init_path(&path);
11741 key.type = BTRFS_EXTENT_ITEM_KEY;
11743 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11746 leaf = path.nodes[0];
11749 struct btrfs_key found_key;
11750 struct btrfs_extent_item *ei;
11751 struct btrfs_extent_inline_ref *iref;
11752 int slot = path.slots[0];
11757 struct cache_extent *entry;
11758 struct root_item_info *rii;
11760 if (slot >= btrfs_header_nritems(leaf)) {
11761 ret = btrfs_next_leaf(info->extent_root, &path);
11768 leaf = path.nodes[0];
11769 slot = path.slots[0];
11772 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11774 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11775 found_key.type != BTRFS_METADATA_ITEM_KEY)
11778 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11779 flags = btrfs_extent_flags(leaf, ei);
11781 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11782 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11785 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11786 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11787 level = found_key.offset;
11789 struct btrfs_tree_block_info *binfo;
11791 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11792 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11793 level = btrfs_tree_block_level(leaf, binfo);
11797 * For a root extent, it must be of the following type and the
11798 * first (and only one) iref in the item.
11800 type = btrfs_extent_inline_ref_type(leaf, iref);
11801 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11804 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11805 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11807 rii = malloc(sizeof(struct root_item_info));
11812 rii->cache_extent.start = root_id;
11813 rii->cache_extent.size = 1;
11814 rii->level = (u8)-1;
11815 entry = &rii->cache_extent;
11816 ret = insert_cache_extent(roots_info_cache, entry);
11819 rii = container_of(entry, struct root_item_info,
11823 ASSERT(rii->cache_extent.start == root_id);
11824 ASSERT(rii->cache_extent.size == 1);
11826 if (level > rii->level || rii->level == (u8)-1) {
11827 rii->level = level;
11828 rii->bytenr = found_key.objectid;
11829 rii->gen = btrfs_extent_generation(leaf, ei);
11830 rii->node_count = 1;
11831 } else if (level == rii->level) {
11839 btrfs_release_path(&path);
11844 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11845 struct btrfs_path *path,
11846 const struct btrfs_key *root_key,
11847 const int read_only_mode)
11849 const u64 root_id = root_key->objectid;
11850 struct cache_extent *entry;
11851 struct root_item_info *rii;
11852 struct btrfs_root_item ri;
11853 unsigned long offset;
11855 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11858 "Error: could not find extent items for root %llu\n",
11859 root_key->objectid);
11863 rii = container_of(entry, struct root_item_info, cache_extent);
11864 ASSERT(rii->cache_extent.start == root_id);
11865 ASSERT(rii->cache_extent.size == 1);
11867 if (rii->node_count != 1) {
11869 "Error: could not find btree root extent for root %llu\n",
11874 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11875 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11877 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11878 btrfs_root_level(&ri) != rii->level ||
11879 btrfs_root_generation(&ri) != rii->gen) {
11882 * If we're in repair mode but our caller told us to not update
11883 * the root item, i.e. just check if it needs to be updated, don't
11884 * print this message, since the caller will call us again shortly
11885 * for the same root item without read only mode (the caller will
11886 * open a transaction first).
11888 if (!(read_only_mode && repair))
11890 "%sroot item for root %llu,"
11891 " current bytenr %llu, current gen %llu, current level %u,"
11892 " new bytenr %llu, new gen %llu, new level %u\n",
11893 (read_only_mode ? "" : "fixing "),
11895 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11896 btrfs_root_level(&ri),
11897 rii->bytenr, rii->gen, rii->level);
11899 if (btrfs_root_generation(&ri) > rii->gen) {
11901 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11902 root_id, btrfs_root_generation(&ri), rii->gen);
11906 if (!read_only_mode) {
11907 btrfs_set_root_bytenr(&ri, rii->bytenr);
11908 btrfs_set_root_level(&ri, rii->level);
11909 btrfs_set_root_generation(&ri, rii->gen);
11910 write_extent_buffer(path->nodes[0], &ri,
11911 offset, sizeof(ri));
11921 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11922 * caused read-only snapshots to be corrupted if they were created at a moment
11923 * when the source subvolume/snapshot had orphan items. The issue was that the
11924 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11925 * node instead of the post orphan cleanup root node.
11926 * So this function, and its callees, just detects and fixes those cases. Even
11927 * though the regression was for read-only snapshots, this function applies to
11928 * any snapshot/subvolume root.
11929 * This must be run before any other repair code - not doing it so, makes other
11930 * repair code delete or modify backrefs in the extent tree for example, which
11931 * will result in an inconsistent fs after repairing the root items.
11933 static int repair_root_items(struct btrfs_fs_info *info)
11935 struct btrfs_path path;
11936 struct btrfs_key key;
11937 struct extent_buffer *leaf;
11938 struct btrfs_trans_handle *trans = NULL;
11941 int need_trans = 0;
11943 btrfs_init_path(&path);
11945 ret = build_roots_info_cache(info);
11949 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11950 key.type = BTRFS_ROOT_ITEM_KEY;
11955 * Avoid opening and committing transactions if a leaf doesn't have
11956 * any root items that need to be fixed, so that we avoid rotating
11957 * backup roots unnecessarily.
11960 trans = btrfs_start_transaction(info->tree_root, 1);
11961 if (IS_ERR(trans)) {
11962 ret = PTR_ERR(trans);
11967 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11971 leaf = path.nodes[0];
11974 struct btrfs_key found_key;
11976 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11977 int no_more_keys = find_next_key(&path, &key);
11979 btrfs_release_path(&path);
11981 ret = btrfs_commit_transaction(trans,
11993 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11995 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11997 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12000 ret = maybe_repair_root_item(info, &path, &found_key,
12005 if (!trans && repair) {
12008 btrfs_release_path(&path);
12018 free_roots_info_cache();
12019 btrfs_release_path(&path);
12021 btrfs_commit_transaction(trans, info->tree_root);
12028 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12030 struct btrfs_trans_handle *trans;
12031 struct btrfs_block_group_cache *bg_cache;
12035 /* Clear all free space cache inodes and its extent data */
12037 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12040 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12043 current = bg_cache->key.objectid + bg_cache->key.offset;
12046 /* Don't forget to set cache_generation to -1 */
12047 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12048 if (IS_ERR(trans)) {
12049 error("failed to update super block cache generation");
12050 return PTR_ERR(trans);
12052 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12053 btrfs_commit_transaction(trans, fs_info->tree_root);
12058 const char * const cmd_check_usage[] = {
12059 "btrfs check [options] <device>",
12060 "Check structural integrity of a filesystem (unmounted).",
12061 "Check structural integrity of an unmounted filesystem. Verify internal",
12062 "trees' consistency and item connectivity. In the repair mode try to",
12063 "fix the problems found. ",
12064 "WARNING: the repair mode is considered dangerous",
12066 "-s|--super <superblock> use this superblock copy",
12067 "-b|--backup use the first valid backup root copy",
12068 "--repair try to repair the filesystem",
12069 "--readonly run in read-only mode (default)",
12070 "--init-csum-tree create a new CRC tree",
12071 "--init-extent-tree create a new extent tree",
12072 "--mode <MODE> allows choice of memory/IO trade-offs",
12073 " where MODE is one of:",
12074 " original - read inodes and extents to memory (requires",
12075 " more memory, does less IO)",
12076 " lowmem - try to use less memory but read blocks again",
12078 "--check-data-csum verify checksums of data blocks",
12079 "-Q|--qgroup-report print a report on qgroup consistency",
12080 "-E|--subvol-extents <subvolid>",
12081 " print subvolume extents and sharing state",
12082 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12083 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12084 "-p|--progress indicate progress",
12085 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12089 int cmd_check(int argc, char **argv)
12091 struct cache_tree root_cache;
12092 struct btrfs_root *root;
12093 struct btrfs_fs_info *info;
12096 u64 tree_root_bytenr = 0;
12097 u64 chunk_root_bytenr = 0;
12098 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12101 int init_csum_tree = 0;
12103 int clear_space_cache = 0;
12104 int qgroup_report = 0;
12105 int qgroups_repaired = 0;
12106 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12110 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12111 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12112 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12113 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12114 static const struct option long_options[] = {
12115 { "super", required_argument, NULL, 's' },
12116 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12117 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12118 { "init-csum-tree", no_argument, NULL,
12119 GETOPT_VAL_INIT_CSUM },
12120 { "init-extent-tree", no_argument, NULL,
12121 GETOPT_VAL_INIT_EXTENT },
12122 { "check-data-csum", no_argument, NULL,
12123 GETOPT_VAL_CHECK_CSUM },
12124 { "backup", no_argument, NULL, 'b' },
12125 { "subvol-extents", required_argument, NULL, 'E' },
12126 { "qgroup-report", no_argument, NULL, 'Q' },
12127 { "tree-root", required_argument, NULL, 'r' },
12128 { "chunk-root", required_argument, NULL,
12129 GETOPT_VAL_CHUNK_TREE },
12130 { "progress", no_argument, NULL, 'p' },
12131 { "mode", required_argument, NULL,
12133 { "clear-space-cache", required_argument, NULL,
12134 GETOPT_VAL_CLEAR_SPACE_CACHE},
12135 { NULL, 0, NULL, 0}
12138 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12142 case 'a': /* ignored */ break;
12144 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12147 num = arg_strtou64(optarg);
12148 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12150 "super mirror should be less than %d",
12151 BTRFS_SUPER_MIRROR_MAX);
12154 bytenr = btrfs_sb_offset(((int)num));
12155 printf("using SB copy %llu, bytenr %llu\n", num,
12156 (unsigned long long)bytenr);
12162 subvolid = arg_strtou64(optarg);
12165 tree_root_bytenr = arg_strtou64(optarg);
12167 case GETOPT_VAL_CHUNK_TREE:
12168 chunk_root_bytenr = arg_strtou64(optarg);
12171 ctx.progress_enabled = true;
12175 usage(cmd_check_usage);
12176 case GETOPT_VAL_REPAIR:
12177 printf("enabling repair mode\n");
12179 ctree_flags |= OPEN_CTREE_WRITES;
12181 case GETOPT_VAL_READONLY:
12184 case GETOPT_VAL_INIT_CSUM:
12185 printf("Creating a new CRC tree\n");
12186 init_csum_tree = 1;
12188 ctree_flags |= OPEN_CTREE_WRITES;
12190 case GETOPT_VAL_INIT_EXTENT:
12191 init_extent_tree = 1;
12192 ctree_flags |= (OPEN_CTREE_WRITES |
12193 OPEN_CTREE_NO_BLOCK_GROUPS);
12196 case GETOPT_VAL_CHECK_CSUM:
12197 check_data_csum = 1;
12199 case GETOPT_VAL_MODE:
12200 check_mode = parse_check_mode(optarg);
12201 if (check_mode == CHECK_MODE_UNKNOWN) {
12202 error("unknown mode: %s", optarg);
12206 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12207 if (strcmp(optarg, "v1") == 0) {
12208 clear_space_cache = 1;
12209 } else if (strcmp(optarg, "v2") == 0) {
12210 clear_space_cache = 2;
12211 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12214 "invalid argument to --clear-space-cache, must be v1 or v2");
12217 ctree_flags |= OPEN_CTREE_WRITES;
12222 if (check_argc_exact(argc - optind, 1))
12223 usage(cmd_check_usage);
12225 if (ctx.progress_enabled) {
12226 ctx.tp = TASK_NOTHING;
12227 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12230 /* This check is the only reason for --readonly to exist */
12231 if (readonly && repair) {
12232 error("repair options are not compatible with --readonly");
12237 * Not supported yet
12239 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12240 error("low memory mode doesn't support repair yet");
12245 cache_tree_init(&root_cache);
12247 if((ret = check_mounted(argv[optind])) < 0) {
12248 error("could not check mount status: %s", strerror(-ret));
12251 error("%s is currently mounted, aborting", argv[optind]);
12256 /* only allow partial opening under repair mode */
12258 ctree_flags |= OPEN_CTREE_PARTIAL;
12260 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12261 chunk_root_bytenr, ctree_flags);
12263 error("cannot open file system");
12268 global_info = info;
12269 root = info->fs_root;
12270 if (clear_space_cache == 1) {
12271 if (btrfs_fs_compat_ro(info,
12272 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12274 "free space cache v2 detected, use --clear-space-cache v2");
12278 printf("Clearing free space cache\n");
12279 ret = clear_free_space_cache(info);
12281 error("failed to clear free space cache");
12284 printf("Free space cache cleared\n");
12287 } else if (clear_space_cache == 2) {
12288 if (!btrfs_fs_compat_ro(info,
12289 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12290 printf("no free space cache v2 to clear\n");
12294 printf("Clear free space cache v2\n");
12295 ret = btrfs_clear_free_space_tree(info);
12297 error("failed to clear free space cache v2: %d", ret);
12300 printf("free space cache v2 cleared\n");
12306 * repair mode will force us to commit transaction which
12307 * will make us fail to load log tree when mounting.
12309 if (repair && btrfs_super_log_root(info->super_copy)) {
12310 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12315 ret = zero_log_tree(root);
12317 error("failed to zero log tree: %d", ret);
12322 uuid_unparse(info->super_copy->fsid, uuidbuf);
12323 if (qgroup_report) {
12324 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12326 ret = qgroup_verify_all(info);
12332 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12333 subvolid, argv[optind], uuidbuf);
12334 ret = print_extent_state(info, subvolid);
12337 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12339 if (!extent_buffer_uptodate(info->tree_root->node) ||
12340 !extent_buffer_uptodate(info->dev_root->node) ||
12341 !extent_buffer_uptodate(info->chunk_root->node)) {
12342 error("critical roots corrupted, unable to check the filesystem");
12347 if (init_extent_tree || init_csum_tree) {
12348 struct btrfs_trans_handle *trans;
12350 trans = btrfs_start_transaction(info->extent_root, 0);
12351 if (IS_ERR(trans)) {
12352 error("error starting transaction");
12353 ret = PTR_ERR(trans);
12357 if (init_extent_tree) {
12358 printf("Creating a new extent tree\n");
12359 ret = reinit_extent_tree(trans, info);
12364 if (init_csum_tree) {
12365 printf("Reinitialize checksum tree\n");
12366 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12368 error("checksum tree initialization failed: %d",
12374 ret = fill_csum_tree(trans, info->csum_root,
12377 error("checksum tree refilling failed: %d", ret);
12382 * Ok now we commit and run the normal fsck, which will add
12383 * extent entries for all of the items it finds.
12385 ret = btrfs_commit_transaction(trans, info->extent_root);
12389 if (!extent_buffer_uptodate(info->extent_root->node)) {
12390 error("critical: extent_root, unable to check the filesystem");
12394 if (!extent_buffer_uptodate(info->csum_root->node)) {
12395 error("critical: csum_root, unable to check the filesystem");
12400 if (!ctx.progress_enabled)
12401 fprintf(stderr, "checking extents\n");
12402 if (check_mode == CHECK_MODE_LOWMEM)
12403 ret = check_chunks_and_extents_v2(root);
12405 ret = check_chunks_and_extents(root);
12408 "errors found in extent allocation tree or chunk allocation");
12410 ret = repair_root_items(info);
12414 fprintf(stderr, "Fixed %d roots.\n", ret);
12416 } else if (ret > 0) {
12418 "Found %d roots with an outdated root item.\n",
12421 "Please run a filesystem check with the option --repair to fix them.\n");
12426 if (!ctx.progress_enabled) {
12427 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12428 fprintf(stderr, "checking free space tree\n");
12430 fprintf(stderr, "checking free space cache\n");
12432 ret = check_space_cache(root);
12437 * We used to have to have these hole extents in between our real
12438 * extents so if we don't have this flag set we need to make sure there
12439 * are no gaps in the file extents for inodes, otherwise we can just
12440 * ignore it when this happens.
12442 no_holes = btrfs_fs_incompat(root->fs_info,
12443 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12444 if (!ctx.progress_enabled)
12445 fprintf(stderr, "checking fs roots\n");
12446 ret = check_fs_roots(root, &root_cache);
12450 fprintf(stderr, "checking csums\n");
12451 ret = check_csums(root);
12455 fprintf(stderr, "checking root refs\n");
12456 ret = check_root_refs(root, &root_cache);
12460 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12461 struct extent_buffer *eb;
12463 eb = list_first_entry(&root->fs_info->recow_ebs,
12464 struct extent_buffer, recow);
12465 list_del_init(&eb->recow);
12466 ret = recow_extent_buffer(root, eb);
12471 while (!list_empty(&delete_items)) {
12472 struct bad_item *bad;
12474 bad = list_first_entry(&delete_items, struct bad_item, list);
12475 list_del_init(&bad->list);
12477 ret = delete_bad_item(root, bad);
12481 if (info->quota_enabled) {
12483 fprintf(stderr, "checking quota groups\n");
12484 err = qgroup_verify_all(info);
12488 err = repair_qgroups(info, &qgroups_repaired);
12493 if (!list_empty(&root->fs_info->recow_ebs)) {
12494 error("transid errors in file system");
12498 /* Don't override original ret */
12499 if (!ret && qgroups_repaired)
12500 ret = qgroups_repaired;
12502 if (found_old_backref) { /*
12503 * there was a disk format change when mixed
12504 * backref was in testing tree. The old format
12505 * existed about one week.
12507 printf("\n * Found old mixed backref format. "
12508 "The old format is not supported! *"
12509 "\n * Please mount the FS in readonly mode, "
12510 "backup data and re-format the FS. *\n\n");
12513 printf("found %llu bytes used err is %d\n",
12514 (unsigned long long)bytes_used, ret);
12515 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12516 printf("total tree bytes: %llu\n",
12517 (unsigned long long)total_btree_bytes);
12518 printf("total fs tree bytes: %llu\n",
12519 (unsigned long long)total_fs_tree_bytes);
12520 printf("total extent tree bytes: %llu\n",
12521 (unsigned long long)total_extent_tree_bytes);
12522 printf("btree space waste bytes: %llu\n",
12523 (unsigned long long)btree_space_waste);
12524 printf("file data blocks allocated: %llu\n referenced %llu\n",
12525 (unsigned long long)data_bytes_allocated,
12526 (unsigned long long)data_bytes_referenced);
12528 free_qgroup_counts();
12529 free_root_recs_tree(&root_cache);
12533 if (ctx.progress_enabled)
12534 task_deinit(ctx.info);