2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
50 TASK_NOTHING, /* have to be the last element */
55 enum task_position tp;
57 struct task_info *info;
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
118 return container_of(back, struct data_backref, node);
122 * Much like data_backref, just removed the undetermined members
123 * and change it to use list_head.
124 * During extent scan, it is stored in root->orphan_data_extent.
125 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
127 struct orphan_data_extent {
128 struct list_head list;
136 struct tree_backref {
137 struct extent_backref node;
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
146 return container_of(back, struct tree_backref, node);
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
152 struct extent_record {
153 struct list_head backrefs;
154 struct list_head dups;
155 struct list_head list;
156 struct cache_extent cache;
157 struct btrfs_disk_key parent_key;
162 u64 extent_item_refs;
164 u64 parent_generation;
168 unsigned int flag_block_full_backref:2;
169 unsigned int found_rec:1;
170 unsigned int content_checked:1;
171 unsigned int owner_ref_checked:1;
172 unsigned int is_root:1;
173 unsigned int metadata:1;
174 unsigned int bad_full_backref:1;
175 unsigned int crossing_stripes:1;
176 unsigned int wrong_chunk_type:1;
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
181 return container_of(entry, struct extent_record, list);
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
200 return list_entry(entry, struct inode_backref, list);
203 struct root_item_record {
204 struct list_head list;
211 struct btrfs_key drop_key;
214 #define REF_ERR_NO_DIR_ITEM (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX (1 << 1)
216 #define REF_ERR_NO_INODE_REF (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
219 #define REF_ERR_DUP_INODE_REF (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
228 struct file_extent_hole {
234 struct inode_record {
235 struct list_head backrefs;
236 unsigned int checked:1;
237 unsigned int merging:1;
238 unsigned int found_inode_item:1;
239 unsigned int found_dir_item:1;
240 unsigned int found_file_extent:1;
241 unsigned int found_csum_item:1;
242 unsigned int some_csum_missing:1;
243 unsigned int nodatasum:1;
256 struct rb_root holes;
257 struct list_head orphan_extents;
262 #define I_ERR_NO_INODE_ITEM (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
278 struct root_backref {
279 struct list_head list;
280 unsigned int found_dir_item:1;
281 unsigned int found_dir_index:1;
282 unsigned int found_back_ref:1;
283 unsigned int found_forward_ref:1;
284 unsigned int reachable:1;
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
295 return list_entry(entry, struct root_backref, list);
299 struct list_head backrefs;
300 struct cache_extent cache;
301 unsigned int found_root_item:1;
307 struct cache_extent cache;
312 struct cache_extent cache;
313 struct cache_tree root_cache;
314 struct cache_tree inode_cache;
315 struct inode_record *current;
324 struct walk_control {
325 struct cache_tree shared;
326 struct shared_node *nodes[BTRFS_MAX_LEVEL];
332 struct btrfs_key key;
334 struct list_head list;
337 struct extent_entry {
342 struct list_head list;
345 struct root_item_info {
346 /* level of the root */
348 /* number of nodes at this level, must be 1 for a root */
352 struct cache_extent cache_extent;
356 * Error bit for low memory mode check.
358 * Currently no caller cares about it yet. Just internal use for error
361 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH (1 << 8)
372 static void *print_status_check(void *p)
374 struct task_ctx *priv = p;
375 const char work_indicator[] = { '.', 'o', 'O', 'o' };
377 static char *task_position_string[] = {
379 "checking free space cache",
383 task_period_start(priv->info, 1000 /* 1s */);
385 if (priv->tp == TASK_NOTHING)
389 printf("%s [%c]\r", task_position_string[priv->tp],
390 work_indicator[count % 4]);
393 task_period_wait(priv->info);
398 static int print_status_return(void *p)
406 static enum btrfs_check_mode parse_check_mode(const char *str)
408 if (strcmp(str, "lowmem") == 0)
409 return CHECK_MODE_LOWMEM;
410 if (strcmp(str, "orig") == 0)
411 return CHECK_MODE_ORIGINAL;
412 if (strcmp(str, "original") == 0)
413 return CHECK_MODE_ORIGINAL;
415 return CHECK_MODE_UNKNOWN;
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
421 struct file_extent_hole *hole;
423 if (RB_EMPTY_ROOT(holes))
426 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
432 struct file_extent_hole *hole1;
433 struct file_extent_hole *hole2;
435 hole1 = rb_entry(node1, struct file_extent_hole, node);
436 hole2 = rb_entry(node2, struct file_extent_hole, node);
438 if (hole1->start > hole2->start)
440 if (hole1->start < hole2->start)
442 /* Now hole1->start == hole2->start */
443 if (hole1->len >= hole2->len)
445 * Hole 1 will be merge center
446 * Same hole will be merged later
449 /* Hole 2 will be merge center */
454 * Add a hole to the record
456 * This will do hole merge for copy_file_extent_holes(),
457 * which will ensure there won't be continuous holes.
459 static int add_file_extent_hole(struct rb_root *holes,
462 struct file_extent_hole *hole;
463 struct file_extent_hole *prev = NULL;
464 struct file_extent_hole *next = NULL;
466 hole = malloc(sizeof(*hole));
471 /* Since compare will not return 0, no -EEXIST will happen */
472 rb_insert(holes, &hole->node, compare_hole);
474 /* simple merge with previous hole */
475 if (rb_prev(&hole->node))
476 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
478 if (prev && prev->start + prev->len >= hole->start) {
479 hole->len = hole->start + hole->len - prev->start;
480 hole->start = prev->start;
481 rb_erase(&prev->node, holes);
486 /* iterate merge with next holes */
488 if (!rb_next(&hole->node))
490 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
492 if (hole->start + hole->len >= next->start) {
493 if (hole->start + hole->len <= next->start + next->len)
494 hole->len = next->start + next->len -
496 rb_erase(&next->node, holes);
505 static int compare_hole_range(struct rb_node *node, void *data)
507 struct file_extent_hole *hole;
510 hole = (struct file_extent_hole *)data;
513 hole = rb_entry(node, struct file_extent_hole, node);
514 if (start < hole->start)
516 if (start >= hole->start && start < hole->start + hole->len)
522 * Delete a hole in the record
524 * This will do the hole split and is much restrict than add.
526 static int del_file_extent_hole(struct rb_root *holes,
529 struct file_extent_hole *hole;
530 struct file_extent_hole tmp;
535 struct rb_node *node;
542 node = rb_search(holes, &tmp, compare_hole_range, NULL);
545 hole = rb_entry(node, struct file_extent_hole, node);
546 if (start + len > hole->start + hole->len)
550 * Now there will be no overlap, delete the hole and re-add the
551 * split(s) if they exists.
553 if (start > hole->start) {
554 prev_start = hole->start;
555 prev_len = start - hole->start;
558 if (hole->start + hole->len > start + len) {
559 next_start = start + len;
560 next_len = hole->start + hole->len - start - len;
563 rb_erase(node, holes);
566 ret = add_file_extent_hole(holes, prev_start, prev_len);
571 ret = add_file_extent_hole(holes, next_start, next_len);
578 static int copy_file_extent_holes(struct rb_root *dst,
581 struct file_extent_hole *hole;
582 struct rb_node *node;
585 node = rb_first(src);
587 hole = rb_entry(node, struct file_extent_hole, node);
588 ret = add_file_extent_hole(dst, hole->start, hole->len);
591 node = rb_next(node);
596 static void free_file_extent_holes(struct rb_root *holes)
598 struct rb_node *node;
599 struct file_extent_hole *hole;
601 node = rb_first(holes);
603 hole = rb_entry(node, struct file_extent_hole, node);
604 rb_erase(node, holes);
606 node = rb_first(holes);
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613 struct btrfs_root *root)
615 if (root->last_trans != trans->transid) {
616 root->track_dirty = 1;
617 root->last_trans = trans->transid;
618 root->commit_root = root->node;
619 extent_buffer_get(root->node);
623 static u8 imode_to_type(u32 imode)
626 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
628 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
629 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
630 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
631 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
632 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
633 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
636 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
642 struct device_record *rec1;
643 struct device_record *rec2;
645 rec1 = rb_entry(node1, struct device_record, node);
646 rec2 = rb_entry(node2, struct device_record, node);
647 if (rec1->devid > rec2->devid)
649 else if (rec1->devid < rec2->devid)
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
657 struct inode_record *rec;
658 struct inode_backref *backref;
659 struct inode_backref *orig;
660 struct inode_backref *tmp;
661 struct orphan_data_extent *src_orphan;
662 struct orphan_data_extent *dst_orphan;
667 rec = malloc(sizeof(*rec));
669 return ERR_PTR(-ENOMEM);
670 memcpy(rec, orig_rec, sizeof(*rec));
672 INIT_LIST_HEAD(&rec->backrefs);
673 INIT_LIST_HEAD(&rec->orphan_extents);
674 rec->holes = RB_ROOT;
676 list_for_each_entry(orig, &orig_rec->backrefs, list) {
677 size = sizeof(*orig) + orig->namelen + 1;
678 backref = malloc(size);
683 memcpy(backref, orig, size);
684 list_add_tail(&backref->list, &rec->backrefs);
686 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687 dst_orphan = malloc(sizeof(*dst_orphan));
692 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
695 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
702 rb = rb_first(&rec->holes);
704 struct file_extent_hole *hole;
706 hole = rb_entry(rb, struct file_extent_hole, node);
712 if (!list_empty(&rec->backrefs))
713 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714 list_del(&orig->list);
718 if (!list_empty(&rec->orphan_extents))
719 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720 list_del(&orig->list);
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
732 struct orphan_data_extent *orphan;
734 if (list_empty(orphan_extents))
736 printf("The following data extent is lost in tree %llu:\n",
738 list_for_each_entry(orphan, orphan_extents, list) {
739 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740 orphan->objectid, orphan->offset, orphan->disk_bytenr,
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
747 u64 root_objectid = root->root_key.objectid;
748 int errors = rec->errors;
752 /* reloc root errors, we print its corresponding fs root objectid*/
753 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754 root_objectid = root->root_key.offset;
755 fprintf(stderr, "reloc");
757 fprintf(stderr, "root %llu inode %llu errors %x",
758 (unsigned long long) root_objectid,
759 (unsigned long long) rec->ino, rec->errors);
761 if (errors & I_ERR_NO_INODE_ITEM)
762 fprintf(stderr, ", no inode item");
763 if (errors & I_ERR_NO_ORPHAN_ITEM)
764 fprintf(stderr, ", no orphan item");
765 if (errors & I_ERR_DUP_INODE_ITEM)
766 fprintf(stderr, ", dup inode item");
767 if (errors & I_ERR_DUP_DIR_INDEX)
768 fprintf(stderr, ", dup dir index");
769 if (errors & I_ERR_ODD_DIR_ITEM)
770 fprintf(stderr, ", odd dir item");
771 if (errors & I_ERR_ODD_FILE_EXTENT)
772 fprintf(stderr, ", odd file extent");
773 if (errors & I_ERR_BAD_FILE_EXTENT)
774 fprintf(stderr, ", bad file extent");
775 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776 fprintf(stderr, ", file extent overlap");
777 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778 fprintf(stderr, ", file extent discount");
779 if (errors & I_ERR_DIR_ISIZE_WRONG)
780 fprintf(stderr, ", dir isize wrong");
781 if (errors & I_ERR_FILE_NBYTES_WRONG)
782 fprintf(stderr, ", nbytes wrong");
783 if (errors & I_ERR_ODD_CSUM_ITEM)
784 fprintf(stderr, ", odd csum item");
785 if (errors & I_ERR_SOME_CSUM_MISSING)
786 fprintf(stderr, ", some csum missing");
787 if (errors & I_ERR_LINK_COUNT_WRONG)
788 fprintf(stderr, ", link count wrong");
789 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790 fprintf(stderr, ", orphan file extent");
791 fprintf(stderr, "\n");
792 /* Print the orphan extents if needed */
793 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
796 /* Print the holes if needed */
797 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798 struct file_extent_hole *hole;
799 struct rb_node *node;
802 node = rb_first(&rec->holes);
803 fprintf(stderr, "Found file extent holes:\n");
806 hole = rb_entry(node, struct file_extent_hole, node);
807 fprintf(stderr, "\tstart: %llu, len: %llu\n",
808 hole->start, hole->len);
809 node = rb_next(node);
812 fprintf(stderr, "\tstart: 0, len: %llu\n",
813 round_up(rec->isize, root->sectorsize));
817 static void print_ref_error(int errors)
819 if (errors & REF_ERR_NO_DIR_ITEM)
820 fprintf(stderr, ", no dir item");
821 if (errors & REF_ERR_NO_DIR_INDEX)
822 fprintf(stderr, ", no dir index");
823 if (errors & REF_ERR_NO_INODE_REF)
824 fprintf(stderr, ", no inode ref");
825 if (errors & REF_ERR_DUP_DIR_ITEM)
826 fprintf(stderr, ", dup dir item");
827 if (errors & REF_ERR_DUP_DIR_INDEX)
828 fprintf(stderr, ", dup dir index");
829 if (errors & REF_ERR_DUP_INODE_REF)
830 fprintf(stderr, ", dup inode ref");
831 if (errors & REF_ERR_INDEX_UNMATCH)
832 fprintf(stderr, ", index mismatch");
833 if (errors & REF_ERR_FILETYPE_UNMATCH)
834 fprintf(stderr, ", filetype mismatch");
835 if (errors & REF_ERR_NAME_TOO_LONG)
836 fprintf(stderr, ", name too long");
837 if (errors & REF_ERR_NO_ROOT_REF)
838 fprintf(stderr, ", no root ref");
839 if (errors & REF_ERR_NO_ROOT_BACKREF)
840 fprintf(stderr, ", no root backref");
841 if (errors & REF_ERR_DUP_ROOT_REF)
842 fprintf(stderr, ", dup root ref");
843 if (errors & REF_ERR_DUP_ROOT_BACKREF)
844 fprintf(stderr, ", dup root backref");
845 fprintf(stderr, "\n");
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
851 struct ptr_node *node;
852 struct cache_extent *cache;
853 struct inode_record *rec = NULL;
856 cache = lookup_cache_extent(inode_cache, ino, 1);
858 node = container_of(cache, struct ptr_node, cache);
860 if (mod && rec->refs > 1) {
861 node->data = clone_inode_rec(rec);
862 if (IS_ERR(node->data))
868 rec = calloc(1, sizeof(*rec));
870 return ERR_PTR(-ENOMEM);
872 rec->extent_start = (u64)-1;
874 INIT_LIST_HEAD(&rec->backrefs);
875 INIT_LIST_HEAD(&rec->orphan_extents);
876 rec->holes = RB_ROOT;
878 node = malloc(sizeof(*node));
881 return ERR_PTR(-ENOMEM);
883 node->cache.start = ino;
884 node->cache.size = 1;
887 if (ino == BTRFS_FREE_INO_OBJECTID)
890 ret = insert_cache_extent(inode_cache, &node->cache);
892 return ERR_PTR(-EEXIST);
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
899 struct orphan_data_extent *orphan;
901 while (!list_empty(orphan_extents)) {
902 orphan = list_entry(orphan_extents->next,
903 struct orphan_data_extent, list);
904 list_del(&orphan->list);
909 static void free_inode_rec(struct inode_record *rec)
911 struct inode_backref *backref;
916 while (!list_empty(&rec->backrefs)) {
917 backref = to_inode_backref(rec->backrefs.next);
918 list_del(&backref->list);
921 free_orphan_data_extents(&rec->orphan_extents);
922 free_file_extent_holes(&rec->holes);
926 static int can_free_inode_rec(struct inode_record *rec)
928 if (!rec->errors && rec->checked && rec->found_inode_item &&
929 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935 struct inode_record *rec)
937 struct cache_extent *cache;
938 struct inode_backref *tmp, *backref;
939 struct ptr_node *node;
942 if (!rec->found_inode_item)
945 filetype = imode_to_type(rec->imode);
946 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947 if (backref->found_dir_item && backref->found_dir_index) {
948 if (backref->filetype != filetype)
949 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950 if (!backref->errors && backref->found_inode_ref &&
951 rec->nlink == rec->found_link) {
952 list_del(&backref->list);
958 if (!rec->checked || rec->merging)
961 if (S_ISDIR(rec->imode)) {
962 if (rec->found_size != rec->isize)
963 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964 if (rec->found_file_extent)
965 rec->errors |= I_ERR_ODD_FILE_EXTENT;
966 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967 if (rec->found_dir_item)
968 rec->errors |= I_ERR_ODD_DIR_ITEM;
969 if (rec->found_size != rec->nbytes)
970 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971 if (rec->nlink > 0 && !no_holes &&
972 (rec->extent_end < rec->isize ||
973 first_extent_gap(&rec->holes) < rec->isize))
974 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
977 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978 if (rec->found_csum_item && rec->nodatasum)
979 rec->errors |= I_ERR_ODD_CSUM_ITEM;
980 if (rec->some_csum_missing && !rec->nodatasum)
981 rec->errors |= I_ERR_SOME_CSUM_MISSING;
984 BUG_ON(rec->refs != 1);
985 if (can_free_inode_rec(rec)) {
986 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987 node = container_of(cache, struct ptr_node, cache);
988 BUG_ON(node->data != rec);
989 remove_cache_extent(inode_cache, &node->cache);
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
997 struct btrfs_path path;
998 struct btrfs_key key;
1001 key.objectid = BTRFS_ORPHAN_OBJECTID;
1002 key.type = BTRFS_ORPHAN_ITEM_KEY;
1005 btrfs_init_path(&path);
1006 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007 btrfs_release_path(&path);
1013 static int process_inode_item(struct extent_buffer *eb,
1014 int slot, struct btrfs_key *key,
1015 struct shared_node *active_node)
1017 struct inode_record *rec;
1018 struct btrfs_inode_item *item;
1020 rec = active_node->current;
1021 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022 if (rec->found_inode_item) {
1023 rec->errors |= I_ERR_DUP_INODE_ITEM;
1026 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027 rec->nlink = btrfs_inode_nlink(eb, item);
1028 rec->isize = btrfs_inode_size(eb, item);
1029 rec->nbytes = btrfs_inode_nbytes(eb, item);
1030 rec->imode = btrfs_inode_mode(eb, item);
1031 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1033 rec->found_inode_item = 1;
1034 if (rec->nlink == 0)
1035 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036 maybe_free_inode_rec(&active_node->inode_cache, rec);
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1042 int namelen, u64 dir)
1044 struct inode_backref *backref;
1046 list_for_each_entry(backref, &rec->backrefs, list) {
1047 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1049 if (backref->dir != dir || backref->namelen != namelen)
1051 if (memcmp(name, backref->name, namelen))
1056 backref = malloc(sizeof(*backref) + namelen + 1);
1059 memset(backref, 0, sizeof(*backref));
1061 backref->namelen = namelen;
1062 memcpy(backref->name, name, namelen);
1063 backref->name[namelen] = '\0';
1064 list_add_tail(&backref->list, &rec->backrefs);
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069 u64 ino, u64 dir, u64 index,
1070 const char *name, int namelen,
1071 u8 filetype, u8 itemtype, int errors)
1073 struct inode_record *rec;
1074 struct inode_backref *backref;
1076 rec = get_inode_rec(inode_cache, ino, 1);
1077 BUG_ON(IS_ERR(rec));
1078 backref = get_inode_backref(rec, name, namelen, dir);
1081 backref->errors |= errors;
1082 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083 if (backref->found_dir_index)
1084 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085 if (backref->found_inode_ref && backref->index != index)
1086 backref->errors |= REF_ERR_INDEX_UNMATCH;
1087 if (backref->found_dir_item && backref->filetype != filetype)
1088 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1090 backref->index = index;
1091 backref->filetype = filetype;
1092 backref->found_dir_index = 1;
1093 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1095 if (backref->found_dir_item)
1096 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097 if (backref->found_dir_index && backref->filetype != filetype)
1098 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1100 backref->filetype = filetype;
1101 backref->found_dir_item = 1;
1102 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104 if (backref->found_inode_ref)
1105 backref->errors |= REF_ERR_DUP_INODE_REF;
1106 if (backref->found_dir_index && backref->index != index)
1107 backref->errors |= REF_ERR_INDEX_UNMATCH;
1109 backref->index = index;
1111 backref->ref_type = itemtype;
1112 backref->found_inode_ref = 1;
1117 maybe_free_inode_rec(inode_cache, rec);
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122 struct cache_tree *dst_cache)
1124 struct inode_backref *backref;
1129 list_for_each_entry(backref, &src->backrefs, list) {
1130 if (backref->found_dir_index) {
1131 add_inode_backref(dst_cache, dst->ino, backref->dir,
1132 backref->index, backref->name,
1133 backref->namelen, backref->filetype,
1134 BTRFS_DIR_INDEX_KEY, backref->errors);
1136 if (backref->found_dir_item) {
1138 add_inode_backref(dst_cache, dst->ino,
1139 backref->dir, 0, backref->name,
1140 backref->namelen, backref->filetype,
1141 BTRFS_DIR_ITEM_KEY, backref->errors);
1143 if (backref->found_inode_ref) {
1144 add_inode_backref(dst_cache, dst->ino,
1145 backref->dir, backref->index,
1146 backref->name, backref->namelen, 0,
1147 backref->ref_type, backref->errors);
1151 if (src->found_dir_item)
1152 dst->found_dir_item = 1;
1153 if (src->found_file_extent)
1154 dst->found_file_extent = 1;
1155 if (src->found_csum_item)
1156 dst->found_csum_item = 1;
1157 if (src->some_csum_missing)
1158 dst->some_csum_missing = 1;
1159 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1165 BUG_ON(src->found_link < dir_count);
1166 dst->found_link += src->found_link - dir_count;
1167 dst->found_size += src->found_size;
1168 if (src->extent_start != (u64)-1) {
1169 if (dst->extent_start == (u64)-1) {
1170 dst->extent_start = src->extent_start;
1171 dst->extent_end = src->extent_end;
1173 if (dst->extent_end > src->extent_start)
1174 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175 else if (dst->extent_end < src->extent_start) {
1176 ret = add_file_extent_hole(&dst->holes,
1178 src->extent_start - dst->extent_end);
1180 if (dst->extent_end < src->extent_end)
1181 dst->extent_end = src->extent_end;
1185 dst->errors |= src->errors;
1186 if (src->found_inode_item) {
1187 if (!dst->found_inode_item) {
1188 dst->nlink = src->nlink;
1189 dst->isize = src->isize;
1190 dst->nbytes = src->nbytes;
1191 dst->imode = src->imode;
1192 dst->nodatasum = src->nodatasum;
1193 dst->found_inode_item = 1;
1195 dst->errors |= I_ERR_DUP_INODE_ITEM;
1203 static int splice_shared_node(struct shared_node *src_node,
1204 struct shared_node *dst_node)
1206 struct cache_extent *cache;
1207 struct ptr_node *node, *ins;
1208 struct cache_tree *src, *dst;
1209 struct inode_record *rec, *conflict;
1210 u64 current_ino = 0;
1214 if (--src_node->refs == 0)
1216 if (src_node->current)
1217 current_ino = src_node->current->ino;
1219 src = &src_node->root_cache;
1220 dst = &dst_node->root_cache;
1222 cache = search_cache_extent(src, 0);
1224 node = container_of(cache, struct ptr_node, cache);
1226 cache = next_cache_extent(cache);
1229 remove_cache_extent(src, &node->cache);
1232 ins = malloc(sizeof(*ins));
1234 ins->cache.start = node->cache.start;
1235 ins->cache.size = node->cache.size;
1239 ret = insert_cache_extent(dst, &ins->cache);
1240 if (ret == -EEXIST) {
1241 conflict = get_inode_rec(dst, rec->ino, 1);
1242 BUG_ON(IS_ERR(conflict));
1243 merge_inode_recs(rec, conflict, dst);
1245 conflict->checked = 1;
1246 if (dst_node->current == conflict)
1247 dst_node->current = NULL;
1249 maybe_free_inode_rec(dst, conflict);
1250 free_inode_rec(rec);
1257 if (src == &src_node->root_cache) {
1258 src = &src_node->inode_cache;
1259 dst = &dst_node->inode_cache;
1263 if (current_ino > 0 && (!dst_node->current ||
1264 current_ino > dst_node->current->ino)) {
1265 if (dst_node->current) {
1266 dst_node->current->checked = 1;
1267 maybe_free_inode_rec(dst, dst_node->current);
1269 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270 BUG_ON(IS_ERR(dst_node->current));
1275 static void free_inode_ptr(struct cache_extent *cache)
1277 struct ptr_node *node;
1278 struct inode_record *rec;
1280 node = container_of(cache, struct ptr_node, cache);
1282 free_inode_rec(rec);
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1291 struct cache_extent *cache;
1292 struct shared_node *node;
1294 cache = lookup_cache_extent(shared, bytenr, 1);
1296 node = container_of(cache, struct shared_node, cache);
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1305 struct shared_node *node;
1307 node = calloc(1, sizeof(*node));
1310 node->cache.start = bytenr;
1311 node->cache.size = 1;
1312 cache_tree_init(&node->root_cache);
1313 cache_tree_init(&node->inode_cache);
1316 ret = insert_cache_extent(shared, &node->cache);
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322 struct walk_control *wc, int level)
1324 struct shared_node *node;
1325 struct shared_node *dest;
1328 if (level == wc->active_node)
1331 BUG_ON(wc->active_node <= level);
1332 node = find_shared_node(&wc->shared, bytenr);
1334 ret = add_shared_node(&wc->shared, bytenr, refs);
1336 node = find_shared_node(&wc->shared, bytenr);
1337 wc->nodes[level] = node;
1338 wc->active_node = level;
1342 if (wc->root_level == wc->active_node &&
1343 btrfs_root_refs(&root->root_item) == 0) {
1344 if (--node->refs == 0) {
1345 free_inode_recs_tree(&node->root_cache);
1346 free_inode_recs_tree(&node->inode_cache);
1347 remove_cache_extent(&wc->shared, &node->cache);
1353 dest = wc->nodes[wc->active_node];
1354 splice_shared_node(node, dest);
1355 if (node->refs == 0) {
1356 remove_cache_extent(&wc->shared, &node->cache);
1362 static int leave_shared_node(struct btrfs_root *root,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->root_level)
1372 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1376 BUG_ON(i >= BTRFS_MAX_LEVEL);
1378 node = wc->nodes[wc->active_node];
1379 wc->nodes[wc->active_node] = NULL;
1380 wc->active_node = i;
1382 dest = wc->nodes[wc->active_node];
1383 if (wc->active_node < wc->root_level ||
1384 btrfs_root_refs(&root->root_item) > 0) {
1385 BUG_ON(node->refs <= 1);
1386 splice_shared_node(node, dest);
1388 BUG_ON(node->refs < 2);
1397 * 1 - if the root with id child_root_id is a child of root parent_root_id
1398 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1399 * has other root(s) as parent(s)
1400 * 2 - if the root child_root_id doesn't have any parent roots
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1405 struct btrfs_path path;
1406 struct btrfs_key key;
1407 struct extent_buffer *leaf;
1411 btrfs_init_path(&path);
1413 key.objectid = parent_root_id;
1414 key.type = BTRFS_ROOT_REF_KEY;
1415 key.offset = child_root_id;
1416 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1420 btrfs_release_path(&path);
1424 key.objectid = child_root_id;
1425 key.type = BTRFS_ROOT_BACKREF_KEY;
1427 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1433 leaf = path.nodes[0];
1434 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1438 leaf = path.nodes[0];
1441 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442 if (key.objectid != child_root_id ||
1443 key.type != BTRFS_ROOT_BACKREF_KEY)
1448 if (key.offset == parent_root_id) {
1449 btrfs_release_path(&path);
1456 btrfs_release_path(&path);
1459 return has_parent ? 0 : 2;
1462 static int process_dir_item(struct btrfs_root *root,
1463 struct extent_buffer *eb,
1464 int slot, struct btrfs_key *key,
1465 struct shared_node *active_node)
1475 struct btrfs_dir_item *di;
1476 struct inode_record *rec;
1477 struct cache_tree *root_cache;
1478 struct cache_tree *inode_cache;
1479 struct btrfs_key location;
1480 char namebuf[BTRFS_NAME_LEN];
1482 root_cache = &active_node->root_cache;
1483 inode_cache = &active_node->inode_cache;
1484 rec = active_node->current;
1485 rec->found_dir_item = 1;
1487 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1491 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492 name_len = btrfs_dir_name_len(eb, di);
1493 data_len = btrfs_dir_data_len(eb, di);
1494 filetype = btrfs_dir_type(eb, di);
1496 rec->found_size += name_len;
1497 if (name_len <= BTRFS_NAME_LEN) {
1501 len = BTRFS_NAME_LEN;
1502 error = REF_ERR_NAME_TOO_LONG;
1504 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1506 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507 add_inode_backref(inode_cache, location.objectid,
1508 key->objectid, key->offset, namebuf,
1509 len, filetype, key->type, error);
1510 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511 add_inode_backref(root_cache, location.objectid,
1512 key->objectid, key->offset,
1513 namebuf, len, filetype,
1516 fprintf(stderr, "invalid location in dir item %u\n",
1518 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519 key->objectid, key->offset, namebuf,
1520 len, filetype, key->type, error);
1523 len = sizeof(*di) + name_len + data_len;
1524 di = (struct btrfs_dir_item *)((char *)di + len);
1527 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528 rec->errors |= I_ERR_DUP_DIR_INDEX;
1533 static int process_inode_ref(struct extent_buffer *eb,
1534 int slot, struct btrfs_key *key,
1535 struct shared_node *active_node)
1543 struct cache_tree *inode_cache;
1544 struct btrfs_inode_ref *ref;
1545 char namebuf[BTRFS_NAME_LEN];
1547 inode_cache = &active_node->inode_cache;
1549 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550 total = btrfs_item_size_nr(eb, slot);
1551 while (cur < total) {
1552 name_len = btrfs_inode_ref_name_len(eb, ref);
1553 index = btrfs_inode_ref_index(eb, ref);
1554 if (name_len <= BTRFS_NAME_LEN) {
1558 len = BTRFS_NAME_LEN;
1559 error = REF_ERR_NAME_TOO_LONG;
1561 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562 add_inode_backref(inode_cache, key->objectid, key->offset,
1563 index, namebuf, len, 0, key->type, error);
1565 len = sizeof(*ref) + name_len;
1566 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1572 static int process_inode_extref(struct extent_buffer *eb,
1573 int slot, struct btrfs_key *key,
1574 struct shared_node *active_node)
1583 struct cache_tree *inode_cache;
1584 struct btrfs_inode_extref *extref;
1585 char namebuf[BTRFS_NAME_LEN];
1587 inode_cache = &active_node->inode_cache;
1589 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590 total = btrfs_item_size_nr(eb, slot);
1591 while (cur < total) {
1592 name_len = btrfs_inode_extref_name_len(eb, extref);
1593 index = btrfs_inode_extref_index(eb, extref);
1594 parent = btrfs_inode_extref_parent(eb, extref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf,
1603 (unsigned long)(extref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, parent,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*extref) + name_len;
1608 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616 u64 len, u64 *found)
1618 struct btrfs_key key;
1619 struct btrfs_path path;
1620 struct extent_buffer *leaf;
1625 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1627 btrfs_init_path(&path);
1629 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1631 key.type = BTRFS_EXTENT_CSUM_KEY;
1633 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1637 if (ret > 0 && path.slots[0] > 0) {
1638 leaf = path.nodes[0];
1639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641 key.type == BTRFS_EXTENT_CSUM_KEY)
1646 leaf = path.nodes[0];
1647 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1653 leaf = path.nodes[0];
1656 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658 key.type != BTRFS_EXTENT_CSUM_KEY)
1661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662 if (key.offset >= start + len)
1665 if (key.offset > start)
1668 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670 if (csum_end > start) {
1671 size = min(csum_end - start, len);
1680 btrfs_release_path(&path);
1686 static int process_file_extent(struct btrfs_root *root,
1687 struct extent_buffer *eb,
1688 int slot, struct btrfs_key *key,
1689 struct shared_node *active_node)
1691 struct inode_record *rec;
1692 struct btrfs_file_extent_item *fi;
1694 u64 disk_bytenr = 0;
1695 u64 extent_offset = 0;
1696 u64 mask = root->sectorsize - 1;
1700 rec = active_node->current;
1701 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702 rec->found_file_extent = 1;
1704 if (rec->extent_start == (u64)-1) {
1705 rec->extent_start = key->offset;
1706 rec->extent_end = key->offset;
1709 if (rec->extent_end > key->offset)
1710 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711 else if (rec->extent_end < key->offset) {
1712 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713 key->offset - rec->extent_end);
1718 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719 extent_type = btrfs_file_extent_type(eb, fi);
1721 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1724 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725 rec->found_size += num_bytes;
1726 num_bytes = (num_bytes + mask) & ~mask;
1727 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731 extent_offset = btrfs_file_extent_offset(eb, fi);
1732 if (num_bytes == 0 || (num_bytes & mask))
1733 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 if (num_bytes + extent_offset >
1735 btrfs_file_extent_ram_bytes(eb, fi))
1736 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738 (btrfs_file_extent_compression(eb, fi) ||
1739 btrfs_file_extent_encryption(eb, fi) ||
1740 btrfs_file_extent_other_encoding(eb, fi)))
1741 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742 if (disk_bytenr > 0)
1743 rec->found_size += num_bytes;
1745 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1747 rec->extent_end = key->offset + num_bytes;
1750 * The data reloc tree will copy full extents into its inode and then
1751 * copy the corresponding csums. Because the extent it copied could be
1752 * a preallocated extent that hasn't been written to yet there may be no
1753 * csums to copy, ergo we won't have csums for our file extent. This is
1754 * ok so just don't bother checking csums if the inode belongs to the
1757 if (disk_bytenr > 0 &&
1758 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1760 if (btrfs_file_extent_compression(eb, fi))
1761 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1763 disk_bytenr += extent_offset;
1765 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1768 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1770 rec->found_csum_item = 1;
1771 if (found < num_bytes)
1772 rec->some_csum_missing = 1;
1773 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1775 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782 struct walk_control *wc)
1784 struct btrfs_key key;
1788 struct cache_tree *inode_cache;
1789 struct shared_node *active_node;
1791 if (wc->root_level == wc->active_node &&
1792 btrfs_root_refs(&root->root_item) == 0)
1795 active_node = wc->nodes[wc->active_node];
1796 inode_cache = &active_node->inode_cache;
1797 nritems = btrfs_header_nritems(eb);
1798 for (i = 0; i < nritems; i++) {
1799 btrfs_item_key_to_cpu(eb, &key, i);
1801 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1803 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1806 if (active_node->current == NULL ||
1807 active_node->current->ino < key.objectid) {
1808 if (active_node->current) {
1809 active_node->current->checked = 1;
1810 maybe_free_inode_rec(inode_cache,
1811 active_node->current);
1813 active_node->current = get_inode_rec(inode_cache,
1815 BUG_ON(IS_ERR(active_node->current));
1818 case BTRFS_DIR_ITEM_KEY:
1819 case BTRFS_DIR_INDEX_KEY:
1820 ret = process_dir_item(root, eb, i, &key, active_node);
1822 case BTRFS_INODE_REF_KEY:
1823 ret = process_inode_ref(eb, i, &key, active_node);
1825 case BTRFS_INODE_EXTREF_KEY:
1826 ret = process_inode_extref(eb, i, &key, active_node);
1828 case BTRFS_INODE_ITEM_KEY:
1829 ret = process_inode_item(eb, i, &key, active_node);
1831 case BTRFS_EXTENT_DATA_KEY:
1832 ret = process_file_extent(root, eb, i, &key,
1842 static void reada_walk_down(struct btrfs_root *root,
1843 struct extent_buffer *node, int slot)
1852 level = btrfs_header_level(node);
1856 nritems = btrfs_header_nritems(node);
1857 blocksize = root->nodesize;
1858 for (i = slot; i < nritems; i++) {
1859 bytenr = btrfs_node_blockptr(node, i);
1860 ptr_gen = btrfs_node_ptr_generation(node, i);
1861 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1866 * Check the child node/leaf by the following condition:
1867 * 1. the first item key of the node/leaf should be the same with the one
1869 * 2. block in parent node should match the child node/leaf.
1870 * 3. generation of parent node and child's header should be consistent.
1872 * Or the child node/leaf pointed by the key in parent is not valid.
1874 * We hope to check leaf owner too, but since subvol may share leaves,
1875 * which makes leaf owner check not so strong, key check should be
1876 * sufficient enough for that case.
1878 static int check_child_node(struct btrfs_root *root,
1879 struct extent_buffer *parent, int slot,
1880 struct extent_buffer *child)
1882 struct btrfs_key parent_key;
1883 struct btrfs_key child_key;
1886 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887 if (btrfs_header_level(child) == 0)
1888 btrfs_item_key_to_cpu(child, &child_key, 0);
1890 btrfs_node_key_to_cpu(child, &child_key, 0);
1892 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1895 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896 parent_key.objectid, parent_key.type, parent_key.offset,
1897 child_key.objectid, child_key.type, child_key.offset);
1899 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1901 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902 btrfs_node_blockptr(parent, slot),
1903 btrfs_header_bytenr(child));
1905 if (btrfs_node_ptr_generation(parent, slot) !=
1906 btrfs_header_generation(child)) {
1908 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909 btrfs_header_generation(child),
1910 btrfs_node_ptr_generation(parent, slot));
1916 u64 bytenr[BTRFS_MAX_LEVEL];
1917 u64 refs[BTRFS_MAX_LEVEL];
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level,
1922 struct node_refs *nrefs)
1924 enum btrfs_tree_block_status status;
1927 struct extent_buffer *next;
1928 struct extent_buffer *cur;
1933 WARN_ON(*level < 0);
1934 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1936 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937 refs = nrefs->refs[*level];
1940 ret = btrfs_lookup_extent_info(NULL, root,
1941 path->nodes[*level]->start,
1942 *level, 1, &refs, NULL);
1947 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948 nrefs->refs[*level] = refs;
1952 ret = enter_shared_node(root, path->nodes[*level]->start,
1960 while (*level >= 0) {
1961 WARN_ON(*level < 0);
1962 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963 cur = path->nodes[*level];
1965 if (btrfs_header_level(cur) != *level)
1968 if (path->slots[*level] >= btrfs_header_nritems(cur))
1971 ret = process_one_leaf(root, cur, wc);
1976 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978 blocksize = root->nodesize;
1980 if (bytenr == nrefs->bytenr[*level - 1]) {
1981 refs = nrefs->refs[*level - 1];
1983 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984 *level - 1, 1, &refs, NULL);
1988 nrefs->bytenr[*level - 1] = bytenr;
1989 nrefs->refs[*level - 1] = refs;
1994 ret = enter_shared_node(root, bytenr, refs,
1997 path->slots[*level]++;
2002 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004 free_extent_buffer(next);
2005 reada_walk_down(root, cur, path->slots[*level]);
2006 next = read_tree_block(root, bytenr, blocksize,
2008 if (!extent_buffer_uptodate(next)) {
2009 struct btrfs_key node_key;
2011 btrfs_node_key_to_cpu(path->nodes[*level],
2013 path->slots[*level]);
2014 btrfs_add_corrupt_extent_record(root->fs_info,
2016 path->nodes[*level]->start,
2017 root->nodesize, *level);
2023 ret = check_child_node(root, cur, path->slots[*level], next);
2029 if (btrfs_is_leaf(next))
2030 status = btrfs_check_leaf(root, NULL, next);
2032 status = btrfs_check_node(root, NULL, next);
2033 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034 free_extent_buffer(next);
2039 *level = *level - 1;
2040 free_extent_buffer(path->nodes[*level]);
2041 path->nodes[*level] = next;
2042 path->slots[*level] = 0;
2045 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050 struct walk_control *wc, int *level)
2053 struct extent_buffer *leaf;
2055 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056 leaf = path->nodes[i];
2057 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2062 free_extent_buffer(path->nodes[*level]);
2063 path->nodes[*level] = NULL;
2064 BUG_ON(*level > wc->active_node);
2065 if (*level == wc->active_node)
2066 leave_shared_node(root, wc, *level);
2073 static int check_root_dir(struct inode_record *rec)
2075 struct inode_backref *backref;
2078 if (!rec->found_inode_item || rec->errors)
2080 if (rec->nlink != 1 || rec->found_link != 0)
2082 if (list_empty(&rec->backrefs))
2084 backref = to_inode_backref(rec->backrefs.next);
2085 if (!backref->found_inode_ref)
2087 if (backref->index != 0 || backref->namelen != 2 ||
2088 memcmp(backref->name, "..", 2))
2090 if (backref->found_dir_index || backref->found_dir_item)
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098 struct btrfs_root *root, struct btrfs_path *path,
2099 struct inode_record *rec)
2101 struct btrfs_inode_item *ei;
2102 struct btrfs_key key;
2105 key.objectid = rec->ino;
2106 key.type = BTRFS_INODE_ITEM_KEY;
2107 key.offset = (u64)-1;
2109 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2113 if (!path->slots[0]) {
2120 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121 if (key.objectid != rec->ino) {
2126 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127 struct btrfs_inode_item);
2128 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129 btrfs_mark_buffer_dirty(path->nodes[0]);
2130 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132 root->root_key.objectid);
2134 btrfs_release_path(path);
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139 struct btrfs_root *root,
2140 struct btrfs_path *path,
2141 struct inode_record *rec)
2145 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146 btrfs_release_path(path);
2148 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153 struct btrfs_root *root,
2154 struct btrfs_path *path,
2155 struct inode_record *rec)
2157 struct btrfs_inode_item *ei;
2158 struct btrfs_key key;
2161 key.objectid = rec->ino;
2162 key.type = BTRFS_INODE_ITEM_KEY;
2165 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2172 /* Since ret == 0, no need to check anything */
2173 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174 struct btrfs_inode_item);
2175 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176 btrfs_mark_buffer_dirty(path->nodes[0]);
2177 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178 printf("reset nbytes for ino %llu root %llu\n",
2179 rec->ino, root->root_key.objectid);
2181 btrfs_release_path(path);
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186 struct cache_tree *inode_cache,
2187 struct inode_record *rec,
2188 struct inode_backref *backref)
2190 struct btrfs_path path;
2191 struct btrfs_trans_handle *trans;
2192 struct btrfs_dir_item *dir_item;
2193 struct extent_buffer *leaf;
2194 struct btrfs_key key;
2195 struct btrfs_disk_key disk_key;
2196 struct inode_record *dir_rec;
2197 unsigned long name_ptr;
2198 u32 data_size = sizeof(*dir_item) + backref->namelen;
2201 trans = btrfs_start_transaction(root, 1);
2203 return PTR_ERR(trans);
2205 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206 (unsigned long long)rec->ino);
2208 btrfs_init_path(&path);
2209 key.objectid = backref->dir;
2210 key.type = BTRFS_DIR_INDEX_KEY;
2211 key.offset = backref->index;
2212 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2215 leaf = path.nodes[0];
2216 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2218 disk_key.objectid = cpu_to_le64(rec->ino);
2219 disk_key.type = BTRFS_INODE_ITEM_KEY;
2220 disk_key.offset = 0;
2222 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224 btrfs_set_dir_data_len(leaf, dir_item, 0);
2225 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226 name_ptr = (unsigned long)(dir_item + 1);
2227 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228 btrfs_mark_buffer_dirty(leaf);
2229 btrfs_release_path(&path);
2230 btrfs_commit_transaction(trans, root);
2232 backref->found_dir_index = 1;
2233 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234 BUG_ON(IS_ERR(dir_rec));
2237 dir_rec->found_size += backref->namelen;
2238 if (dir_rec->found_size == dir_rec->isize &&
2239 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241 if (dir_rec->found_size != dir_rec->isize)
2242 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247 static int delete_dir_index(struct btrfs_root *root,
2248 struct cache_tree *inode_cache,
2249 struct inode_record *rec,
2250 struct inode_backref *backref)
2252 struct btrfs_trans_handle *trans;
2253 struct btrfs_dir_item *di;
2254 struct btrfs_path path;
2257 trans = btrfs_start_transaction(root, 1);
2259 return PTR_ERR(trans);
2261 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262 (unsigned long long)backref->dir,
2263 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264 (unsigned long long)root->objectid);
2266 btrfs_init_path(&path);
2267 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268 backref->name, backref->namelen,
2269 backref->index, -1);
2272 btrfs_release_path(&path);
2273 btrfs_commit_transaction(trans, root);
2280 ret = btrfs_del_item(trans, root, &path);
2282 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2284 btrfs_release_path(&path);
2285 btrfs_commit_transaction(trans, root);
2289 static int create_inode_item(struct btrfs_root *root,
2290 struct inode_record *rec,
2291 struct inode_backref *backref, int root_dir)
2293 struct btrfs_trans_handle *trans;
2294 struct btrfs_inode_item inode_item;
2295 time_t now = time(NULL);
2298 trans = btrfs_start_transaction(root, 1);
2299 if (IS_ERR(trans)) {
2300 ret = PTR_ERR(trans);
2304 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305 "be incomplete, please check permissions and content after "
2306 "the fsck completes.\n", (unsigned long long)root->objectid,
2307 (unsigned long long)rec->ino);
2309 memset(&inode_item, 0, sizeof(inode_item));
2310 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2312 btrfs_set_stack_inode_nlink(&inode_item, 1);
2314 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316 if (rec->found_dir_item) {
2317 if (rec->found_file_extent)
2318 fprintf(stderr, "root %llu inode %llu has both a dir "
2319 "item and extents, unsure if it is a dir or a "
2320 "regular file so setting it as a directory\n",
2321 (unsigned long long)root->objectid,
2322 (unsigned long long)rec->ino);
2323 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325 } else if (!rec->found_dir_item) {
2326 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2329 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2338 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2340 btrfs_commit_transaction(trans, root);
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345 struct inode_record *rec,
2346 struct cache_tree *inode_cache,
2349 struct inode_backref *tmp, *backref;
2350 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 if (!delete && rec->ino == root_dirid) {
2356 if (!rec->found_inode_item) {
2357 ret = create_inode_item(root, rec, backref, 1);
2364 /* Index 0 for root dir's are special, don't mess with it */
2365 if (rec->ino == root_dirid && backref->index == 0)
2369 ((backref->found_dir_index && !backref->found_inode_ref) ||
2370 (backref->found_dir_index && backref->found_inode_ref &&
2371 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372 ret = delete_dir_index(root, inode_cache, rec, backref);
2376 list_del(&backref->list);
2380 if (!delete && !backref->found_dir_index &&
2381 backref->found_dir_item && backref->found_inode_ref) {
2382 ret = add_missing_dir_index(root, inode_cache, rec,
2387 if (backref->found_dir_item &&
2388 backref->found_dir_index &&
2389 backref->found_dir_index) {
2390 if (!backref->errors &&
2391 backref->found_inode_ref) {
2392 list_del(&backref->list);
2398 if (!delete && (!backref->found_dir_index &&
2399 !backref->found_dir_item &&
2400 backref->found_inode_ref)) {
2401 struct btrfs_trans_handle *trans;
2402 struct btrfs_key location;
2404 ret = check_dir_conflict(root, backref->name,
2410 * let nlink fixing routine to handle it,
2411 * which can do it better.
2416 location.objectid = rec->ino;
2417 location.type = BTRFS_INODE_ITEM_KEY;
2418 location.offset = 0;
2420 trans = btrfs_start_transaction(root, 1);
2421 if (IS_ERR(trans)) {
2422 ret = PTR_ERR(trans);
2425 fprintf(stderr, "adding missing dir index/item pair "
2427 (unsigned long long)rec->ino);
2428 ret = btrfs_insert_dir_item(trans, root, backref->name,
2430 backref->dir, &location,
2431 imode_to_type(rec->imode),
2434 btrfs_commit_transaction(trans, root);
2438 if (!delete && (backref->found_inode_ref &&
2439 backref->found_dir_index &&
2440 backref->found_dir_item &&
2441 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442 !rec->found_inode_item)) {
2443 ret = create_inode_item(root, rec, backref, 0);
2450 return ret ? ret : repaired;
2454 * To determine the file type for nlink/inode_item repair
2456 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457 * Return -ENOENT if file type is not found.
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2461 struct inode_backref *backref;
2463 /* For inode item recovered case */
2464 if (rec->found_inode_item) {
2465 *type = imode_to_type(rec->imode);
2469 list_for_each_entry(backref, &rec->backrefs, list) {
2470 if (backref->found_dir_index || backref->found_dir_item) {
2471 *type = backref->filetype;
2479 * To determine the file name for nlink repair
2481 * Return 0 if file name is found, set name and namelen.
2482 * Return -ENOENT if file name is not found.
2484 static int find_file_name(struct inode_record *rec,
2485 char *name, int *namelen)
2487 struct inode_backref *backref;
2489 list_for_each_entry(backref, &rec->backrefs, list) {
2490 if (backref->found_dir_index || backref->found_dir_item ||
2491 backref->found_inode_ref) {
2492 memcpy(name, backref->name, backref->namelen);
2493 *namelen = backref->namelen;
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502 struct btrfs_root *root,
2503 struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct inode_backref *backref;
2507 struct inode_backref *tmp;
2508 struct btrfs_key key;
2509 struct btrfs_inode_item *inode_item;
2512 /* We don't believe this either, reset it and iterate backref */
2513 rec->found_link = 0;
2515 /* Remove all backref including the valid ones */
2516 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518 backref->index, backref->name,
2519 backref->namelen, 0);
2523 /* remove invalid backref, so it won't be added back */
2524 if (!(backref->found_dir_index &&
2525 backref->found_dir_item &&
2526 backref->found_inode_ref)) {
2527 list_del(&backref->list);
2534 /* Set nlink to 0 */
2535 key.objectid = rec->ino;
2536 key.type = BTRFS_INODE_ITEM_KEY;
2538 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2545 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546 struct btrfs_inode_item);
2547 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548 btrfs_mark_buffer_dirty(path->nodes[0]);
2549 btrfs_release_path(path);
2552 * Add back valid inode_ref/dir_item/dir_index,
2553 * add_link() will handle the nlink inc, so new nlink must be correct
2555 list_for_each_entry(backref, &rec->backrefs, list) {
2556 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557 backref->name, backref->namelen,
2558 backref->filetype, &backref->index, 1);
2563 btrfs_release_path(path);
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568 struct btrfs_root *root,
2569 struct btrfs_path *path,
2570 struct inode_record *rec)
2572 char *dir_name = "lost+found";
2573 char namebuf[BTRFS_NAME_LEN] = {0};
2578 int name_recovered = 0;
2579 int type_recovered = 0;
2583 * Get file name and type first before these invalid inode ref
2584 * are deleted by remove_all_invalid_backref()
2586 name_recovered = !find_file_name(rec, namebuf, &namelen);
2587 type_recovered = !find_file_type(rec, &type);
2589 if (!name_recovered) {
2590 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591 rec->ino, rec->ino);
2592 namelen = count_digits(rec->ino);
2593 sprintf(namebuf, "%llu", rec->ino);
2596 if (!type_recovered) {
2597 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2599 type = BTRFS_FT_REG_FILE;
2603 ret = reset_nlink(trans, root, path, rec);
2606 "Failed to reset nlink for inode %llu: %s\n",
2607 rec->ino, strerror(-ret));
2611 if (rec->found_link == 0) {
2612 lost_found_ino = root->highest_inode;
2613 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2618 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2622 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623 dir_name, strerror(-ret));
2626 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627 namebuf, namelen, type, NULL, 1);
2629 * Add ".INO" suffix several times to handle case where
2630 * "FILENAME.INO" is already taken by another file.
2632 while (ret == -EEXIST) {
2634 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2636 if (namelen + count_digits(rec->ino) + 1 >
2641 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2643 namelen += count_digits(rec->ino) + 1;
2644 ret = btrfs_add_link(trans, root, rec->ino,
2645 lost_found_ino, namebuf,
2646 namelen, type, NULL, 1);
2650 "Failed to link the inode %llu to %s dir: %s\n",
2651 rec->ino, dir_name, strerror(-ret));
2655 * Just increase the found_link, don't actually add the
2656 * backref. This will make things easier and this inode
2657 * record will be freed after the repair is done.
2658 * So fsck will not report problem about this inode.
2661 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662 namelen, namebuf, dir_name);
2664 printf("Fixed the nlink of inode %llu\n", rec->ino);
2667 * Clear the flag anyway, or we will loop forever for the same inode
2668 * as it will not be removed from the bad inode list and the dead loop
2671 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672 btrfs_release_path(path);
2677 * Check if there is any normal(reg or prealloc) file extent for given
2679 * This is used to determine the file type when neither its dir_index/item or
2680 * inode_item exists.
2682 * This will *NOT* report error, if any error happens, just consider it does
2683 * not have any normal file extent.
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2687 struct btrfs_path path;
2688 struct btrfs_key key;
2689 struct btrfs_key found_key;
2690 struct btrfs_file_extent_item *fi;
2694 btrfs_init_path(&path);
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2699 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2704 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705 ret = btrfs_next_leaf(root, &path);
2712 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2717 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path.nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2726 btrfs_release_path(&path);
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2752 int type_recovered = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2768 * 1. If found backref(inode_index/item is already handled) to it,
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2782 filetype = BTRFS_FT_REG_FILE;
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2884 node = rb_first(&rec->holes);
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path path;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2930 * For nlink repair, it may create a dir and add link, so
2931 * 2 for parent(256)'s dir_index and dir_item
2932 * 2 for lost+found dir's inode_item and inode_ref
2933 * 1 for the new inode_ref of the file
2934 * 2 for lost+found dir's dir_index and dir_item for the file
2936 trans = btrfs_start_transaction(root, 7);
2938 return PTR_ERR(trans);
2940 btrfs_init_path(&path);
2941 if (rec->errors & I_ERR_NO_INODE_ITEM)
2942 ret = repair_inode_no_item(trans, root, &path, rec);
2943 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948 ret = repair_inode_isize(trans, root, &path, rec);
2949 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952 ret = repair_inode_nlinks(trans, root, &path, rec);
2953 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954 ret = repair_inode_nbytes(trans, root, &path, rec);
2955 btrfs_commit_transaction(trans, root);
2956 btrfs_release_path(&path);
2960 static int check_inode_recs(struct btrfs_root *root,
2961 struct cache_tree *inode_cache)
2963 struct cache_extent *cache;
2964 struct ptr_node *node;
2965 struct inode_record *rec;
2966 struct inode_backref *backref;
2971 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2973 if (btrfs_root_refs(&root->root_item) == 0) {
2974 if (!cache_tree_empty(inode_cache))
2975 fprintf(stderr, "warning line %d\n", __LINE__);
2980 * We need to record the highest inode number for later 'lost+found'
2982 * We must select an ino not used/referred by any existing inode, or
2983 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984 * this may cause 'lost+found' dir has wrong nlinks.
2986 cache = last_cache_extent(inode_cache);
2988 node = container_of(cache, struct ptr_node, cache);
2990 if (rec->ino > root->highest_inode)
2991 root->highest_inode = rec->ino;
2995 * We need to repair backrefs first because we could change some of the
2996 * errors in the inode recs.
2998 * We also need to go through and delete invalid backrefs first and then
2999 * add the correct ones second. We do this because we may get EEXIST
3000 * when adding back the correct index because we hadn't yet deleted the
3003 * For example, if we were missing a dir index then the directories
3004 * isize would be wrong, so if we fixed the isize to what we thought it
3005 * would be and then fixed the backref we'd still have a invalid fs, so
3006 * we need to add back the dir index and then check to see if the isize
3011 if (stage == 3 && !err)
3014 cache = search_cache_extent(inode_cache, 0);
3015 while (repair && cache) {
3016 node = container_of(cache, struct ptr_node, cache);
3018 cache = next_cache_extent(cache);
3020 /* Need to free everything up and rescan */
3022 remove_cache_extent(inode_cache, &node->cache);
3024 free_inode_rec(rec);
3028 if (list_empty(&rec->backrefs))
3031 ret = repair_inode_backrefs(root, rec, inode_cache,
3045 rec = get_inode_rec(inode_cache, root_dirid, 0);
3046 BUG_ON(IS_ERR(rec));
3048 ret = check_root_dir(rec);
3050 fprintf(stderr, "root %llu root dir %llu error\n",
3051 (unsigned long long)root->root_key.objectid,
3052 (unsigned long long)root_dirid);
3053 print_inode_error(root, rec);
3058 struct btrfs_trans_handle *trans;
3060 trans = btrfs_start_transaction(root, 1);
3061 if (IS_ERR(trans)) {
3062 err = PTR_ERR(trans);
3067 "root %llu missing its root dir, recreating\n",
3068 (unsigned long long)root->objectid);
3070 ret = btrfs_make_root_dir(trans, root, root_dirid);
3073 btrfs_commit_transaction(trans, root);
3077 fprintf(stderr, "root %llu root dir %llu not found\n",
3078 (unsigned long long)root->root_key.objectid,
3079 (unsigned long long)root_dirid);
3083 cache = search_cache_extent(inode_cache, 0);
3086 node = container_of(cache, struct ptr_node, cache);
3088 remove_cache_extent(inode_cache, &node->cache);
3090 if (rec->ino == root_dirid ||
3091 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092 free_inode_rec(rec);
3096 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097 ret = check_orphan_item(root, rec->ino);
3099 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100 if (can_free_inode_rec(rec)) {
3101 free_inode_rec(rec);
3106 if (!rec->found_inode_item)
3107 rec->errors |= I_ERR_NO_INODE_ITEM;
3108 if (rec->found_link != rec->nlink)
3109 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3111 ret = try_repair_inode(root, rec);
3112 if (ret == 0 && can_free_inode_rec(rec)) {
3113 free_inode_rec(rec);
3119 if (!(repair && ret == 0))
3121 print_inode_error(root, rec);
3122 list_for_each_entry(backref, &rec->backrefs, list) {
3123 if (!backref->found_dir_item)
3124 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125 if (!backref->found_dir_index)
3126 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127 if (!backref->found_inode_ref)
3128 backref->errors |= REF_ERR_NO_INODE_REF;
3129 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130 " namelen %u name %s filetype %d errors %x",
3131 (unsigned long long)backref->dir,
3132 (unsigned long long)backref->index,
3133 backref->namelen, backref->name,
3134 backref->filetype, backref->errors);
3135 print_ref_error(backref->errors);
3137 free_inode_rec(rec);
3139 return (error > 0) ? -1 : 0;
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3145 struct cache_extent *cache;
3146 struct root_record *rec = NULL;
3149 cache = lookup_cache_extent(root_cache, objectid, 1);
3151 rec = container_of(cache, struct root_record, cache);
3153 rec = calloc(1, sizeof(*rec));
3155 return ERR_PTR(-ENOMEM);
3156 rec->objectid = objectid;
3157 INIT_LIST_HEAD(&rec->backrefs);
3158 rec->cache.start = objectid;
3159 rec->cache.size = 1;
3161 ret = insert_cache_extent(root_cache, &rec->cache);
3163 return ERR_PTR(-EEXIST);
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169 u64 ref_root, u64 dir, u64 index,
3170 const char *name, int namelen)
3172 struct root_backref *backref;
3174 list_for_each_entry(backref, &rec->backrefs, list) {
3175 if (backref->ref_root != ref_root || backref->dir != dir ||
3176 backref->namelen != namelen)
3178 if (memcmp(name, backref->name, namelen))
3183 backref = calloc(1, sizeof(*backref) + namelen + 1);
3186 backref->ref_root = ref_root;
3188 backref->index = index;
3189 backref->namelen = namelen;
3190 memcpy(backref->name, name, namelen);
3191 backref->name[namelen] = '\0';
3192 list_add_tail(&backref->list, &rec->backrefs);
3196 static void free_root_record(struct cache_extent *cache)
3198 struct root_record *rec;
3199 struct root_backref *backref;
3201 rec = container_of(cache, struct root_record, cache);
3202 while (!list_empty(&rec->backrefs)) {
3203 backref = to_root_backref(rec->backrefs.next);
3204 list_del(&backref->list);
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3213 static int add_root_backref(struct cache_tree *root_cache,
3214 u64 root_id, u64 ref_root, u64 dir, u64 index,
3215 const char *name, int namelen,
3216 int item_type, int errors)
3218 struct root_record *rec;
3219 struct root_backref *backref;
3221 rec = get_root_rec(root_cache, root_id);
3222 BUG_ON(IS_ERR(rec));
3223 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3226 backref->errors |= errors;
3228 if (item_type != BTRFS_DIR_ITEM_KEY) {
3229 if (backref->found_dir_index || backref->found_back_ref ||
3230 backref->found_forward_ref) {
3231 if (backref->index != index)
3232 backref->errors |= REF_ERR_INDEX_UNMATCH;
3234 backref->index = index;
3238 if (item_type == BTRFS_DIR_ITEM_KEY) {
3239 if (backref->found_forward_ref)
3241 backref->found_dir_item = 1;
3242 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243 backref->found_dir_index = 1;
3244 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245 if (backref->found_forward_ref)
3246 backref->errors |= REF_ERR_DUP_ROOT_REF;
3247 else if (backref->found_dir_item)
3249 backref->found_forward_ref = 1;
3250 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251 if (backref->found_back_ref)
3252 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253 backref->found_back_ref = 1;
3258 if (backref->found_forward_ref && backref->found_dir_item)
3259 backref->reachable = 1;
3263 static int merge_root_recs(struct btrfs_root *root,
3264 struct cache_tree *src_cache,
3265 struct cache_tree *dst_cache)
3267 struct cache_extent *cache;
3268 struct ptr_node *node;
3269 struct inode_record *rec;
3270 struct inode_backref *backref;
3273 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274 free_inode_recs_tree(src_cache);
3279 cache = search_cache_extent(src_cache, 0);
3282 node = container_of(cache, struct ptr_node, cache);
3284 remove_cache_extent(src_cache, &node->cache);
3287 ret = is_child_root(root, root->objectid, rec->ino);
3293 list_for_each_entry(backref, &rec->backrefs, list) {
3294 BUG_ON(backref->found_inode_ref);
3295 if (backref->found_dir_item)
3296 add_root_backref(dst_cache, rec->ino,
3297 root->root_key.objectid, backref->dir,
3298 backref->index, backref->name,
3299 backref->namelen, BTRFS_DIR_ITEM_KEY,
3301 if (backref->found_dir_index)
3302 add_root_backref(dst_cache, rec->ino,
3303 root->root_key.objectid, backref->dir,
3304 backref->index, backref->name,
3305 backref->namelen, BTRFS_DIR_INDEX_KEY,
3309 free_inode_rec(rec);
3316 static int check_root_refs(struct btrfs_root *root,
3317 struct cache_tree *root_cache)
3319 struct root_record *rec;
3320 struct root_record *ref_root;
3321 struct root_backref *backref;
3322 struct cache_extent *cache;
3328 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329 BUG_ON(IS_ERR(rec));
3332 /* fixme: this can not detect circular references */
3335 cache = search_cache_extent(root_cache, 0);
3339 rec = container_of(cache, struct root_record, cache);
3340 cache = next_cache_extent(cache);
3342 if (rec->found_ref == 0)
3345 list_for_each_entry(backref, &rec->backrefs, list) {
3346 if (!backref->reachable)
3349 ref_root = get_root_rec(root_cache,
3351 BUG_ON(IS_ERR(ref_root));
3352 if (ref_root->found_ref > 0)
3355 backref->reachable = 0;
3357 if (rec->found_ref == 0)
3363 cache = search_cache_extent(root_cache, 0);
3367 rec = container_of(cache, struct root_record, cache);
3368 cache = next_cache_extent(cache);
3370 if (rec->found_ref == 0 &&
3371 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373 ret = check_orphan_item(root->fs_info->tree_root,
3379 * If we don't have a root item then we likely just have
3380 * a dir item in a snapshot for this root but no actual
3381 * ref key or anything so it's meaningless.
3383 if (!rec->found_root_item)
3386 fprintf(stderr, "fs tree %llu not referenced\n",
3387 (unsigned long long)rec->objectid);
3391 if (rec->found_ref > 0 && !rec->found_root_item)
3393 list_for_each_entry(backref, &rec->backrefs, list) {
3394 if (!backref->found_dir_item)
3395 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396 if (!backref->found_dir_index)
3397 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398 if (!backref->found_back_ref)
3399 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400 if (!backref->found_forward_ref)
3401 backref->errors |= REF_ERR_NO_ROOT_REF;
3402 if (backref->reachable && backref->errors)
3409 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410 (unsigned long long)rec->objectid, rec->found_ref,
3411 rec->found_root_item ? "" : "not found");
3413 list_for_each_entry(backref, &rec->backrefs, list) {
3414 if (!backref->reachable)
3416 if (!backref->errors && rec->found_root_item)
3418 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419 " index %llu namelen %u name %s errors %x\n",
3420 (unsigned long long)backref->ref_root,
3421 (unsigned long long)backref->dir,
3422 (unsigned long long)backref->index,
3423 backref->namelen, backref->name,
3425 print_ref_error(backref->errors);
3428 return errors > 0 ? 1 : 0;
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432 struct btrfs_key *key,
3433 struct cache_tree *root_cache)
3439 struct btrfs_root_ref *ref;
3440 char namebuf[BTRFS_NAME_LEN];
3443 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3445 dirid = btrfs_root_ref_dirid(eb, ref);
3446 index = btrfs_root_ref_sequence(eb, ref);
3447 name_len = btrfs_root_ref_name_len(eb, ref);
3449 if (name_len <= BTRFS_NAME_LEN) {
3453 len = BTRFS_NAME_LEN;
3454 error = REF_ERR_NAME_TOO_LONG;
3456 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3458 if (key->type == BTRFS_ROOT_REF_KEY) {
3459 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460 index, namebuf, len, key->type, error);
3462 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463 index, namebuf, len, key->type, error);
3468 static void free_corrupt_block(struct cache_extent *cache)
3470 struct btrfs_corrupt_block *corrupt;
3472 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3479 * Repair the btree of the given root.
3481 * The fix is to remove the node key in corrupt_blocks cache_tree.
3482 * and rebalance the tree.
3483 * After the fix, the btree should be writeable.
3485 static int repair_btree(struct btrfs_root *root,
3486 struct cache_tree *corrupt_blocks)
3488 struct btrfs_trans_handle *trans;
3489 struct btrfs_path path;
3490 struct btrfs_corrupt_block *corrupt;
3491 struct cache_extent *cache;
3492 struct btrfs_key key;
3497 if (cache_tree_empty(corrupt_blocks))
3500 trans = btrfs_start_transaction(root, 1);
3501 if (IS_ERR(trans)) {
3502 ret = PTR_ERR(trans);
3503 fprintf(stderr, "Error starting transaction: %s\n",
3507 btrfs_init_path(&path);
3508 cache = first_cache_extent(corrupt_blocks);
3510 corrupt = container_of(cache, struct btrfs_corrupt_block,
3512 level = corrupt->level;
3513 path.lowest_level = level;
3514 key.objectid = corrupt->key.objectid;
3515 key.type = corrupt->key.type;
3516 key.offset = corrupt->key.offset;
3519 * Here we don't want to do any tree balance, since it may
3520 * cause a balance with corrupted brother leaf/node,
3521 * so ins_len set to 0 here.
3522 * Balance will be done after all corrupt node/leaf is deleted.
3524 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3527 offset = btrfs_node_blockptr(path.nodes[level],
3530 /* Remove the ptr */
3531 ret = btrfs_del_ptr(trans, root, &path, level,
3536 * Remove the corresponding extent
3537 * return value is not concerned.
3539 btrfs_release_path(&path);
3540 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541 0, root->root_key.objectid,
3543 cache = next_cache_extent(cache);
3546 /* Balance the btree using btrfs_search_slot() */
3547 cache = first_cache_extent(corrupt_blocks);
3549 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551 memcpy(&key, &corrupt->key, sizeof(key));
3552 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3555 /* return will always >0 since it won't find the item */
3557 btrfs_release_path(&path);
3558 cache = next_cache_extent(cache);
3561 btrfs_commit_transaction(trans, root);
3562 btrfs_release_path(&path);
3566 static int check_fs_root(struct btrfs_root *root,
3567 struct cache_tree *root_cache,
3568 struct walk_control *wc)
3574 struct btrfs_path path;
3575 struct shared_node root_node;
3576 struct root_record *rec;
3577 struct btrfs_root_item *root_item = &root->root_item;
3578 struct cache_tree corrupt_blocks;
3579 struct orphan_data_extent *orphan;
3580 struct orphan_data_extent *tmp;
3581 enum btrfs_tree_block_status status;
3582 struct node_refs nrefs;
3585 * Reuse the corrupt_block cache tree to record corrupted tree block
3587 * Unlike the usage in extent tree check, here we do it in a per
3588 * fs/subvol tree base.
3590 cache_tree_init(&corrupt_blocks);
3591 root->fs_info->corrupt_blocks = &corrupt_blocks;
3593 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594 rec = get_root_rec(root_cache, root->root_key.objectid);
3595 BUG_ON(IS_ERR(rec));
3596 if (btrfs_root_refs(root_item) > 0)
3597 rec->found_root_item = 1;
3600 btrfs_init_path(&path);
3601 memset(&root_node, 0, sizeof(root_node));
3602 cache_tree_init(&root_node.root_cache);
3603 cache_tree_init(&root_node.inode_cache);
3604 memset(&nrefs, 0, sizeof(nrefs));
3606 /* Move the orphan extent record to corresponding inode_record */
3607 list_for_each_entry_safe(orphan, tmp,
3608 &root->orphan_data_extents, list) {
3609 struct inode_record *inode;
3611 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3613 BUG_ON(IS_ERR(inode));
3614 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615 list_move(&orphan->list, &inode->orphan_extents);
3618 level = btrfs_header_level(root->node);
3619 memset(wc->nodes, 0, sizeof(wc->nodes));
3620 wc->nodes[level] = &root_node;
3621 wc->active_node = level;
3622 wc->root_level = level;
3624 /* We may not have checked the root block, lets do that now */
3625 if (btrfs_is_leaf(root->node))
3626 status = btrfs_check_leaf(root, NULL, root->node);
3628 status = btrfs_check_node(root, NULL, root->node);
3629 if (status != BTRFS_TREE_BLOCK_CLEAN)
3632 if (btrfs_root_refs(root_item) > 0 ||
3633 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634 path.nodes[level] = root->node;
3635 extent_buffer_get(root->node);
3636 path.slots[level] = 0;
3638 struct btrfs_key key;
3639 struct btrfs_disk_key found_key;
3641 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642 level = root_item->drop_level;
3643 path.lowest_level = level;
3644 if (level > btrfs_header_level(root->node) ||
3645 level >= BTRFS_MAX_LEVEL) {
3646 error("ignoring invalid drop level: %u", level);
3649 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3652 btrfs_node_key(path.nodes[level], &found_key,
3654 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655 sizeof(found_key)));
3659 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3665 wret = walk_up_tree(root, &path, wc, &level);
3672 btrfs_release_path(&path);
3674 if (!cache_tree_empty(&corrupt_blocks)) {
3675 struct cache_extent *cache;
3676 struct btrfs_corrupt_block *corrupt;
3678 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679 root->root_key.objectid);
3680 cache = first_cache_extent(&corrupt_blocks);
3682 corrupt = container_of(cache,
3683 struct btrfs_corrupt_block,
3685 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686 cache->start, corrupt->level,
3687 corrupt->key.objectid, corrupt->key.type,
3688 corrupt->key.offset);
3689 cache = next_cache_extent(cache);
3692 printf("Try to repair the btree for root %llu\n",
3693 root->root_key.objectid);
3694 ret = repair_btree(root, &corrupt_blocks);
3696 fprintf(stderr, "Failed to repair btree: %s\n",
3699 printf("Btree for root %llu is fixed\n",
3700 root->root_key.objectid);
3704 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3708 if (root_node.current) {
3709 root_node.current->checked = 1;
3710 maybe_free_inode_rec(&root_node.inode_cache,
3714 err = check_inode_recs(root, &root_node.inode_cache);
3718 free_corrupt_blocks_tree(&corrupt_blocks);
3719 root->fs_info->corrupt_blocks = NULL;
3720 free_orphan_data_extents(&root->orphan_data_extents);
3724 static int fs_root_objectid(u64 objectid)
3726 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3729 return is_fstree(objectid);
3732 static int check_fs_roots(struct btrfs_root *root,
3733 struct cache_tree *root_cache)
3735 struct btrfs_path path;
3736 struct btrfs_key key;
3737 struct walk_control wc;
3738 struct extent_buffer *leaf, *tree_node;
3739 struct btrfs_root *tmp_root;
3740 struct btrfs_root *tree_root = root->fs_info->tree_root;
3744 if (ctx.progress_enabled) {
3745 ctx.tp = TASK_FS_ROOTS;
3746 task_start(ctx.info);
3750 * Just in case we made any changes to the extent tree that weren't
3751 * reflected into the free space cache yet.
3754 reset_cached_block_groups(root->fs_info);
3755 memset(&wc, 0, sizeof(wc));
3756 cache_tree_init(&wc.shared);
3757 btrfs_init_path(&path);
3762 key.type = BTRFS_ROOT_ITEM_KEY;
3763 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3768 tree_node = tree_root->node;
3770 if (tree_node != tree_root->node) {
3771 free_root_recs_tree(root_cache);
3772 btrfs_release_path(&path);
3775 leaf = path.nodes[0];
3776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777 ret = btrfs_next_leaf(tree_root, &path);
3783 leaf = path.nodes[0];
3785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787 fs_root_objectid(key.objectid)) {
3788 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789 tmp_root = btrfs_read_fs_root_no_cache(
3790 root->fs_info, &key);
3792 key.offset = (u64)-1;
3793 tmp_root = btrfs_read_fs_root(
3794 root->fs_info, &key);
3796 if (IS_ERR(tmp_root)) {
3800 ret = check_fs_root(tmp_root, root_cache, &wc);
3801 if (ret == -EAGAIN) {
3802 free_root_recs_tree(root_cache);
3803 btrfs_release_path(&path);
3808 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809 btrfs_free_fs_root(tmp_root);
3810 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811 key.type == BTRFS_ROOT_BACKREF_KEY) {
3812 process_root_ref(leaf, path.slots[0], &key,
3819 btrfs_release_path(&path);
3821 free_extent_cache_tree(&wc.shared);
3822 if (!cache_tree_empty(&wc.shared))
3823 fprintf(stderr, "warning line %d\n", __LINE__);
3825 task_stop(ctx.info);
3830 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
3835 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3836 * INODE_REF/INODE_EXTREF match.
3838 * @root: the root of the fs/file tree
3839 * @ref_key: the key of the INODE_REF/INODE_EXTREF
3840 * @key: the key of the DIR_ITEM/DIR_INDEX
3841 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
3842 * distinguish root_dir between normal dir/file
3843 * @name: the name in the INODE_REF/INODE_EXTREF
3844 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
3845 * @mode: the st_mode of INODE_ITEM
3847 * Return 0 if no error occurred.
3848 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3849 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3851 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3852 * not match for normal dir/file.
3854 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3855 struct btrfs_key *key, u64 index, char *name,
3856 u32 namelen, u32 mode)
3858 struct btrfs_path path;
3859 struct extent_buffer *node;
3860 struct btrfs_dir_item *di;
3861 struct btrfs_key location;
3862 char namebuf[BTRFS_NAME_LEN] = {0};
3872 btrfs_init_path(&path);
3873 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3875 ret = DIR_ITEM_MISSING;
3879 /* Process root dir and goto out*/
3882 ret = ROOT_DIR_ERROR;
3884 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3886 ref_key->type == BTRFS_INODE_REF_KEY ?
3888 ref_key->objectid, ref_key->offset,
3889 key->type == BTRFS_DIR_ITEM_KEY ?
3890 "DIR_ITEM" : "DIR_INDEX");
3898 /* Process normal file/dir */
3900 ret = DIR_ITEM_MISSING;
3902 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3904 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3905 ref_key->objectid, ref_key->offset,
3906 key->type == BTRFS_DIR_ITEM_KEY ?
3907 "DIR_ITEM" : "DIR_INDEX",
3908 key->objectid, key->offset, namelen, name,
3909 imode_to_type(mode));
3913 /* Check whether inode_id/filetype/name match */
3914 node = path.nodes[0];
3915 slot = path.slots[0];
3916 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3917 total = btrfs_item_size_nr(node, slot);
3918 while (cur < total) {
3919 ret = DIR_ITEM_MISMATCH;
3920 name_len = btrfs_dir_name_len(node, di);
3921 data_len = btrfs_dir_data_len(node, di);
3923 btrfs_dir_item_key_to_cpu(node, di, &location);
3924 if (location.objectid != ref_key->objectid ||
3925 location.type != BTRFS_INODE_ITEM_KEY ||
3926 location.offset != 0)
3929 filetype = btrfs_dir_type(node, di);
3930 if (imode_to_type(mode) != filetype)
3933 if (name_len <= BTRFS_NAME_LEN) {
3936 len = BTRFS_NAME_LEN;
3937 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3939 key->type == BTRFS_DIR_ITEM_KEY ?
3940 "DIR_ITEM" : "DIR_INDEX",
3941 key->objectid, key->offset, name_len);
3943 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3944 if (len != namelen || strncmp(namebuf, name, len))
3950 len = sizeof(*di) + name_len + data_len;
3951 di = (struct btrfs_dir_item *)((char *)di + len);
3954 if (ret == DIR_ITEM_MISMATCH)
3956 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3958 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3959 ref_key->objectid, ref_key->offset,
3960 key->type == BTRFS_DIR_ITEM_KEY ?
3961 "DIR_ITEM" : "DIR_INDEX",
3962 key->objectid, key->offset, namelen, name,
3963 imode_to_type(mode));
3965 btrfs_release_path(&path);
3970 * Traverse the given INODE_REF and call find_dir_item() to find related
3971 * DIR_ITEM/DIR_INDEX.
3973 * @root: the root of the fs/file tree
3974 * @ref_key: the key of the INODE_REF
3975 * @refs: the count of INODE_REF
3976 * @mode: the st_mode of INODE_ITEM
3978 * Return 0 if no error occurred.
3980 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3981 struct extent_buffer *node, int slot, u64 *refs,
3984 struct btrfs_key key;
3985 struct btrfs_inode_ref *ref;
3986 char namebuf[BTRFS_NAME_LEN] = {0};
3994 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
3995 total = btrfs_item_size_nr(node, slot);
3998 /* Update inode ref count */
4001 index = btrfs_inode_ref_index(node, ref);
4002 name_len = btrfs_inode_ref_name_len(node, ref);
4003 if (name_len <= BTRFS_NAME_LEN) {
4006 len = BTRFS_NAME_LEN;
4007 warning("root %llu INODE_REF[%llu %llu] name too long",
4008 root->objectid, ref_key->objectid, ref_key->offset);
4011 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4013 /* Check root dir ref name */
4014 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4015 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4016 root->objectid, ref_key->objectid, ref_key->offset,
4018 err |= ROOT_DIR_ERROR;
4021 /* Find related DIR_INDEX */
4022 key.objectid = ref_key->offset;
4023 key.type = BTRFS_DIR_INDEX_KEY;
4025 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4028 /* Find related dir_item */
4029 key.objectid = ref_key->offset;
4030 key.type = BTRFS_DIR_ITEM_KEY;
4031 key.offset = btrfs_name_hash(namebuf, len);
4032 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4035 len = sizeof(*ref) + name_len;
4036 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4044 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4046 struct list_head *cur = rec->backrefs.next;
4047 struct extent_backref *back;
4048 struct tree_backref *tback;
4049 struct data_backref *dback;
4053 while(cur != &rec->backrefs) {
4054 back = to_extent_backref(cur);
4056 if (!back->found_extent_tree) {
4060 if (back->is_data) {
4061 dback = to_data_backref(back);
4062 fprintf(stderr, "Backref %llu %s %llu"
4063 " owner %llu offset %llu num_refs %lu"
4064 " not found in extent tree\n",
4065 (unsigned long long)rec->start,
4066 back->full_backref ?
4068 back->full_backref ?
4069 (unsigned long long)dback->parent:
4070 (unsigned long long)dback->root,
4071 (unsigned long long)dback->owner,
4072 (unsigned long long)dback->offset,
4073 (unsigned long)dback->num_refs);
4075 tback = to_tree_backref(back);
4076 fprintf(stderr, "Backref %llu parent %llu"
4077 " root %llu not found in extent tree\n",
4078 (unsigned long long)rec->start,
4079 (unsigned long long)tback->parent,
4080 (unsigned long long)tback->root);
4083 if (!back->is_data && !back->found_ref) {
4087 tback = to_tree_backref(back);
4088 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4089 (unsigned long long)rec->start,
4090 back->full_backref ? "parent" : "root",
4091 back->full_backref ?
4092 (unsigned long long)tback->parent :
4093 (unsigned long long)tback->root, back);
4095 if (back->is_data) {
4096 dback = to_data_backref(back);
4097 if (dback->found_ref != dback->num_refs) {
4101 fprintf(stderr, "Incorrect local backref count"
4102 " on %llu %s %llu owner %llu"
4103 " offset %llu found %u wanted %u back %p\n",
4104 (unsigned long long)rec->start,
4105 back->full_backref ?
4107 back->full_backref ?
4108 (unsigned long long)dback->parent:
4109 (unsigned long long)dback->root,
4110 (unsigned long long)dback->owner,
4111 (unsigned long long)dback->offset,
4112 dback->found_ref, dback->num_refs, back);
4114 if (dback->disk_bytenr != rec->start) {
4118 fprintf(stderr, "Backref disk bytenr does not"
4119 " match extent record, bytenr=%llu, "
4120 "ref bytenr=%llu\n",
4121 (unsigned long long)rec->start,
4122 (unsigned long long)dback->disk_bytenr);
4125 if (dback->bytes != rec->nr) {
4129 fprintf(stderr, "Backref bytes do not match "
4130 "extent backref, bytenr=%llu, ref "
4131 "bytes=%llu, backref bytes=%llu\n",
4132 (unsigned long long)rec->start,
4133 (unsigned long long)rec->nr,
4134 (unsigned long long)dback->bytes);
4137 if (!back->is_data) {
4140 dback = to_data_backref(back);
4141 found += dback->found_ref;
4144 if (found != rec->refs) {
4148 fprintf(stderr, "Incorrect global backref count "
4149 "on %llu found %llu wanted %llu\n",
4150 (unsigned long long)rec->start,
4151 (unsigned long long)found,
4152 (unsigned long long)rec->refs);
4158 static int free_all_extent_backrefs(struct extent_record *rec)
4160 struct extent_backref *back;
4161 struct list_head *cur;
4162 while (!list_empty(&rec->backrefs)) {
4163 cur = rec->backrefs.next;
4164 back = to_extent_backref(cur);
4171 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4172 struct cache_tree *extent_cache)
4174 struct cache_extent *cache;
4175 struct extent_record *rec;
4178 cache = first_cache_extent(extent_cache);
4181 rec = container_of(cache, struct extent_record, cache);
4182 remove_cache_extent(extent_cache, cache);
4183 free_all_extent_backrefs(rec);
4188 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4189 struct extent_record *rec)
4191 if (rec->content_checked && rec->owner_ref_checked &&
4192 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4193 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4194 !rec->bad_full_backref && !rec->crossing_stripes &&
4195 !rec->wrong_chunk_type) {
4196 remove_cache_extent(extent_cache, &rec->cache);
4197 free_all_extent_backrefs(rec);
4198 list_del_init(&rec->list);
4204 static int check_owner_ref(struct btrfs_root *root,
4205 struct extent_record *rec,
4206 struct extent_buffer *buf)
4208 struct extent_backref *node;
4209 struct tree_backref *back;
4210 struct btrfs_root *ref_root;
4211 struct btrfs_key key;
4212 struct btrfs_path path;
4213 struct extent_buffer *parent;
4218 list_for_each_entry(node, &rec->backrefs, list) {
4221 if (!node->found_ref)
4223 if (node->full_backref)
4225 back = to_tree_backref(node);
4226 if (btrfs_header_owner(buf) == back->root)
4229 BUG_ON(rec->is_root);
4231 /* try to find the block by search corresponding fs tree */
4232 key.objectid = btrfs_header_owner(buf);
4233 key.type = BTRFS_ROOT_ITEM_KEY;
4234 key.offset = (u64)-1;
4236 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4237 if (IS_ERR(ref_root))
4240 level = btrfs_header_level(buf);
4242 btrfs_item_key_to_cpu(buf, &key, 0);
4244 btrfs_node_key_to_cpu(buf, &key, 0);
4246 btrfs_init_path(&path);
4247 path.lowest_level = level + 1;
4248 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4252 parent = path.nodes[level + 1];
4253 if (parent && buf->start == btrfs_node_blockptr(parent,
4254 path.slots[level + 1]))
4257 btrfs_release_path(&path);
4258 return found ? 0 : 1;
4261 static int is_extent_tree_record(struct extent_record *rec)
4263 struct list_head *cur = rec->backrefs.next;
4264 struct extent_backref *node;
4265 struct tree_backref *back;
4268 while(cur != &rec->backrefs) {
4269 node = to_extent_backref(cur);
4273 back = to_tree_backref(node);
4274 if (node->full_backref)
4276 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4283 static int record_bad_block_io(struct btrfs_fs_info *info,
4284 struct cache_tree *extent_cache,
4287 struct extent_record *rec;
4288 struct cache_extent *cache;
4289 struct btrfs_key key;
4291 cache = lookup_cache_extent(extent_cache, start, len);
4295 rec = container_of(cache, struct extent_record, cache);
4296 if (!is_extent_tree_record(rec))
4299 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4300 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4303 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4304 struct extent_buffer *buf, int slot)
4306 if (btrfs_header_level(buf)) {
4307 struct btrfs_key_ptr ptr1, ptr2;
4309 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4310 sizeof(struct btrfs_key_ptr));
4311 read_extent_buffer(buf, &ptr2,
4312 btrfs_node_key_ptr_offset(slot + 1),
4313 sizeof(struct btrfs_key_ptr));
4314 write_extent_buffer(buf, &ptr1,
4315 btrfs_node_key_ptr_offset(slot + 1),
4316 sizeof(struct btrfs_key_ptr));
4317 write_extent_buffer(buf, &ptr2,
4318 btrfs_node_key_ptr_offset(slot),
4319 sizeof(struct btrfs_key_ptr));
4321 struct btrfs_disk_key key;
4322 btrfs_node_key(buf, &key, 0);
4323 btrfs_fixup_low_keys(root, path, &key,
4324 btrfs_header_level(buf) + 1);
4327 struct btrfs_item *item1, *item2;
4328 struct btrfs_key k1, k2;
4329 char *item1_data, *item2_data;
4330 u32 item1_offset, item2_offset, item1_size, item2_size;
4332 item1 = btrfs_item_nr(slot);
4333 item2 = btrfs_item_nr(slot + 1);
4334 btrfs_item_key_to_cpu(buf, &k1, slot);
4335 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4336 item1_offset = btrfs_item_offset(buf, item1);
4337 item2_offset = btrfs_item_offset(buf, item2);
4338 item1_size = btrfs_item_size(buf, item1);
4339 item2_size = btrfs_item_size(buf, item2);
4341 item1_data = malloc(item1_size);
4344 item2_data = malloc(item2_size);
4350 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4351 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4353 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4354 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4358 btrfs_set_item_offset(buf, item1, item2_offset);
4359 btrfs_set_item_offset(buf, item2, item1_offset);
4360 btrfs_set_item_size(buf, item1, item2_size);
4361 btrfs_set_item_size(buf, item2, item1_size);
4363 path->slots[0] = slot;
4364 btrfs_set_item_key_unsafe(root, path, &k2);
4365 path->slots[0] = slot + 1;
4366 btrfs_set_item_key_unsafe(root, path, &k1);
4371 static int fix_key_order(struct btrfs_trans_handle *trans,
4372 struct btrfs_root *root,
4373 struct btrfs_path *path)
4375 struct extent_buffer *buf;
4376 struct btrfs_key k1, k2;
4378 int level = path->lowest_level;
4381 buf = path->nodes[level];
4382 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4384 btrfs_node_key_to_cpu(buf, &k1, i);
4385 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4387 btrfs_item_key_to_cpu(buf, &k1, i);
4388 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4390 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4392 ret = swap_values(root, path, buf, i);
4395 btrfs_mark_buffer_dirty(buf);
4401 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4402 struct btrfs_root *root,
4403 struct btrfs_path *path,
4404 struct extent_buffer *buf, int slot)
4406 struct btrfs_key key;
4407 int nritems = btrfs_header_nritems(buf);
4409 btrfs_item_key_to_cpu(buf, &key, slot);
4411 /* These are all the keys we can deal with missing. */
4412 if (key.type != BTRFS_DIR_INDEX_KEY &&
4413 key.type != BTRFS_EXTENT_ITEM_KEY &&
4414 key.type != BTRFS_METADATA_ITEM_KEY &&
4415 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4416 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4419 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4420 (unsigned long long)key.objectid, key.type,
4421 (unsigned long long)key.offset, slot, buf->start);
4422 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4423 btrfs_item_nr_offset(slot + 1),
4424 sizeof(struct btrfs_item) *
4425 (nritems - slot - 1));
4426 btrfs_set_header_nritems(buf, nritems - 1);
4428 struct btrfs_disk_key disk_key;
4430 btrfs_item_key(buf, &disk_key, 0);
4431 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4433 btrfs_mark_buffer_dirty(buf);
4437 static int fix_item_offset(struct btrfs_trans_handle *trans,
4438 struct btrfs_root *root,
4439 struct btrfs_path *path)
4441 struct extent_buffer *buf;
4445 /* We should only get this for leaves */
4446 BUG_ON(path->lowest_level);
4447 buf = path->nodes[0];
4449 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4450 unsigned int shift = 0, offset;
4452 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4453 BTRFS_LEAF_DATA_SIZE(root)) {
4454 if (btrfs_item_end_nr(buf, i) >
4455 BTRFS_LEAF_DATA_SIZE(root)) {
4456 ret = delete_bogus_item(trans, root, path,
4460 fprintf(stderr, "item is off the end of the "
4461 "leaf, can't fix\n");
4465 shift = BTRFS_LEAF_DATA_SIZE(root) -
4466 btrfs_item_end_nr(buf, i);
4467 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4468 btrfs_item_offset_nr(buf, i - 1)) {
4469 if (btrfs_item_end_nr(buf, i) >
4470 btrfs_item_offset_nr(buf, i - 1)) {
4471 ret = delete_bogus_item(trans, root, path,
4475 fprintf(stderr, "items overlap, can't fix\n");
4479 shift = btrfs_item_offset_nr(buf, i - 1) -
4480 btrfs_item_end_nr(buf, i);
4485 printf("Shifting item nr %d by %u bytes in block %llu\n",
4486 i, shift, (unsigned long long)buf->start);
4487 offset = btrfs_item_offset_nr(buf, i);
4488 memmove_extent_buffer(buf,
4489 btrfs_leaf_data(buf) + offset + shift,
4490 btrfs_leaf_data(buf) + offset,
4491 btrfs_item_size_nr(buf, i));
4492 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4494 btrfs_mark_buffer_dirty(buf);
4498 * We may have moved things, in which case we want to exit so we don't
4499 * write those changes out. Once we have proper abort functionality in
4500 * progs this can be changed to something nicer.
4507 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4508 * then just return -EIO.
4510 static int try_to_fix_bad_block(struct btrfs_root *root,
4511 struct extent_buffer *buf,
4512 enum btrfs_tree_block_status status)
4514 struct btrfs_trans_handle *trans;
4515 struct ulist *roots;
4516 struct ulist_node *node;
4517 struct btrfs_root *search_root;
4518 struct btrfs_path path;
4519 struct ulist_iterator iter;
4520 struct btrfs_key root_key, key;
4523 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4524 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4527 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4531 btrfs_init_path(&path);
4532 ULIST_ITER_INIT(&iter);
4533 while ((node = ulist_next(roots, &iter))) {
4534 root_key.objectid = node->val;
4535 root_key.type = BTRFS_ROOT_ITEM_KEY;
4536 root_key.offset = (u64)-1;
4538 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4545 trans = btrfs_start_transaction(search_root, 0);
4546 if (IS_ERR(trans)) {
4547 ret = PTR_ERR(trans);
4551 path.lowest_level = btrfs_header_level(buf);
4552 path.skip_check_block = 1;
4553 if (path.lowest_level)
4554 btrfs_node_key_to_cpu(buf, &key, 0);
4556 btrfs_item_key_to_cpu(buf, &key, 0);
4557 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4560 btrfs_commit_transaction(trans, search_root);
4563 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4564 ret = fix_key_order(trans, search_root, &path);
4565 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4566 ret = fix_item_offset(trans, search_root, &path);
4568 btrfs_commit_transaction(trans, search_root);
4571 btrfs_release_path(&path);
4572 btrfs_commit_transaction(trans, search_root);
4575 btrfs_release_path(&path);
4579 static int check_block(struct btrfs_root *root,
4580 struct cache_tree *extent_cache,
4581 struct extent_buffer *buf, u64 flags)
4583 struct extent_record *rec;
4584 struct cache_extent *cache;
4585 struct btrfs_key key;
4586 enum btrfs_tree_block_status status;
4590 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4593 rec = container_of(cache, struct extent_record, cache);
4594 rec->generation = btrfs_header_generation(buf);
4596 level = btrfs_header_level(buf);
4597 if (btrfs_header_nritems(buf) > 0) {
4600 btrfs_item_key_to_cpu(buf, &key, 0);
4602 btrfs_node_key_to_cpu(buf, &key, 0);
4604 rec->info_objectid = key.objectid;
4606 rec->info_level = level;
4608 if (btrfs_is_leaf(buf))
4609 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4611 status = btrfs_check_node(root, &rec->parent_key, buf);
4613 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4615 status = try_to_fix_bad_block(root, buf, status);
4616 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4618 fprintf(stderr, "bad block %llu\n",
4619 (unsigned long long)buf->start);
4622 * Signal to callers we need to start the scan over
4623 * again since we'll have cowed blocks.
4628 rec->content_checked = 1;
4629 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4630 rec->owner_ref_checked = 1;
4632 ret = check_owner_ref(root, rec, buf);
4634 rec->owner_ref_checked = 1;
4638 maybe_free_extent_rec(extent_cache, rec);
4642 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4643 u64 parent, u64 root)
4645 struct list_head *cur = rec->backrefs.next;
4646 struct extent_backref *node;
4647 struct tree_backref *back;
4649 while(cur != &rec->backrefs) {
4650 node = to_extent_backref(cur);
4654 back = to_tree_backref(node);
4656 if (!node->full_backref)
4658 if (parent == back->parent)
4661 if (node->full_backref)
4663 if (back->root == root)
4670 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4671 u64 parent, u64 root)
4673 struct tree_backref *ref = malloc(sizeof(*ref));
4677 memset(&ref->node, 0, sizeof(ref->node));
4679 ref->parent = parent;
4680 ref->node.full_backref = 1;
4683 ref->node.full_backref = 0;
4685 list_add_tail(&ref->node.list, &rec->backrefs);
4690 static struct data_backref *find_data_backref(struct extent_record *rec,
4691 u64 parent, u64 root,
4692 u64 owner, u64 offset,
4694 u64 disk_bytenr, u64 bytes)
4696 struct list_head *cur = rec->backrefs.next;
4697 struct extent_backref *node;
4698 struct data_backref *back;
4700 while(cur != &rec->backrefs) {
4701 node = to_extent_backref(cur);
4705 back = to_data_backref(node);
4707 if (!node->full_backref)
4709 if (parent == back->parent)
4712 if (node->full_backref)
4714 if (back->root == root && back->owner == owner &&
4715 back->offset == offset) {
4716 if (found_ref && node->found_ref &&
4717 (back->bytes != bytes ||
4718 back->disk_bytenr != disk_bytenr))
4727 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4728 u64 parent, u64 root,
4729 u64 owner, u64 offset,
4732 struct data_backref *ref = malloc(sizeof(*ref));
4736 memset(&ref->node, 0, sizeof(ref->node));
4737 ref->node.is_data = 1;
4740 ref->parent = parent;
4743 ref->node.full_backref = 1;
4747 ref->offset = offset;
4748 ref->node.full_backref = 0;
4750 ref->bytes = max_size;
4753 list_add_tail(&ref->node.list, &rec->backrefs);
4754 if (max_size > rec->max_size)
4755 rec->max_size = max_size;
4759 /* Check if the type of extent matches with its chunk */
4760 static void check_extent_type(struct extent_record *rec)
4762 struct btrfs_block_group_cache *bg_cache;
4764 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4768 /* data extent, check chunk directly*/
4769 if (!rec->metadata) {
4770 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4771 rec->wrong_chunk_type = 1;
4775 /* metadata extent, check the obvious case first */
4776 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4777 BTRFS_BLOCK_GROUP_METADATA))) {
4778 rec->wrong_chunk_type = 1;
4783 * Check SYSTEM extent, as it's also marked as metadata, we can only
4784 * make sure it's a SYSTEM extent by its backref
4786 if (!list_empty(&rec->backrefs)) {
4787 struct extent_backref *node;
4788 struct tree_backref *tback;
4791 node = to_extent_backref(rec->backrefs.next);
4792 if (node->is_data) {
4793 /* tree block shouldn't have data backref */
4794 rec->wrong_chunk_type = 1;
4797 tback = container_of(node, struct tree_backref, node);
4799 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4800 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4802 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4803 if (!(bg_cache->flags & bg_type))
4804 rec->wrong_chunk_type = 1;
4809 * Allocate a new extent record, fill default values from @tmpl and insert int
4810 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4811 * the cache, otherwise it fails.
4813 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4814 struct extent_record *tmpl)
4816 struct extent_record *rec;
4819 rec = malloc(sizeof(*rec));
4822 rec->start = tmpl->start;
4823 rec->max_size = tmpl->max_size;
4824 rec->nr = max(tmpl->nr, tmpl->max_size);
4825 rec->found_rec = tmpl->found_rec;
4826 rec->content_checked = tmpl->content_checked;
4827 rec->owner_ref_checked = tmpl->owner_ref_checked;
4828 rec->num_duplicates = 0;
4829 rec->metadata = tmpl->metadata;
4830 rec->flag_block_full_backref = FLAG_UNSET;
4831 rec->bad_full_backref = 0;
4832 rec->crossing_stripes = 0;
4833 rec->wrong_chunk_type = 0;
4834 rec->is_root = tmpl->is_root;
4835 rec->refs = tmpl->refs;
4836 rec->extent_item_refs = tmpl->extent_item_refs;
4837 rec->parent_generation = tmpl->parent_generation;
4838 INIT_LIST_HEAD(&rec->backrefs);
4839 INIT_LIST_HEAD(&rec->dups);
4840 INIT_LIST_HEAD(&rec->list);
4841 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4842 rec->cache.start = tmpl->start;
4843 rec->cache.size = tmpl->nr;
4844 ret = insert_cache_extent(extent_cache, &rec->cache);
4849 bytes_used += rec->nr;
4852 rec->crossing_stripes = check_crossing_stripes(global_info,
4853 rec->start, global_info->tree_root->nodesize);
4854 check_extent_type(rec);
4859 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4861 * - refs - if found, increase refs
4862 * - is_root - if found, set
4863 * - content_checked - if found, set
4864 * - owner_ref_checked - if found, set
4866 * If not found, create a new one, initialize and insert.
4868 static int add_extent_rec(struct cache_tree *extent_cache,
4869 struct extent_record *tmpl)
4871 struct extent_record *rec;
4872 struct cache_extent *cache;
4876 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4878 rec = container_of(cache, struct extent_record, cache);
4882 rec->nr = max(tmpl->nr, tmpl->max_size);
4885 * We need to make sure to reset nr to whatever the extent
4886 * record says was the real size, this way we can compare it to
4889 if (tmpl->found_rec) {
4890 if (tmpl->start != rec->start || rec->found_rec) {
4891 struct extent_record *tmp;
4894 if (list_empty(&rec->list))
4895 list_add_tail(&rec->list,
4896 &duplicate_extents);
4899 * We have to do this song and dance in case we
4900 * find an extent record that falls inside of
4901 * our current extent record but does not have
4902 * the same objectid.
4904 tmp = malloc(sizeof(*tmp));
4907 tmp->start = tmpl->start;
4908 tmp->max_size = tmpl->max_size;
4911 tmp->metadata = tmpl->metadata;
4912 tmp->extent_item_refs = tmpl->extent_item_refs;
4913 INIT_LIST_HEAD(&tmp->list);
4914 list_add_tail(&tmp->list, &rec->dups);
4915 rec->num_duplicates++;
4922 if (tmpl->extent_item_refs && !dup) {
4923 if (rec->extent_item_refs) {
4924 fprintf(stderr, "block %llu rec "
4925 "extent_item_refs %llu, passed %llu\n",
4926 (unsigned long long)tmpl->start,
4927 (unsigned long long)
4928 rec->extent_item_refs,
4929 (unsigned long long)tmpl->extent_item_refs);
4931 rec->extent_item_refs = tmpl->extent_item_refs;
4935 if (tmpl->content_checked)
4936 rec->content_checked = 1;
4937 if (tmpl->owner_ref_checked)
4938 rec->owner_ref_checked = 1;
4939 memcpy(&rec->parent_key, &tmpl->parent_key,
4940 sizeof(tmpl->parent_key));
4941 if (tmpl->parent_generation)
4942 rec->parent_generation = tmpl->parent_generation;
4943 if (rec->max_size < tmpl->max_size)
4944 rec->max_size = tmpl->max_size;
4947 * A metadata extent can't cross stripe_len boundary, otherwise
4948 * kernel scrub won't be able to handle it.
4949 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4953 rec->crossing_stripes = check_crossing_stripes(
4954 global_info, rec->start,
4955 global_info->tree_root->nodesize);
4956 check_extent_type(rec);
4957 maybe_free_extent_rec(extent_cache, rec);
4961 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4966 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4967 u64 parent, u64 root, int found_ref)
4969 struct extent_record *rec;
4970 struct tree_backref *back;
4971 struct cache_extent *cache;
4974 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4976 struct extent_record tmpl;
4978 memset(&tmpl, 0, sizeof(tmpl));
4979 tmpl.start = bytenr;
4983 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4987 /* really a bug in cache_extent implement now */
4988 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4993 rec = container_of(cache, struct extent_record, cache);
4994 if (rec->start != bytenr) {
4996 * Several cause, from unaligned bytenr to over lapping extents
5001 back = find_tree_backref(rec, parent, root);
5003 back = alloc_tree_backref(rec, parent, root);
5009 if (back->node.found_ref) {
5010 fprintf(stderr, "Extent back ref already exists "
5011 "for %llu parent %llu root %llu \n",
5012 (unsigned long long)bytenr,
5013 (unsigned long long)parent,
5014 (unsigned long long)root);
5016 back->node.found_ref = 1;
5018 if (back->node.found_extent_tree) {
5019 fprintf(stderr, "Extent back ref already exists "
5020 "for %llu parent %llu root %llu \n",
5021 (unsigned long long)bytenr,
5022 (unsigned long long)parent,
5023 (unsigned long long)root);
5025 back->node.found_extent_tree = 1;
5027 check_extent_type(rec);
5028 maybe_free_extent_rec(extent_cache, rec);
5032 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5033 u64 parent, u64 root, u64 owner, u64 offset,
5034 u32 num_refs, int found_ref, u64 max_size)
5036 struct extent_record *rec;
5037 struct data_backref *back;
5038 struct cache_extent *cache;
5041 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5043 struct extent_record tmpl;
5045 memset(&tmpl, 0, sizeof(tmpl));
5046 tmpl.start = bytenr;
5048 tmpl.max_size = max_size;
5050 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5054 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5059 rec = container_of(cache, struct extent_record, cache);
5060 if (rec->max_size < max_size)
5061 rec->max_size = max_size;
5064 * If found_ref is set then max_size is the real size and must match the
5065 * existing refs. So if we have already found a ref then we need to
5066 * make sure that this ref matches the existing one, otherwise we need
5067 * to add a new backref so we can notice that the backrefs don't match
5068 * and we need to figure out who is telling the truth. This is to
5069 * account for that awful fsync bug I introduced where we'd end up with
5070 * a btrfs_file_extent_item that would have its length include multiple
5071 * prealloc extents or point inside of a prealloc extent.
5073 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5076 back = alloc_data_backref(rec, parent, root, owner, offset,
5082 BUG_ON(num_refs != 1);
5083 if (back->node.found_ref)
5084 BUG_ON(back->bytes != max_size);
5085 back->node.found_ref = 1;
5086 back->found_ref += 1;
5087 back->bytes = max_size;
5088 back->disk_bytenr = bytenr;
5090 rec->content_checked = 1;
5091 rec->owner_ref_checked = 1;
5093 if (back->node.found_extent_tree) {
5094 fprintf(stderr, "Extent back ref already exists "
5095 "for %llu parent %llu root %llu "
5096 "owner %llu offset %llu num_refs %lu\n",
5097 (unsigned long long)bytenr,
5098 (unsigned long long)parent,
5099 (unsigned long long)root,
5100 (unsigned long long)owner,
5101 (unsigned long long)offset,
5102 (unsigned long)num_refs);
5104 back->num_refs = num_refs;
5105 back->node.found_extent_tree = 1;
5107 maybe_free_extent_rec(extent_cache, rec);
5111 static int add_pending(struct cache_tree *pending,
5112 struct cache_tree *seen, u64 bytenr, u32 size)
5115 ret = add_cache_extent(seen, bytenr, size);
5118 add_cache_extent(pending, bytenr, size);
5122 static int pick_next_pending(struct cache_tree *pending,
5123 struct cache_tree *reada,
5124 struct cache_tree *nodes,
5125 u64 last, struct block_info *bits, int bits_nr,
5128 unsigned long node_start = last;
5129 struct cache_extent *cache;
5132 cache = search_cache_extent(reada, 0);
5134 bits[0].start = cache->start;
5135 bits[0].size = cache->size;
5140 if (node_start > 32768)
5141 node_start -= 32768;
5143 cache = search_cache_extent(nodes, node_start);
5145 cache = search_cache_extent(nodes, 0);
5148 cache = search_cache_extent(pending, 0);
5153 bits[ret].start = cache->start;
5154 bits[ret].size = cache->size;
5155 cache = next_cache_extent(cache);
5157 } while (cache && ret < bits_nr);
5163 bits[ret].start = cache->start;
5164 bits[ret].size = cache->size;
5165 cache = next_cache_extent(cache);
5167 } while (cache && ret < bits_nr);
5169 if (bits_nr - ret > 8) {
5170 u64 lookup = bits[0].start + bits[0].size;
5171 struct cache_extent *next;
5172 next = search_cache_extent(pending, lookup);
5174 if (next->start - lookup > 32768)
5176 bits[ret].start = next->start;
5177 bits[ret].size = next->size;
5178 lookup = next->start + next->size;
5182 next = next_cache_extent(next);
5190 static void free_chunk_record(struct cache_extent *cache)
5192 struct chunk_record *rec;
5194 rec = container_of(cache, struct chunk_record, cache);
5195 list_del_init(&rec->list);
5196 list_del_init(&rec->dextents);
5200 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5202 cache_tree_free_extents(chunk_cache, free_chunk_record);
5205 static void free_device_record(struct rb_node *node)
5207 struct device_record *rec;
5209 rec = container_of(node, struct device_record, node);
5213 FREE_RB_BASED_TREE(device_cache, free_device_record);
5215 int insert_block_group_record(struct block_group_tree *tree,
5216 struct block_group_record *bg_rec)
5220 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5224 list_add_tail(&bg_rec->list, &tree->block_groups);
5228 static void free_block_group_record(struct cache_extent *cache)
5230 struct block_group_record *rec;
5232 rec = container_of(cache, struct block_group_record, cache);
5233 list_del_init(&rec->list);
5237 void free_block_group_tree(struct block_group_tree *tree)
5239 cache_tree_free_extents(&tree->tree, free_block_group_record);
5242 int insert_device_extent_record(struct device_extent_tree *tree,
5243 struct device_extent_record *de_rec)
5248 * Device extent is a bit different from the other extents, because
5249 * the extents which belong to the different devices may have the
5250 * same start and size, so we need use the special extent cache
5251 * search/insert functions.
5253 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5257 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5258 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5262 static void free_device_extent_record(struct cache_extent *cache)
5264 struct device_extent_record *rec;
5266 rec = container_of(cache, struct device_extent_record, cache);
5267 if (!list_empty(&rec->chunk_list))
5268 list_del_init(&rec->chunk_list);
5269 if (!list_empty(&rec->device_list))
5270 list_del_init(&rec->device_list);
5274 void free_device_extent_tree(struct device_extent_tree *tree)
5276 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5279 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5280 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5281 struct extent_buffer *leaf, int slot)
5283 struct btrfs_extent_ref_v0 *ref0;
5284 struct btrfs_key key;
5287 btrfs_item_key_to_cpu(leaf, &key, slot);
5288 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5289 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5290 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5293 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5294 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5300 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5301 struct btrfs_key *key,
5304 struct btrfs_chunk *ptr;
5305 struct chunk_record *rec;
5308 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5309 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5311 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5313 fprintf(stderr, "memory allocation failed\n");
5317 INIT_LIST_HEAD(&rec->list);
5318 INIT_LIST_HEAD(&rec->dextents);
5321 rec->cache.start = key->offset;
5322 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5324 rec->generation = btrfs_header_generation(leaf);
5326 rec->objectid = key->objectid;
5327 rec->type = key->type;
5328 rec->offset = key->offset;
5330 rec->length = rec->cache.size;
5331 rec->owner = btrfs_chunk_owner(leaf, ptr);
5332 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5333 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5334 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5335 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5336 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5337 rec->num_stripes = num_stripes;
5338 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5340 for (i = 0; i < rec->num_stripes; ++i) {
5341 rec->stripes[i].devid =
5342 btrfs_stripe_devid_nr(leaf, ptr, i);
5343 rec->stripes[i].offset =
5344 btrfs_stripe_offset_nr(leaf, ptr, i);
5345 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5346 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5353 static int process_chunk_item(struct cache_tree *chunk_cache,
5354 struct btrfs_key *key, struct extent_buffer *eb,
5357 struct chunk_record *rec;
5358 struct btrfs_chunk *chunk;
5361 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5363 * Do extra check for this chunk item,
5365 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5366 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5367 * and owner<->key_type check.
5369 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5372 error("chunk(%llu, %llu) is not valid, ignore it",
5373 key->offset, btrfs_chunk_length(eb, chunk));
5376 rec = btrfs_new_chunk_record(eb, key, slot);
5377 ret = insert_cache_extent(chunk_cache, &rec->cache);
5379 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5380 rec->offset, rec->length);
5387 static int process_device_item(struct rb_root *dev_cache,
5388 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5390 struct btrfs_dev_item *ptr;
5391 struct device_record *rec;
5394 ptr = btrfs_item_ptr(eb,
5395 slot, struct btrfs_dev_item);
5397 rec = malloc(sizeof(*rec));
5399 fprintf(stderr, "memory allocation failed\n");
5403 rec->devid = key->offset;
5404 rec->generation = btrfs_header_generation(eb);
5406 rec->objectid = key->objectid;
5407 rec->type = key->type;
5408 rec->offset = key->offset;
5410 rec->devid = btrfs_device_id(eb, ptr);
5411 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5412 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5414 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5416 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5423 struct block_group_record *
5424 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5427 struct btrfs_block_group_item *ptr;
5428 struct block_group_record *rec;
5430 rec = calloc(1, sizeof(*rec));
5432 fprintf(stderr, "memory allocation failed\n");
5436 rec->cache.start = key->objectid;
5437 rec->cache.size = key->offset;
5439 rec->generation = btrfs_header_generation(leaf);
5441 rec->objectid = key->objectid;
5442 rec->type = key->type;
5443 rec->offset = key->offset;
5445 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5446 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5448 INIT_LIST_HEAD(&rec->list);
5453 static int process_block_group_item(struct block_group_tree *block_group_cache,
5454 struct btrfs_key *key,
5455 struct extent_buffer *eb, int slot)
5457 struct block_group_record *rec;
5460 rec = btrfs_new_block_group_record(eb, key, slot);
5461 ret = insert_block_group_record(block_group_cache, rec);
5463 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5464 rec->objectid, rec->offset);
5471 struct device_extent_record *
5472 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5473 struct btrfs_key *key, int slot)
5475 struct device_extent_record *rec;
5476 struct btrfs_dev_extent *ptr;
5478 rec = calloc(1, sizeof(*rec));
5480 fprintf(stderr, "memory allocation failed\n");
5484 rec->cache.objectid = key->objectid;
5485 rec->cache.start = key->offset;
5487 rec->generation = btrfs_header_generation(leaf);
5489 rec->objectid = key->objectid;
5490 rec->type = key->type;
5491 rec->offset = key->offset;
5493 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5494 rec->chunk_objecteid =
5495 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5497 btrfs_dev_extent_chunk_offset(leaf, ptr);
5498 rec->length = btrfs_dev_extent_length(leaf, ptr);
5499 rec->cache.size = rec->length;
5501 INIT_LIST_HEAD(&rec->chunk_list);
5502 INIT_LIST_HEAD(&rec->device_list);
5508 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5509 struct btrfs_key *key, struct extent_buffer *eb,
5512 struct device_extent_record *rec;
5515 rec = btrfs_new_device_extent_record(eb, key, slot);
5516 ret = insert_device_extent_record(dev_extent_cache, rec);
5519 "Device extent[%llu, %llu, %llu] existed.\n",
5520 rec->objectid, rec->offset, rec->length);
5527 static int process_extent_item(struct btrfs_root *root,
5528 struct cache_tree *extent_cache,
5529 struct extent_buffer *eb, int slot)
5531 struct btrfs_extent_item *ei;
5532 struct btrfs_extent_inline_ref *iref;
5533 struct btrfs_extent_data_ref *dref;
5534 struct btrfs_shared_data_ref *sref;
5535 struct btrfs_key key;
5536 struct extent_record tmpl;
5541 u32 item_size = btrfs_item_size_nr(eb, slot);
5547 btrfs_item_key_to_cpu(eb, &key, slot);
5549 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5551 num_bytes = root->nodesize;
5553 num_bytes = key.offset;
5556 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5557 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5558 key.objectid, root->sectorsize);
5561 if (item_size < sizeof(*ei)) {
5562 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5563 struct btrfs_extent_item_v0 *ei0;
5564 BUG_ON(item_size != sizeof(*ei0));
5565 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5566 refs = btrfs_extent_refs_v0(eb, ei0);
5570 memset(&tmpl, 0, sizeof(tmpl));
5571 tmpl.start = key.objectid;
5572 tmpl.nr = num_bytes;
5573 tmpl.extent_item_refs = refs;
5574 tmpl.metadata = metadata;
5576 tmpl.max_size = num_bytes;
5578 return add_extent_rec(extent_cache, &tmpl);
5581 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5582 refs = btrfs_extent_refs(eb, ei);
5583 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5587 if (metadata && num_bytes != root->nodesize) {
5588 error("ignore invalid metadata extent, length %llu does not equal to %u",
5589 num_bytes, root->nodesize);
5592 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5593 error("ignore invalid data extent, length %llu is not aligned to %u",
5594 num_bytes, root->sectorsize);
5598 memset(&tmpl, 0, sizeof(tmpl));
5599 tmpl.start = key.objectid;
5600 tmpl.nr = num_bytes;
5601 tmpl.extent_item_refs = refs;
5602 tmpl.metadata = metadata;
5604 tmpl.max_size = num_bytes;
5605 add_extent_rec(extent_cache, &tmpl);
5607 ptr = (unsigned long)(ei + 1);
5608 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5609 key.type == BTRFS_EXTENT_ITEM_KEY)
5610 ptr += sizeof(struct btrfs_tree_block_info);
5612 end = (unsigned long)ei + item_size;
5614 iref = (struct btrfs_extent_inline_ref *)ptr;
5615 type = btrfs_extent_inline_ref_type(eb, iref);
5616 offset = btrfs_extent_inline_ref_offset(eb, iref);
5618 case BTRFS_TREE_BLOCK_REF_KEY:
5619 ret = add_tree_backref(extent_cache, key.objectid,
5622 error("add_tree_backref failed: %s",
5625 case BTRFS_SHARED_BLOCK_REF_KEY:
5626 ret = add_tree_backref(extent_cache, key.objectid,
5629 error("add_tree_backref failed: %s",
5632 case BTRFS_EXTENT_DATA_REF_KEY:
5633 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5634 add_data_backref(extent_cache, key.objectid, 0,
5635 btrfs_extent_data_ref_root(eb, dref),
5636 btrfs_extent_data_ref_objectid(eb,
5638 btrfs_extent_data_ref_offset(eb, dref),
5639 btrfs_extent_data_ref_count(eb, dref),
5642 case BTRFS_SHARED_DATA_REF_KEY:
5643 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5644 add_data_backref(extent_cache, key.objectid, offset,
5646 btrfs_shared_data_ref_count(eb, sref),
5650 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5651 key.objectid, key.type, num_bytes);
5654 ptr += btrfs_extent_inline_ref_size(type);
5661 static int check_cache_range(struct btrfs_root *root,
5662 struct btrfs_block_group_cache *cache,
5663 u64 offset, u64 bytes)
5665 struct btrfs_free_space *entry;
5671 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5672 bytenr = btrfs_sb_offset(i);
5673 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5674 cache->key.objectid, bytenr, 0,
5675 &logical, &nr, &stripe_len);
5680 if (logical[nr] + stripe_len <= offset)
5682 if (offset + bytes <= logical[nr])
5684 if (logical[nr] == offset) {
5685 if (stripe_len >= bytes) {
5689 bytes -= stripe_len;
5690 offset += stripe_len;
5691 } else if (logical[nr] < offset) {
5692 if (logical[nr] + stripe_len >=
5697 bytes = (offset + bytes) -
5698 (logical[nr] + stripe_len);
5699 offset = logical[nr] + stripe_len;
5702 * Could be tricky, the super may land in the
5703 * middle of the area we're checking. First
5704 * check the easiest case, it's at the end.
5706 if (logical[nr] + stripe_len >=
5708 bytes = logical[nr] - offset;
5712 /* Check the left side */
5713 ret = check_cache_range(root, cache,
5715 logical[nr] - offset);
5721 /* Now we continue with the right side */
5722 bytes = (offset + bytes) -
5723 (logical[nr] + stripe_len);
5724 offset = logical[nr] + stripe_len;
5731 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5733 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5734 offset, offset+bytes);
5738 if (entry->offset != offset) {
5739 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5744 if (entry->bytes != bytes) {
5745 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5746 bytes, entry->bytes, offset);
5750 unlink_free_space(cache->free_space_ctl, entry);
5755 static int verify_space_cache(struct btrfs_root *root,
5756 struct btrfs_block_group_cache *cache)
5758 struct btrfs_path path;
5759 struct extent_buffer *leaf;
5760 struct btrfs_key key;
5764 root = root->fs_info->extent_root;
5766 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5768 btrfs_init_path(&path);
5769 key.objectid = last;
5771 key.type = BTRFS_EXTENT_ITEM_KEY;
5772 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5777 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5778 ret = btrfs_next_leaf(root, &path);
5786 leaf = path.nodes[0];
5787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5788 if (key.objectid >= cache->key.offset + cache->key.objectid)
5790 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5791 key.type != BTRFS_METADATA_ITEM_KEY) {
5796 if (last == key.objectid) {
5797 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5798 last = key.objectid + key.offset;
5800 last = key.objectid + root->nodesize;
5805 ret = check_cache_range(root, cache, last,
5806 key.objectid - last);
5809 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5810 last = key.objectid + key.offset;
5812 last = key.objectid + root->nodesize;
5816 if (last < cache->key.objectid + cache->key.offset)
5817 ret = check_cache_range(root, cache, last,
5818 cache->key.objectid +
5819 cache->key.offset - last);
5822 btrfs_release_path(&path);
5825 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5826 fprintf(stderr, "There are still entries left in the space "
5834 static int check_space_cache(struct btrfs_root *root)
5836 struct btrfs_block_group_cache *cache;
5837 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5841 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5842 btrfs_super_generation(root->fs_info->super_copy) !=
5843 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5844 printf("cache and super generation don't match, space cache "
5845 "will be invalidated\n");
5849 if (ctx.progress_enabled) {
5850 ctx.tp = TASK_FREE_SPACE;
5851 task_start(ctx.info);
5855 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5859 start = cache->key.objectid + cache->key.offset;
5860 if (!cache->free_space_ctl) {
5861 if (btrfs_init_free_space_ctl(cache,
5862 root->sectorsize)) {
5867 btrfs_remove_free_space_cache(cache);
5870 if (btrfs_fs_compat_ro(root->fs_info,
5871 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5872 ret = exclude_super_stripes(root, cache);
5874 fprintf(stderr, "could not exclude super stripes: %s\n",
5879 ret = load_free_space_tree(root->fs_info, cache);
5880 free_excluded_extents(root, cache);
5882 fprintf(stderr, "could not load free space tree: %s\n",
5889 ret = load_free_space_cache(root->fs_info, cache);
5894 ret = verify_space_cache(root, cache);
5896 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5897 cache->key.objectid);
5902 task_stop(ctx.info);
5904 return error ? -EINVAL : 0;
5907 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5908 u64 num_bytes, unsigned long leaf_offset,
5909 struct extent_buffer *eb) {
5912 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5914 unsigned long csum_offset;
5918 u64 data_checked = 0;
5924 if (num_bytes % root->sectorsize)
5927 data = malloc(num_bytes);
5931 while (offset < num_bytes) {
5934 read_len = num_bytes - offset;
5935 /* read as much space once a time */
5936 ret = read_extent_data(root, data + offset,
5937 bytenr + offset, &read_len, mirror);
5941 /* verify every 4k data's checksum */
5942 while (data_checked < read_len) {
5944 tmp = offset + data_checked;
5946 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5947 csum, root->sectorsize);
5948 btrfs_csum_final(csum, (u8 *)&csum);
5950 csum_offset = leaf_offset +
5951 tmp / root->sectorsize * csum_size;
5952 read_extent_buffer(eb, (char *)&csum_expected,
5953 csum_offset, csum_size);
5954 /* try another mirror */
5955 if (csum != csum_expected) {
5956 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5957 mirror, bytenr + tmp,
5958 csum, csum_expected);
5959 num_copies = btrfs_num_copies(
5960 &root->fs_info->mapping_tree,
5962 if (mirror < num_copies - 1) {
5967 data_checked += root->sectorsize;
5976 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5979 struct btrfs_path path;
5980 struct extent_buffer *leaf;
5981 struct btrfs_key key;
5984 btrfs_init_path(&path);
5985 key.objectid = bytenr;
5986 key.type = BTRFS_EXTENT_ITEM_KEY;
5987 key.offset = (u64)-1;
5990 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5993 fprintf(stderr, "Error looking up extent record %d\n", ret);
5994 btrfs_release_path(&path);
5997 if (path.slots[0] > 0) {
6000 ret = btrfs_prev_leaf(root, &path);
6003 } else if (ret > 0) {
6010 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6013 * Block group items come before extent items if they have the same
6014 * bytenr, so walk back one more just in case. Dear future traveller,
6015 * first congrats on mastering time travel. Now if it's not too much
6016 * trouble could you go back to 2006 and tell Chris to make the
6017 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6018 * EXTENT_ITEM_KEY please?
6020 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6021 if (path.slots[0] > 0) {
6024 ret = btrfs_prev_leaf(root, &path);
6027 } else if (ret > 0) {
6032 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6036 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6037 ret = btrfs_next_leaf(root, &path);
6039 fprintf(stderr, "Error going to next leaf "
6041 btrfs_release_path(&path);
6047 leaf = path.nodes[0];
6048 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6049 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6053 if (key.objectid + key.offset < bytenr) {
6057 if (key.objectid > bytenr + num_bytes)
6060 if (key.objectid == bytenr) {
6061 if (key.offset >= num_bytes) {
6065 num_bytes -= key.offset;
6066 bytenr += key.offset;
6067 } else if (key.objectid < bytenr) {
6068 if (key.objectid + key.offset >= bytenr + num_bytes) {
6072 num_bytes = (bytenr + num_bytes) -
6073 (key.objectid + key.offset);
6074 bytenr = key.objectid + key.offset;
6076 if (key.objectid + key.offset < bytenr + num_bytes) {
6077 u64 new_start = key.objectid + key.offset;
6078 u64 new_bytes = bytenr + num_bytes - new_start;
6081 * Weird case, the extent is in the middle of
6082 * our range, we'll have to search one side
6083 * and then the other. Not sure if this happens
6084 * in real life, but no harm in coding it up
6085 * anyway just in case.
6087 btrfs_release_path(&path);
6088 ret = check_extent_exists(root, new_start,
6091 fprintf(stderr, "Right section didn't "
6095 num_bytes = key.objectid - bytenr;
6098 num_bytes = key.objectid - bytenr;
6105 if (num_bytes && !ret) {
6106 fprintf(stderr, "There are no extents for csum range "
6107 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6111 btrfs_release_path(&path);
6115 static int check_csums(struct btrfs_root *root)
6117 struct btrfs_path path;
6118 struct extent_buffer *leaf;
6119 struct btrfs_key key;
6120 u64 offset = 0, num_bytes = 0;
6121 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6125 unsigned long leaf_offset;
6127 root = root->fs_info->csum_root;
6128 if (!extent_buffer_uptodate(root->node)) {
6129 fprintf(stderr, "No valid csum tree found\n");
6133 btrfs_init_path(&path);
6134 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6135 key.type = BTRFS_EXTENT_CSUM_KEY;
6137 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6139 fprintf(stderr, "Error searching csum tree %d\n", ret);
6140 btrfs_release_path(&path);
6144 if (ret > 0 && path.slots[0])
6149 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6150 ret = btrfs_next_leaf(root, &path);
6152 fprintf(stderr, "Error going to next leaf "
6159 leaf = path.nodes[0];
6161 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6162 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6167 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6168 csum_size) * root->sectorsize;
6169 if (!check_data_csum)
6170 goto skip_csum_check;
6171 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6172 ret = check_extent_csums(root, key.offset, data_len,
6178 offset = key.offset;
6179 } else if (key.offset != offset + num_bytes) {
6180 ret = check_extent_exists(root, offset, num_bytes);
6182 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6183 "there is no extent record\n",
6184 offset, offset+num_bytes);
6187 offset = key.offset;
6190 num_bytes += data_len;
6194 btrfs_release_path(&path);
6198 static int is_dropped_key(struct btrfs_key *key,
6199 struct btrfs_key *drop_key) {
6200 if (key->objectid < drop_key->objectid)
6202 else if (key->objectid == drop_key->objectid) {
6203 if (key->type < drop_key->type)
6205 else if (key->type == drop_key->type) {
6206 if (key->offset < drop_key->offset)
6214 * Here are the rules for FULL_BACKREF.
6216 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6217 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6219 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6220 * if it happened after the relocation occurred since we'll have dropped the
6221 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6222 * have no real way to know for sure.
6224 * We process the blocks one root at a time, and we start from the lowest root
6225 * objectid and go to the highest. So we can just lookup the owner backref for
6226 * the record and if we don't find it then we know it doesn't exist and we have
6229 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6230 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6231 * be set or not and then we can check later once we've gathered all the refs.
6233 static int calc_extent_flag(struct btrfs_root *root,
6234 struct cache_tree *extent_cache,
6235 struct extent_buffer *buf,
6236 struct root_item_record *ri,
6239 struct extent_record *rec;
6240 struct cache_extent *cache;
6241 struct tree_backref *tback;
6244 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6245 /* we have added this extent before */
6249 rec = container_of(cache, struct extent_record, cache);
6252 * Except file/reloc tree, we can not have
6255 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6260 if (buf->start == ri->bytenr)
6263 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6266 owner = btrfs_header_owner(buf);
6267 if (owner == ri->objectid)
6270 tback = find_tree_backref(rec, 0, owner);
6275 if (rec->flag_block_full_backref != FLAG_UNSET &&
6276 rec->flag_block_full_backref != 0)
6277 rec->bad_full_backref = 1;
6280 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6281 if (rec->flag_block_full_backref != FLAG_UNSET &&
6282 rec->flag_block_full_backref != 1)
6283 rec->bad_full_backref = 1;
6287 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6289 fprintf(stderr, "Invalid key type(");
6290 print_key_type(stderr, 0, key_type);
6291 fprintf(stderr, ") found in root(");
6292 print_objectid(stderr, rootid, 0);
6293 fprintf(stderr, ")\n");
6297 * Check if the key is valid with its extent buffer.
6299 * This is a early check in case invalid key exists in a extent buffer
6300 * This is not comprehensive yet, but should prevent wrong key/item passed
6303 static int check_type_with_root(u64 rootid, u8 key_type)
6306 /* Only valid in chunk tree */
6307 case BTRFS_DEV_ITEM_KEY:
6308 case BTRFS_CHUNK_ITEM_KEY:
6309 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6312 /* valid in csum and log tree */
6313 case BTRFS_CSUM_TREE_OBJECTID:
6314 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6318 case BTRFS_EXTENT_ITEM_KEY:
6319 case BTRFS_METADATA_ITEM_KEY:
6320 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6321 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6324 case BTRFS_ROOT_ITEM_KEY:
6325 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6328 case BTRFS_DEV_EXTENT_KEY:
6329 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6335 report_mismatch_key_root(key_type, rootid);
6339 static int run_next_block(struct btrfs_root *root,
6340 struct block_info *bits,
6343 struct cache_tree *pending,
6344 struct cache_tree *seen,
6345 struct cache_tree *reada,
6346 struct cache_tree *nodes,
6347 struct cache_tree *extent_cache,
6348 struct cache_tree *chunk_cache,
6349 struct rb_root *dev_cache,
6350 struct block_group_tree *block_group_cache,
6351 struct device_extent_tree *dev_extent_cache,
6352 struct root_item_record *ri)
6354 struct extent_buffer *buf;
6355 struct extent_record *rec = NULL;
6366 struct btrfs_key key;
6367 struct cache_extent *cache;
6370 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6371 bits_nr, &reada_bits);
6376 for(i = 0; i < nritems; i++) {
6377 ret = add_cache_extent(reada, bits[i].start,
6382 /* fixme, get the parent transid */
6383 readahead_tree_block(root, bits[i].start,
6387 *last = bits[0].start;
6388 bytenr = bits[0].start;
6389 size = bits[0].size;
6391 cache = lookup_cache_extent(pending, bytenr, size);
6393 remove_cache_extent(pending, cache);
6396 cache = lookup_cache_extent(reada, bytenr, size);
6398 remove_cache_extent(reada, cache);
6401 cache = lookup_cache_extent(nodes, bytenr, size);
6403 remove_cache_extent(nodes, cache);
6406 cache = lookup_cache_extent(extent_cache, bytenr, size);
6408 rec = container_of(cache, struct extent_record, cache);
6409 gen = rec->parent_generation;
6412 /* fixme, get the real parent transid */
6413 buf = read_tree_block(root, bytenr, size, gen);
6414 if (!extent_buffer_uptodate(buf)) {
6415 record_bad_block_io(root->fs_info,
6416 extent_cache, bytenr, size);
6420 nritems = btrfs_header_nritems(buf);
6423 if (!init_extent_tree) {
6424 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6425 btrfs_header_level(buf), 1, NULL,
6428 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6430 fprintf(stderr, "Couldn't calc extent flags\n");
6431 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6436 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6438 fprintf(stderr, "Couldn't calc extent flags\n");
6439 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6443 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6445 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6446 ri->objectid == btrfs_header_owner(buf)) {
6448 * Ok we got to this block from it's original owner and
6449 * we have FULL_BACKREF set. Relocation can leave
6450 * converted blocks over so this is altogether possible,
6451 * however it's not possible if the generation > the
6452 * last snapshot, so check for this case.
6454 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6455 btrfs_header_generation(buf) > ri->last_snapshot) {
6456 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6457 rec->bad_full_backref = 1;
6462 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6463 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6464 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6465 rec->bad_full_backref = 1;
6469 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6470 rec->flag_block_full_backref = 1;
6474 rec->flag_block_full_backref = 0;
6476 owner = btrfs_header_owner(buf);
6479 ret = check_block(root, extent_cache, buf, flags);
6483 if (btrfs_is_leaf(buf)) {
6484 btree_space_waste += btrfs_leaf_free_space(root, buf);
6485 for (i = 0; i < nritems; i++) {
6486 struct btrfs_file_extent_item *fi;
6487 btrfs_item_key_to_cpu(buf, &key, i);
6489 * Check key type against the leaf owner.
6490 * Could filter quite a lot of early error if
6493 if (check_type_with_root(btrfs_header_owner(buf),
6495 fprintf(stderr, "ignoring invalid key\n");
6498 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6499 process_extent_item(root, extent_cache, buf,
6503 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6504 process_extent_item(root, extent_cache, buf,
6508 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6510 btrfs_item_size_nr(buf, i);
6513 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6514 process_chunk_item(chunk_cache, &key, buf, i);
6517 if (key.type == BTRFS_DEV_ITEM_KEY) {
6518 process_device_item(dev_cache, &key, buf, i);
6521 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6522 process_block_group_item(block_group_cache,
6526 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6527 process_device_extent_item(dev_extent_cache,
6532 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6533 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6534 process_extent_ref_v0(extent_cache, buf, i);
6541 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6542 ret = add_tree_backref(extent_cache,
6543 key.objectid, 0, key.offset, 0);
6545 error("add_tree_backref failed: %s",
6549 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6550 ret = add_tree_backref(extent_cache,
6551 key.objectid, key.offset, 0, 0);
6553 error("add_tree_backref failed: %s",
6557 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6558 struct btrfs_extent_data_ref *ref;
6559 ref = btrfs_item_ptr(buf, i,
6560 struct btrfs_extent_data_ref);
6561 add_data_backref(extent_cache,
6563 btrfs_extent_data_ref_root(buf, ref),
6564 btrfs_extent_data_ref_objectid(buf,
6566 btrfs_extent_data_ref_offset(buf, ref),
6567 btrfs_extent_data_ref_count(buf, ref),
6568 0, root->sectorsize);
6571 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6572 struct btrfs_shared_data_ref *ref;
6573 ref = btrfs_item_ptr(buf, i,
6574 struct btrfs_shared_data_ref);
6575 add_data_backref(extent_cache,
6576 key.objectid, key.offset, 0, 0, 0,
6577 btrfs_shared_data_ref_count(buf, ref),
6578 0, root->sectorsize);
6581 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6582 struct bad_item *bad;
6584 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6588 bad = malloc(sizeof(struct bad_item));
6591 INIT_LIST_HEAD(&bad->list);
6592 memcpy(&bad->key, &key,
6593 sizeof(struct btrfs_key));
6594 bad->root_id = owner;
6595 list_add_tail(&bad->list, &delete_items);
6598 if (key.type != BTRFS_EXTENT_DATA_KEY)
6600 fi = btrfs_item_ptr(buf, i,
6601 struct btrfs_file_extent_item);
6602 if (btrfs_file_extent_type(buf, fi) ==
6603 BTRFS_FILE_EXTENT_INLINE)
6605 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6608 data_bytes_allocated +=
6609 btrfs_file_extent_disk_num_bytes(buf, fi);
6610 if (data_bytes_allocated < root->sectorsize) {
6613 data_bytes_referenced +=
6614 btrfs_file_extent_num_bytes(buf, fi);
6615 add_data_backref(extent_cache,
6616 btrfs_file_extent_disk_bytenr(buf, fi),
6617 parent, owner, key.objectid, key.offset -
6618 btrfs_file_extent_offset(buf, fi), 1, 1,
6619 btrfs_file_extent_disk_num_bytes(buf, fi));
6623 struct btrfs_key first_key;
6625 first_key.objectid = 0;
6628 btrfs_item_key_to_cpu(buf, &first_key, 0);
6629 level = btrfs_header_level(buf);
6630 for (i = 0; i < nritems; i++) {
6631 struct extent_record tmpl;
6633 ptr = btrfs_node_blockptr(buf, i);
6634 size = root->nodesize;
6635 btrfs_node_key_to_cpu(buf, &key, i);
6637 if ((level == ri->drop_level)
6638 && is_dropped_key(&key, &ri->drop_key)) {
6643 memset(&tmpl, 0, sizeof(tmpl));
6644 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6645 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6650 tmpl.max_size = size;
6651 ret = add_extent_rec(extent_cache, &tmpl);
6655 ret = add_tree_backref(extent_cache, ptr, parent,
6658 error("add_tree_backref failed: %s",
6664 add_pending(nodes, seen, ptr, size);
6666 add_pending(pending, seen, ptr, size);
6669 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6670 nritems) * sizeof(struct btrfs_key_ptr);
6672 total_btree_bytes += buf->len;
6673 if (fs_root_objectid(btrfs_header_owner(buf)))
6674 total_fs_tree_bytes += buf->len;
6675 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6676 total_extent_tree_bytes += buf->len;
6677 if (!found_old_backref &&
6678 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6679 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6680 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6681 found_old_backref = 1;
6683 free_extent_buffer(buf);
6687 static int add_root_to_pending(struct extent_buffer *buf,
6688 struct cache_tree *extent_cache,
6689 struct cache_tree *pending,
6690 struct cache_tree *seen,
6691 struct cache_tree *nodes,
6694 struct extent_record tmpl;
6697 if (btrfs_header_level(buf) > 0)
6698 add_pending(nodes, seen, buf->start, buf->len);
6700 add_pending(pending, seen, buf->start, buf->len);
6702 memset(&tmpl, 0, sizeof(tmpl));
6703 tmpl.start = buf->start;
6708 tmpl.max_size = buf->len;
6709 add_extent_rec(extent_cache, &tmpl);
6711 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6712 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6713 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6716 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6721 /* as we fix the tree, we might be deleting blocks that
6722 * we're tracking for repair. This hook makes sure we
6723 * remove any backrefs for blocks as we are fixing them.
6725 static int free_extent_hook(struct btrfs_trans_handle *trans,
6726 struct btrfs_root *root,
6727 u64 bytenr, u64 num_bytes, u64 parent,
6728 u64 root_objectid, u64 owner, u64 offset,
6731 struct extent_record *rec;
6732 struct cache_extent *cache;
6734 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6736 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6737 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6741 rec = container_of(cache, struct extent_record, cache);
6743 struct data_backref *back;
6744 back = find_data_backref(rec, parent, root_objectid, owner,
6745 offset, 1, bytenr, num_bytes);
6748 if (back->node.found_ref) {
6749 back->found_ref -= refs_to_drop;
6751 rec->refs -= refs_to_drop;
6753 if (back->node.found_extent_tree) {
6754 back->num_refs -= refs_to_drop;
6755 if (rec->extent_item_refs)
6756 rec->extent_item_refs -= refs_to_drop;
6758 if (back->found_ref == 0)
6759 back->node.found_ref = 0;
6760 if (back->num_refs == 0)
6761 back->node.found_extent_tree = 0;
6763 if (!back->node.found_extent_tree && back->node.found_ref) {
6764 list_del(&back->node.list);
6768 struct tree_backref *back;
6769 back = find_tree_backref(rec, parent, root_objectid);
6772 if (back->node.found_ref) {
6775 back->node.found_ref = 0;
6777 if (back->node.found_extent_tree) {
6778 if (rec->extent_item_refs)
6779 rec->extent_item_refs--;
6780 back->node.found_extent_tree = 0;
6782 if (!back->node.found_extent_tree && back->node.found_ref) {
6783 list_del(&back->node.list);
6787 maybe_free_extent_rec(extent_cache, rec);
6792 static int delete_extent_records(struct btrfs_trans_handle *trans,
6793 struct btrfs_root *root,
6794 struct btrfs_path *path,
6795 u64 bytenr, u64 new_len)
6797 struct btrfs_key key;
6798 struct btrfs_key found_key;
6799 struct extent_buffer *leaf;
6804 key.objectid = bytenr;
6806 key.offset = (u64)-1;
6809 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6816 if (path->slots[0] == 0)
6822 leaf = path->nodes[0];
6823 slot = path->slots[0];
6825 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6826 if (found_key.objectid != bytenr)
6829 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6830 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6831 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6832 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6833 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6834 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6835 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6836 btrfs_release_path(path);
6837 if (found_key.type == 0) {
6838 if (found_key.offset == 0)
6840 key.offset = found_key.offset - 1;
6841 key.type = found_key.type;
6843 key.type = found_key.type - 1;
6844 key.offset = (u64)-1;
6848 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6849 found_key.objectid, found_key.type, found_key.offset);
6851 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6854 btrfs_release_path(path);
6856 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6857 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6858 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6859 found_key.offset : root->nodesize;
6861 ret = btrfs_update_block_group(trans, root, bytenr,
6868 btrfs_release_path(path);
6873 * for a single backref, this will allocate a new extent
6874 * and add the backref to it.
6876 static int record_extent(struct btrfs_trans_handle *trans,
6877 struct btrfs_fs_info *info,
6878 struct btrfs_path *path,
6879 struct extent_record *rec,
6880 struct extent_backref *back,
6881 int allocated, u64 flags)
6884 struct btrfs_root *extent_root = info->extent_root;
6885 struct extent_buffer *leaf;
6886 struct btrfs_key ins_key;
6887 struct btrfs_extent_item *ei;
6888 struct data_backref *dback;
6889 struct btrfs_tree_block_info *bi;
6892 rec->max_size = max_t(u64, rec->max_size,
6893 info->extent_root->nodesize);
6896 u32 item_size = sizeof(*ei);
6899 item_size += sizeof(*bi);
6901 ins_key.objectid = rec->start;
6902 ins_key.offset = rec->max_size;
6903 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6905 ret = btrfs_insert_empty_item(trans, extent_root, path,
6906 &ins_key, item_size);
6910 leaf = path->nodes[0];
6911 ei = btrfs_item_ptr(leaf, path->slots[0],
6912 struct btrfs_extent_item);
6914 btrfs_set_extent_refs(leaf, ei, 0);
6915 btrfs_set_extent_generation(leaf, ei, rec->generation);
6917 if (back->is_data) {
6918 btrfs_set_extent_flags(leaf, ei,
6919 BTRFS_EXTENT_FLAG_DATA);
6921 struct btrfs_disk_key copy_key;;
6923 bi = (struct btrfs_tree_block_info *)(ei + 1);
6924 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6927 btrfs_set_disk_key_objectid(©_key,
6928 rec->info_objectid);
6929 btrfs_set_disk_key_type(©_key, 0);
6930 btrfs_set_disk_key_offset(©_key, 0);
6932 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6933 btrfs_set_tree_block_key(leaf, bi, ©_key);
6935 btrfs_set_extent_flags(leaf, ei,
6936 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6939 btrfs_mark_buffer_dirty(leaf);
6940 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6941 rec->max_size, 1, 0);
6944 btrfs_release_path(path);
6947 if (back->is_data) {
6951 dback = to_data_backref(back);
6952 if (back->full_backref)
6953 parent = dback->parent;
6957 for (i = 0; i < dback->found_ref; i++) {
6958 /* if parent != 0, we're doing a full backref
6959 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6960 * just makes the backref allocator create a data
6963 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6964 rec->start, rec->max_size,
6968 BTRFS_FIRST_FREE_OBJECTID :
6974 fprintf(stderr, "adding new data backref"
6975 " on %llu %s %llu owner %llu"
6976 " offset %llu found %d\n",
6977 (unsigned long long)rec->start,
6978 back->full_backref ?
6980 back->full_backref ?
6981 (unsigned long long)parent :
6982 (unsigned long long)dback->root,
6983 (unsigned long long)dback->owner,
6984 (unsigned long long)dback->offset,
6988 struct tree_backref *tback;
6990 tback = to_tree_backref(back);
6991 if (back->full_backref)
6992 parent = tback->parent;
6996 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6997 rec->start, rec->max_size,
6998 parent, tback->root, 0, 0);
6999 fprintf(stderr, "adding new tree backref on "
7000 "start %llu len %llu parent %llu root %llu\n",
7001 rec->start, rec->max_size, parent, tback->root);
7004 btrfs_release_path(path);
7008 static struct extent_entry *find_entry(struct list_head *entries,
7009 u64 bytenr, u64 bytes)
7011 struct extent_entry *entry = NULL;
7013 list_for_each_entry(entry, entries, list) {
7014 if (entry->bytenr == bytenr && entry->bytes == bytes)
7021 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7023 struct extent_entry *entry, *best = NULL, *prev = NULL;
7025 list_for_each_entry(entry, entries, list) {
7027 * If there are as many broken entries as entries then we know
7028 * not to trust this particular entry.
7030 if (entry->broken == entry->count)
7034 * Special case, when there are only two entries and 'best' is
7044 * If our current entry == best then we can't be sure our best
7045 * is really the best, so we need to keep searching.
7047 if (best && best->count == entry->count) {
7053 /* Prev == entry, not good enough, have to keep searching */
7054 if (!prev->broken && prev->count == entry->count)
7058 best = (prev->count > entry->count) ? prev : entry;
7059 else if (best->count < entry->count)
7067 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7068 struct data_backref *dback, struct extent_entry *entry)
7070 struct btrfs_trans_handle *trans;
7071 struct btrfs_root *root;
7072 struct btrfs_file_extent_item *fi;
7073 struct extent_buffer *leaf;
7074 struct btrfs_key key;
7078 key.objectid = dback->root;
7079 key.type = BTRFS_ROOT_ITEM_KEY;
7080 key.offset = (u64)-1;
7081 root = btrfs_read_fs_root(info, &key);
7083 fprintf(stderr, "Couldn't find root for our ref\n");
7088 * The backref points to the original offset of the extent if it was
7089 * split, so we need to search down to the offset we have and then walk
7090 * forward until we find the backref we're looking for.
7092 key.objectid = dback->owner;
7093 key.type = BTRFS_EXTENT_DATA_KEY;
7094 key.offset = dback->offset;
7095 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7097 fprintf(stderr, "Error looking up ref %d\n", ret);
7102 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7103 ret = btrfs_next_leaf(root, path);
7105 fprintf(stderr, "Couldn't find our ref, next\n");
7109 leaf = path->nodes[0];
7110 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7111 if (key.objectid != dback->owner ||
7112 key.type != BTRFS_EXTENT_DATA_KEY) {
7113 fprintf(stderr, "Couldn't find our ref, search\n");
7116 fi = btrfs_item_ptr(leaf, path->slots[0],
7117 struct btrfs_file_extent_item);
7118 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7119 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7121 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7126 btrfs_release_path(path);
7128 trans = btrfs_start_transaction(root, 1);
7130 return PTR_ERR(trans);
7133 * Ok we have the key of the file extent we want to fix, now we can cow
7134 * down to the thing and fix it.
7136 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7138 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7139 key.objectid, key.type, key.offset, ret);
7143 fprintf(stderr, "Well that's odd, we just found this key "
7144 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7149 leaf = path->nodes[0];
7150 fi = btrfs_item_ptr(leaf, path->slots[0],
7151 struct btrfs_file_extent_item);
7153 if (btrfs_file_extent_compression(leaf, fi) &&
7154 dback->disk_bytenr != entry->bytenr) {
7155 fprintf(stderr, "Ref doesn't match the record start and is "
7156 "compressed, please take a btrfs-image of this file "
7157 "system and send it to a btrfs developer so they can "
7158 "complete this functionality for bytenr %Lu\n",
7159 dback->disk_bytenr);
7164 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7165 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7166 } else if (dback->disk_bytenr > entry->bytenr) {
7167 u64 off_diff, offset;
7169 off_diff = dback->disk_bytenr - entry->bytenr;
7170 offset = btrfs_file_extent_offset(leaf, fi);
7171 if (dback->disk_bytenr + offset +
7172 btrfs_file_extent_num_bytes(leaf, fi) >
7173 entry->bytenr + entry->bytes) {
7174 fprintf(stderr, "Ref is past the entry end, please "
7175 "take a btrfs-image of this file system and "
7176 "send it to a btrfs developer, ref %Lu\n",
7177 dback->disk_bytenr);
7182 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7183 btrfs_set_file_extent_offset(leaf, fi, offset);
7184 } else if (dback->disk_bytenr < entry->bytenr) {
7187 offset = btrfs_file_extent_offset(leaf, fi);
7188 if (dback->disk_bytenr + offset < entry->bytenr) {
7189 fprintf(stderr, "Ref is before the entry start, please"
7190 " take a btrfs-image of this file system and "
7191 "send it to a btrfs developer, ref %Lu\n",
7192 dback->disk_bytenr);
7197 offset += dback->disk_bytenr;
7198 offset -= entry->bytenr;
7199 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7200 btrfs_set_file_extent_offset(leaf, fi, offset);
7203 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7206 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7207 * only do this if we aren't using compression, otherwise it's a
7210 if (!btrfs_file_extent_compression(leaf, fi))
7211 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7213 printf("ram bytes may be wrong?\n");
7214 btrfs_mark_buffer_dirty(leaf);
7216 err = btrfs_commit_transaction(trans, root);
7217 btrfs_release_path(path);
7218 return ret ? ret : err;
7221 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7222 struct extent_record *rec)
7224 struct extent_backref *back;
7225 struct data_backref *dback;
7226 struct extent_entry *entry, *best = NULL;
7229 int broken_entries = 0;
7234 * Metadata is easy and the backrefs should always agree on bytenr and
7235 * size, if not we've got bigger issues.
7240 list_for_each_entry(back, &rec->backrefs, list) {
7241 if (back->full_backref || !back->is_data)
7244 dback = to_data_backref(back);
7247 * We only pay attention to backrefs that we found a real
7250 if (dback->found_ref == 0)
7254 * For now we only catch when the bytes don't match, not the
7255 * bytenr. We can easily do this at the same time, but I want
7256 * to have a fs image to test on before we just add repair
7257 * functionality willy-nilly so we know we won't screw up the
7261 entry = find_entry(&entries, dback->disk_bytenr,
7264 entry = malloc(sizeof(struct extent_entry));
7269 memset(entry, 0, sizeof(*entry));
7270 entry->bytenr = dback->disk_bytenr;
7271 entry->bytes = dback->bytes;
7272 list_add_tail(&entry->list, &entries);
7277 * If we only have on entry we may think the entries agree when
7278 * in reality they don't so we have to do some extra checking.
7280 if (dback->disk_bytenr != rec->start ||
7281 dback->bytes != rec->nr || back->broken)
7292 /* Yay all the backrefs agree, carry on good sir */
7293 if (nr_entries <= 1 && !mismatch)
7296 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7297 "%Lu\n", rec->start);
7300 * First we want to see if the backrefs can agree amongst themselves who
7301 * is right, so figure out which one of the entries has the highest
7304 best = find_most_right_entry(&entries);
7307 * Ok so we may have an even split between what the backrefs think, so
7308 * this is where we use the extent ref to see what it thinks.
7311 entry = find_entry(&entries, rec->start, rec->nr);
7312 if (!entry && (!broken_entries || !rec->found_rec)) {
7313 fprintf(stderr, "Backrefs don't agree with each other "
7314 "and extent record doesn't agree with anybody,"
7315 " so we can't fix bytenr %Lu bytes %Lu\n",
7316 rec->start, rec->nr);
7319 } else if (!entry) {
7321 * Ok our backrefs were broken, we'll assume this is the
7322 * correct value and add an entry for this range.
7324 entry = malloc(sizeof(struct extent_entry));
7329 memset(entry, 0, sizeof(*entry));
7330 entry->bytenr = rec->start;
7331 entry->bytes = rec->nr;
7332 list_add_tail(&entry->list, &entries);
7336 best = find_most_right_entry(&entries);
7338 fprintf(stderr, "Backrefs and extent record evenly "
7339 "split on who is right, this is going to "
7340 "require user input to fix bytenr %Lu bytes "
7341 "%Lu\n", rec->start, rec->nr);
7348 * I don't think this can happen currently as we'll abort() if we catch
7349 * this case higher up, but in case somebody removes that we still can't
7350 * deal with it properly here yet, so just bail out of that's the case.
7352 if (best->bytenr != rec->start) {
7353 fprintf(stderr, "Extent start and backref starts don't match, "
7354 "please use btrfs-image on this file system and send "
7355 "it to a btrfs developer so they can make fsck fix "
7356 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7357 rec->start, rec->nr);
7363 * Ok great we all agreed on an extent record, let's go find the real
7364 * references and fix up the ones that don't match.
7366 list_for_each_entry(back, &rec->backrefs, list) {
7367 if (back->full_backref || !back->is_data)
7370 dback = to_data_backref(back);
7373 * Still ignoring backrefs that don't have a real ref attached
7376 if (dback->found_ref == 0)
7379 if (dback->bytes == best->bytes &&
7380 dback->disk_bytenr == best->bytenr)
7383 ret = repair_ref(info, path, dback, best);
7389 * Ok we messed with the actual refs, which means we need to drop our
7390 * entire cache and go back and rescan. I know this is a huge pain and
7391 * adds a lot of extra work, but it's the only way to be safe. Once all
7392 * the backrefs agree we may not need to do anything to the extent
7397 while (!list_empty(&entries)) {
7398 entry = list_entry(entries.next, struct extent_entry, list);
7399 list_del_init(&entry->list);
7405 static int process_duplicates(struct btrfs_root *root,
7406 struct cache_tree *extent_cache,
7407 struct extent_record *rec)
7409 struct extent_record *good, *tmp;
7410 struct cache_extent *cache;
7414 * If we found a extent record for this extent then return, or if we
7415 * have more than one duplicate we are likely going to need to delete
7418 if (rec->found_rec || rec->num_duplicates > 1)
7421 /* Shouldn't happen but just in case */
7422 BUG_ON(!rec->num_duplicates);
7425 * So this happens if we end up with a backref that doesn't match the
7426 * actual extent entry. So either the backref is bad or the extent
7427 * entry is bad. Either way we want to have the extent_record actually
7428 * reflect what we found in the extent_tree, so we need to take the
7429 * duplicate out and use that as the extent_record since the only way we
7430 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7432 remove_cache_extent(extent_cache, &rec->cache);
7434 good = to_extent_record(rec->dups.next);
7435 list_del_init(&good->list);
7436 INIT_LIST_HEAD(&good->backrefs);
7437 INIT_LIST_HEAD(&good->dups);
7438 good->cache.start = good->start;
7439 good->cache.size = good->nr;
7440 good->content_checked = 0;
7441 good->owner_ref_checked = 0;
7442 good->num_duplicates = 0;
7443 good->refs = rec->refs;
7444 list_splice_init(&rec->backrefs, &good->backrefs);
7446 cache = lookup_cache_extent(extent_cache, good->start,
7450 tmp = container_of(cache, struct extent_record, cache);
7453 * If we find another overlapping extent and it's found_rec is
7454 * set then it's a duplicate and we need to try and delete
7457 if (tmp->found_rec || tmp->num_duplicates > 0) {
7458 if (list_empty(&good->list))
7459 list_add_tail(&good->list,
7460 &duplicate_extents);
7461 good->num_duplicates += tmp->num_duplicates + 1;
7462 list_splice_init(&tmp->dups, &good->dups);
7463 list_del_init(&tmp->list);
7464 list_add_tail(&tmp->list, &good->dups);
7465 remove_cache_extent(extent_cache, &tmp->cache);
7470 * Ok we have another non extent item backed extent rec, so lets
7471 * just add it to this extent and carry on like we did above.
7473 good->refs += tmp->refs;
7474 list_splice_init(&tmp->backrefs, &good->backrefs);
7475 remove_cache_extent(extent_cache, &tmp->cache);
7478 ret = insert_cache_extent(extent_cache, &good->cache);
7481 return good->num_duplicates ? 0 : 1;
7484 static int delete_duplicate_records(struct btrfs_root *root,
7485 struct extent_record *rec)
7487 struct btrfs_trans_handle *trans;
7488 LIST_HEAD(delete_list);
7489 struct btrfs_path path;
7490 struct extent_record *tmp, *good, *n;
7493 struct btrfs_key key;
7495 btrfs_init_path(&path);
7498 /* Find the record that covers all of the duplicates. */
7499 list_for_each_entry(tmp, &rec->dups, list) {
7500 if (good->start < tmp->start)
7502 if (good->nr > tmp->nr)
7505 if (tmp->start + tmp->nr < good->start + good->nr) {
7506 fprintf(stderr, "Ok we have overlapping extents that "
7507 "aren't completely covered by each other, this "
7508 "is going to require more careful thought. "
7509 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7510 tmp->start, tmp->nr, good->start, good->nr);
7517 list_add_tail(&rec->list, &delete_list);
7519 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7522 list_move_tail(&tmp->list, &delete_list);
7525 root = root->fs_info->extent_root;
7526 trans = btrfs_start_transaction(root, 1);
7527 if (IS_ERR(trans)) {
7528 ret = PTR_ERR(trans);
7532 list_for_each_entry(tmp, &delete_list, list) {
7533 if (tmp->found_rec == 0)
7535 key.objectid = tmp->start;
7536 key.type = BTRFS_EXTENT_ITEM_KEY;
7537 key.offset = tmp->nr;
7539 /* Shouldn't happen but just in case */
7540 if (tmp->metadata) {
7541 fprintf(stderr, "Well this shouldn't happen, extent "
7542 "record overlaps but is metadata? "
7543 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7547 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7553 ret = btrfs_del_item(trans, root, &path);
7556 btrfs_release_path(&path);
7559 err = btrfs_commit_transaction(trans, root);
7563 while (!list_empty(&delete_list)) {
7564 tmp = to_extent_record(delete_list.next);
7565 list_del_init(&tmp->list);
7571 while (!list_empty(&rec->dups)) {
7572 tmp = to_extent_record(rec->dups.next);
7573 list_del_init(&tmp->list);
7577 btrfs_release_path(&path);
7579 if (!ret && !nr_del)
7580 rec->num_duplicates = 0;
7582 return ret ? ret : nr_del;
7585 static int find_possible_backrefs(struct btrfs_fs_info *info,
7586 struct btrfs_path *path,
7587 struct cache_tree *extent_cache,
7588 struct extent_record *rec)
7590 struct btrfs_root *root;
7591 struct extent_backref *back;
7592 struct data_backref *dback;
7593 struct cache_extent *cache;
7594 struct btrfs_file_extent_item *fi;
7595 struct btrfs_key key;
7599 list_for_each_entry(back, &rec->backrefs, list) {
7600 /* Don't care about full backrefs (poor unloved backrefs) */
7601 if (back->full_backref || !back->is_data)
7604 dback = to_data_backref(back);
7606 /* We found this one, we don't need to do a lookup */
7607 if (dback->found_ref)
7610 key.objectid = dback->root;
7611 key.type = BTRFS_ROOT_ITEM_KEY;
7612 key.offset = (u64)-1;
7614 root = btrfs_read_fs_root(info, &key);
7616 /* No root, definitely a bad ref, skip */
7617 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7619 /* Other err, exit */
7621 return PTR_ERR(root);
7623 key.objectid = dback->owner;
7624 key.type = BTRFS_EXTENT_DATA_KEY;
7625 key.offset = dback->offset;
7626 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7628 btrfs_release_path(path);
7631 /* Didn't find it, we can carry on */
7636 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7637 struct btrfs_file_extent_item);
7638 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7639 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7640 btrfs_release_path(path);
7641 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7643 struct extent_record *tmp;
7644 tmp = container_of(cache, struct extent_record, cache);
7647 * If we found an extent record for the bytenr for this
7648 * particular backref then we can't add it to our
7649 * current extent record. We only want to add backrefs
7650 * that don't have a corresponding extent item in the
7651 * extent tree since they likely belong to this record
7652 * and we need to fix it if it doesn't match bytenrs.
7658 dback->found_ref += 1;
7659 dback->disk_bytenr = bytenr;
7660 dback->bytes = bytes;
7663 * Set this so the verify backref code knows not to trust the
7664 * values in this backref.
7673 * Record orphan data ref into corresponding root.
7675 * Return 0 if the extent item contains data ref and recorded.
7676 * Return 1 if the extent item contains no useful data ref
7677 * On that case, it may contains only shared_dataref or metadata backref
7678 * or the file extent exists(this should be handled by the extent bytenr
7680 * Return <0 if something goes wrong.
7682 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7683 struct extent_record *rec)
7685 struct btrfs_key key;
7686 struct btrfs_root *dest_root;
7687 struct extent_backref *back;
7688 struct data_backref *dback;
7689 struct orphan_data_extent *orphan;
7690 struct btrfs_path path;
7691 int recorded_data_ref = 0;
7696 btrfs_init_path(&path);
7697 list_for_each_entry(back, &rec->backrefs, list) {
7698 if (back->full_backref || !back->is_data ||
7699 !back->found_extent_tree)
7701 dback = to_data_backref(back);
7702 if (dback->found_ref)
7704 key.objectid = dback->root;
7705 key.type = BTRFS_ROOT_ITEM_KEY;
7706 key.offset = (u64)-1;
7708 dest_root = btrfs_read_fs_root(fs_info, &key);
7710 /* For non-exist root we just skip it */
7711 if (IS_ERR(dest_root) || !dest_root)
7714 key.objectid = dback->owner;
7715 key.type = BTRFS_EXTENT_DATA_KEY;
7716 key.offset = dback->offset;
7718 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7719 btrfs_release_path(&path);
7721 * For ret < 0, it's OK since the fs-tree may be corrupted,
7722 * we need to record it for inode/file extent rebuild.
7723 * For ret > 0, we record it only for file extent rebuild.
7724 * For ret == 0, the file extent exists but only bytenr
7725 * mismatch, let the original bytenr fix routine to handle,
7731 orphan = malloc(sizeof(*orphan));
7736 INIT_LIST_HEAD(&orphan->list);
7737 orphan->root = dback->root;
7738 orphan->objectid = dback->owner;
7739 orphan->offset = dback->offset;
7740 orphan->disk_bytenr = rec->cache.start;
7741 orphan->disk_len = rec->cache.size;
7742 list_add(&dest_root->orphan_data_extents, &orphan->list);
7743 recorded_data_ref = 1;
7746 btrfs_release_path(&path);
7748 return !recorded_data_ref;
7754 * when an incorrect extent item is found, this will delete
7755 * all of the existing entries for it and recreate them
7756 * based on what the tree scan found.
7758 static int fixup_extent_refs(struct btrfs_fs_info *info,
7759 struct cache_tree *extent_cache,
7760 struct extent_record *rec)
7762 struct btrfs_trans_handle *trans = NULL;
7764 struct btrfs_path path;
7765 struct list_head *cur = rec->backrefs.next;
7766 struct cache_extent *cache;
7767 struct extent_backref *back;
7771 if (rec->flag_block_full_backref)
7772 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7774 btrfs_init_path(&path);
7775 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7777 * Sometimes the backrefs themselves are so broken they don't
7778 * get attached to any meaningful rec, so first go back and
7779 * check any of our backrefs that we couldn't find and throw
7780 * them into the list if we find the backref so that
7781 * verify_backrefs can figure out what to do.
7783 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7788 /* step one, make sure all of the backrefs agree */
7789 ret = verify_backrefs(info, &path, rec);
7793 trans = btrfs_start_transaction(info->extent_root, 1);
7794 if (IS_ERR(trans)) {
7795 ret = PTR_ERR(trans);
7799 /* step two, delete all the existing records */
7800 ret = delete_extent_records(trans, info->extent_root, &path,
7801 rec->start, rec->max_size);
7806 /* was this block corrupt? If so, don't add references to it */
7807 cache = lookup_cache_extent(info->corrupt_blocks,
7808 rec->start, rec->max_size);
7814 /* step three, recreate all the refs we did find */
7815 while(cur != &rec->backrefs) {
7816 back = to_extent_backref(cur);
7820 * if we didn't find any references, don't create a
7823 if (!back->found_ref)
7826 rec->bad_full_backref = 0;
7827 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
7835 int err = btrfs_commit_transaction(trans, info->extent_root);
7840 btrfs_release_path(&path);
7844 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7845 struct extent_record *rec)
7847 struct btrfs_trans_handle *trans;
7848 struct btrfs_root *root = fs_info->extent_root;
7849 struct btrfs_path path;
7850 struct btrfs_extent_item *ei;
7851 struct btrfs_key key;
7855 key.objectid = rec->start;
7856 if (rec->metadata) {
7857 key.type = BTRFS_METADATA_ITEM_KEY;
7858 key.offset = rec->info_level;
7860 key.type = BTRFS_EXTENT_ITEM_KEY;
7861 key.offset = rec->max_size;
7864 trans = btrfs_start_transaction(root, 0);
7866 return PTR_ERR(trans);
7868 btrfs_init_path(&path);
7869 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7871 btrfs_release_path(&path);
7872 btrfs_commit_transaction(trans, root);
7875 fprintf(stderr, "Didn't find extent for %llu\n",
7876 (unsigned long long)rec->start);
7877 btrfs_release_path(&path);
7878 btrfs_commit_transaction(trans, root);
7882 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7883 struct btrfs_extent_item);
7884 flags = btrfs_extent_flags(path.nodes[0], ei);
7885 if (rec->flag_block_full_backref) {
7886 fprintf(stderr, "setting full backref on %llu\n",
7887 (unsigned long long)key.objectid);
7888 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7890 fprintf(stderr, "clearing full backref on %llu\n",
7891 (unsigned long long)key.objectid);
7892 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7894 btrfs_set_extent_flags(path.nodes[0], ei, flags);
7895 btrfs_mark_buffer_dirty(path.nodes[0]);
7896 btrfs_release_path(&path);
7897 return btrfs_commit_transaction(trans, root);
7900 /* right now we only prune from the extent allocation tree */
7901 static int prune_one_block(struct btrfs_trans_handle *trans,
7902 struct btrfs_fs_info *info,
7903 struct btrfs_corrupt_block *corrupt)
7906 struct btrfs_path path;
7907 struct extent_buffer *eb;
7911 int level = corrupt->level + 1;
7913 btrfs_init_path(&path);
7915 /* we want to stop at the parent to our busted block */
7916 path.lowest_level = level;
7918 ret = btrfs_search_slot(trans, info->extent_root,
7919 &corrupt->key, &path, -1, 1);
7924 eb = path.nodes[level];
7931 * hopefully the search gave us the block we want to prune,
7932 * lets try that first
7934 slot = path.slots[level];
7935 found = btrfs_node_blockptr(eb, slot);
7936 if (found == corrupt->cache.start)
7939 nritems = btrfs_header_nritems(eb);
7941 /* the search failed, lets scan this node and hope we find it */
7942 for (slot = 0; slot < nritems; slot++) {
7943 found = btrfs_node_blockptr(eb, slot);
7944 if (found == corrupt->cache.start)
7948 * we couldn't find the bad block. TODO, search all the nodes for pointers
7951 if (eb == info->extent_root->node) {
7956 btrfs_release_path(&path);
7961 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7962 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7965 btrfs_release_path(&path);
7969 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7971 struct btrfs_trans_handle *trans = NULL;
7972 struct cache_extent *cache;
7973 struct btrfs_corrupt_block *corrupt;
7976 cache = search_cache_extent(info->corrupt_blocks, 0);
7980 trans = btrfs_start_transaction(info->extent_root, 1);
7982 return PTR_ERR(trans);
7984 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7985 prune_one_block(trans, info, corrupt);
7986 remove_cache_extent(info->corrupt_blocks, cache);
7989 return btrfs_commit_transaction(trans, info->extent_root);
7993 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7995 struct btrfs_block_group_cache *cache;
8000 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8001 &start, &end, EXTENT_DIRTY);
8004 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8010 cache = btrfs_lookup_first_block_group(fs_info, start);
8015 start = cache->key.objectid + cache->key.offset;
8019 static int check_extent_refs(struct btrfs_root *root,
8020 struct cache_tree *extent_cache)
8022 struct extent_record *rec;
8023 struct cache_extent *cache;
8032 * if we're doing a repair, we have to make sure
8033 * we don't allocate from the problem extents.
8034 * In the worst case, this will be all the
8037 cache = search_cache_extent(extent_cache, 0);
8039 rec = container_of(cache, struct extent_record, cache);
8040 set_extent_dirty(root->fs_info->excluded_extents,
8042 rec->start + rec->max_size - 1,
8044 cache = next_cache_extent(cache);
8047 /* pin down all the corrupted blocks too */
8048 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8050 set_extent_dirty(root->fs_info->excluded_extents,
8052 cache->start + cache->size - 1,
8054 cache = next_cache_extent(cache);
8056 prune_corrupt_blocks(root->fs_info);
8057 reset_cached_block_groups(root->fs_info);
8060 reset_cached_block_groups(root->fs_info);
8063 * We need to delete any duplicate entries we find first otherwise we
8064 * could mess up the extent tree when we have backrefs that actually
8065 * belong to a different extent item and not the weird duplicate one.
8067 while (repair && !list_empty(&duplicate_extents)) {
8068 rec = to_extent_record(duplicate_extents.next);
8069 list_del_init(&rec->list);
8071 /* Sometimes we can find a backref before we find an actual
8072 * extent, so we need to process it a little bit to see if there
8073 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8074 * if this is a backref screwup. If we need to delete stuff
8075 * process_duplicates() will return 0, otherwise it will return
8078 if (process_duplicates(root, extent_cache, rec))
8080 ret = delete_duplicate_records(root, rec);
8084 * delete_duplicate_records will return the number of entries
8085 * deleted, so if it's greater than 0 then we know we actually
8086 * did something and we need to remove.
8100 cache = search_cache_extent(extent_cache, 0);
8103 rec = container_of(cache, struct extent_record, cache);
8104 if (rec->num_duplicates) {
8105 fprintf(stderr, "extent item %llu has multiple extent "
8106 "items\n", (unsigned long long)rec->start);
8111 if (rec->refs != rec->extent_item_refs) {
8112 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8113 (unsigned long long)rec->start,
8114 (unsigned long long)rec->nr);
8115 fprintf(stderr, "extent item %llu, found %llu\n",
8116 (unsigned long long)rec->extent_item_refs,
8117 (unsigned long long)rec->refs);
8118 ret = record_orphan_data_extents(root->fs_info, rec);
8125 * we can't use the extent to repair file
8126 * extent, let the fallback method handle it.
8128 if (!fixed && repair) {
8129 ret = fixup_extent_refs(
8140 if (all_backpointers_checked(rec, 1)) {
8141 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8142 (unsigned long long)rec->start,
8143 (unsigned long long)rec->nr);
8145 if (!fixed && !recorded && repair) {
8146 ret = fixup_extent_refs(root->fs_info,
8155 if (!rec->owner_ref_checked) {
8156 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8157 (unsigned long long)rec->start,
8158 (unsigned long long)rec->nr);
8159 if (!fixed && !recorded && repair) {
8160 ret = fixup_extent_refs(root->fs_info,
8169 if (rec->bad_full_backref) {
8170 fprintf(stderr, "bad full backref, on [%llu]\n",
8171 (unsigned long long)rec->start);
8173 ret = fixup_extent_flags(root->fs_info, rec);
8182 * Although it's not a extent ref's problem, we reuse this
8183 * routine for error reporting.
8184 * No repair function yet.
8186 if (rec->crossing_stripes) {
8188 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8189 rec->start, rec->start + rec->max_size);
8194 if (rec->wrong_chunk_type) {
8196 "bad extent [%llu, %llu), type mismatch with chunk\n",
8197 rec->start, rec->start + rec->max_size);
8202 remove_cache_extent(extent_cache, cache);
8203 free_all_extent_backrefs(rec);
8204 if (!init_extent_tree && repair && (!cur_err || fixed))
8205 clear_extent_dirty(root->fs_info->excluded_extents,
8207 rec->start + rec->max_size - 1,
8213 if (ret && ret != -EAGAIN) {
8214 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8217 struct btrfs_trans_handle *trans;
8219 root = root->fs_info->extent_root;
8220 trans = btrfs_start_transaction(root, 1);
8221 if (IS_ERR(trans)) {
8222 ret = PTR_ERR(trans);
8226 btrfs_fix_block_accounting(trans, root);
8227 ret = btrfs_commit_transaction(trans, root);
8232 fprintf(stderr, "repaired damaged extent references\n");
8238 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8242 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8243 stripe_size = length;
8244 stripe_size /= num_stripes;
8245 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8246 stripe_size = length * 2;
8247 stripe_size /= num_stripes;
8248 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8249 stripe_size = length;
8250 stripe_size /= (num_stripes - 1);
8251 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8252 stripe_size = length;
8253 stripe_size /= (num_stripes - 2);
8255 stripe_size = length;
8261 * Check the chunk with its block group/dev list ref:
8262 * Return 0 if all refs seems valid.
8263 * Return 1 if part of refs seems valid, need later check for rebuild ref
8264 * like missing block group and needs to search extent tree to rebuild them.
8265 * Return -1 if essential refs are missing and unable to rebuild.
8267 static int check_chunk_refs(struct chunk_record *chunk_rec,
8268 struct block_group_tree *block_group_cache,
8269 struct device_extent_tree *dev_extent_cache,
8272 struct cache_extent *block_group_item;
8273 struct block_group_record *block_group_rec;
8274 struct cache_extent *dev_extent_item;
8275 struct device_extent_record *dev_extent_rec;
8279 int metadump_v2 = 0;
8283 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8286 if (block_group_item) {
8287 block_group_rec = container_of(block_group_item,
8288 struct block_group_record,
8290 if (chunk_rec->length != block_group_rec->offset ||
8291 chunk_rec->offset != block_group_rec->objectid ||
8293 chunk_rec->type_flags != block_group_rec->flags)) {
8296 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8297 chunk_rec->objectid,
8302 chunk_rec->type_flags,
8303 block_group_rec->objectid,
8304 block_group_rec->type,
8305 block_group_rec->offset,
8306 block_group_rec->offset,
8307 block_group_rec->objectid,
8308 block_group_rec->flags);
8311 list_del_init(&block_group_rec->list);
8312 chunk_rec->bg_rec = block_group_rec;
8317 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8318 chunk_rec->objectid,
8323 chunk_rec->type_flags);
8330 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8331 chunk_rec->num_stripes);
8332 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8333 devid = chunk_rec->stripes[i].devid;
8334 offset = chunk_rec->stripes[i].offset;
8335 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8336 devid, offset, length);
8337 if (dev_extent_item) {
8338 dev_extent_rec = container_of(dev_extent_item,
8339 struct device_extent_record,
8341 if (dev_extent_rec->objectid != devid ||
8342 dev_extent_rec->offset != offset ||
8343 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8344 dev_extent_rec->length != length) {
8347 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8348 chunk_rec->objectid,
8351 chunk_rec->stripes[i].devid,
8352 chunk_rec->stripes[i].offset,
8353 dev_extent_rec->objectid,
8354 dev_extent_rec->offset,
8355 dev_extent_rec->length);
8358 list_move(&dev_extent_rec->chunk_list,
8359 &chunk_rec->dextents);
8364 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8365 chunk_rec->objectid,
8368 chunk_rec->stripes[i].devid,
8369 chunk_rec->stripes[i].offset);
8376 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8377 int check_chunks(struct cache_tree *chunk_cache,
8378 struct block_group_tree *block_group_cache,
8379 struct device_extent_tree *dev_extent_cache,
8380 struct list_head *good, struct list_head *bad,
8381 struct list_head *rebuild, int silent)
8383 struct cache_extent *chunk_item;
8384 struct chunk_record *chunk_rec;
8385 struct block_group_record *bg_rec;
8386 struct device_extent_record *dext_rec;
8390 chunk_item = first_cache_extent(chunk_cache);
8391 while (chunk_item) {
8392 chunk_rec = container_of(chunk_item, struct chunk_record,
8394 err = check_chunk_refs(chunk_rec, block_group_cache,
8395 dev_extent_cache, silent);
8398 if (err == 0 && good)
8399 list_add_tail(&chunk_rec->list, good);
8400 if (err > 0 && rebuild)
8401 list_add_tail(&chunk_rec->list, rebuild);
8403 list_add_tail(&chunk_rec->list, bad);
8404 chunk_item = next_cache_extent(chunk_item);
8407 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8410 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8418 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8422 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8433 static int check_device_used(struct device_record *dev_rec,
8434 struct device_extent_tree *dext_cache)
8436 struct cache_extent *cache;
8437 struct device_extent_record *dev_extent_rec;
8440 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8442 dev_extent_rec = container_of(cache,
8443 struct device_extent_record,
8445 if (dev_extent_rec->objectid != dev_rec->devid)
8448 list_del_init(&dev_extent_rec->device_list);
8449 total_byte += dev_extent_rec->length;
8450 cache = next_cache_extent(cache);
8453 if (total_byte != dev_rec->byte_used) {
8455 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8456 total_byte, dev_rec->byte_used, dev_rec->objectid,
8457 dev_rec->type, dev_rec->offset);
8464 /* check btrfs_dev_item -> btrfs_dev_extent */
8465 static int check_devices(struct rb_root *dev_cache,
8466 struct device_extent_tree *dev_extent_cache)
8468 struct rb_node *dev_node;
8469 struct device_record *dev_rec;
8470 struct device_extent_record *dext_rec;
8474 dev_node = rb_first(dev_cache);
8476 dev_rec = container_of(dev_node, struct device_record, node);
8477 err = check_device_used(dev_rec, dev_extent_cache);
8481 dev_node = rb_next(dev_node);
8483 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8486 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8487 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8494 static int add_root_item_to_list(struct list_head *head,
8495 u64 objectid, u64 bytenr, u64 last_snapshot,
8496 u8 level, u8 drop_level,
8497 int level_size, struct btrfs_key *drop_key)
8500 struct root_item_record *ri_rec;
8501 ri_rec = malloc(sizeof(*ri_rec));
8504 ri_rec->bytenr = bytenr;
8505 ri_rec->objectid = objectid;
8506 ri_rec->level = level;
8507 ri_rec->level_size = level_size;
8508 ri_rec->drop_level = drop_level;
8509 ri_rec->last_snapshot = last_snapshot;
8511 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8512 list_add_tail(&ri_rec->list, head);
8517 static void free_root_item_list(struct list_head *list)
8519 struct root_item_record *ri_rec;
8521 while (!list_empty(list)) {
8522 ri_rec = list_first_entry(list, struct root_item_record,
8524 list_del_init(&ri_rec->list);
8529 static int deal_root_from_list(struct list_head *list,
8530 struct btrfs_root *root,
8531 struct block_info *bits,
8533 struct cache_tree *pending,
8534 struct cache_tree *seen,
8535 struct cache_tree *reada,
8536 struct cache_tree *nodes,
8537 struct cache_tree *extent_cache,
8538 struct cache_tree *chunk_cache,
8539 struct rb_root *dev_cache,
8540 struct block_group_tree *block_group_cache,
8541 struct device_extent_tree *dev_extent_cache)
8546 while (!list_empty(list)) {
8547 struct root_item_record *rec;
8548 struct extent_buffer *buf;
8549 rec = list_entry(list->next,
8550 struct root_item_record, list);
8552 buf = read_tree_block(root->fs_info->tree_root,
8553 rec->bytenr, rec->level_size, 0);
8554 if (!extent_buffer_uptodate(buf)) {
8555 free_extent_buffer(buf);
8559 ret = add_root_to_pending(buf, extent_cache, pending,
8560 seen, nodes, rec->objectid);
8564 * To rebuild extent tree, we need deal with snapshot
8565 * one by one, otherwise we deal with node firstly which
8566 * can maximize readahead.
8569 ret = run_next_block(root, bits, bits_nr, &last,
8570 pending, seen, reada, nodes,
8571 extent_cache, chunk_cache,
8572 dev_cache, block_group_cache,
8573 dev_extent_cache, rec);
8577 free_extent_buffer(buf);
8578 list_del(&rec->list);
8584 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8585 reada, nodes, extent_cache, chunk_cache,
8586 dev_cache, block_group_cache,
8587 dev_extent_cache, NULL);
8597 static int check_chunks_and_extents(struct btrfs_root *root)
8599 struct rb_root dev_cache;
8600 struct cache_tree chunk_cache;
8601 struct block_group_tree block_group_cache;
8602 struct device_extent_tree dev_extent_cache;
8603 struct cache_tree extent_cache;
8604 struct cache_tree seen;
8605 struct cache_tree pending;
8606 struct cache_tree reada;
8607 struct cache_tree nodes;
8608 struct extent_io_tree excluded_extents;
8609 struct cache_tree corrupt_blocks;
8610 struct btrfs_path path;
8611 struct btrfs_key key;
8612 struct btrfs_key found_key;
8614 struct block_info *bits;
8616 struct extent_buffer *leaf;
8618 struct btrfs_root_item ri;
8619 struct list_head dropping_trees;
8620 struct list_head normal_trees;
8621 struct btrfs_root *root1;
8626 dev_cache = RB_ROOT;
8627 cache_tree_init(&chunk_cache);
8628 block_group_tree_init(&block_group_cache);
8629 device_extent_tree_init(&dev_extent_cache);
8631 cache_tree_init(&extent_cache);
8632 cache_tree_init(&seen);
8633 cache_tree_init(&pending);
8634 cache_tree_init(&nodes);
8635 cache_tree_init(&reada);
8636 cache_tree_init(&corrupt_blocks);
8637 extent_io_tree_init(&excluded_extents);
8638 INIT_LIST_HEAD(&dropping_trees);
8639 INIT_LIST_HEAD(&normal_trees);
8642 root->fs_info->excluded_extents = &excluded_extents;
8643 root->fs_info->fsck_extent_cache = &extent_cache;
8644 root->fs_info->free_extent_hook = free_extent_hook;
8645 root->fs_info->corrupt_blocks = &corrupt_blocks;
8649 bits = malloc(bits_nr * sizeof(struct block_info));
8655 if (ctx.progress_enabled) {
8656 ctx.tp = TASK_EXTENTS;
8657 task_start(ctx.info);
8661 root1 = root->fs_info->tree_root;
8662 level = btrfs_header_level(root1->node);
8663 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8664 root1->node->start, 0, level, 0,
8665 root1->nodesize, NULL);
8668 root1 = root->fs_info->chunk_root;
8669 level = btrfs_header_level(root1->node);
8670 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8671 root1->node->start, 0, level, 0,
8672 root1->nodesize, NULL);
8675 btrfs_init_path(&path);
8678 key.type = BTRFS_ROOT_ITEM_KEY;
8679 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8684 leaf = path.nodes[0];
8685 slot = path.slots[0];
8686 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8687 ret = btrfs_next_leaf(root, &path);
8690 leaf = path.nodes[0];
8691 slot = path.slots[0];
8693 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8694 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8695 unsigned long offset;
8698 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8699 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8700 last_snapshot = btrfs_root_last_snapshot(&ri);
8701 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8702 level = btrfs_root_level(&ri);
8703 level_size = root->nodesize;
8704 ret = add_root_item_to_list(&normal_trees,
8706 btrfs_root_bytenr(&ri),
8707 last_snapshot, level,
8708 0, level_size, NULL);
8712 level = btrfs_root_level(&ri);
8713 level_size = root->nodesize;
8714 objectid = found_key.objectid;
8715 btrfs_disk_key_to_cpu(&found_key,
8717 ret = add_root_item_to_list(&dropping_trees,
8719 btrfs_root_bytenr(&ri),
8720 last_snapshot, level,
8722 level_size, &found_key);
8729 btrfs_release_path(&path);
8732 * check_block can return -EAGAIN if it fixes something, please keep
8733 * this in mind when dealing with return values from these functions, if
8734 * we get -EAGAIN we want to fall through and restart the loop.
8736 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8737 &seen, &reada, &nodes, &extent_cache,
8738 &chunk_cache, &dev_cache, &block_group_cache,
8745 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8746 &pending, &seen, &reada, &nodes,
8747 &extent_cache, &chunk_cache, &dev_cache,
8748 &block_group_cache, &dev_extent_cache);
8755 ret = check_chunks(&chunk_cache, &block_group_cache,
8756 &dev_extent_cache, NULL, NULL, NULL, 0);
8763 ret = check_extent_refs(root, &extent_cache);
8770 ret = check_devices(&dev_cache, &dev_extent_cache);
8775 task_stop(ctx.info);
8777 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8778 extent_io_tree_cleanup(&excluded_extents);
8779 root->fs_info->fsck_extent_cache = NULL;
8780 root->fs_info->free_extent_hook = NULL;
8781 root->fs_info->corrupt_blocks = NULL;
8782 root->fs_info->excluded_extents = NULL;
8785 free_chunk_cache_tree(&chunk_cache);
8786 free_device_cache_tree(&dev_cache);
8787 free_block_group_tree(&block_group_cache);
8788 free_device_extent_tree(&dev_extent_cache);
8789 free_extent_cache_tree(&seen);
8790 free_extent_cache_tree(&pending);
8791 free_extent_cache_tree(&reada);
8792 free_extent_cache_tree(&nodes);
8795 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8796 free_extent_cache_tree(&seen);
8797 free_extent_cache_tree(&pending);
8798 free_extent_cache_tree(&reada);
8799 free_extent_cache_tree(&nodes);
8800 free_chunk_cache_tree(&chunk_cache);
8801 free_block_group_tree(&block_group_cache);
8802 free_device_cache_tree(&dev_cache);
8803 free_device_extent_tree(&dev_extent_cache);
8804 free_extent_record_cache(root->fs_info, &extent_cache);
8805 free_root_item_list(&normal_trees);
8806 free_root_item_list(&dropping_trees);
8807 extent_io_tree_cleanup(&excluded_extents);
8812 * Check backrefs of a tree block given by @bytenr or @eb.
8814 * @root: the root containing the @bytenr or @eb
8815 * @eb: tree block extent buffer, can be NULL
8816 * @bytenr: bytenr of the tree block to search
8817 * @level: tree level of the tree block
8818 * @owner: owner of the tree block
8820 * Return >0 for any error found and output error message
8821 * Return 0 for no error found
8823 static int check_tree_block_ref(struct btrfs_root *root,
8824 struct extent_buffer *eb, u64 bytenr,
8825 int level, u64 owner)
8827 struct btrfs_key key;
8828 struct btrfs_root *extent_root = root->fs_info->extent_root;
8829 struct btrfs_path path;
8830 struct btrfs_extent_item *ei;
8831 struct btrfs_extent_inline_ref *iref;
8832 struct extent_buffer *leaf;
8838 u32 nodesize = root->nodesize;
8845 btrfs_init_path(&path);
8846 key.objectid = bytenr;
8847 if (btrfs_fs_incompat(root->fs_info,
8848 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8849 key.type = BTRFS_METADATA_ITEM_KEY;
8851 key.type = BTRFS_EXTENT_ITEM_KEY;
8852 key.offset = (u64)-1;
8854 /* Search for the backref in extent tree */
8855 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8857 err |= BACKREF_MISSING;
8860 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8862 err |= BACKREF_MISSING;
8866 leaf = path.nodes[0];
8867 slot = path.slots[0];
8868 btrfs_item_key_to_cpu(leaf, &key, slot);
8870 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8872 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8873 skinny_level = (int)key.offset;
8874 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8876 struct btrfs_tree_block_info *info;
8878 info = (struct btrfs_tree_block_info *)(ei + 1);
8879 skinny_level = btrfs_tree_block_level(leaf, info);
8880 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8887 if (!(btrfs_extent_flags(leaf, ei) &
8888 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8890 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8891 key.objectid, nodesize,
8892 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8893 err = BACKREF_MISMATCH;
8895 header_gen = btrfs_header_generation(eb);
8896 extent_gen = btrfs_extent_generation(leaf, ei);
8897 if (header_gen != extent_gen) {
8899 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8900 key.objectid, nodesize, header_gen,
8902 err = BACKREF_MISMATCH;
8904 if (level != skinny_level) {
8906 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8907 key.objectid, nodesize, level, skinny_level);
8908 err = BACKREF_MISMATCH;
8910 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8912 "extent[%llu %u] is referred by other roots than %llu",
8913 key.objectid, nodesize, root->objectid);
8914 err = BACKREF_MISMATCH;
8919 * Iterate the extent/metadata item to find the exact backref
8921 item_size = btrfs_item_size_nr(leaf, slot);
8922 ptr = (unsigned long)iref;
8923 end = (unsigned long)ei + item_size;
8925 iref = (struct btrfs_extent_inline_ref *)ptr;
8926 type = btrfs_extent_inline_ref_type(leaf, iref);
8927 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8929 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8930 (offset == root->objectid || offset == owner)) {
8932 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8933 /* Check if the backref points to valid referencer */
8934 found_ref = !check_tree_block_ref(root, NULL, offset,
8940 ptr += btrfs_extent_inline_ref_size(type);
8944 * Inlined extent item doesn't have what we need, check
8945 * TREE_BLOCK_REF_KEY
8948 btrfs_release_path(&path);
8949 key.objectid = bytenr;
8950 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8951 key.offset = root->objectid;
8953 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8958 err |= BACKREF_MISSING;
8960 btrfs_release_path(&path);
8961 if (eb && (err & BACKREF_MISSING))
8962 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8963 bytenr, nodesize, owner, level);
8968 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8970 * Return >0 any error found and output error message
8971 * Return 0 for no error found
8973 static int check_extent_data_item(struct btrfs_root *root,
8974 struct extent_buffer *eb, int slot)
8976 struct btrfs_file_extent_item *fi;
8977 struct btrfs_path path;
8978 struct btrfs_root *extent_root = root->fs_info->extent_root;
8979 struct btrfs_key fi_key;
8980 struct btrfs_key dbref_key;
8981 struct extent_buffer *leaf;
8982 struct btrfs_extent_item *ei;
8983 struct btrfs_extent_inline_ref *iref;
8984 struct btrfs_extent_data_ref *dref;
8986 u64 file_extent_gen;
8989 u64 extent_num_bytes;
8997 int found_dbackref = 0;
9001 btrfs_item_key_to_cpu(eb, &fi_key, slot);
9002 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9003 file_extent_gen = btrfs_file_extent_generation(eb, fi);
9005 /* Nothing to check for hole and inline data extents */
9006 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9007 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9010 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9011 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9012 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9014 /* Check unaligned disk_num_bytes and num_bytes */
9015 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9017 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9018 fi_key.objectid, fi_key.offset, disk_num_bytes,
9020 err |= BYTES_UNALIGNED;
9022 data_bytes_allocated += disk_num_bytes;
9024 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9026 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9027 fi_key.objectid, fi_key.offset, extent_num_bytes,
9029 err |= BYTES_UNALIGNED;
9031 data_bytes_referenced += extent_num_bytes;
9033 owner = btrfs_header_owner(eb);
9035 /* Check the extent item of the file extent in extent tree */
9036 btrfs_init_path(&path);
9037 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9038 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9039 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9041 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9043 err |= BACKREF_MISSING;
9047 leaf = path.nodes[0];
9048 slot = path.slots[0];
9049 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9051 extent_flags = btrfs_extent_flags(leaf, ei);
9052 extent_gen = btrfs_extent_generation(leaf, ei);
9054 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9056 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9057 disk_bytenr, disk_num_bytes,
9058 BTRFS_EXTENT_FLAG_DATA);
9059 err |= BACKREF_MISMATCH;
9062 if (file_extent_gen < extent_gen) {
9064 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9065 disk_bytenr, disk_num_bytes, file_extent_gen,
9067 err |= BACKREF_MISMATCH;
9070 /* Check data backref inside that extent item */
9071 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9072 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9073 ptr = (unsigned long)iref;
9074 end = (unsigned long)ei + item_size;
9076 iref = (struct btrfs_extent_inline_ref *)ptr;
9077 type = btrfs_extent_inline_ref_type(leaf, iref);
9078 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9080 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9081 ref_root = btrfs_extent_data_ref_root(leaf, dref);
9082 if (ref_root == owner || ref_root == root->objectid)
9084 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9085 found_dbackref = !check_tree_block_ref(root, NULL,
9086 btrfs_extent_inline_ref_offset(leaf, iref),
9092 ptr += btrfs_extent_inline_ref_size(type);
9095 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9096 if (!found_dbackref) {
9097 btrfs_release_path(&path);
9099 btrfs_init_path(&path);
9100 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9101 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9102 dbref_key.offset = hash_extent_data_ref(root->objectid,
9103 fi_key.objectid, fi_key.offset);
9105 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9106 &dbref_key, &path, 0, 0);
9111 if (!found_dbackref)
9112 err |= BACKREF_MISSING;
9114 btrfs_release_path(&path);
9115 if (err & BACKREF_MISSING) {
9116 error("data extent[%llu %llu] backref lost",
9117 disk_bytenr, disk_num_bytes);
9123 * Get real tree block level for the case like shared block
9124 * Return >= 0 as tree level
9125 * Return <0 for error
9127 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9129 struct extent_buffer *eb;
9130 struct btrfs_path path;
9131 struct btrfs_key key;
9132 struct btrfs_extent_item *ei;
9135 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9140 /* Search extent tree for extent generation and level */
9141 key.objectid = bytenr;
9142 key.type = BTRFS_METADATA_ITEM_KEY;
9143 key.offset = (u64)-1;
9145 btrfs_init_path(&path);
9146 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9149 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9157 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9158 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9159 struct btrfs_extent_item);
9160 flags = btrfs_extent_flags(path.nodes[0], ei);
9161 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9166 /* Get transid for later read_tree_block() check */
9167 transid = btrfs_extent_generation(path.nodes[0], ei);
9169 /* Get backref level as one source */
9170 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9171 backref_level = key.offset;
9173 struct btrfs_tree_block_info *info;
9175 info = (struct btrfs_tree_block_info *)(ei + 1);
9176 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9178 btrfs_release_path(&path);
9180 /* Get level from tree block as an alternative source */
9181 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9182 if (!extent_buffer_uptodate(eb)) {
9183 free_extent_buffer(eb);
9186 header_level = btrfs_header_level(eb);
9187 free_extent_buffer(eb);
9189 if (header_level != backref_level)
9191 return header_level;
9194 btrfs_release_path(&path);
9199 * Check if a tree block backref is valid (points to a valid tree block)
9200 * if level == -1, level will be resolved
9201 * Return >0 for any error found and print error message
9203 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9204 u64 bytenr, int level)
9206 struct btrfs_root *root;
9207 struct btrfs_key key;
9208 struct btrfs_path path;
9209 struct extent_buffer *eb;
9210 struct extent_buffer *node;
9211 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9215 /* Query level for level == -1 special case */
9217 level = query_tree_block_level(fs_info, bytenr);
9219 err |= REFERENCER_MISSING;
9223 key.objectid = root_id;
9224 key.type = BTRFS_ROOT_ITEM_KEY;
9225 key.offset = (u64)-1;
9227 root = btrfs_read_fs_root(fs_info, &key);
9229 err |= REFERENCER_MISSING;
9233 /* Read out the tree block to get item/node key */
9234 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9235 if (!extent_buffer_uptodate(eb)) {
9236 err |= REFERENCER_MISSING;
9237 free_extent_buffer(eb);
9241 /* Empty tree, no need to check key */
9242 if (!btrfs_header_nritems(eb) && !level) {
9243 free_extent_buffer(eb);
9248 btrfs_node_key_to_cpu(eb, &key, 0);
9250 btrfs_item_key_to_cpu(eb, &key, 0);
9252 free_extent_buffer(eb);
9254 btrfs_init_path(&path);
9255 path.lowest_level = level;
9256 /* Search with the first key, to ensure we can reach it */
9257 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9259 err |= REFERENCER_MISSING;
9263 node = path.nodes[level];
9264 if (btrfs_header_bytenr(node) != bytenr) {
9266 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9267 bytenr, nodesize, bytenr,
9268 btrfs_header_bytenr(node));
9269 err |= REFERENCER_MISMATCH;
9271 if (btrfs_header_level(node) != level) {
9273 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9274 bytenr, nodesize, level,
9275 btrfs_header_level(node));
9276 err |= REFERENCER_MISMATCH;
9280 btrfs_release_path(&path);
9282 if (err & REFERENCER_MISSING) {
9284 error("extent [%llu %d] lost referencer (owner: %llu)",
9285 bytenr, nodesize, root_id);
9288 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9289 bytenr, nodesize, root_id, level);
9296 * Check referencer for shared block backref
9297 * If level == -1, this function will resolve the level.
9299 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9300 u64 parent, u64 bytenr, int level)
9302 struct extent_buffer *eb;
9303 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9305 int found_parent = 0;
9308 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9309 if (!extent_buffer_uptodate(eb))
9313 level = query_tree_block_level(fs_info, bytenr);
9317 if (level + 1 != btrfs_header_level(eb))
9320 nr = btrfs_header_nritems(eb);
9321 for (i = 0; i < nr; i++) {
9322 if (bytenr == btrfs_node_blockptr(eb, i)) {
9328 free_extent_buffer(eb);
9329 if (!found_parent) {
9331 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9332 bytenr, nodesize, parent, level);
9333 return REFERENCER_MISSING;
9339 * Check referencer for normal (inlined) data ref
9340 * If len == 0, it will be resolved by searching in extent tree
9342 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9343 u64 root_id, u64 objectid, u64 offset,
9344 u64 bytenr, u64 len, u32 count)
9346 struct btrfs_root *root;
9347 struct btrfs_root *extent_root = fs_info->extent_root;
9348 struct btrfs_key key;
9349 struct btrfs_path path;
9350 struct extent_buffer *leaf;
9351 struct btrfs_file_extent_item *fi;
9352 u32 found_count = 0;
9357 key.objectid = bytenr;
9358 key.type = BTRFS_EXTENT_ITEM_KEY;
9359 key.offset = (u64)-1;
9361 btrfs_init_path(&path);
9362 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9365 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9368 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9369 if (key.objectid != bytenr ||
9370 key.type != BTRFS_EXTENT_ITEM_KEY)
9373 btrfs_release_path(&path);
9375 key.objectid = root_id;
9376 key.type = BTRFS_ROOT_ITEM_KEY;
9377 key.offset = (u64)-1;
9378 btrfs_init_path(&path);
9380 root = btrfs_read_fs_root(fs_info, &key);
9384 key.objectid = objectid;
9385 key.type = BTRFS_EXTENT_DATA_KEY;
9387 * It can be nasty as data backref offset is
9388 * file offset - file extent offset, which is smaller or
9389 * equal to original backref offset. The only special case is
9390 * overflow. So we need to special check and do further search.
9392 key.offset = offset & (1ULL << 63) ? 0 : offset;
9394 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9399 * Search afterwards to get correct one
9400 * NOTE: As we must do a comprehensive check on the data backref to
9401 * make sure the dref count also matches, we must iterate all file
9402 * extents for that inode.
9405 leaf = path.nodes[0];
9406 slot = path.slots[0];
9408 btrfs_item_key_to_cpu(leaf, &key, slot);
9409 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9411 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9413 * Except normal disk bytenr and disk num bytes, we still
9414 * need to do extra check on dbackref offset as
9415 * dbackref offset = file_offset - file_extent_offset
9417 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9418 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9419 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9423 ret = btrfs_next_item(root, &path);
9428 btrfs_release_path(&path);
9429 if (found_count != count) {
9431 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9432 bytenr, len, root_id, objectid, offset, count, found_count);
9433 return REFERENCER_MISSING;
9439 * Check if the referencer of a shared data backref exists
9441 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9442 u64 parent, u64 bytenr)
9444 struct extent_buffer *eb;
9445 struct btrfs_key key;
9446 struct btrfs_file_extent_item *fi;
9447 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9449 int found_parent = 0;
9452 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9453 if (!extent_buffer_uptodate(eb))
9456 nr = btrfs_header_nritems(eb);
9457 for (i = 0; i < nr; i++) {
9458 btrfs_item_key_to_cpu(eb, &key, i);
9459 if (key.type != BTRFS_EXTENT_DATA_KEY)
9462 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9463 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9466 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9473 free_extent_buffer(eb);
9474 if (!found_parent) {
9475 error("shared extent %llu referencer lost (parent: %llu)",
9477 return REFERENCER_MISSING;
9483 * This function will check a given extent item, including its backref and
9484 * itself (like crossing stripe boundary and type)
9486 * Since we don't use extent_record anymore, introduce new error bit
9488 static int check_extent_item(struct btrfs_fs_info *fs_info,
9489 struct extent_buffer *eb, int slot)
9491 struct btrfs_extent_item *ei;
9492 struct btrfs_extent_inline_ref *iref;
9493 struct btrfs_extent_data_ref *dref;
9497 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9498 u32 item_size = btrfs_item_size_nr(eb, slot);
9503 struct btrfs_key key;
9507 btrfs_item_key_to_cpu(eb, &key, slot);
9508 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9509 bytes_used += key.offset;
9511 bytes_used += nodesize;
9513 if (item_size < sizeof(*ei)) {
9515 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9516 * old thing when on disk format is still un-determined.
9517 * No need to care about it anymore
9519 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9523 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9524 flags = btrfs_extent_flags(eb, ei);
9526 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9528 if (metadata && check_crossing_stripes(global_info, key.objectid,
9530 error("bad metadata [%llu, %llu) crossing stripe boundary",
9531 key.objectid, key.objectid + nodesize);
9532 err |= CROSSING_STRIPE_BOUNDARY;
9535 ptr = (unsigned long)(ei + 1);
9537 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9538 /* Old EXTENT_ITEM metadata */
9539 struct btrfs_tree_block_info *info;
9541 info = (struct btrfs_tree_block_info *)ptr;
9542 level = btrfs_tree_block_level(eb, info);
9543 ptr += sizeof(struct btrfs_tree_block_info);
9545 /* New METADATA_ITEM */
9548 end = (unsigned long)ei + item_size;
9551 err |= ITEM_SIZE_MISMATCH;
9555 /* Now check every backref in this extent item */
9557 iref = (struct btrfs_extent_inline_ref *)ptr;
9558 type = btrfs_extent_inline_ref_type(eb, iref);
9559 offset = btrfs_extent_inline_ref_offset(eb, iref);
9561 case BTRFS_TREE_BLOCK_REF_KEY:
9562 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9566 case BTRFS_SHARED_BLOCK_REF_KEY:
9567 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9571 case BTRFS_EXTENT_DATA_REF_KEY:
9572 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9573 ret = check_extent_data_backref(fs_info,
9574 btrfs_extent_data_ref_root(eb, dref),
9575 btrfs_extent_data_ref_objectid(eb, dref),
9576 btrfs_extent_data_ref_offset(eb, dref),
9577 key.objectid, key.offset,
9578 btrfs_extent_data_ref_count(eb, dref));
9581 case BTRFS_SHARED_DATA_REF_KEY:
9582 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9586 error("extent[%llu %d %llu] has unknown ref type: %d",
9587 key.objectid, key.type, key.offset, type);
9588 err |= UNKNOWN_TYPE;
9592 ptr += btrfs_extent_inline_ref_size(type);
9601 * Check if a dev extent item is referred correctly by its chunk
9603 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9604 struct extent_buffer *eb, int slot)
9606 struct btrfs_root *chunk_root = fs_info->chunk_root;
9607 struct btrfs_dev_extent *ptr;
9608 struct btrfs_path path;
9609 struct btrfs_key chunk_key;
9610 struct btrfs_key devext_key;
9611 struct btrfs_chunk *chunk;
9612 struct extent_buffer *l;
9616 int found_chunk = 0;
9619 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9620 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9621 length = btrfs_dev_extent_length(eb, ptr);
9623 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9624 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9625 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9627 btrfs_init_path(&path);
9628 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9633 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9634 if (btrfs_chunk_length(l, chunk) != length)
9637 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9638 for (i = 0; i < num_stripes; i++) {
9639 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9640 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9642 if (devid == devext_key.objectid &&
9643 offset == devext_key.offset) {
9649 btrfs_release_path(&path);
9652 "device extent[%llu, %llu, %llu] did not find the related chunk",
9653 devext_key.objectid, devext_key.offset, length);
9654 return REFERENCER_MISSING;
9660 * Check if the used space is correct with the dev item
9662 static int check_dev_item(struct btrfs_fs_info *fs_info,
9663 struct extent_buffer *eb, int slot)
9665 struct btrfs_root *dev_root = fs_info->dev_root;
9666 struct btrfs_dev_item *dev_item;
9667 struct btrfs_path path;
9668 struct btrfs_key key;
9669 struct btrfs_dev_extent *ptr;
9675 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9676 dev_id = btrfs_device_id(eb, dev_item);
9677 used = btrfs_device_bytes_used(eb, dev_item);
9679 key.objectid = dev_id;
9680 key.type = BTRFS_DEV_EXTENT_KEY;
9683 btrfs_init_path(&path);
9684 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9686 btrfs_item_key_to_cpu(eb, &key, slot);
9687 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9688 key.objectid, key.type, key.offset);
9689 btrfs_release_path(&path);
9690 return REFERENCER_MISSING;
9693 /* Iterate dev_extents to calculate the used space of a device */
9695 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9697 if (key.objectid > dev_id)
9699 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9702 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9703 struct btrfs_dev_extent);
9704 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9706 ret = btrfs_next_item(dev_root, &path);
9710 btrfs_release_path(&path);
9712 if (used != total) {
9713 btrfs_item_key_to_cpu(eb, &key, slot);
9715 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9716 total, used, BTRFS_ROOT_TREE_OBJECTID,
9717 BTRFS_DEV_EXTENT_KEY, dev_id);
9718 return ACCOUNTING_MISMATCH;
9724 * Check a block group item with its referener (chunk) and its used space
9725 * with extent/metadata item
9727 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9728 struct extent_buffer *eb, int slot)
9730 struct btrfs_root *extent_root = fs_info->extent_root;
9731 struct btrfs_root *chunk_root = fs_info->chunk_root;
9732 struct btrfs_block_group_item *bi;
9733 struct btrfs_block_group_item bg_item;
9734 struct btrfs_path path;
9735 struct btrfs_key bg_key;
9736 struct btrfs_key chunk_key;
9737 struct btrfs_key extent_key;
9738 struct btrfs_chunk *chunk;
9739 struct extent_buffer *leaf;
9740 struct btrfs_extent_item *ei;
9741 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9749 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9750 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9751 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9752 used = btrfs_block_group_used(&bg_item);
9753 bg_flags = btrfs_block_group_flags(&bg_item);
9755 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9756 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9757 chunk_key.offset = bg_key.objectid;
9759 btrfs_init_path(&path);
9760 /* Search for the referencer chunk */
9761 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9764 "block group[%llu %llu] did not find the related chunk item",
9765 bg_key.objectid, bg_key.offset);
9766 err |= REFERENCER_MISSING;
9768 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9769 struct btrfs_chunk);
9770 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9773 "block group[%llu %llu] related chunk item length does not match",
9774 bg_key.objectid, bg_key.offset);
9775 err |= REFERENCER_MISMATCH;
9778 btrfs_release_path(&path);
9780 /* Search from the block group bytenr */
9781 extent_key.objectid = bg_key.objectid;
9782 extent_key.type = 0;
9783 extent_key.offset = 0;
9785 btrfs_init_path(&path);
9786 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9790 /* Iterate extent tree to account used space */
9792 leaf = path.nodes[0];
9793 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9794 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9797 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9798 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9800 if (extent_key.objectid < bg_key.objectid)
9803 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9806 total += extent_key.offset;
9808 ei = btrfs_item_ptr(leaf, path.slots[0],
9809 struct btrfs_extent_item);
9810 flags = btrfs_extent_flags(leaf, ei);
9811 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9812 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9814 "bad extent[%llu, %llu) type mismatch with chunk",
9815 extent_key.objectid,
9816 extent_key.objectid + extent_key.offset);
9817 err |= CHUNK_TYPE_MISMATCH;
9819 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9820 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9821 BTRFS_BLOCK_GROUP_METADATA))) {
9823 "bad extent[%llu, %llu) type mismatch with chunk",
9824 extent_key.objectid,
9825 extent_key.objectid + nodesize);
9826 err |= CHUNK_TYPE_MISMATCH;
9830 ret = btrfs_next_item(extent_root, &path);
9836 btrfs_release_path(&path);
9838 if (total != used) {
9840 "block group[%llu %llu] used %llu but extent items used %llu",
9841 bg_key.objectid, bg_key.offset, used, total);
9842 err |= ACCOUNTING_MISMATCH;
9848 * Check a chunk item.
9849 * Including checking all referred dev_extents and block group
9851 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9852 struct extent_buffer *eb, int slot)
9854 struct btrfs_root *extent_root = fs_info->extent_root;
9855 struct btrfs_root *dev_root = fs_info->dev_root;
9856 struct btrfs_path path;
9857 struct btrfs_key chunk_key;
9858 struct btrfs_key bg_key;
9859 struct btrfs_key devext_key;
9860 struct btrfs_chunk *chunk;
9861 struct extent_buffer *leaf;
9862 struct btrfs_block_group_item *bi;
9863 struct btrfs_block_group_item bg_item;
9864 struct btrfs_dev_extent *ptr;
9865 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9877 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9878 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9879 length = btrfs_chunk_length(eb, chunk);
9880 chunk_end = chunk_key.offset + length;
9881 if (!IS_ALIGNED(length, sectorsize)) {
9882 error("chunk[%llu %llu) not aligned to %u",
9883 chunk_key.offset, chunk_end, sectorsize);
9884 err |= BYTES_UNALIGNED;
9888 type = btrfs_chunk_type(eb, chunk);
9889 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9890 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9891 error("chunk[%llu %llu) has no chunk type",
9892 chunk_key.offset, chunk_end);
9893 err |= UNKNOWN_TYPE;
9895 if (profile && (profile & (profile - 1))) {
9896 error("chunk[%llu %llu) multiple profiles detected: %llx",
9897 chunk_key.offset, chunk_end, profile);
9898 err |= UNKNOWN_TYPE;
9901 bg_key.objectid = chunk_key.offset;
9902 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9903 bg_key.offset = length;
9905 btrfs_init_path(&path);
9906 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9909 "chunk[%llu %llu) did not find the related block group item",
9910 chunk_key.offset, chunk_end);
9911 err |= REFERENCER_MISSING;
9913 leaf = path.nodes[0];
9914 bi = btrfs_item_ptr(leaf, path.slots[0],
9915 struct btrfs_block_group_item);
9916 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9918 if (btrfs_block_group_flags(&bg_item) != type) {
9920 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9921 chunk_key.offset, chunk_end, type,
9922 btrfs_block_group_flags(&bg_item));
9923 err |= REFERENCER_MISSING;
9927 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9928 for (i = 0; i < num_stripes; i++) {
9929 btrfs_release_path(&path);
9930 btrfs_init_path(&path);
9931 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9932 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9933 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9935 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9940 leaf = path.nodes[0];
9941 ptr = btrfs_item_ptr(leaf, path.slots[0],
9942 struct btrfs_dev_extent);
9943 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9944 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9945 if (objectid != chunk_key.objectid ||
9946 offset != chunk_key.offset ||
9947 btrfs_dev_extent_length(leaf, ptr) != length)
9951 err |= BACKREF_MISSING;
9953 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9954 chunk_key.objectid, chunk_end, i);
9957 btrfs_release_path(&path);
9963 * Main entry function to check known items and update related accounting info
9965 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9967 struct btrfs_fs_info *fs_info = root->fs_info;
9968 struct btrfs_key key;
9971 struct btrfs_extent_data_ref *dref;
9976 btrfs_item_key_to_cpu(eb, &key, slot);
9980 case BTRFS_EXTENT_DATA_KEY:
9981 ret = check_extent_data_item(root, eb, slot);
9984 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9985 ret = check_block_group_item(fs_info, eb, slot);
9988 case BTRFS_DEV_ITEM_KEY:
9989 ret = check_dev_item(fs_info, eb, slot);
9992 case BTRFS_CHUNK_ITEM_KEY:
9993 ret = check_chunk_item(fs_info, eb, slot);
9996 case BTRFS_DEV_EXTENT_KEY:
9997 ret = check_dev_extent_item(fs_info, eb, slot);
10000 case BTRFS_EXTENT_ITEM_KEY:
10001 case BTRFS_METADATA_ITEM_KEY:
10002 ret = check_extent_item(fs_info, eb, slot);
10005 case BTRFS_EXTENT_CSUM_KEY:
10006 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10008 case BTRFS_TREE_BLOCK_REF_KEY:
10009 ret = check_tree_block_backref(fs_info, key.offset,
10013 case BTRFS_EXTENT_DATA_REF_KEY:
10014 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10015 ret = check_extent_data_backref(fs_info,
10016 btrfs_extent_data_ref_root(eb, dref),
10017 btrfs_extent_data_ref_objectid(eb, dref),
10018 btrfs_extent_data_ref_offset(eb, dref),
10020 btrfs_extent_data_ref_count(eb, dref));
10023 case BTRFS_SHARED_BLOCK_REF_KEY:
10024 ret = check_shared_block_backref(fs_info, key.offset,
10028 case BTRFS_SHARED_DATA_REF_KEY:
10029 ret = check_shared_data_backref(fs_info, key.offset,
10037 if (++slot < btrfs_header_nritems(eb))
10044 * Helper function for later fs/subvol tree check. To determine if a tree
10045 * block should be checked.
10046 * This function will ensure only the direct referencer with lowest rootid to
10047 * check a fs/subvolume tree block.
10049 * Backref check at extent tree would detect errors like missing subvolume
10050 * tree, so we can do aggressive check to reduce duplicated checks.
10052 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10054 struct btrfs_root *extent_root = root->fs_info->extent_root;
10055 struct btrfs_key key;
10056 struct btrfs_path path;
10057 struct extent_buffer *leaf;
10059 struct btrfs_extent_item *ei;
10065 struct btrfs_extent_inline_ref *iref;
10068 btrfs_init_path(&path);
10069 key.objectid = btrfs_header_bytenr(eb);
10070 key.type = BTRFS_METADATA_ITEM_KEY;
10071 key.offset = (u64)-1;
10074 * Any failure in backref resolving means we can't determine
10075 * whom the tree block belongs to.
10076 * So in that case, we need to check that tree block
10078 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10082 ret = btrfs_previous_extent_item(extent_root, &path,
10083 btrfs_header_bytenr(eb));
10087 leaf = path.nodes[0];
10088 slot = path.slots[0];
10089 btrfs_item_key_to_cpu(leaf, &key, slot);
10090 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10092 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10093 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10095 struct btrfs_tree_block_info *info;
10097 info = (struct btrfs_tree_block_info *)(ei + 1);
10098 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10101 item_size = btrfs_item_size_nr(leaf, slot);
10102 ptr = (unsigned long)iref;
10103 end = (unsigned long)ei + item_size;
10104 while (ptr < end) {
10105 iref = (struct btrfs_extent_inline_ref *)ptr;
10106 type = btrfs_extent_inline_ref_type(leaf, iref);
10107 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10110 * We only check the tree block if current root is
10111 * the lowest referencer of it.
10113 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10114 offset < root->objectid) {
10115 btrfs_release_path(&path);
10119 ptr += btrfs_extent_inline_ref_size(type);
10122 * Normally we should also check keyed tree block ref, but that may be
10123 * very time consuming. Inlined ref should already make us skip a lot
10124 * of refs now. So skip search keyed tree block ref.
10128 btrfs_release_path(&path);
10133 * Traversal function for tree block. We will do:
10134 * 1) Skip shared fs/subvolume tree blocks
10135 * 2) Update related bytes accounting
10136 * 3) Pre-order traversal
10138 static int traverse_tree_block(struct btrfs_root *root,
10139 struct extent_buffer *node)
10141 struct extent_buffer *eb;
10142 struct btrfs_key key;
10143 struct btrfs_key drop_key;
10151 * Skip shared fs/subvolume tree block, in that case they will
10152 * be checked by referencer with lowest rootid
10154 if (is_fstree(root->objectid) && !should_check(root, node))
10157 /* Update bytes accounting */
10158 total_btree_bytes += node->len;
10159 if (fs_root_objectid(btrfs_header_owner(node)))
10160 total_fs_tree_bytes += node->len;
10161 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10162 total_extent_tree_bytes += node->len;
10163 if (!found_old_backref &&
10164 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10165 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10166 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10167 found_old_backref = 1;
10169 /* pre-order tranversal, check itself first */
10170 level = btrfs_header_level(node);
10171 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10172 btrfs_header_level(node),
10173 btrfs_header_owner(node));
10177 "check %s failed root %llu bytenr %llu level %d, force continue check",
10178 level ? "node":"leaf", root->objectid,
10179 btrfs_header_bytenr(node), btrfs_header_level(node));
10182 btree_space_waste += btrfs_leaf_free_space(root, node);
10183 ret = check_leaf_items(root, node);
10188 nr = btrfs_header_nritems(node);
10189 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10190 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10191 sizeof(struct btrfs_key_ptr);
10193 /* Then check all its children */
10194 for (i = 0; i < nr; i++) {
10195 u64 blocknr = btrfs_node_blockptr(node, i);
10197 btrfs_node_key_to_cpu(node, &key, i);
10198 if (level == root->root_item.drop_level &&
10199 is_dropped_key(&key, &drop_key))
10203 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10204 * to call the function itself.
10206 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10207 if (extent_buffer_uptodate(eb)) {
10208 ret = traverse_tree_block(root, eb);
10211 free_extent_buffer(eb);
10218 * Low memory usage version check_chunks_and_extents.
10220 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10222 struct btrfs_path path;
10223 struct btrfs_key key;
10224 struct btrfs_root *root1;
10225 struct btrfs_root *cur_root;
10229 root1 = root->fs_info->chunk_root;
10230 ret = traverse_tree_block(root1, root1->node);
10233 root1 = root->fs_info->tree_root;
10234 ret = traverse_tree_block(root1, root1->node);
10237 btrfs_init_path(&path);
10238 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10240 key.type = BTRFS_ROOT_ITEM_KEY;
10242 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10244 error("cannot find extent treet in tree_root");
10249 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10250 if (key.type != BTRFS_ROOT_ITEM_KEY)
10252 key.offset = (u64)-1;
10254 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10255 if (IS_ERR(cur_root) || !cur_root) {
10256 error("failed to read tree: %lld", key.objectid);
10260 ret = traverse_tree_block(cur_root, cur_root->node);
10264 ret = btrfs_next_item(root1, &path);
10270 btrfs_release_path(&path);
10274 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10275 struct btrfs_root *root, int overwrite)
10277 struct extent_buffer *c;
10278 struct extent_buffer *old = root->node;
10281 struct btrfs_disk_key disk_key = {0,0,0};
10287 extent_buffer_get(c);
10290 c = btrfs_alloc_free_block(trans, root,
10292 root->root_key.objectid,
10293 &disk_key, level, 0, 0);
10296 extent_buffer_get(c);
10300 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10301 btrfs_set_header_level(c, level);
10302 btrfs_set_header_bytenr(c, c->start);
10303 btrfs_set_header_generation(c, trans->transid);
10304 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10305 btrfs_set_header_owner(c, root->root_key.objectid);
10307 write_extent_buffer(c, root->fs_info->fsid,
10308 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10310 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10311 btrfs_header_chunk_tree_uuid(c),
10314 btrfs_mark_buffer_dirty(c);
10316 * this case can happen in the following case:
10318 * 1.overwrite previous root.
10320 * 2.reinit reloc data root, this is because we skip pin
10321 * down reloc data tree before which means we can allocate
10322 * same block bytenr here.
10324 if (old->start == c->start) {
10325 btrfs_set_root_generation(&root->root_item,
10327 root->root_item.level = btrfs_header_level(root->node);
10328 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10329 &root->root_key, &root->root_item);
10331 free_extent_buffer(c);
10335 free_extent_buffer(old);
10337 add_root_to_dirty_list(root);
10341 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10342 struct extent_buffer *eb, int tree_root)
10344 struct extent_buffer *tmp;
10345 struct btrfs_root_item *ri;
10346 struct btrfs_key key;
10349 int level = btrfs_header_level(eb);
10355 * If we have pinned this block before, don't pin it again.
10356 * This can not only avoid forever loop with broken filesystem
10357 * but also give us some speedups.
10359 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10360 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10363 btrfs_pin_extent(fs_info, eb->start, eb->len);
10365 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10366 nritems = btrfs_header_nritems(eb);
10367 for (i = 0; i < nritems; i++) {
10369 btrfs_item_key_to_cpu(eb, &key, i);
10370 if (key.type != BTRFS_ROOT_ITEM_KEY)
10372 /* Skip the extent root and reloc roots */
10373 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10374 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10375 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10377 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10378 bytenr = btrfs_disk_root_bytenr(eb, ri);
10381 * If at any point we start needing the real root we
10382 * will have to build a stump root for the root we are
10383 * in, but for now this doesn't actually use the root so
10384 * just pass in extent_root.
10386 tmp = read_tree_block(fs_info->extent_root, bytenr,
10388 if (!extent_buffer_uptodate(tmp)) {
10389 fprintf(stderr, "Error reading root block\n");
10392 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10393 free_extent_buffer(tmp);
10397 bytenr = btrfs_node_blockptr(eb, i);
10399 /* If we aren't the tree root don't read the block */
10400 if (level == 1 && !tree_root) {
10401 btrfs_pin_extent(fs_info, bytenr, nodesize);
10405 tmp = read_tree_block(fs_info->extent_root, bytenr,
10407 if (!extent_buffer_uptodate(tmp)) {
10408 fprintf(stderr, "Error reading tree block\n");
10411 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10412 free_extent_buffer(tmp);
10421 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10425 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10429 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10432 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10434 struct btrfs_block_group_cache *cache;
10435 struct btrfs_path path;
10436 struct extent_buffer *leaf;
10437 struct btrfs_chunk *chunk;
10438 struct btrfs_key key;
10442 btrfs_init_path(&path);
10444 key.type = BTRFS_CHUNK_ITEM_KEY;
10446 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10448 btrfs_release_path(&path);
10453 * We do this in case the block groups were screwed up and had alloc
10454 * bits that aren't actually set on the chunks. This happens with
10455 * restored images every time and could happen in real life I guess.
10457 fs_info->avail_data_alloc_bits = 0;
10458 fs_info->avail_metadata_alloc_bits = 0;
10459 fs_info->avail_system_alloc_bits = 0;
10461 /* First we need to create the in-memory block groups */
10463 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10464 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10466 btrfs_release_path(&path);
10474 leaf = path.nodes[0];
10475 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10476 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10481 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10482 btrfs_add_block_group(fs_info, 0,
10483 btrfs_chunk_type(leaf, chunk),
10484 key.objectid, key.offset,
10485 btrfs_chunk_length(leaf, chunk));
10486 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10487 key.offset + btrfs_chunk_length(leaf, chunk),
10493 cache = btrfs_lookup_first_block_group(fs_info, start);
10497 start = cache->key.objectid + cache->key.offset;
10500 btrfs_release_path(&path);
10504 static int reset_balance(struct btrfs_trans_handle *trans,
10505 struct btrfs_fs_info *fs_info)
10507 struct btrfs_root *root = fs_info->tree_root;
10508 struct btrfs_path path;
10509 struct extent_buffer *leaf;
10510 struct btrfs_key key;
10511 int del_slot, del_nr = 0;
10515 btrfs_init_path(&path);
10516 key.objectid = BTRFS_BALANCE_OBJECTID;
10517 key.type = BTRFS_BALANCE_ITEM_KEY;
10519 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10524 goto reinit_data_reloc;
10529 ret = btrfs_del_item(trans, root, &path);
10532 btrfs_release_path(&path);
10534 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10535 key.type = BTRFS_ROOT_ITEM_KEY;
10537 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10541 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10546 ret = btrfs_del_items(trans, root, &path,
10553 btrfs_release_path(&path);
10556 ret = btrfs_search_slot(trans, root, &key, &path,
10563 leaf = path.nodes[0];
10564 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10565 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10567 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10572 del_slot = path.slots[0];
10581 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
10585 btrfs_release_path(&path);
10588 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10589 key.type = BTRFS_ROOT_ITEM_KEY;
10590 key.offset = (u64)-1;
10591 root = btrfs_read_fs_root(fs_info, &key);
10592 if (IS_ERR(root)) {
10593 fprintf(stderr, "Error reading data reloc tree\n");
10594 ret = PTR_ERR(root);
10597 record_root_in_trans(trans, root);
10598 ret = btrfs_fsck_reinit_root(trans, root, 0);
10601 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10603 btrfs_release_path(&path);
10607 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10608 struct btrfs_fs_info *fs_info)
10614 * The only reason we don't do this is because right now we're just
10615 * walking the trees we find and pinning down their bytes, we don't look
10616 * at any of the leaves. In order to do mixed groups we'd have to check
10617 * the leaves of any fs roots and pin down the bytes for any file
10618 * extents we find. Not hard but why do it if we don't have to?
10620 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10621 fprintf(stderr, "We don't support re-initing the extent tree "
10622 "for mixed block groups yet, please notify a btrfs "
10623 "developer you want to do this so they can add this "
10624 "functionality.\n");
10629 * first we need to walk all of the trees except the extent tree and pin
10630 * down the bytes that are in use so we don't overwrite any existing
10633 ret = pin_metadata_blocks(fs_info);
10635 fprintf(stderr, "error pinning down used bytes\n");
10640 * Need to drop all the block groups since we're going to recreate all
10643 btrfs_free_block_groups(fs_info);
10644 ret = reset_block_groups(fs_info);
10646 fprintf(stderr, "error resetting the block groups\n");
10650 /* Ok we can allocate now, reinit the extent root */
10651 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10653 fprintf(stderr, "extent root initialization failed\n");
10655 * When the transaction code is updated we should end the
10656 * transaction, but for now progs only knows about commit so
10657 * just return an error.
10663 * Now we have all the in-memory block groups setup so we can make
10664 * allocations properly, and the metadata we care about is safe since we
10665 * pinned all of it above.
10668 struct btrfs_block_group_cache *cache;
10670 cache = btrfs_lookup_first_block_group(fs_info, start);
10673 start = cache->key.objectid + cache->key.offset;
10674 ret = btrfs_insert_item(trans, fs_info->extent_root,
10675 &cache->key, &cache->item,
10676 sizeof(cache->item));
10678 fprintf(stderr, "Error adding block group\n");
10681 btrfs_extent_post_op(trans, fs_info->extent_root);
10684 ret = reset_balance(trans, fs_info);
10686 fprintf(stderr, "error resetting the pending balance\n");
10691 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10693 struct btrfs_path path;
10694 struct btrfs_trans_handle *trans;
10695 struct btrfs_key key;
10698 printf("Recowing metadata block %llu\n", eb->start);
10699 key.objectid = btrfs_header_owner(eb);
10700 key.type = BTRFS_ROOT_ITEM_KEY;
10701 key.offset = (u64)-1;
10703 root = btrfs_read_fs_root(root->fs_info, &key);
10704 if (IS_ERR(root)) {
10705 fprintf(stderr, "Couldn't find owner root %llu\n",
10707 return PTR_ERR(root);
10710 trans = btrfs_start_transaction(root, 1);
10712 return PTR_ERR(trans);
10714 btrfs_init_path(&path);
10715 path.lowest_level = btrfs_header_level(eb);
10716 if (path.lowest_level)
10717 btrfs_node_key_to_cpu(eb, &key, 0);
10719 btrfs_item_key_to_cpu(eb, &key, 0);
10721 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10722 btrfs_commit_transaction(trans, root);
10723 btrfs_release_path(&path);
10727 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10729 struct btrfs_path path;
10730 struct btrfs_trans_handle *trans;
10731 struct btrfs_key key;
10734 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10735 bad->key.type, bad->key.offset);
10736 key.objectid = bad->root_id;
10737 key.type = BTRFS_ROOT_ITEM_KEY;
10738 key.offset = (u64)-1;
10740 root = btrfs_read_fs_root(root->fs_info, &key);
10741 if (IS_ERR(root)) {
10742 fprintf(stderr, "Couldn't find owner root %llu\n",
10744 return PTR_ERR(root);
10747 trans = btrfs_start_transaction(root, 1);
10749 return PTR_ERR(trans);
10751 btrfs_init_path(&path);
10752 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
10758 ret = btrfs_del_item(trans, root, &path);
10760 btrfs_commit_transaction(trans, root);
10761 btrfs_release_path(&path);
10765 static int zero_log_tree(struct btrfs_root *root)
10767 struct btrfs_trans_handle *trans;
10770 trans = btrfs_start_transaction(root, 1);
10771 if (IS_ERR(trans)) {
10772 ret = PTR_ERR(trans);
10775 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10776 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10777 ret = btrfs_commit_transaction(trans, root);
10781 static int populate_csum(struct btrfs_trans_handle *trans,
10782 struct btrfs_root *csum_root, char *buf, u64 start,
10789 while (offset < len) {
10790 sectorsize = csum_root->sectorsize;
10791 ret = read_extent_data(csum_root, buf, start + offset,
10795 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10796 start + offset, buf, sectorsize);
10799 offset += sectorsize;
10804 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10805 struct btrfs_root *csum_root,
10806 struct btrfs_root *cur_root)
10808 struct btrfs_path path;
10809 struct btrfs_key key;
10810 struct extent_buffer *node;
10811 struct btrfs_file_extent_item *fi;
10818 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10822 btrfs_init_path(&path);
10826 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
10829 /* Iterate all regular file extents and fill its csum */
10831 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10833 if (key.type != BTRFS_EXTENT_DATA_KEY)
10835 node = path.nodes[0];
10836 slot = path.slots[0];
10837 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10838 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10840 start = btrfs_file_extent_disk_bytenr(node, fi);
10841 len = btrfs_file_extent_disk_num_bytes(node, fi);
10843 ret = populate_csum(trans, csum_root, buf, start, len);
10844 if (ret == -EEXIST)
10850 * TODO: if next leaf is corrupted, jump to nearest next valid
10853 ret = btrfs_next_item(cur_root, &path);
10863 btrfs_release_path(&path);
10868 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10869 struct btrfs_root *csum_root)
10871 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10872 struct btrfs_path path;
10873 struct btrfs_root *tree_root = fs_info->tree_root;
10874 struct btrfs_root *cur_root;
10875 struct extent_buffer *node;
10876 struct btrfs_key key;
10880 btrfs_init_path(&path);
10881 key.objectid = BTRFS_FS_TREE_OBJECTID;
10883 key.type = BTRFS_ROOT_ITEM_KEY;
10884 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
10893 node = path.nodes[0];
10894 slot = path.slots[0];
10895 btrfs_item_key_to_cpu(node, &key, slot);
10896 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10898 if (key.type != BTRFS_ROOT_ITEM_KEY)
10900 if (!is_fstree(key.objectid))
10902 key.offset = (u64)-1;
10904 cur_root = btrfs_read_fs_root(fs_info, &key);
10905 if (IS_ERR(cur_root) || !cur_root) {
10906 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10910 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10915 ret = btrfs_next_item(tree_root, &path);
10925 btrfs_release_path(&path);
10929 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10930 struct btrfs_root *csum_root)
10932 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10933 struct btrfs_path path;
10934 struct btrfs_extent_item *ei;
10935 struct extent_buffer *leaf;
10937 struct btrfs_key key;
10940 btrfs_init_path(&path);
10942 key.type = BTRFS_EXTENT_ITEM_KEY;
10944 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10946 btrfs_release_path(&path);
10950 buf = malloc(csum_root->sectorsize);
10952 btrfs_release_path(&path);
10957 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10958 ret = btrfs_next_leaf(extent_root, &path);
10966 leaf = path.nodes[0];
10968 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10969 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10974 ei = btrfs_item_ptr(leaf, path.slots[0],
10975 struct btrfs_extent_item);
10976 if (!(btrfs_extent_flags(leaf, ei) &
10977 BTRFS_EXTENT_FLAG_DATA)) {
10982 ret = populate_csum(trans, csum_root, buf, key.objectid,
10989 btrfs_release_path(&path);
10995 * Recalculate the csum and put it into the csum tree.
10997 * Extent tree init will wipe out all the extent info, so in that case, we
10998 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10999 * will use fs/subvol trees to init the csum tree.
11001 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11002 struct btrfs_root *csum_root,
11003 int search_fs_tree)
11005 if (search_fs_tree)
11006 return fill_csum_tree_from_fs(trans, csum_root);
11008 return fill_csum_tree_from_extent(trans, csum_root);
11011 static void free_roots_info_cache(void)
11013 if (!roots_info_cache)
11016 while (!cache_tree_empty(roots_info_cache)) {
11017 struct cache_extent *entry;
11018 struct root_item_info *rii;
11020 entry = first_cache_extent(roots_info_cache);
11023 remove_cache_extent(roots_info_cache, entry);
11024 rii = container_of(entry, struct root_item_info, cache_extent);
11028 free(roots_info_cache);
11029 roots_info_cache = NULL;
11032 static int build_roots_info_cache(struct btrfs_fs_info *info)
11035 struct btrfs_key key;
11036 struct extent_buffer *leaf;
11037 struct btrfs_path path;
11039 if (!roots_info_cache) {
11040 roots_info_cache = malloc(sizeof(*roots_info_cache));
11041 if (!roots_info_cache)
11043 cache_tree_init(roots_info_cache);
11046 btrfs_init_path(&path);
11048 key.type = BTRFS_EXTENT_ITEM_KEY;
11050 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11053 leaf = path.nodes[0];
11056 struct btrfs_key found_key;
11057 struct btrfs_extent_item *ei;
11058 struct btrfs_extent_inline_ref *iref;
11059 int slot = path.slots[0];
11064 struct cache_extent *entry;
11065 struct root_item_info *rii;
11067 if (slot >= btrfs_header_nritems(leaf)) {
11068 ret = btrfs_next_leaf(info->extent_root, &path);
11075 leaf = path.nodes[0];
11076 slot = path.slots[0];
11079 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11081 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11082 found_key.type != BTRFS_METADATA_ITEM_KEY)
11085 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11086 flags = btrfs_extent_flags(leaf, ei);
11088 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11089 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11092 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11093 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11094 level = found_key.offset;
11096 struct btrfs_tree_block_info *binfo;
11098 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11099 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11100 level = btrfs_tree_block_level(leaf, binfo);
11104 * For a root extent, it must be of the following type and the
11105 * first (and only one) iref in the item.
11107 type = btrfs_extent_inline_ref_type(leaf, iref);
11108 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11111 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11112 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11114 rii = malloc(sizeof(struct root_item_info));
11119 rii->cache_extent.start = root_id;
11120 rii->cache_extent.size = 1;
11121 rii->level = (u8)-1;
11122 entry = &rii->cache_extent;
11123 ret = insert_cache_extent(roots_info_cache, entry);
11126 rii = container_of(entry, struct root_item_info,
11130 ASSERT(rii->cache_extent.start == root_id);
11131 ASSERT(rii->cache_extent.size == 1);
11133 if (level > rii->level || rii->level == (u8)-1) {
11134 rii->level = level;
11135 rii->bytenr = found_key.objectid;
11136 rii->gen = btrfs_extent_generation(leaf, ei);
11137 rii->node_count = 1;
11138 } else if (level == rii->level) {
11146 btrfs_release_path(&path);
11151 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11152 struct btrfs_path *path,
11153 const struct btrfs_key *root_key,
11154 const int read_only_mode)
11156 const u64 root_id = root_key->objectid;
11157 struct cache_extent *entry;
11158 struct root_item_info *rii;
11159 struct btrfs_root_item ri;
11160 unsigned long offset;
11162 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11165 "Error: could not find extent items for root %llu\n",
11166 root_key->objectid);
11170 rii = container_of(entry, struct root_item_info, cache_extent);
11171 ASSERT(rii->cache_extent.start == root_id);
11172 ASSERT(rii->cache_extent.size == 1);
11174 if (rii->node_count != 1) {
11176 "Error: could not find btree root extent for root %llu\n",
11181 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11182 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11184 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11185 btrfs_root_level(&ri) != rii->level ||
11186 btrfs_root_generation(&ri) != rii->gen) {
11189 * If we're in repair mode but our caller told us to not update
11190 * the root item, i.e. just check if it needs to be updated, don't
11191 * print this message, since the caller will call us again shortly
11192 * for the same root item without read only mode (the caller will
11193 * open a transaction first).
11195 if (!(read_only_mode && repair))
11197 "%sroot item for root %llu,"
11198 " current bytenr %llu, current gen %llu, current level %u,"
11199 " new bytenr %llu, new gen %llu, new level %u\n",
11200 (read_only_mode ? "" : "fixing "),
11202 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11203 btrfs_root_level(&ri),
11204 rii->bytenr, rii->gen, rii->level);
11206 if (btrfs_root_generation(&ri) > rii->gen) {
11208 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11209 root_id, btrfs_root_generation(&ri), rii->gen);
11213 if (!read_only_mode) {
11214 btrfs_set_root_bytenr(&ri, rii->bytenr);
11215 btrfs_set_root_level(&ri, rii->level);
11216 btrfs_set_root_generation(&ri, rii->gen);
11217 write_extent_buffer(path->nodes[0], &ri,
11218 offset, sizeof(ri));
11228 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11229 * caused read-only snapshots to be corrupted if they were created at a moment
11230 * when the source subvolume/snapshot had orphan items. The issue was that the
11231 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11232 * node instead of the post orphan cleanup root node.
11233 * So this function, and its callees, just detects and fixes those cases. Even
11234 * though the regression was for read-only snapshots, this function applies to
11235 * any snapshot/subvolume root.
11236 * This must be run before any other repair code - not doing it so, makes other
11237 * repair code delete or modify backrefs in the extent tree for example, which
11238 * will result in an inconsistent fs after repairing the root items.
11240 static int repair_root_items(struct btrfs_fs_info *info)
11242 struct btrfs_path path;
11243 struct btrfs_key key;
11244 struct extent_buffer *leaf;
11245 struct btrfs_trans_handle *trans = NULL;
11248 int need_trans = 0;
11250 btrfs_init_path(&path);
11252 ret = build_roots_info_cache(info);
11256 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11257 key.type = BTRFS_ROOT_ITEM_KEY;
11262 * Avoid opening and committing transactions if a leaf doesn't have
11263 * any root items that need to be fixed, so that we avoid rotating
11264 * backup roots unnecessarily.
11267 trans = btrfs_start_transaction(info->tree_root, 1);
11268 if (IS_ERR(trans)) {
11269 ret = PTR_ERR(trans);
11274 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11278 leaf = path.nodes[0];
11281 struct btrfs_key found_key;
11283 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11284 int no_more_keys = find_next_key(&path, &key);
11286 btrfs_release_path(&path);
11288 ret = btrfs_commit_transaction(trans,
11300 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11302 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11304 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11307 ret = maybe_repair_root_item(info, &path, &found_key,
11312 if (!trans && repair) {
11315 btrfs_release_path(&path);
11325 free_roots_info_cache();
11326 btrfs_release_path(&path);
11328 btrfs_commit_transaction(trans, info->tree_root);
11335 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11337 struct btrfs_trans_handle *trans;
11338 struct btrfs_block_group_cache *bg_cache;
11342 /* Clear all free space cache inodes and its extent data */
11344 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11347 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11350 current = bg_cache->key.objectid + bg_cache->key.offset;
11353 /* Don't forget to set cache_generation to -1 */
11354 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11355 if (IS_ERR(trans)) {
11356 error("failed to update super block cache generation");
11357 return PTR_ERR(trans);
11359 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11360 btrfs_commit_transaction(trans, fs_info->tree_root);
11365 const char * const cmd_check_usage[] = {
11366 "btrfs check [options] <device>",
11367 "Check structural integrity of a filesystem (unmounted).",
11368 "Check structural integrity of an unmounted filesystem. Verify internal",
11369 "trees' consistency and item connectivity. In the repair mode try to",
11370 "fix the problems found. ",
11371 "WARNING: the repair mode is considered dangerous",
11373 "-s|--super <superblock> use this superblock copy",
11374 "-b|--backup use the first valid backup root copy",
11375 "--repair try to repair the filesystem",
11376 "--readonly run in read-only mode (default)",
11377 "--init-csum-tree create a new CRC tree",
11378 "--init-extent-tree create a new extent tree",
11379 "--mode <MODE> allows choice of memory/IO trade-offs",
11380 " where MODE is one of:",
11381 " original - read inodes and extents to memory (requires",
11382 " more memory, does less IO)",
11383 " lowmem - try to use less memory but read blocks again",
11385 "--check-data-csum verify checksums of data blocks",
11386 "-Q|--qgroup-report print a report on qgroup consistency",
11387 "-E|--subvol-extents <subvolid>",
11388 " print subvolume extents and sharing state",
11389 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11390 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11391 "-p|--progress indicate progress",
11392 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11396 int cmd_check(int argc, char **argv)
11398 struct cache_tree root_cache;
11399 struct btrfs_root *root;
11400 struct btrfs_fs_info *info;
11403 u64 tree_root_bytenr = 0;
11404 u64 chunk_root_bytenr = 0;
11405 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11408 int init_csum_tree = 0;
11410 int clear_space_cache = 0;
11411 int qgroup_report = 0;
11412 int qgroups_repaired = 0;
11413 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11417 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11418 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11419 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11420 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11421 static const struct option long_options[] = {
11422 { "super", required_argument, NULL, 's' },
11423 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11424 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11425 { "init-csum-tree", no_argument, NULL,
11426 GETOPT_VAL_INIT_CSUM },
11427 { "init-extent-tree", no_argument, NULL,
11428 GETOPT_VAL_INIT_EXTENT },
11429 { "check-data-csum", no_argument, NULL,
11430 GETOPT_VAL_CHECK_CSUM },
11431 { "backup", no_argument, NULL, 'b' },
11432 { "subvol-extents", required_argument, NULL, 'E' },
11433 { "qgroup-report", no_argument, NULL, 'Q' },
11434 { "tree-root", required_argument, NULL, 'r' },
11435 { "chunk-root", required_argument, NULL,
11436 GETOPT_VAL_CHUNK_TREE },
11437 { "progress", no_argument, NULL, 'p' },
11438 { "mode", required_argument, NULL,
11440 { "clear-space-cache", required_argument, NULL,
11441 GETOPT_VAL_CLEAR_SPACE_CACHE},
11442 { NULL, 0, NULL, 0}
11445 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11449 case 'a': /* ignored */ break;
11451 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11454 num = arg_strtou64(optarg);
11455 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11457 "super mirror should be less than %d",
11458 BTRFS_SUPER_MIRROR_MAX);
11461 bytenr = btrfs_sb_offset(((int)num));
11462 printf("using SB copy %llu, bytenr %llu\n", num,
11463 (unsigned long long)bytenr);
11469 subvolid = arg_strtou64(optarg);
11472 tree_root_bytenr = arg_strtou64(optarg);
11474 case GETOPT_VAL_CHUNK_TREE:
11475 chunk_root_bytenr = arg_strtou64(optarg);
11478 ctx.progress_enabled = true;
11482 usage(cmd_check_usage);
11483 case GETOPT_VAL_REPAIR:
11484 printf("enabling repair mode\n");
11486 ctree_flags |= OPEN_CTREE_WRITES;
11488 case GETOPT_VAL_READONLY:
11491 case GETOPT_VAL_INIT_CSUM:
11492 printf("Creating a new CRC tree\n");
11493 init_csum_tree = 1;
11495 ctree_flags |= OPEN_CTREE_WRITES;
11497 case GETOPT_VAL_INIT_EXTENT:
11498 init_extent_tree = 1;
11499 ctree_flags |= (OPEN_CTREE_WRITES |
11500 OPEN_CTREE_NO_BLOCK_GROUPS);
11503 case GETOPT_VAL_CHECK_CSUM:
11504 check_data_csum = 1;
11506 case GETOPT_VAL_MODE:
11507 check_mode = parse_check_mode(optarg);
11508 if (check_mode == CHECK_MODE_UNKNOWN) {
11509 error("unknown mode: %s", optarg);
11513 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11514 if (strcmp(optarg, "v1") == 0) {
11515 clear_space_cache = 1;
11516 } else if (strcmp(optarg, "v2") == 0) {
11517 clear_space_cache = 2;
11518 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11521 "invalid argument to --clear-space-cache, must be v1 or v2");
11524 ctree_flags |= OPEN_CTREE_WRITES;
11529 if (check_argc_exact(argc - optind, 1))
11530 usage(cmd_check_usage);
11532 if (ctx.progress_enabled) {
11533 ctx.tp = TASK_NOTHING;
11534 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11537 /* This check is the only reason for --readonly to exist */
11538 if (readonly && repair) {
11539 error("repair options are not compatible with --readonly");
11544 * Not supported yet
11546 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11547 error("low memory mode doesn't support repair yet");
11552 cache_tree_init(&root_cache);
11554 if((ret = check_mounted(argv[optind])) < 0) {
11555 error("could not check mount status: %s", strerror(-ret));
11558 error("%s is currently mounted, aborting", argv[optind]);
11563 /* only allow partial opening under repair mode */
11565 ctree_flags |= OPEN_CTREE_PARTIAL;
11567 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11568 chunk_root_bytenr, ctree_flags);
11570 error("cannot open file system");
11575 global_info = info;
11576 root = info->fs_root;
11577 if (clear_space_cache == 1) {
11578 if (btrfs_fs_compat_ro(info,
11579 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11581 "free space cache v2 detected, use --clear-space-cache v2");
11585 printf("Clearing free space cache\n");
11586 ret = clear_free_space_cache(info);
11588 error("failed to clear free space cache");
11591 printf("Free space cache cleared\n");
11594 } else if (clear_space_cache == 2) {
11595 if (!btrfs_fs_compat_ro(info,
11596 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11597 printf("no free space cache v2 to clear\n");
11601 printf("Clear free space cache v2\n");
11602 ret = btrfs_clear_free_space_tree(info);
11604 error("failed to clear free space cache v2: %d", ret);
11607 printf("free space cache v2 cleared\n");
11613 * repair mode will force us to commit transaction which
11614 * will make us fail to load log tree when mounting.
11616 if (repair && btrfs_super_log_root(info->super_copy)) {
11617 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11622 ret = zero_log_tree(root);
11624 error("failed to zero log tree: %d", ret);
11629 uuid_unparse(info->super_copy->fsid, uuidbuf);
11630 if (qgroup_report) {
11631 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11633 ret = qgroup_verify_all(info);
11639 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11640 subvolid, argv[optind], uuidbuf);
11641 ret = print_extent_state(info, subvolid);
11644 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11646 if (!extent_buffer_uptodate(info->tree_root->node) ||
11647 !extent_buffer_uptodate(info->dev_root->node) ||
11648 !extent_buffer_uptodate(info->chunk_root->node)) {
11649 error("critical roots corrupted, unable to check the filesystem");
11654 if (init_extent_tree || init_csum_tree) {
11655 struct btrfs_trans_handle *trans;
11657 trans = btrfs_start_transaction(info->extent_root, 0);
11658 if (IS_ERR(trans)) {
11659 error("error starting transaction");
11660 ret = PTR_ERR(trans);
11664 if (init_extent_tree) {
11665 printf("Creating a new extent tree\n");
11666 ret = reinit_extent_tree(trans, info);
11671 if (init_csum_tree) {
11672 printf("Reinitialize checksum tree\n");
11673 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11675 error("checksum tree initialization failed: %d",
11681 ret = fill_csum_tree(trans, info->csum_root,
11684 error("checksum tree refilling failed: %d", ret);
11689 * Ok now we commit and run the normal fsck, which will add
11690 * extent entries for all of the items it finds.
11692 ret = btrfs_commit_transaction(trans, info->extent_root);
11696 if (!extent_buffer_uptodate(info->extent_root->node)) {
11697 error("critical: extent_root, unable to check the filesystem");
11701 if (!extent_buffer_uptodate(info->csum_root->node)) {
11702 error("critical: csum_root, unable to check the filesystem");
11707 if (!ctx.progress_enabled)
11708 fprintf(stderr, "checking extents\n");
11709 if (check_mode == CHECK_MODE_LOWMEM)
11710 ret = check_chunks_and_extents_v2(root);
11712 ret = check_chunks_and_extents(root);
11715 "errors found in extent allocation tree or chunk allocation");
11717 ret = repair_root_items(info);
11721 fprintf(stderr, "Fixed %d roots.\n", ret);
11723 } else if (ret > 0) {
11725 "Found %d roots with an outdated root item.\n",
11728 "Please run a filesystem check with the option --repair to fix them.\n");
11733 if (!ctx.progress_enabled) {
11734 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11735 fprintf(stderr, "checking free space tree\n");
11737 fprintf(stderr, "checking free space cache\n");
11739 ret = check_space_cache(root);
11744 * We used to have to have these hole extents in between our real
11745 * extents so if we don't have this flag set we need to make sure there
11746 * are no gaps in the file extents for inodes, otherwise we can just
11747 * ignore it when this happens.
11749 no_holes = btrfs_fs_incompat(root->fs_info,
11750 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11751 if (!ctx.progress_enabled)
11752 fprintf(stderr, "checking fs roots\n");
11753 ret = check_fs_roots(root, &root_cache);
11757 fprintf(stderr, "checking csums\n");
11758 ret = check_csums(root);
11762 fprintf(stderr, "checking root refs\n");
11763 ret = check_root_refs(root, &root_cache);
11767 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11768 struct extent_buffer *eb;
11770 eb = list_first_entry(&root->fs_info->recow_ebs,
11771 struct extent_buffer, recow);
11772 list_del_init(&eb->recow);
11773 ret = recow_extent_buffer(root, eb);
11778 while (!list_empty(&delete_items)) {
11779 struct bad_item *bad;
11781 bad = list_first_entry(&delete_items, struct bad_item, list);
11782 list_del_init(&bad->list);
11784 ret = delete_bad_item(root, bad);
11788 if (info->quota_enabled) {
11790 fprintf(stderr, "checking quota groups\n");
11791 err = qgroup_verify_all(info);
11795 err = repair_qgroups(info, &qgroups_repaired);
11800 if (!list_empty(&root->fs_info->recow_ebs)) {
11801 error("transid errors in file system");
11805 /* Don't override original ret */
11806 if (!ret && qgroups_repaired)
11807 ret = qgroups_repaired;
11809 if (found_old_backref) { /*
11810 * there was a disk format change when mixed
11811 * backref was in testing tree. The old format
11812 * existed about one week.
11814 printf("\n * Found old mixed backref format. "
11815 "The old format is not supported! *"
11816 "\n * Please mount the FS in readonly mode, "
11817 "backup data and re-format the FS. *\n\n");
11820 printf("found %llu bytes used err is %d\n",
11821 (unsigned long long)bytes_used, ret);
11822 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11823 printf("total tree bytes: %llu\n",
11824 (unsigned long long)total_btree_bytes);
11825 printf("total fs tree bytes: %llu\n",
11826 (unsigned long long)total_fs_tree_bytes);
11827 printf("total extent tree bytes: %llu\n",
11828 (unsigned long long)total_extent_tree_bytes);
11829 printf("btree space waste bytes: %llu\n",
11830 (unsigned long long)btree_space_waste);
11831 printf("file data blocks allocated: %llu\n referenced %llu\n",
11832 (unsigned long long)data_bytes_allocated,
11833 (unsigned long long)data_bytes_referenced);
11835 free_qgroup_counts();
11836 free_root_recs_tree(&root_cache);
11840 if (ctx.progress_enabled)
11841 task_deinit(ctx.info);