2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 struct extent_backref {
79 struct list_head list;
80 unsigned int is_data:1;
81 unsigned int found_extent_tree:1;
82 unsigned int full_backref:1;
83 unsigned int found_ref:1;
84 unsigned int broken:1;
87 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
89 return list_entry(entry, struct extent_backref, list);
93 struct extent_backref node;
107 static inline struct data_backref* to_data_backref(struct extent_backref *back)
109 return container_of(back, struct data_backref, node);
113 * Much like data_backref, just removed the undetermined members
114 * and change it to use list_head.
115 * During extent scan, it is stored in root->orphan_data_extent.
116 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
118 struct orphan_data_extent {
119 struct list_head list;
127 struct tree_backref {
128 struct extent_backref node;
135 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
137 return container_of(back, struct tree_backref, node);
140 /* Explicit initialization for extent_record::flag_block_full_backref */
141 enum { FLAG_UNSET = 2 };
143 struct extent_record {
144 struct list_head backrefs;
145 struct list_head dups;
146 struct list_head list;
147 struct cache_extent cache;
148 struct btrfs_disk_key parent_key;
153 u64 extent_item_refs;
155 u64 parent_generation;
159 unsigned int flag_block_full_backref:2;
160 unsigned int found_rec:1;
161 unsigned int content_checked:1;
162 unsigned int owner_ref_checked:1;
163 unsigned int is_root:1;
164 unsigned int metadata:1;
165 unsigned int bad_full_backref:1;
166 unsigned int crossing_stripes:1;
167 unsigned int wrong_chunk_type:1;
170 static inline struct extent_record* to_extent_record(struct list_head *entry)
172 return container_of(entry, struct extent_record, list);
175 struct inode_backref {
176 struct list_head list;
177 unsigned int found_dir_item:1;
178 unsigned int found_dir_index:1;
179 unsigned int found_inode_ref:1;
180 unsigned int filetype:8;
182 unsigned int ref_type;
189 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
191 return list_entry(entry, struct inode_backref, list);
194 struct root_item_record {
195 struct list_head list;
202 struct btrfs_key drop_key;
205 #define REF_ERR_NO_DIR_ITEM (1 << 0)
206 #define REF_ERR_NO_DIR_INDEX (1 << 1)
207 #define REF_ERR_NO_INODE_REF (1 << 2)
208 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
209 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
210 #define REF_ERR_DUP_INODE_REF (1 << 5)
211 #define REF_ERR_INDEX_UNMATCH (1 << 6)
212 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
213 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
214 #define REF_ERR_NO_ROOT_REF (1 << 9)
215 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
216 #define REF_ERR_DUP_ROOT_REF (1 << 11)
217 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
219 struct file_extent_hole {
225 struct inode_record {
226 struct list_head backrefs;
227 unsigned int checked:1;
228 unsigned int merging:1;
229 unsigned int found_inode_item:1;
230 unsigned int found_dir_item:1;
231 unsigned int found_file_extent:1;
232 unsigned int found_csum_item:1;
233 unsigned int some_csum_missing:1;
234 unsigned int nodatasum:1;
247 struct rb_root holes;
248 struct list_head orphan_extents;
253 #define I_ERR_NO_INODE_ITEM (1 << 0)
254 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
255 #define I_ERR_DUP_INODE_ITEM (1 << 2)
256 #define I_ERR_DUP_DIR_INDEX (1 << 3)
257 #define I_ERR_ODD_DIR_ITEM (1 << 4)
258 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
259 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
260 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
261 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
262 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
263 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
264 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
265 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
266 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
267 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
269 struct root_backref {
270 struct list_head list;
271 unsigned int found_dir_item:1;
272 unsigned int found_dir_index:1;
273 unsigned int found_back_ref:1;
274 unsigned int found_forward_ref:1;
275 unsigned int reachable:1;
284 static inline struct root_backref* to_root_backref(struct list_head *entry)
286 return list_entry(entry, struct root_backref, list);
290 struct list_head backrefs;
291 struct cache_extent cache;
292 unsigned int found_root_item:1;
298 struct cache_extent cache;
303 struct cache_extent cache;
304 struct cache_tree root_cache;
305 struct cache_tree inode_cache;
306 struct inode_record *current;
315 struct walk_control {
316 struct cache_tree shared;
317 struct shared_node *nodes[BTRFS_MAX_LEVEL];
323 struct btrfs_key key;
325 struct list_head list;
328 struct extent_entry {
333 struct list_head list;
336 struct root_item_info {
337 /* level of the root */
339 /* number of nodes at this level, must be 1 for a root */
343 struct cache_extent cache_extent;
346 static void *print_status_check(void *p)
348 struct task_ctx *priv = p;
349 const char work_indicator[] = { '.', 'o', 'O', 'o' };
351 static char *task_position_string[] = {
353 "checking free space cache",
357 task_period_start(priv->info, 1000 /* 1s */);
359 if (priv->tp == TASK_NOTHING)
363 printf("%s [%c]\r", task_position_string[priv->tp],
364 work_indicator[count % 4]);
367 task_period_wait(priv->info);
372 static int print_status_return(void *p)
380 /* Compatible function to allow reuse of old codes */
381 static u64 first_extent_gap(struct rb_root *holes)
383 struct file_extent_hole *hole;
385 if (RB_EMPTY_ROOT(holes))
388 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
392 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
394 struct file_extent_hole *hole1;
395 struct file_extent_hole *hole2;
397 hole1 = rb_entry(node1, struct file_extent_hole, node);
398 hole2 = rb_entry(node2, struct file_extent_hole, node);
400 if (hole1->start > hole2->start)
402 if (hole1->start < hole2->start)
404 /* Now hole1->start == hole2->start */
405 if (hole1->len >= hole2->len)
407 * Hole 1 will be merge center
408 * Same hole will be merged later
411 /* Hole 2 will be merge center */
416 * Add a hole to the record
418 * This will do hole merge for copy_file_extent_holes(),
419 * which will ensure there won't be continuous holes.
421 static int add_file_extent_hole(struct rb_root *holes,
424 struct file_extent_hole *hole;
425 struct file_extent_hole *prev = NULL;
426 struct file_extent_hole *next = NULL;
428 hole = malloc(sizeof(*hole));
433 /* Since compare will not return 0, no -EEXIST will happen */
434 rb_insert(holes, &hole->node, compare_hole);
436 /* simple merge with previous hole */
437 if (rb_prev(&hole->node))
438 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
440 if (prev && prev->start + prev->len >= hole->start) {
441 hole->len = hole->start + hole->len - prev->start;
442 hole->start = prev->start;
443 rb_erase(&prev->node, holes);
448 /* iterate merge with next holes */
450 if (!rb_next(&hole->node))
452 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
454 if (hole->start + hole->len >= next->start) {
455 if (hole->start + hole->len <= next->start + next->len)
456 hole->len = next->start + next->len -
458 rb_erase(&next->node, holes);
467 static int compare_hole_range(struct rb_node *node, void *data)
469 struct file_extent_hole *hole;
472 hole = (struct file_extent_hole *)data;
475 hole = rb_entry(node, struct file_extent_hole, node);
476 if (start < hole->start)
478 if (start >= hole->start && start < hole->start + hole->len)
484 * Delete a hole in the record
486 * This will do the hole split and is much restrict than add.
488 static int del_file_extent_hole(struct rb_root *holes,
491 struct file_extent_hole *hole;
492 struct file_extent_hole tmp;
497 struct rb_node *node;
504 node = rb_search(holes, &tmp, compare_hole_range, NULL);
507 hole = rb_entry(node, struct file_extent_hole, node);
508 if (start + len > hole->start + hole->len)
512 * Now there will be no overlap, delete the hole and re-add the
513 * split(s) if they exists.
515 if (start > hole->start) {
516 prev_start = hole->start;
517 prev_len = start - hole->start;
520 if (hole->start + hole->len > start + len) {
521 next_start = start + len;
522 next_len = hole->start + hole->len - start - len;
525 rb_erase(node, holes);
528 ret = add_file_extent_hole(holes, prev_start, prev_len);
533 ret = add_file_extent_hole(holes, next_start, next_len);
540 static int copy_file_extent_holes(struct rb_root *dst,
543 struct file_extent_hole *hole;
544 struct rb_node *node;
547 node = rb_first(src);
549 hole = rb_entry(node, struct file_extent_hole, node);
550 ret = add_file_extent_hole(dst, hole->start, hole->len);
553 node = rb_next(node);
558 static void free_file_extent_holes(struct rb_root *holes)
560 struct rb_node *node;
561 struct file_extent_hole *hole;
563 node = rb_first(holes);
565 hole = rb_entry(node, struct file_extent_hole, node);
566 rb_erase(node, holes);
568 node = rb_first(holes);
572 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
574 static void record_root_in_trans(struct btrfs_trans_handle *trans,
575 struct btrfs_root *root)
577 if (root->last_trans != trans->transid) {
578 root->track_dirty = 1;
579 root->last_trans = trans->transid;
580 root->commit_root = root->node;
581 extent_buffer_get(root->node);
585 static u8 imode_to_type(u32 imode)
588 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
589 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
590 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
591 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
592 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
593 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
594 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
595 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
598 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
602 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
604 struct device_record *rec1;
605 struct device_record *rec2;
607 rec1 = rb_entry(node1, struct device_record, node);
608 rec2 = rb_entry(node2, struct device_record, node);
609 if (rec1->devid > rec2->devid)
611 else if (rec1->devid < rec2->devid)
617 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
619 struct inode_record *rec;
620 struct inode_backref *backref;
621 struct inode_backref *orig;
622 struct inode_backref *tmp;
623 struct orphan_data_extent *src_orphan;
624 struct orphan_data_extent *dst_orphan;
628 rec = malloc(sizeof(*rec));
630 return ERR_PTR(-ENOMEM);
631 memcpy(rec, orig_rec, sizeof(*rec));
633 INIT_LIST_HEAD(&rec->backrefs);
634 INIT_LIST_HEAD(&rec->orphan_extents);
635 rec->holes = RB_ROOT;
637 list_for_each_entry(orig, &orig_rec->backrefs, list) {
638 size = sizeof(*orig) + orig->namelen + 1;
639 backref = malloc(size);
644 memcpy(backref, orig, size);
645 list_add_tail(&backref->list, &rec->backrefs);
647 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
648 dst_orphan = malloc(sizeof(*dst_orphan));
653 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
654 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
656 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
662 if (!list_empty(&rec->backrefs))
663 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
664 list_del(&orig->list);
668 if (!list_empty(&rec->orphan_extents))
669 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
670 list_del(&orig->list);
679 static void print_orphan_data_extents(struct list_head *orphan_extents,
682 struct orphan_data_extent *orphan;
684 if (list_empty(orphan_extents))
686 printf("The following data extent is lost in tree %llu:\n",
688 list_for_each_entry(orphan, orphan_extents, list) {
689 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
690 orphan->objectid, orphan->offset, orphan->disk_bytenr,
695 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
697 u64 root_objectid = root->root_key.objectid;
698 int errors = rec->errors;
702 /* reloc root errors, we print its corresponding fs root objectid*/
703 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
704 root_objectid = root->root_key.offset;
705 fprintf(stderr, "reloc");
707 fprintf(stderr, "root %llu inode %llu errors %x",
708 (unsigned long long) root_objectid,
709 (unsigned long long) rec->ino, rec->errors);
711 if (errors & I_ERR_NO_INODE_ITEM)
712 fprintf(stderr, ", no inode item");
713 if (errors & I_ERR_NO_ORPHAN_ITEM)
714 fprintf(stderr, ", no orphan item");
715 if (errors & I_ERR_DUP_INODE_ITEM)
716 fprintf(stderr, ", dup inode item");
717 if (errors & I_ERR_DUP_DIR_INDEX)
718 fprintf(stderr, ", dup dir index");
719 if (errors & I_ERR_ODD_DIR_ITEM)
720 fprintf(stderr, ", odd dir item");
721 if (errors & I_ERR_ODD_FILE_EXTENT)
722 fprintf(stderr, ", odd file extent");
723 if (errors & I_ERR_BAD_FILE_EXTENT)
724 fprintf(stderr, ", bad file extent");
725 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
726 fprintf(stderr, ", file extent overlap");
727 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
728 fprintf(stderr, ", file extent discount");
729 if (errors & I_ERR_DIR_ISIZE_WRONG)
730 fprintf(stderr, ", dir isize wrong");
731 if (errors & I_ERR_FILE_NBYTES_WRONG)
732 fprintf(stderr, ", nbytes wrong");
733 if (errors & I_ERR_ODD_CSUM_ITEM)
734 fprintf(stderr, ", odd csum item");
735 if (errors & I_ERR_SOME_CSUM_MISSING)
736 fprintf(stderr, ", some csum missing");
737 if (errors & I_ERR_LINK_COUNT_WRONG)
738 fprintf(stderr, ", link count wrong");
739 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
740 fprintf(stderr, ", orphan file extent");
741 fprintf(stderr, "\n");
742 /* Print the orphan extents if needed */
743 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
744 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
746 /* Print the holes if needed */
747 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
748 struct file_extent_hole *hole;
749 struct rb_node *node;
752 node = rb_first(&rec->holes);
753 fprintf(stderr, "Found file extent holes:\n");
756 hole = rb_entry(node, struct file_extent_hole, node);
757 fprintf(stderr, "\tstart: %llu, len: %llu\n",
758 hole->start, hole->len);
759 node = rb_next(node);
762 fprintf(stderr, "\tstart: 0, len: %llu\n",
763 round_up(rec->isize, root->sectorsize));
767 static void print_ref_error(int errors)
769 if (errors & REF_ERR_NO_DIR_ITEM)
770 fprintf(stderr, ", no dir item");
771 if (errors & REF_ERR_NO_DIR_INDEX)
772 fprintf(stderr, ", no dir index");
773 if (errors & REF_ERR_NO_INODE_REF)
774 fprintf(stderr, ", no inode ref");
775 if (errors & REF_ERR_DUP_DIR_ITEM)
776 fprintf(stderr, ", dup dir item");
777 if (errors & REF_ERR_DUP_DIR_INDEX)
778 fprintf(stderr, ", dup dir index");
779 if (errors & REF_ERR_DUP_INODE_REF)
780 fprintf(stderr, ", dup inode ref");
781 if (errors & REF_ERR_INDEX_UNMATCH)
782 fprintf(stderr, ", index mismatch");
783 if (errors & REF_ERR_FILETYPE_UNMATCH)
784 fprintf(stderr, ", filetype mismatch");
785 if (errors & REF_ERR_NAME_TOO_LONG)
786 fprintf(stderr, ", name too long");
787 if (errors & REF_ERR_NO_ROOT_REF)
788 fprintf(stderr, ", no root ref");
789 if (errors & REF_ERR_NO_ROOT_BACKREF)
790 fprintf(stderr, ", no root backref");
791 if (errors & REF_ERR_DUP_ROOT_REF)
792 fprintf(stderr, ", dup root ref");
793 if (errors & REF_ERR_DUP_ROOT_BACKREF)
794 fprintf(stderr, ", dup root backref");
795 fprintf(stderr, "\n");
798 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
801 struct ptr_node *node;
802 struct cache_extent *cache;
803 struct inode_record *rec = NULL;
806 cache = lookup_cache_extent(inode_cache, ino, 1);
808 node = container_of(cache, struct ptr_node, cache);
810 if (mod && rec->refs > 1) {
811 node->data = clone_inode_rec(rec);
812 if (IS_ERR(node->data))
818 rec = calloc(1, sizeof(*rec));
820 return ERR_PTR(-ENOMEM);
822 rec->extent_start = (u64)-1;
824 INIT_LIST_HEAD(&rec->backrefs);
825 INIT_LIST_HEAD(&rec->orphan_extents);
826 rec->holes = RB_ROOT;
828 node = malloc(sizeof(*node));
831 return ERR_PTR(-ENOMEM);
833 node->cache.start = ino;
834 node->cache.size = 1;
837 if (ino == BTRFS_FREE_INO_OBJECTID)
840 ret = insert_cache_extent(inode_cache, &node->cache);
842 return ERR_PTR(-EEXIST);
847 static void free_orphan_data_extents(struct list_head *orphan_extents)
849 struct orphan_data_extent *orphan;
851 while (!list_empty(orphan_extents)) {
852 orphan = list_entry(orphan_extents->next,
853 struct orphan_data_extent, list);
854 list_del(&orphan->list);
859 static void free_inode_rec(struct inode_record *rec)
861 struct inode_backref *backref;
866 while (!list_empty(&rec->backrefs)) {
867 backref = to_inode_backref(rec->backrefs.next);
868 list_del(&backref->list);
871 free_orphan_data_extents(&rec->orphan_extents);
872 free_file_extent_holes(&rec->holes);
876 static int can_free_inode_rec(struct inode_record *rec)
878 if (!rec->errors && rec->checked && rec->found_inode_item &&
879 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
884 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
885 struct inode_record *rec)
887 struct cache_extent *cache;
888 struct inode_backref *tmp, *backref;
889 struct ptr_node *node;
890 unsigned char filetype;
892 if (!rec->found_inode_item)
895 filetype = imode_to_type(rec->imode);
896 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
897 if (backref->found_dir_item && backref->found_dir_index) {
898 if (backref->filetype != filetype)
899 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
900 if (!backref->errors && backref->found_inode_ref &&
901 rec->nlink == rec->found_link) {
902 list_del(&backref->list);
908 if (!rec->checked || rec->merging)
911 if (S_ISDIR(rec->imode)) {
912 if (rec->found_size != rec->isize)
913 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
914 if (rec->found_file_extent)
915 rec->errors |= I_ERR_ODD_FILE_EXTENT;
916 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
917 if (rec->found_dir_item)
918 rec->errors |= I_ERR_ODD_DIR_ITEM;
919 if (rec->found_size != rec->nbytes)
920 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
921 if (rec->nlink > 0 && !no_holes &&
922 (rec->extent_end < rec->isize ||
923 first_extent_gap(&rec->holes) < rec->isize))
924 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
927 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
928 if (rec->found_csum_item && rec->nodatasum)
929 rec->errors |= I_ERR_ODD_CSUM_ITEM;
930 if (rec->some_csum_missing && !rec->nodatasum)
931 rec->errors |= I_ERR_SOME_CSUM_MISSING;
934 BUG_ON(rec->refs != 1);
935 if (can_free_inode_rec(rec)) {
936 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
937 node = container_of(cache, struct ptr_node, cache);
938 BUG_ON(node->data != rec);
939 remove_cache_extent(inode_cache, &node->cache);
945 static int check_orphan_item(struct btrfs_root *root, u64 ino)
947 struct btrfs_path path;
948 struct btrfs_key key;
951 key.objectid = BTRFS_ORPHAN_OBJECTID;
952 key.type = BTRFS_ORPHAN_ITEM_KEY;
955 btrfs_init_path(&path);
956 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
957 btrfs_release_path(&path);
963 static int process_inode_item(struct extent_buffer *eb,
964 int slot, struct btrfs_key *key,
965 struct shared_node *active_node)
967 struct inode_record *rec;
968 struct btrfs_inode_item *item;
970 rec = active_node->current;
971 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
972 if (rec->found_inode_item) {
973 rec->errors |= I_ERR_DUP_INODE_ITEM;
976 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
977 rec->nlink = btrfs_inode_nlink(eb, item);
978 rec->isize = btrfs_inode_size(eb, item);
979 rec->nbytes = btrfs_inode_nbytes(eb, item);
980 rec->imode = btrfs_inode_mode(eb, item);
981 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
983 rec->found_inode_item = 1;
985 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
986 maybe_free_inode_rec(&active_node->inode_cache, rec);
990 static struct inode_backref *get_inode_backref(struct inode_record *rec,
992 int namelen, u64 dir)
994 struct inode_backref *backref;
996 list_for_each_entry(backref, &rec->backrefs, list) {
997 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
999 if (backref->dir != dir || backref->namelen != namelen)
1001 if (memcmp(name, backref->name, namelen))
1006 backref = malloc(sizeof(*backref) + namelen + 1);
1009 memset(backref, 0, sizeof(*backref));
1011 backref->namelen = namelen;
1012 memcpy(backref->name, name, namelen);
1013 backref->name[namelen] = '\0';
1014 list_add_tail(&backref->list, &rec->backrefs);
1018 static int add_inode_backref(struct cache_tree *inode_cache,
1019 u64 ino, u64 dir, u64 index,
1020 const char *name, int namelen,
1021 int filetype, int itemtype, int errors)
1023 struct inode_record *rec;
1024 struct inode_backref *backref;
1026 rec = get_inode_rec(inode_cache, ino, 1);
1027 BUG_ON(IS_ERR(rec));
1028 backref = get_inode_backref(rec, name, namelen, dir);
1031 backref->errors |= errors;
1032 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1033 if (backref->found_dir_index)
1034 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1035 if (backref->found_inode_ref && backref->index != index)
1036 backref->errors |= REF_ERR_INDEX_UNMATCH;
1037 if (backref->found_dir_item && backref->filetype != filetype)
1038 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1040 backref->index = index;
1041 backref->filetype = filetype;
1042 backref->found_dir_index = 1;
1043 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1045 if (backref->found_dir_item)
1046 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1047 if (backref->found_dir_index && backref->filetype != filetype)
1048 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1050 backref->filetype = filetype;
1051 backref->found_dir_item = 1;
1052 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1053 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1054 if (backref->found_inode_ref)
1055 backref->errors |= REF_ERR_DUP_INODE_REF;
1056 if (backref->found_dir_index && backref->index != index)
1057 backref->errors |= REF_ERR_INDEX_UNMATCH;
1059 backref->index = index;
1061 backref->ref_type = itemtype;
1062 backref->found_inode_ref = 1;
1067 maybe_free_inode_rec(inode_cache, rec);
1071 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1072 struct cache_tree *dst_cache)
1074 struct inode_backref *backref;
1079 list_for_each_entry(backref, &src->backrefs, list) {
1080 if (backref->found_dir_index) {
1081 add_inode_backref(dst_cache, dst->ino, backref->dir,
1082 backref->index, backref->name,
1083 backref->namelen, backref->filetype,
1084 BTRFS_DIR_INDEX_KEY, backref->errors);
1086 if (backref->found_dir_item) {
1088 add_inode_backref(dst_cache, dst->ino,
1089 backref->dir, 0, backref->name,
1090 backref->namelen, backref->filetype,
1091 BTRFS_DIR_ITEM_KEY, backref->errors);
1093 if (backref->found_inode_ref) {
1094 add_inode_backref(dst_cache, dst->ino,
1095 backref->dir, backref->index,
1096 backref->name, backref->namelen, 0,
1097 backref->ref_type, backref->errors);
1101 if (src->found_dir_item)
1102 dst->found_dir_item = 1;
1103 if (src->found_file_extent)
1104 dst->found_file_extent = 1;
1105 if (src->found_csum_item)
1106 dst->found_csum_item = 1;
1107 if (src->some_csum_missing)
1108 dst->some_csum_missing = 1;
1109 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1110 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1115 BUG_ON(src->found_link < dir_count);
1116 dst->found_link += src->found_link - dir_count;
1117 dst->found_size += src->found_size;
1118 if (src->extent_start != (u64)-1) {
1119 if (dst->extent_start == (u64)-1) {
1120 dst->extent_start = src->extent_start;
1121 dst->extent_end = src->extent_end;
1123 if (dst->extent_end > src->extent_start)
1124 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1125 else if (dst->extent_end < src->extent_start) {
1126 ret = add_file_extent_hole(&dst->holes,
1128 src->extent_start - dst->extent_end);
1130 if (dst->extent_end < src->extent_end)
1131 dst->extent_end = src->extent_end;
1135 dst->errors |= src->errors;
1136 if (src->found_inode_item) {
1137 if (!dst->found_inode_item) {
1138 dst->nlink = src->nlink;
1139 dst->isize = src->isize;
1140 dst->nbytes = src->nbytes;
1141 dst->imode = src->imode;
1142 dst->nodatasum = src->nodatasum;
1143 dst->found_inode_item = 1;
1145 dst->errors |= I_ERR_DUP_INODE_ITEM;
1153 static int splice_shared_node(struct shared_node *src_node,
1154 struct shared_node *dst_node)
1156 struct cache_extent *cache;
1157 struct ptr_node *node, *ins;
1158 struct cache_tree *src, *dst;
1159 struct inode_record *rec, *conflict;
1160 u64 current_ino = 0;
1164 if (--src_node->refs == 0)
1166 if (src_node->current)
1167 current_ino = src_node->current->ino;
1169 src = &src_node->root_cache;
1170 dst = &dst_node->root_cache;
1172 cache = search_cache_extent(src, 0);
1174 node = container_of(cache, struct ptr_node, cache);
1176 cache = next_cache_extent(cache);
1179 remove_cache_extent(src, &node->cache);
1182 ins = malloc(sizeof(*ins));
1184 ins->cache.start = node->cache.start;
1185 ins->cache.size = node->cache.size;
1189 ret = insert_cache_extent(dst, &ins->cache);
1190 if (ret == -EEXIST) {
1191 conflict = get_inode_rec(dst, rec->ino, 1);
1192 BUG_ON(IS_ERR(conflict));
1193 merge_inode_recs(rec, conflict, dst);
1195 conflict->checked = 1;
1196 if (dst_node->current == conflict)
1197 dst_node->current = NULL;
1199 maybe_free_inode_rec(dst, conflict);
1200 free_inode_rec(rec);
1207 if (src == &src_node->root_cache) {
1208 src = &src_node->inode_cache;
1209 dst = &dst_node->inode_cache;
1213 if (current_ino > 0 && (!dst_node->current ||
1214 current_ino > dst_node->current->ino)) {
1215 if (dst_node->current) {
1216 dst_node->current->checked = 1;
1217 maybe_free_inode_rec(dst, dst_node->current);
1219 dst_node->current = get_inode_rec(dst, current_ino, 1);
1220 BUG_ON(IS_ERR(dst_node->current));
1225 static void free_inode_ptr(struct cache_extent *cache)
1227 struct ptr_node *node;
1228 struct inode_record *rec;
1230 node = container_of(cache, struct ptr_node, cache);
1232 free_inode_rec(rec);
1236 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1238 static struct shared_node *find_shared_node(struct cache_tree *shared,
1241 struct cache_extent *cache;
1242 struct shared_node *node;
1244 cache = lookup_cache_extent(shared, bytenr, 1);
1246 node = container_of(cache, struct shared_node, cache);
1252 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1255 struct shared_node *node;
1257 node = calloc(1, sizeof(*node));
1260 node->cache.start = bytenr;
1261 node->cache.size = 1;
1262 cache_tree_init(&node->root_cache);
1263 cache_tree_init(&node->inode_cache);
1266 ret = insert_cache_extent(shared, &node->cache);
1271 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1272 struct walk_control *wc, int level)
1274 struct shared_node *node;
1275 struct shared_node *dest;
1278 if (level == wc->active_node)
1281 BUG_ON(wc->active_node <= level);
1282 node = find_shared_node(&wc->shared, bytenr);
1284 ret = add_shared_node(&wc->shared, bytenr, refs);
1286 node = find_shared_node(&wc->shared, bytenr);
1287 wc->nodes[level] = node;
1288 wc->active_node = level;
1292 if (wc->root_level == wc->active_node &&
1293 btrfs_root_refs(&root->root_item) == 0) {
1294 if (--node->refs == 0) {
1295 free_inode_recs_tree(&node->root_cache);
1296 free_inode_recs_tree(&node->inode_cache);
1297 remove_cache_extent(&wc->shared, &node->cache);
1303 dest = wc->nodes[wc->active_node];
1304 splice_shared_node(node, dest);
1305 if (node->refs == 0) {
1306 remove_cache_extent(&wc->shared, &node->cache);
1312 static int leave_shared_node(struct btrfs_root *root,
1313 struct walk_control *wc, int level)
1315 struct shared_node *node;
1316 struct shared_node *dest;
1319 if (level == wc->root_level)
1322 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1326 BUG_ON(i >= BTRFS_MAX_LEVEL);
1328 node = wc->nodes[wc->active_node];
1329 wc->nodes[wc->active_node] = NULL;
1330 wc->active_node = i;
1332 dest = wc->nodes[wc->active_node];
1333 if (wc->active_node < wc->root_level ||
1334 btrfs_root_refs(&root->root_item) > 0) {
1335 BUG_ON(node->refs <= 1);
1336 splice_shared_node(node, dest);
1338 BUG_ON(node->refs < 2);
1347 * 1 - if the root with id child_root_id is a child of root parent_root_id
1348 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1349 * has other root(s) as parent(s)
1350 * 2 - if the root child_root_id doesn't have any parent roots
1352 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1355 struct btrfs_path path;
1356 struct btrfs_key key;
1357 struct extent_buffer *leaf;
1361 btrfs_init_path(&path);
1363 key.objectid = parent_root_id;
1364 key.type = BTRFS_ROOT_REF_KEY;
1365 key.offset = child_root_id;
1366 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1370 btrfs_release_path(&path);
1374 key.objectid = child_root_id;
1375 key.type = BTRFS_ROOT_BACKREF_KEY;
1377 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1383 leaf = path.nodes[0];
1384 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1385 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1388 leaf = path.nodes[0];
1391 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1392 if (key.objectid != child_root_id ||
1393 key.type != BTRFS_ROOT_BACKREF_KEY)
1398 if (key.offset == parent_root_id) {
1399 btrfs_release_path(&path);
1406 btrfs_release_path(&path);
1409 return has_parent ? 0 : 2;
1412 static int process_dir_item(struct btrfs_root *root,
1413 struct extent_buffer *eb,
1414 int slot, struct btrfs_key *key,
1415 struct shared_node *active_node)
1425 struct btrfs_dir_item *di;
1426 struct inode_record *rec;
1427 struct cache_tree *root_cache;
1428 struct cache_tree *inode_cache;
1429 struct btrfs_key location;
1430 char namebuf[BTRFS_NAME_LEN];
1432 root_cache = &active_node->root_cache;
1433 inode_cache = &active_node->inode_cache;
1434 rec = active_node->current;
1435 rec->found_dir_item = 1;
1437 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1438 total = btrfs_item_size_nr(eb, slot);
1439 while (cur < total) {
1441 btrfs_dir_item_key_to_cpu(eb, di, &location);
1442 name_len = btrfs_dir_name_len(eb, di);
1443 data_len = btrfs_dir_data_len(eb, di);
1444 filetype = btrfs_dir_type(eb, di);
1446 rec->found_size += name_len;
1447 if (name_len <= BTRFS_NAME_LEN) {
1451 len = BTRFS_NAME_LEN;
1452 error = REF_ERR_NAME_TOO_LONG;
1454 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1456 if (location.type == BTRFS_INODE_ITEM_KEY) {
1457 add_inode_backref(inode_cache, location.objectid,
1458 key->objectid, key->offset, namebuf,
1459 len, filetype, key->type, error);
1460 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1461 add_inode_backref(root_cache, location.objectid,
1462 key->objectid, key->offset,
1463 namebuf, len, filetype,
1466 fprintf(stderr, "invalid location in dir item %u\n",
1468 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1469 key->objectid, key->offset, namebuf,
1470 len, filetype, key->type, error);
1473 len = sizeof(*di) + name_len + data_len;
1474 di = (struct btrfs_dir_item *)((char *)di + len);
1477 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1478 rec->errors |= I_ERR_DUP_DIR_INDEX;
1483 static int process_inode_ref(struct extent_buffer *eb,
1484 int slot, struct btrfs_key *key,
1485 struct shared_node *active_node)
1493 struct cache_tree *inode_cache;
1494 struct btrfs_inode_ref *ref;
1495 char namebuf[BTRFS_NAME_LEN];
1497 inode_cache = &active_node->inode_cache;
1499 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1500 total = btrfs_item_size_nr(eb, slot);
1501 while (cur < total) {
1502 name_len = btrfs_inode_ref_name_len(eb, ref);
1503 index = btrfs_inode_ref_index(eb, ref);
1504 if (name_len <= BTRFS_NAME_LEN) {
1508 len = BTRFS_NAME_LEN;
1509 error = REF_ERR_NAME_TOO_LONG;
1511 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1512 add_inode_backref(inode_cache, key->objectid, key->offset,
1513 index, namebuf, len, 0, key->type, error);
1515 len = sizeof(*ref) + name_len;
1516 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1522 static int process_inode_extref(struct extent_buffer *eb,
1523 int slot, struct btrfs_key *key,
1524 struct shared_node *active_node)
1533 struct cache_tree *inode_cache;
1534 struct btrfs_inode_extref *extref;
1535 char namebuf[BTRFS_NAME_LEN];
1537 inode_cache = &active_node->inode_cache;
1539 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1540 total = btrfs_item_size_nr(eb, slot);
1541 while (cur < total) {
1542 name_len = btrfs_inode_extref_name_len(eb, extref);
1543 index = btrfs_inode_extref_index(eb, extref);
1544 parent = btrfs_inode_extref_parent(eb, extref);
1545 if (name_len <= BTRFS_NAME_LEN) {
1549 len = BTRFS_NAME_LEN;
1550 error = REF_ERR_NAME_TOO_LONG;
1552 read_extent_buffer(eb, namebuf,
1553 (unsigned long)(extref + 1), len);
1554 add_inode_backref(inode_cache, key->objectid, parent,
1555 index, namebuf, len, 0, key->type, error);
1557 len = sizeof(*extref) + name_len;
1558 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1565 static int count_csum_range(struct btrfs_root *root, u64 start,
1566 u64 len, u64 *found)
1568 struct btrfs_key key;
1569 struct btrfs_path path;
1570 struct extent_buffer *leaf;
1575 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1577 btrfs_init_path(&path);
1579 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1581 key.type = BTRFS_EXTENT_CSUM_KEY;
1583 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1587 if (ret > 0 && path.slots[0] > 0) {
1588 leaf = path.nodes[0];
1589 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1590 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1591 key.type == BTRFS_EXTENT_CSUM_KEY)
1596 leaf = path.nodes[0];
1597 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1598 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1603 leaf = path.nodes[0];
1606 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1607 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1608 key.type != BTRFS_EXTENT_CSUM_KEY)
1611 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1612 if (key.offset >= start + len)
1615 if (key.offset > start)
1618 size = btrfs_item_size_nr(leaf, path.slots[0]);
1619 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1620 if (csum_end > start) {
1621 size = min(csum_end - start, len);
1630 btrfs_release_path(&path);
1636 static int process_file_extent(struct btrfs_root *root,
1637 struct extent_buffer *eb,
1638 int slot, struct btrfs_key *key,
1639 struct shared_node *active_node)
1641 struct inode_record *rec;
1642 struct btrfs_file_extent_item *fi;
1644 u64 disk_bytenr = 0;
1645 u64 extent_offset = 0;
1646 u64 mask = root->sectorsize - 1;
1650 rec = active_node->current;
1651 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1652 rec->found_file_extent = 1;
1654 if (rec->extent_start == (u64)-1) {
1655 rec->extent_start = key->offset;
1656 rec->extent_end = key->offset;
1659 if (rec->extent_end > key->offset)
1660 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1661 else if (rec->extent_end < key->offset) {
1662 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1663 key->offset - rec->extent_end);
1668 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1669 extent_type = btrfs_file_extent_type(eb, fi);
1671 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1672 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1674 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1675 rec->found_size += num_bytes;
1676 num_bytes = (num_bytes + mask) & ~mask;
1677 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1678 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1679 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1680 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1681 extent_offset = btrfs_file_extent_offset(eb, fi);
1682 if (num_bytes == 0 || (num_bytes & mask))
1683 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1684 if (num_bytes + extent_offset >
1685 btrfs_file_extent_ram_bytes(eb, fi))
1686 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1687 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1688 (btrfs_file_extent_compression(eb, fi) ||
1689 btrfs_file_extent_encryption(eb, fi) ||
1690 btrfs_file_extent_other_encoding(eb, fi)))
1691 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1692 if (disk_bytenr > 0)
1693 rec->found_size += num_bytes;
1695 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1697 rec->extent_end = key->offset + num_bytes;
1700 * The data reloc tree will copy full extents into its inode and then
1701 * copy the corresponding csums. Because the extent it copied could be
1702 * a preallocated extent that hasn't been written to yet there may be no
1703 * csums to copy, ergo we won't have csums for our file extent. This is
1704 * ok so just don't bother checking csums if the inode belongs to the
1707 if (disk_bytenr > 0 &&
1708 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1710 if (btrfs_file_extent_compression(eb, fi))
1711 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1713 disk_bytenr += extent_offset;
1715 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1718 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1720 rec->found_csum_item = 1;
1721 if (found < num_bytes)
1722 rec->some_csum_missing = 1;
1723 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1725 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1731 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1732 struct walk_control *wc)
1734 struct btrfs_key key;
1738 struct cache_tree *inode_cache;
1739 struct shared_node *active_node;
1741 if (wc->root_level == wc->active_node &&
1742 btrfs_root_refs(&root->root_item) == 0)
1745 active_node = wc->nodes[wc->active_node];
1746 inode_cache = &active_node->inode_cache;
1747 nritems = btrfs_header_nritems(eb);
1748 for (i = 0; i < nritems; i++) {
1749 btrfs_item_key_to_cpu(eb, &key, i);
1751 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1753 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1756 if (active_node->current == NULL ||
1757 active_node->current->ino < key.objectid) {
1758 if (active_node->current) {
1759 active_node->current->checked = 1;
1760 maybe_free_inode_rec(inode_cache,
1761 active_node->current);
1763 active_node->current = get_inode_rec(inode_cache,
1765 BUG_ON(IS_ERR(active_node->current));
1768 case BTRFS_DIR_ITEM_KEY:
1769 case BTRFS_DIR_INDEX_KEY:
1770 ret = process_dir_item(root, eb, i, &key, active_node);
1772 case BTRFS_INODE_REF_KEY:
1773 ret = process_inode_ref(eb, i, &key, active_node);
1775 case BTRFS_INODE_EXTREF_KEY:
1776 ret = process_inode_extref(eb, i, &key, active_node);
1778 case BTRFS_INODE_ITEM_KEY:
1779 ret = process_inode_item(eb, i, &key, active_node);
1781 case BTRFS_EXTENT_DATA_KEY:
1782 ret = process_file_extent(root, eb, i, &key,
1792 static void reada_walk_down(struct btrfs_root *root,
1793 struct extent_buffer *node, int slot)
1802 level = btrfs_header_level(node);
1806 nritems = btrfs_header_nritems(node);
1807 blocksize = root->nodesize;
1808 for (i = slot; i < nritems; i++) {
1809 bytenr = btrfs_node_blockptr(node, i);
1810 ptr_gen = btrfs_node_ptr_generation(node, i);
1811 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1816 * Check the child node/leaf by the following condition:
1817 * 1. the first item key of the node/leaf should be the same with the one
1819 * 2. block in parent node should match the child node/leaf.
1820 * 3. generation of parent node and child's header should be consistent.
1822 * Or the child node/leaf pointed by the key in parent is not valid.
1824 * We hope to check leaf owner too, but since subvol may share leaves,
1825 * which makes leaf owner check not so strong, key check should be
1826 * sufficient enough for that case.
1828 static int check_child_node(struct btrfs_root *root,
1829 struct extent_buffer *parent, int slot,
1830 struct extent_buffer *child)
1832 struct btrfs_key parent_key;
1833 struct btrfs_key child_key;
1836 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1837 if (btrfs_header_level(child) == 0)
1838 btrfs_item_key_to_cpu(child, &child_key, 0);
1840 btrfs_node_key_to_cpu(child, &child_key, 0);
1842 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1845 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1846 parent_key.objectid, parent_key.type, parent_key.offset,
1847 child_key.objectid, child_key.type, child_key.offset);
1849 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1851 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1852 btrfs_node_blockptr(parent, slot),
1853 btrfs_header_bytenr(child));
1855 if (btrfs_node_ptr_generation(parent, slot) !=
1856 btrfs_header_generation(child)) {
1858 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1859 btrfs_header_generation(child),
1860 btrfs_node_ptr_generation(parent, slot));
1865 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1866 struct walk_control *wc, int *level)
1868 enum btrfs_tree_block_status status;
1871 struct extent_buffer *next;
1872 struct extent_buffer *cur;
1877 WARN_ON(*level < 0);
1878 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1879 ret = btrfs_lookup_extent_info(NULL, root,
1880 path->nodes[*level]->start,
1881 *level, 1, &refs, NULL);
1888 ret = enter_shared_node(root, path->nodes[*level]->start,
1896 while (*level >= 0) {
1897 WARN_ON(*level < 0);
1898 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1899 cur = path->nodes[*level];
1901 if (btrfs_header_level(cur) != *level)
1904 if (path->slots[*level] >= btrfs_header_nritems(cur))
1907 ret = process_one_leaf(root, cur, wc);
1912 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1913 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1914 blocksize = root->nodesize;
1915 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1921 ret = enter_shared_node(root, bytenr, refs,
1924 path->slots[*level]++;
1929 next = btrfs_find_tree_block(root, bytenr, blocksize);
1930 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1931 free_extent_buffer(next);
1932 reada_walk_down(root, cur, path->slots[*level]);
1933 next = read_tree_block(root, bytenr, blocksize,
1935 if (!extent_buffer_uptodate(next)) {
1936 struct btrfs_key node_key;
1938 btrfs_node_key_to_cpu(path->nodes[*level],
1940 path->slots[*level]);
1941 btrfs_add_corrupt_extent_record(root->fs_info,
1943 path->nodes[*level]->start,
1944 root->nodesize, *level);
1950 ret = check_child_node(root, cur, path->slots[*level], next);
1956 if (btrfs_is_leaf(next))
1957 status = btrfs_check_leaf(root, NULL, next);
1959 status = btrfs_check_node(root, NULL, next);
1960 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1961 free_extent_buffer(next);
1966 *level = *level - 1;
1967 free_extent_buffer(path->nodes[*level]);
1968 path->nodes[*level] = next;
1969 path->slots[*level] = 0;
1972 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1976 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1977 struct walk_control *wc, int *level)
1980 struct extent_buffer *leaf;
1982 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1983 leaf = path->nodes[i];
1984 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1989 free_extent_buffer(path->nodes[*level]);
1990 path->nodes[*level] = NULL;
1991 BUG_ON(*level > wc->active_node);
1992 if (*level == wc->active_node)
1993 leave_shared_node(root, wc, *level);
2000 static int check_root_dir(struct inode_record *rec)
2002 struct inode_backref *backref;
2005 if (!rec->found_inode_item || rec->errors)
2007 if (rec->nlink != 1 || rec->found_link != 0)
2009 if (list_empty(&rec->backrefs))
2011 backref = to_inode_backref(rec->backrefs.next);
2012 if (!backref->found_inode_ref)
2014 if (backref->index != 0 || backref->namelen != 2 ||
2015 memcmp(backref->name, "..", 2))
2017 if (backref->found_dir_index || backref->found_dir_item)
2024 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2025 struct btrfs_root *root, struct btrfs_path *path,
2026 struct inode_record *rec)
2028 struct btrfs_inode_item *ei;
2029 struct btrfs_key key;
2032 key.objectid = rec->ino;
2033 key.type = BTRFS_INODE_ITEM_KEY;
2034 key.offset = (u64)-1;
2036 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2040 if (!path->slots[0]) {
2047 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2048 if (key.objectid != rec->ino) {
2053 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2054 struct btrfs_inode_item);
2055 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2056 btrfs_mark_buffer_dirty(path->nodes[0]);
2057 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2058 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2059 root->root_key.objectid);
2061 btrfs_release_path(path);
2065 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2066 struct btrfs_root *root,
2067 struct btrfs_path *path,
2068 struct inode_record *rec)
2072 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2073 btrfs_release_path(path);
2075 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2079 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2080 struct btrfs_root *root,
2081 struct btrfs_path *path,
2082 struct inode_record *rec)
2084 struct btrfs_inode_item *ei;
2085 struct btrfs_key key;
2088 key.objectid = rec->ino;
2089 key.type = BTRFS_INODE_ITEM_KEY;
2092 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2099 /* Since ret == 0, no need to check anything */
2100 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2101 struct btrfs_inode_item);
2102 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2103 btrfs_mark_buffer_dirty(path->nodes[0]);
2104 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2105 printf("reset nbytes for ino %llu root %llu\n",
2106 rec->ino, root->root_key.objectid);
2108 btrfs_release_path(path);
2112 static int add_missing_dir_index(struct btrfs_root *root,
2113 struct cache_tree *inode_cache,
2114 struct inode_record *rec,
2115 struct inode_backref *backref)
2117 struct btrfs_path *path;
2118 struct btrfs_trans_handle *trans;
2119 struct btrfs_dir_item *dir_item;
2120 struct extent_buffer *leaf;
2121 struct btrfs_key key;
2122 struct btrfs_disk_key disk_key;
2123 struct inode_record *dir_rec;
2124 unsigned long name_ptr;
2125 u32 data_size = sizeof(*dir_item) + backref->namelen;
2128 path = btrfs_alloc_path();
2132 trans = btrfs_start_transaction(root, 1);
2133 if (IS_ERR(trans)) {
2134 btrfs_free_path(path);
2135 return PTR_ERR(trans);
2138 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2139 (unsigned long long)rec->ino);
2140 key.objectid = backref->dir;
2141 key.type = BTRFS_DIR_INDEX_KEY;
2142 key.offset = backref->index;
2144 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2147 leaf = path->nodes[0];
2148 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2150 disk_key.objectid = cpu_to_le64(rec->ino);
2151 disk_key.type = BTRFS_INODE_ITEM_KEY;
2152 disk_key.offset = 0;
2154 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2155 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2156 btrfs_set_dir_data_len(leaf, dir_item, 0);
2157 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2158 name_ptr = (unsigned long)(dir_item + 1);
2159 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2160 btrfs_mark_buffer_dirty(leaf);
2161 btrfs_free_path(path);
2162 btrfs_commit_transaction(trans, root);
2164 backref->found_dir_index = 1;
2165 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2166 BUG_ON(IS_ERR(dir_rec));
2169 dir_rec->found_size += backref->namelen;
2170 if (dir_rec->found_size == dir_rec->isize &&
2171 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2172 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2173 if (dir_rec->found_size != dir_rec->isize)
2174 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2179 static int delete_dir_index(struct btrfs_root *root,
2180 struct cache_tree *inode_cache,
2181 struct inode_record *rec,
2182 struct inode_backref *backref)
2184 struct btrfs_trans_handle *trans;
2185 struct btrfs_dir_item *di;
2186 struct btrfs_path *path;
2189 path = btrfs_alloc_path();
2193 trans = btrfs_start_transaction(root, 1);
2194 if (IS_ERR(trans)) {
2195 btrfs_free_path(path);
2196 return PTR_ERR(trans);
2200 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2201 (unsigned long long)backref->dir,
2202 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2203 (unsigned long long)root->objectid);
2205 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2206 backref->name, backref->namelen,
2207 backref->index, -1);
2210 btrfs_free_path(path);
2211 btrfs_commit_transaction(trans, root);
2218 ret = btrfs_del_item(trans, root, path);
2220 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2222 btrfs_free_path(path);
2223 btrfs_commit_transaction(trans, root);
2227 static int create_inode_item(struct btrfs_root *root,
2228 struct inode_record *rec,
2229 struct inode_backref *backref, int root_dir)
2231 struct btrfs_trans_handle *trans;
2232 struct btrfs_inode_item inode_item;
2233 time_t now = time(NULL);
2236 trans = btrfs_start_transaction(root, 1);
2237 if (IS_ERR(trans)) {
2238 ret = PTR_ERR(trans);
2242 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2243 "be incomplete, please check permissions and content after "
2244 "the fsck completes.\n", (unsigned long long)root->objectid,
2245 (unsigned long long)rec->ino);
2247 memset(&inode_item, 0, sizeof(inode_item));
2248 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2250 btrfs_set_stack_inode_nlink(&inode_item, 1);
2252 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2253 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2254 if (rec->found_dir_item) {
2255 if (rec->found_file_extent)
2256 fprintf(stderr, "root %llu inode %llu has both a dir "
2257 "item and extents, unsure if it is a dir or a "
2258 "regular file so setting it as a directory\n",
2259 (unsigned long long)root->objectid,
2260 (unsigned long long)rec->ino);
2261 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2262 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2263 } else if (!rec->found_dir_item) {
2264 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2265 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2267 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2268 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2269 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2270 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2271 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2272 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2273 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2274 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2276 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2278 btrfs_commit_transaction(trans, root);
2282 static int repair_inode_backrefs(struct btrfs_root *root,
2283 struct inode_record *rec,
2284 struct cache_tree *inode_cache,
2287 struct inode_backref *tmp, *backref;
2288 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2292 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2293 if (!delete && rec->ino == root_dirid) {
2294 if (!rec->found_inode_item) {
2295 ret = create_inode_item(root, rec, backref, 1);
2302 /* Index 0 for root dir's are special, don't mess with it */
2303 if (rec->ino == root_dirid && backref->index == 0)
2307 ((backref->found_dir_index && !backref->found_inode_ref) ||
2308 (backref->found_dir_index && backref->found_inode_ref &&
2309 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2310 ret = delete_dir_index(root, inode_cache, rec, backref);
2314 list_del(&backref->list);
2318 if (!delete && !backref->found_dir_index &&
2319 backref->found_dir_item && backref->found_inode_ref) {
2320 ret = add_missing_dir_index(root, inode_cache, rec,
2325 if (backref->found_dir_item &&
2326 backref->found_dir_index &&
2327 backref->found_dir_index) {
2328 if (!backref->errors &&
2329 backref->found_inode_ref) {
2330 list_del(&backref->list);
2336 if (!delete && (!backref->found_dir_index &&
2337 !backref->found_dir_item &&
2338 backref->found_inode_ref)) {
2339 struct btrfs_trans_handle *trans;
2340 struct btrfs_key location;
2342 ret = check_dir_conflict(root, backref->name,
2348 * let nlink fixing routine to handle it,
2349 * which can do it better.
2354 location.objectid = rec->ino;
2355 location.type = BTRFS_INODE_ITEM_KEY;
2356 location.offset = 0;
2358 trans = btrfs_start_transaction(root, 1);
2359 if (IS_ERR(trans)) {
2360 ret = PTR_ERR(trans);
2363 fprintf(stderr, "adding missing dir index/item pair "
2365 (unsigned long long)rec->ino);
2366 ret = btrfs_insert_dir_item(trans, root, backref->name,
2368 backref->dir, &location,
2369 imode_to_type(rec->imode),
2372 btrfs_commit_transaction(trans, root);
2376 if (!delete && (backref->found_inode_ref &&
2377 backref->found_dir_index &&
2378 backref->found_dir_item &&
2379 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2380 !rec->found_inode_item)) {
2381 ret = create_inode_item(root, rec, backref, 0);
2388 return ret ? ret : repaired;
2392 * To determine the file type for nlink/inode_item repair
2394 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2395 * Return -ENOENT if file type is not found.
2397 static int find_file_type(struct inode_record *rec, u8 *type)
2399 struct inode_backref *backref;
2401 /* For inode item recovered case */
2402 if (rec->found_inode_item) {
2403 *type = imode_to_type(rec->imode);
2407 list_for_each_entry(backref, &rec->backrefs, list) {
2408 if (backref->found_dir_index || backref->found_dir_item) {
2409 *type = backref->filetype;
2417 * To determine the file name for nlink repair
2419 * Return 0 if file name is found, set name and namelen.
2420 * Return -ENOENT if file name is not found.
2422 static int find_file_name(struct inode_record *rec,
2423 char *name, int *namelen)
2425 struct inode_backref *backref;
2427 list_for_each_entry(backref, &rec->backrefs, list) {
2428 if (backref->found_dir_index || backref->found_dir_item ||
2429 backref->found_inode_ref) {
2430 memcpy(name, backref->name, backref->namelen);
2431 *namelen = backref->namelen;
2438 /* Reset the nlink of the inode to the correct one */
2439 static int reset_nlink(struct btrfs_trans_handle *trans,
2440 struct btrfs_root *root,
2441 struct btrfs_path *path,
2442 struct inode_record *rec)
2444 struct inode_backref *backref;
2445 struct inode_backref *tmp;
2446 struct btrfs_key key;
2447 struct btrfs_inode_item *inode_item;
2450 /* We don't believe this either, reset it and iterate backref */
2451 rec->found_link = 0;
2453 /* Remove all backref including the valid ones */
2454 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2455 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2456 backref->index, backref->name,
2457 backref->namelen, 0);
2461 /* remove invalid backref, so it won't be added back */
2462 if (!(backref->found_dir_index &&
2463 backref->found_dir_item &&
2464 backref->found_inode_ref)) {
2465 list_del(&backref->list);
2472 /* Set nlink to 0 */
2473 key.objectid = rec->ino;
2474 key.type = BTRFS_INODE_ITEM_KEY;
2476 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2483 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2484 struct btrfs_inode_item);
2485 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2486 btrfs_mark_buffer_dirty(path->nodes[0]);
2487 btrfs_release_path(path);
2490 * Add back valid inode_ref/dir_item/dir_index,
2491 * add_link() will handle the nlink inc, so new nlink must be correct
2493 list_for_each_entry(backref, &rec->backrefs, list) {
2494 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2495 backref->name, backref->namelen,
2496 backref->filetype, &backref->index, 1);
2501 btrfs_release_path(path);
2505 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2506 struct btrfs_root *root,
2507 struct btrfs_path *path,
2508 struct inode_record *rec)
2510 char *dir_name = "lost+found";
2511 char namebuf[BTRFS_NAME_LEN] = {0};
2516 int name_recovered = 0;
2517 int type_recovered = 0;
2521 * Get file name and type first before these invalid inode ref
2522 * are deleted by remove_all_invalid_backref()
2524 name_recovered = !find_file_name(rec, namebuf, &namelen);
2525 type_recovered = !find_file_type(rec, &type);
2527 if (!name_recovered) {
2528 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2529 rec->ino, rec->ino);
2530 namelen = count_digits(rec->ino);
2531 sprintf(namebuf, "%llu", rec->ino);
2534 if (!type_recovered) {
2535 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2537 type = BTRFS_FT_REG_FILE;
2541 ret = reset_nlink(trans, root, path, rec);
2544 "Failed to reset nlink for inode %llu: %s\n",
2545 rec->ino, strerror(-ret));
2549 if (rec->found_link == 0) {
2550 lost_found_ino = root->highest_inode;
2551 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2556 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2557 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2560 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2561 dir_name, strerror(-ret));
2564 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2565 namebuf, namelen, type, NULL, 1);
2567 * Add ".INO" suffix several times to handle case where
2568 * "FILENAME.INO" is already taken by another file.
2570 while (ret == -EEXIST) {
2572 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2574 if (namelen + count_digits(rec->ino) + 1 >
2579 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2581 namelen += count_digits(rec->ino) + 1;
2582 ret = btrfs_add_link(trans, root, rec->ino,
2583 lost_found_ino, namebuf,
2584 namelen, type, NULL, 1);
2588 "Failed to link the inode %llu to %s dir: %s\n",
2589 rec->ino, dir_name, strerror(-ret));
2593 * Just increase the found_link, don't actually add the
2594 * backref. This will make things easier and this inode
2595 * record will be freed after the repair is done.
2596 * So fsck will not report problem about this inode.
2599 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2600 namelen, namebuf, dir_name);
2602 printf("Fixed the nlink of inode %llu\n", rec->ino);
2605 * Clear the flag anyway, or we will loop forever for the same inode
2606 * as it will not be removed from the bad inode list and the dead loop
2609 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2610 btrfs_release_path(path);
2615 * Check if there is any normal(reg or prealloc) file extent for given
2617 * This is used to determine the file type when neither its dir_index/item or
2618 * inode_item exists.
2620 * This will *NOT* report error, if any error happens, just consider it does
2621 * not have any normal file extent.
2623 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2625 struct btrfs_path *path;
2626 struct btrfs_key key;
2627 struct btrfs_key found_key;
2628 struct btrfs_file_extent_item *fi;
2632 path = btrfs_alloc_path();
2636 key.type = BTRFS_EXTENT_DATA_KEY;
2639 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2644 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2645 ret = btrfs_next_leaf(root, path);
2652 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2654 if (found_key.objectid != ino ||
2655 found_key.type != BTRFS_EXTENT_DATA_KEY)
2657 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2658 struct btrfs_file_extent_item);
2659 type = btrfs_file_extent_type(path->nodes[0], fi);
2660 if (type != BTRFS_FILE_EXTENT_INLINE) {
2666 btrfs_free_path(path);
2670 static u32 btrfs_type_to_imode(u8 type)
2672 static u32 imode_by_btrfs_type[] = {
2673 [BTRFS_FT_REG_FILE] = S_IFREG,
2674 [BTRFS_FT_DIR] = S_IFDIR,
2675 [BTRFS_FT_CHRDEV] = S_IFCHR,
2676 [BTRFS_FT_BLKDEV] = S_IFBLK,
2677 [BTRFS_FT_FIFO] = S_IFIFO,
2678 [BTRFS_FT_SOCK] = S_IFSOCK,
2679 [BTRFS_FT_SYMLINK] = S_IFLNK,
2682 return imode_by_btrfs_type[(type)];
2685 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2686 struct btrfs_root *root,
2687 struct btrfs_path *path,
2688 struct inode_record *rec)
2692 int type_recovered = 0;
2695 printf("Trying to rebuild inode:%llu\n", rec->ino);
2697 type_recovered = !find_file_type(rec, &filetype);
2700 * Try to determine inode type if type not found.
2702 * For found regular file extent, it must be FILE.
2703 * For found dir_item/index, it must be DIR.
2705 * For undetermined one, use FILE as fallback.
2708 * 1. If found backref(inode_index/item is already handled) to it,
2710 * Need new inode-inode ref structure to allow search for that.
2712 if (!type_recovered) {
2713 if (rec->found_file_extent &&
2714 find_normal_file_extent(root, rec->ino)) {
2716 filetype = BTRFS_FT_REG_FILE;
2717 } else if (rec->found_dir_item) {
2719 filetype = BTRFS_FT_DIR;
2720 } else if (!list_empty(&rec->orphan_extents)) {
2722 filetype = BTRFS_FT_REG_FILE;
2724 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2727 filetype = BTRFS_FT_REG_FILE;
2731 ret = btrfs_new_inode(trans, root, rec->ino,
2732 mode | btrfs_type_to_imode(filetype));
2737 * Here inode rebuild is done, we only rebuild the inode item,
2738 * don't repair the nlink(like move to lost+found).
2739 * That is the job of nlink repair.
2741 * We just fill the record and return
2743 rec->found_dir_item = 1;
2744 rec->imode = mode | btrfs_type_to_imode(filetype);
2746 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2747 /* Ensure the inode_nlinks repair function will be called */
2748 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2753 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2754 struct btrfs_root *root,
2755 struct btrfs_path *path,
2756 struct inode_record *rec)
2758 struct orphan_data_extent *orphan;
2759 struct orphan_data_extent *tmp;
2762 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2764 * Check for conflicting file extents
2766 * Here we don't know whether the extents is compressed or not,
2767 * so we can only assume it not compressed nor data offset,
2768 * and use its disk_len as extent length.
2770 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2771 orphan->offset, orphan->disk_len, 0);
2772 btrfs_release_path(path);
2777 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2778 orphan->disk_bytenr, orphan->disk_len);
2779 ret = btrfs_free_extent(trans,
2780 root->fs_info->extent_root,
2781 orphan->disk_bytenr, orphan->disk_len,
2782 0, root->objectid, orphan->objectid,
2787 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2788 orphan->offset, orphan->disk_bytenr,
2789 orphan->disk_len, orphan->disk_len);
2793 /* Update file size info */
2794 rec->found_size += orphan->disk_len;
2795 if (rec->found_size == rec->nbytes)
2796 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2798 /* Update the file extent hole info too */
2799 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2803 if (RB_EMPTY_ROOT(&rec->holes))
2804 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2806 list_del(&orphan->list);
2809 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2814 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2815 struct btrfs_root *root,
2816 struct btrfs_path *path,
2817 struct inode_record *rec)
2819 struct rb_node *node;
2820 struct file_extent_hole *hole;
2824 node = rb_first(&rec->holes);
2828 hole = rb_entry(node, struct file_extent_hole, node);
2829 ret = btrfs_punch_hole(trans, root, rec->ino,
2830 hole->start, hole->len);
2833 ret = del_file_extent_hole(&rec->holes, hole->start,
2837 if (RB_EMPTY_ROOT(&rec->holes))
2838 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2839 node = rb_first(&rec->holes);
2841 /* special case for a file losing all its file extent */
2843 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2844 round_up(rec->isize, root->sectorsize));
2848 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2849 rec->ino, root->objectid);
2854 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2856 struct btrfs_trans_handle *trans;
2857 struct btrfs_path *path;
2860 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2861 I_ERR_NO_ORPHAN_ITEM |
2862 I_ERR_LINK_COUNT_WRONG |
2863 I_ERR_NO_INODE_ITEM |
2864 I_ERR_FILE_EXTENT_ORPHAN |
2865 I_ERR_FILE_EXTENT_DISCOUNT|
2866 I_ERR_FILE_NBYTES_WRONG)))
2869 path = btrfs_alloc_path();
2874 * For nlink repair, it may create a dir and add link, so
2875 * 2 for parent(256)'s dir_index and dir_item
2876 * 2 for lost+found dir's inode_item and inode_ref
2877 * 1 for the new inode_ref of the file
2878 * 2 for lost+found dir's dir_index and dir_item for the file
2880 trans = btrfs_start_transaction(root, 7);
2881 if (IS_ERR(trans)) {
2882 btrfs_free_path(path);
2883 return PTR_ERR(trans);
2886 if (rec->errors & I_ERR_NO_INODE_ITEM)
2887 ret = repair_inode_no_item(trans, root, path, rec);
2888 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2889 ret = repair_inode_orphan_extent(trans, root, path, rec);
2890 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2891 ret = repair_inode_discount_extent(trans, root, path, rec);
2892 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2893 ret = repair_inode_isize(trans, root, path, rec);
2894 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2895 ret = repair_inode_orphan_item(trans, root, path, rec);
2896 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2897 ret = repair_inode_nlinks(trans, root, path, rec);
2898 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2899 ret = repair_inode_nbytes(trans, root, path, rec);
2900 btrfs_commit_transaction(trans, root);
2901 btrfs_free_path(path);
2905 static int check_inode_recs(struct btrfs_root *root,
2906 struct cache_tree *inode_cache)
2908 struct cache_extent *cache;
2909 struct ptr_node *node;
2910 struct inode_record *rec;
2911 struct inode_backref *backref;
2916 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2918 if (btrfs_root_refs(&root->root_item) == 0) {
2919 if (!cache_tree_empty(inode_cache))
2920 fprintf(stderr, "warning line %d\n", __LINE__);
2925 * We need to record the highest inode number for later 'lost+found'
2927 * We must select an ino not used/referred by any existing inode, or
2928 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2929 * this may cause 'lost+found' dir has wrong nlinks.
2931 cache = last_cache_extent(inode_cache);
2933 node = container_of(cache, struct ptr_node, cache);
2935 if (rec->ino > root->highest_inode)
2936 root->highest_inode = rec->ino;
2940 * We need to repair backrefs first because we could change some of the
2941 * errors in the inode recs.
2943 * We also need to go through and delete invalid backrefs first and then
2944 * add the correct ones second. We do this because we may get EEXIST
2945 * when adding back the correct index because we hadn't yet deleted the
2948 * For example, if we were missing a dir index then the directories
2949 * isize would be wrong, so if we fixed the isize to what we thought it
2950 * would be and then fixed the backref we'd still have a invalid fs, so
2951 * we need to add back the dir index and then check to see if the isize
2956 if (stage == 3 && !err)
2959 cache = search_cache_extent(inode_cache, 0);
2960 while (repair && cache) {
2961 node = container_of(cache, struct ptr_node, cache);
2963 cache = next_cache_extent(cache);
2965 /* Need to free everything up and rescan */
2967 remove_cache_extent(inode_cache, &node->cache);
2969 free_inode_rec(rec);
2973 if (list_empty(&rec->backrefs))
2976 ret = repair_inode_backrefs(root, rec, inode_cache,
2990 rec = get_inode_rec(inode_cache, root_dirid, 0);
2991 BUG_ON(IS_ERR(rec));
2993 ret = check_root_dir(rec);
2995 fprintf(stderr, "root %llu root dir %llu error\n",
2996 (unsigned long long)root->root_key.objectid,
2997 (unsigned long long)root_dirid);
2998 print_inode_error(root, rec);
3003 struct btrfs_trans_handle *trans;
3005 trans = btrfs_start_transaction(root, 1);
3006 if (IS_ERR(trans)) {
3007 err = PTR_ERR(trans);
3012 "root %llu missing its root dir, recreating\n",
3013 (unsigned long long)root->objectid);
3015 ret = btrfs_make_root_dir(trans, root, root_dirid);
3018 btrfs_commit_transaction(trans, root);
3022 fprintf(stderr, "root %llu root dir %llu not found\n",
3023 (unsigned long long)root->root_key.objectid,
3024 (unsigned long long)root_dirid);
3028 cache = search_cache_extent(inode_cache, 0);
3031 node = container_of(cache, struct ptr_node, cache);
3033 remove_cache_extent(inode_cache, &node->cache);
3035 if (rec->ino == root_dirid ||
3036 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3037 free_inode_rec(rec);
3041 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3042 ret = check_orphan_item(root, rec->ino);
3044 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3045 if (can_free_inode_rec(rec)) {
3046 free_inode_rec(rec);
3051 if (!rec->found_inode_item)
3052 rec->errors |= I_ERR_NO_INODE_ITEM;
3053 if (rec->found_link != rec->nlink)
3054 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3056 ret = try_repair_inode(root, rec);
3057 if (ret == 0 && can_free_inode_rec(rec)) {
3058 free_inode_rec(rec);
3064 if (!(repair && ret == 0))
3066 print_inode_error(root, rec);
3067 list_for_each_entry(backref, &rec->backrefs, list) {
3068 if (!backref->found_dir_item)
3069 backref->errors |= REF_ERR_NO_DIR_ITEM;
3070 if (!backref->found_dir_index)
3071 backref->errors |= REF_ERR_NO_DIR_INDEX;
3072 if (!backref->found_inode_ref)
3073 backref->errors |= REF_ERR_NO_INODE_REF;
3074 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3075 " namelen %u name %s filetype %d errors %x",
3076 (unsigned long long)backref->dir,
3077 (unsigned long long)backref->index,
3078 backref->namelen, backref->name,
3079 backref->filetype, backref->errors);
3080 print_ref_error(backref->errors);
3082 free_inode_rec(rec);
3084 return (error > 0) ? -1 : 0;
3087 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3090 struct cache_extent *cache;
3091 struct root_record *rec = NULL;
3094 cache = lookup_cache_extent(root_cache, objectid, 1);
3096 rec = container_of(cache, struct root_record, cache);
3098 rec = calloc(1, sizeof(*rec));
3100 return ERR_PTR(-ENOMEM);
3101 rec->objectid = objectid;
3102 INIT_LIST_HEAD(&rec->backrefs);
3103 rec->cache.start = objectid;
3104 rec->cache.size = 1;
3106 ret = insert_cache_extent(root_cache, &rec->cache);
3108 return ERR_PTR(-EEXIST);
3113 static struct root_backref *get_root_backref(struct root_record *rec,
3114 u64 ref_root, u64 dir, u64 index,
3115 const char *name, int namelen)
3117 struct root_backref *backref;
3119 list_for_each_entry(backref, &rec->backrefs, list) {
3120 if (backref->ref_root != ref_root || backref->dir != dir ||
3121 backref->namelen != namelen)
3123 if (memcmp(name, backref->name, namelen))
3128 backref = calloc(1, sizeof(*backref) + namelen + 1);
3131 backref->ref_root = ref_root;
3133 backref->index = index;
3134 backref->namelen = namelen;
3135 memcpy(backref->name, name, namelen);
3136 backref->name[namelen] = '\0';
3137 list_add_tail(&backref->list, &rec->backrefs);
3141 static void free_root_record(struct cache_extent *cache)
3143 struct root_record *rec;
3144 struct root_backref *backref;
3146 rec = container_of(cache, struct root_record, cache);
3147 while (!list_empty(&rec->backrefs)) {
3148 backref = to_root_backref(rec->backrefs.next);
3149 list_del(&backref->list);
3156 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3158 static int add_root_backref(struct cache_tree *root_cache,
3159 u64 root_id, u64 ref_root, u64 dir, u64 index,
3160 const char *name, int namelen,
3161 int item_type, int errors)
3163 struct root_record *rec;
3164 struct root_backref *backref;
3166 rec = get_root_rec(root_cache, root_id);
3167 BUG_ON(IS_ERR(rec));
3168 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3171 backref->errors |= errors;
3173 if (item_type != BTRFS_DIR_ITEM_KEY) {
3174 if (backref->found_dir_index || backref->found_back_ref ||
3175 backref->found_forward_ref) {
3176 if (backref->index != index)
3177 backref->errors |= REF_ERR_INDEX_UNMATCH;
3179 backref->index = index;
3183 if (item_type == BTRFS_DIR_ITEM_KEY) {
3184 if (backref->found_forward_ref)
3186 backref->found_dir_item = 1;
3187 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3188 backref->found_dir_index = 1;
3189 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3190 if (backref->found_forward_ref)
3191 backref->errors |= REF_ERR_DUP_ROOT_REF;
3192 else if (backref->found_dir_item)
3194 backref->found_forward_ref = 1;
3195 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3196 if (backref->found_back_ref)
3197 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3198 backref->found_back_ref = 1;
3203 if (backref->found_forward_ref && backref->found_dir_item)
3204 backref->reachable = 1;
3208 static int merge_root_recs(struct btrfs_root *root,
3209 struct cache_tree *src_cache,
3210 struct cache_tree *dst_cache)
3212 struct cache_extent *cache;
3213 struct ptr_node *node;
3214 struct inode_record *rec;
3215 struct inode_backref *backref;
3218 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3219 free_inode_recs_tree(src_cache);
3224 cache = search_cache_extent(src_cache, 0);
3227 node = container_of(cache, struct ptr_node, cache);
3229 remove_cache_extent(src_cache, &node->cache);
3232 ret = is_child_root(root, root->objectid, rec->ino);
3238 list_for_each_entry(backref, &rec->backrefs, list) {
3239 BUG_ON(backref->found_inode_ref);
3240 if (backref->found_dir_item)
3241 add_root_backref(dst_cache, rec->ino,
3242 root->root_key.objectid, backref->dir,
3243 backref->index, backref->name,
3244 backref->namelen, BTRFS_DIR_ITEM_KEY,
3246 if (backref->found_dir_index)
3247 add_root_backref(dst_cache, rec->ino,
3248 root->root_key.objectid, backref->dir,
3249 backref->index, backref->name,
3250 backref->namelen, BTRFS_DIR_INDEX_KEY,
3254 free_inode_rec(rec);
3261 static int check_root_refs(struct btrfs_root *root,
3262 struct cache_tree *root_cache)
3264 struct root_record *rec;
3265 struct root_record *ref_root;
3266 struct root_backref *backref;
3267 struct cache_extent *cache;
3273 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3274 BUG_ON(IS_ERR(rec));
3277 /* fixme: this can not detect circular references */
3280 cache = search_cache_extent(root_cache, 0);
3284 rec = container_of(cache, struct root_record, cache);
3285 cache = next_cache_extent(cache);
3287 if (rec->found_ref == 0)
3290 list_for_each_entry(backref, &rec->backrefs, list) {
3291 if (!backref->reachable)
3294 ref_root = get_root_rec(root_cache,
3296 BUG_ON(IS_ERR(ref_root));
3297 if (ref_root->found_ref > 0)
3300 backref->reachable = 0;
3302 if (rec->found_ref == 0)
3308 cache = search_cache_extent(root_cache, 0);
3312 rec = container_of(cache, struct root_record, cache);
3313 cache = next_cache_extent(cache);
3315 if (rec->found_ref == 0 &&
3316 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3317 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3318 ret = check_orphan_item(root->fs_info->tree_root,
3324 * If we don't have a root item then we likely just have
3325 * a dir item in a snapshot for this root but no actual
3326 * ref key or anything so it's meaningless.
3328 if (!rec->found_root_item)
3331 fprintf(stderr, "fs tree %llu not referenced\n",
3332 (unsigned long long)rec->objectid);
3336 if (rec->found_ref > 0 && !rec->found_root_item)
3338 list_for_each_entry(backref, &rec->backrefs, list) {
3339 if (!backref->found_dir_item)
3340 backref->errors |= REF_ERR_NO_DIR_ITEM;
3341 if (!backref->found_dir_index)
3342 backref->errors |= REF_ERR_NO_DIR_INDEX;
3343 if (!backref->found_back_ref)
3344 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3345 if (!backref->found_forward_ref)
3346 backref->errors |= REF_ERR_NO_ROOT_REF;
3347 if (backref->reachable && backref->errors)
3354 fprintf(stderr, "fs tree %llu refs %u %s\n",
3355 (unsigned long long)rec->objectid, rec->found_ref,
3356 rec->found_root_item ? "" : "not found");
3358 list_for_each_entry(backref, &rec->backrefs, list) {
3359 if (!backref->reachable)
3361 if (!backref->errors && rec->found_root_item)
3363 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3364 " index %llu namelen %u name %s errors %x\n",
3365 (unsigned long long)backref->ref_root,
3366 (unsigned long long)backref->dir,
3367 (unsigned long long)backref->index,
3368 backref->namelen, backref->name,
3370 print_ref_error(backref->errors);
3373 return errors > 0 ? 1 : 0;
3376 static int process_root_ref(struct extent_buffer *eb, int slot,
3377 struct btrfs_key *key,
3378 struct cache_tree *root_cache)
3384 struct btrfs_root_ref *ref;
3385 char namebuf[BTRFS_NAME_LEN];
3388 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3390 dirid = btrfs_root_ref_dirid(eb, ref);
3391 index = btrfs_root_ref_sequence(eb, ref);
3392 name_len = btrfs_root_ref_name_len(eb, ref);
3394 if (name_len <= BTRFS_NAME_LEN) {
3398 len = BTRFS_NAME_LEN;
3399 error = REF_ERR_NAME_TOO_LONG;
3401 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3403 if (key->type == BTRFS_ROOT_REF_KEY) {
3404 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3405 index, namebuf, len, key->type, error);
3407 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3408 index, namebuf, len, key->type, error);
3413 static void free_corrupt_block(struct cache_extent *cache)
3415 struct btrfs_corrupt_block *corrupt;
3417 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3421 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3424 * Repair the btree of the given root.
3426 * The fix is to remove the node key in corrupt_blocks cache_tree.
3427 * and rebalance the tree.
3428 * After the fix, the btree should be writeable.
3430 static int repair_btree(struct btrfs_root *root,
3431 struct cache_tree *corrupt_blocks)
3433 struct btrfs_trans_handle *trans;
3434 struct btrfs_path *path;
3435 struct btrfs_corrupt_block *corrupt;
3436 struct cache_extent *cache;
3437 struct btrfs_key key;
3442 if (cache_tree_empty(corrupt_blocks))
3445 path = btrfs_alloc_path();
3449 trans = btrfs_start_transaction(root, 1);
3450 if (IS_ERR(trans)) {
3451 ret = PTR_ERR(trans);
3452 fprintf(stderr, "Error starting transaction: %s\n",
3456 cache = first_cache_extent(corrupt_blocks);
3458 corrupt = container_of(cache, struct btrfs_corrupt_block,
3460 level = corrupt->level;
3461 path->lowest_level = level;
3462 key.objectid = corrupt->key.objectid;
3463 key.type = corrupt->key.type;
3464 key.offset = corrupt->key.offset;
3467 * Here we don't want to do any tree balance, since it may
3468 * cause a balance with corrupted brother leaf/node,
3469 * so ins_len set to 0 here.
3470 * Balance will be done after all corrupt node/leaf is deleted.
3472 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3475 offset = btrfs_node_blockptr(path->nodes[level],
3476 path->slots[level]);
3478 /* Remove the ptr */
3479 ret = btrfs_del_ptr(trans, root, path, level,
3480 path->slots[level]);
3484 * Remove the corresponding extent
3485 * return value is not concerned.
3487 btrfs_release_path(path);
3488 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3489 0, root->root_key.objectid,
3491 cache = next_cache_extent(cache);
3494 /* Balance the btree using btrfs_search_slot() */
3495 cache = first_cache_extent(corrupt_blocks);
3497 corrupt = container_of(cache, struct btrfs_corrupt_block,
3499 memcpy(&key, &corrupt->key, sizeof(key));
3500 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3503 /* return will always >0 since it won't find the item */
3505 btrfs_release_path(path);
3506 cache = next_cache_extent(cache);
3509 btrfs_commit_transaction(trans, root);
3511 btrfs_free_path(path);
3515 static int check_fs_root(struct btrfs_root *root,
3516 struct cache_tree *root_cache,
3517 struct walk_control *wc)
3523 struct btrfs_path path;
3524 struct shared_node root_node;
3525 struct root_record *rec;
3526 struct btrfs_root_item *root_item = &root->root_item;
3527 struct cache_tree corrupt_blocks;
3528 struct orphan_data_extent *orphan;
3529 struct orphan_data_extent *tmp;
3530 enum btrfs_tree_block_status status;
3533 * Reuse the corrupt_block cache tree to record corrupted tree block
3535 * Unlike the usage in extent tree check, here we do it in a per
3536 * fs/subvol tree base.
3538 cache_tree_init(&corrupt_blocks);
3539 root->fs_info->corrupt_blocks = &corrupt_blocks;
3541 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3542 rec = get_root_rec(root_cache, root->root_key.objectid);
3543 BUG_ON(IS_ERR(rec));
3544 if (btrfs_root_refs(root_item) > 0)
3545 rec->found_root_item = 1;
3548 btrfs_init_path(&path);
3549 memset(&root_node, 0, sizeof(root_node));
3550 cache_tree_init(&root_node.root_cache);
3551 cache_tree_init(&root_node.inode_cache);
3553 /* Move the orphan extent record to corresponding inode_record */
3554 list_for_each_entry_safe(orphan, tmp,
3555 &root->orphan_data_extents, list) {
3556 struct inode_record *inode;
3558 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3560 BUG_ON(IS_ERR(inode));
3561 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3562 list_move(&orphan->list, &inode->orphan_extents);
3565 level = btrfs_header_level(root->node);
3566 memset(wc->nodes, 0, sizeof(wc->nodes));
3567 wc->nodes[level] = &root_node;
3568 wc->active_node = level;
3569 wc->root_level = level;
3571 /* We may not have checked the root block, lets do that now */
3572 if (btrfs_is_leaf(root->node))
3573 status = btrfs_check_leaf(root, NULL, root->node);
3575 status = btrfs_check_node(root, NULL, root->node);
3576 if (status != BTRFS_TREE_BLOCK_CLEAN)
3579 if (btrfs_root_refs(root_item) > 0 ||
3580 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3581 path.nodes[level] = root->node;
3582 extent_buffer_get(root->node);
3583 path.slots[level] = 0;
3585 struct btrfs_key key;
3586 struct btrfs_disk_key found_key;
3588 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3589 level = root_item->drop_level;
3590 path.lowest_level = level;
3591 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3594 btrfs_node_key(path.nodes[level], &found_key,
3596 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3597 sizeof(found_key)));
3601 wret = walk_down_tree(root, &path, wc, &level);
3607 wret = walk_up_tree(root, &path, wc, &level);
3614 btrfs_release_path(&path);
3616 if (!cache_tree_empty(&corrupt_blocks)) {
3617 struct cache_extent *cache;
3618 struct btrfs_corrupt_block *corrupt;
3620 printf("The following tree block(s) is corrupted in tree %llu:\n",
3621 root->root_key.objectid);
3622 cache = first_cache_extent(&corrupt_blocks);
3624 corrupt = container_of(cache,
3625 struct btrfs_corrupt_block,
3627 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3628 cache->start, corrupt->level,
3629 corrupt->key.objectid, corrupt->key.type,
3630 corrupt->key.offset);
3631 cache = next_cache_extent(cache);
3634 printf("Try to repair the btree for root %llu\n",
3635 root->root_key.objectid);
3636 ret = repair_btree(root, &corrupt_blocks);
3638 fprintf(stderr, "Failed to repair btree: %s\n",
3641 printf("Btree for root %llu is fixed\n",
3642 root->root_key.objectid);
3646 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3650 if (root_node.current) {
3651 root_node.current->checked = 1;
3652 maybe_free_inode_rec(&root_node.inode_cache,
3656 err = check_inode_recs(root, &root_node.inode_cache);
3660 free_corrupt_blocks_tree(&corrupt_blocks);
3661 root->fs_info->corrupt_blocks = NULL;
3662 free_orphan_data_extents(&root->orphan_data_extents);
3666 static int fs_root_objectid(u64 objectid)
3668 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3669 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3671 return is_fstree(objectid);
3674 static int check_fs_roots(struct btrfs_root *root,
3675 struct cache_tree *root_cache)
3677 struct btrfs_path path;
3678 struct btrfs_key key;
3679 struct walk_control wc;
3680 struct extent_buffer *leaf, *tree_node;
3681 struct btrfs_root *tmp_root;
3682 struct btrfs_root *tree_root = root->fs_info->tree_root;
3686 if (ctx.progress_enabled) {
3687 ctx.tp = TASK_FS_ROOTS;
3688 task_start(ctx.info);
3692 * Just in case we made any changes to the extent tree that weren't
3693 * reflected into the free space cache yet.
3696 reset_cached_block_groups(root->fs_info);
3697 memset(&wc, 0, sizeof(wc));
3698 cache_tree_init(&wc.shared);
3699 btrfs_init_path(&path);
3704 key.type = BTRFS_ROOT_ITEM_KEY;
3705 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3710 tree_node = tree_root->node;
3712 if (tree_node != tree_root->node) {
3713 free_root_recs_tree(root_cache);
3714 btrfs_release_path(&path);
3717 leaf = path.nodes[0];
3718 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3719 ret = btrfs_next_leaf(tree_root, &path);
3725 leaf = path.nodes[0];
3727 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3728 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3729 fs_root_objectid(key.objectid)) {
3730 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3731 tmp_root = btrfs_read_fs_root_no_cache(
3732 root->fs_info, &key);
3734 key.offset = (u64)-1;
3735 tmp_root = btrfs_read_fs_root(
3736 root->fs_info, &key);
3738 if (IS_ERR(tmp_root)) {
3742 ret = check_fs_root(tmp_root, root_cache, &wc);
3743 if (ret == -EAGAIN) {
3744 free_root_recs_tree(root_cache);
3745 btrfs_release_path(&path);
3750 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3751 btrfs_free_fs_root(tmp_root);
3752 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3753 key.type == BTRFS_ROOT_BACKREF_KEY) {
3754 process_root_ref(leaf, path.slots[0], &key,
3761 btrfs_release_path(&path);
3763 free_extent_cache_tree(&wc.shared);
3764 if (!cache_tree_empty(&wc.shared))
3765 fprintf(stderr, "warning line %d\n", __LINE__);
3767 task_stop(ctx.info);
3772 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3774 struct list_head *cur = rec->backrefs.next;
3775 struct extent_backref *back;
3776 struct tree_backref *tback;
3777 struct data_backref *dback;
3781 while(cur != &rec->backrefs) {
3782 back = to_extent_backref(cur);
3784 if (!back->found_extent_tree) {
3788 if (back->is_data) {
3789 dback = to_data_backref(back);
3790 fprintf(stderr, "Backref %llu %s %llu"
3791 " owner %llu offset %llu num_refs %lu"
3792 " not found in extent tree\n",
3793 (unsigned long long)rec->start,
3794 back->full_backref ?
3796 back->full_backref ?
3797 (unsigned long long)dback->parent:
3798 (unsigned long long)dback->root,
3799 (unsigned long long)dback->owner,
3800 (unsigned long long)dback->offset,
3801 (unsigned long)dback->num_refs);
3803 tback = to_tree_backref(back);
3804 fprintf(stderr, "Backref %llu parent %llu"
3805 " root %llu not found in extent tree\n",
3806 (unsigned long long)rec->start,
3807 (unsigned long long)tback->parent,
3808 (unsigned long long)tback->root);
3811 if (!back->is_data && !back->found_ref) {
3815 tback = to_tree_backref(back);
3816 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3817 (unsigned long long)rec->start,
3818 back->full_backref ? "parent" : "root",
3819 back->full_backref ?
3820 (unsigned long long)tback->parent :
3821 (unsigned long long)tback->root, back);
3823 if (back->is_data) {
3824 dback = to_data_backref(back);
3825 if (dback->found_ref != dback->num_refs) {
3829 fprintf(stderr, "Incorrect local backref count"
3830 " on %llu %s %llu owner %llu"
3831 " offset %llu found %u wanted %u back %p\n",
3832 (unsigned long long)rec->start,
3833 back->full_backref ?
3835 back->full_backref ?
3836 (unsigned long long)dback->parent:
3837 (unsigned long long)dback->root,
3838 (unsigned long long)dback->owner,
3839 (unsigned long long)dback->offset,
3840 dback->found_ref, dback->num_refs, back);
3842 if (dback->disk_bytenr != rec->start) {
3846 fprintf(stderr, "Backref disk bytenr does not"
3847 " match extent record, bytenr=%llu, "
3848 "ref bytenr=%llu\n",
3849 (unsigned long long)rec->start,
3850 (unsigned long long)dback->disk_bytenr);
3853 if (dback->bytes != rec->nr) {
3857 fprintf(stderr, "Backref bytes do not match "
3858 "extent backref, bytenr=%llu, ref "
3859 "bytes=%llu, backref bytes=%llu\n",
3860 (unsigned long long)rec->start,
3861 (unsigned long long)rec->nr,
3862 (unsigned long long)dback->bytes);
3865 if (!back->is_data) {
3868 dback = to_data_backref(back);
3869 found += dback->found_ref;
3872 if (found != rec->refs) {
3876 fprintf(stderr, "Incorrect global backref count "
3877 "on %llu found %llu wanted %llu\n",
3878 (unsigned long long)rec->start,
3879 (unsigned long long)found,
3880 (unsigned long long)rec->refs);
3886 static int free_all_extent_backrefs(struct extent_record *rec)
3888 struct extent_backref *back;
3889 struct list_head *cur;
3890 while (!list_empty(&rec->backrefs)) {
3891 cur = rec->backrefs.next;
3892 back = to_extent_backref(cur);
3899 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3900 struct cache_tree *extent_cache)
3902 struct cache_extent *cache;
3903 struct extent_record *rec;
3906 cache = first_cache_extent(extent_cache);
3909 rec = container_of(cache, struct extent_record, cache);
3910 remove_cache_extent(extent_cache, cache);
3911 free_all_extent_backrefs(rec);
3916 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3917 struct extent_record *rec)
3919 if (rec->content_checked && rec->owner_ref_checked &&
3920 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3921 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3922 !rec->bad_full_backref && !rec->crossing_stripes &&
3923 !rec->wrong_chunk_type) {
3924 remove_cache_extent(extent_cache, &rec->cache);
3925 free_all_extent_backrefs(rec);
3926 list_del_init(&rec->list);
3932 static int check_owner_ref(struct btrfs_root *root,
3933 struct extent_record *rec,
3934 struct extent_buffer *buf)
3936 struct extent_backref *node;
3937 struct tree_backref *back;
3938 struct btrfs_root *ref_root;
3939 struct btrfs_key key;
3940 struct btrfs_path path;
3941 struct extent_buffer *parent;
3946 list_for_each_entry(node, &rec->backrefs, list) {
3949 if (!node->found_ref)
3951 if (node->full_backref)
3953 back = to_tree_backref(node);
3954 if (btrfs_header_owner(buf) == back->root)
3957 BUG_ON(rec->is_root);
3959 /* try to find the block by search corresponding fs tree */
3960 key.objectid = btrfs_header_owner(buf);
3961 key.type = BTRFS_ROOT_ITEM_KEY;
3962 key.offset = (u64)-1;
3964 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3965 if (IS_ERR(ref_root))
3968 level = btrfs_header_level(buf);
3970 btrfs_item_key_to_cpu(buf, &key, 0);
3972 btrfs_node_key_to_cpu(buf, &key, 0);
3974 btrfs_init_path(&path);
3975 path.lowest_level = level + 1;
3976 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3980 parent = path.nodes[level + 1];
3981 if (parent && buf->start == btrfs_node_blockptr(parent,
3982 path.slots[level + 1]))
3985 btrfs_release_path(&path);
3986 return found ? 0 : 1;
3989 static int is_extent_tree_record(struct extent_record *rec)
3991 struct list_head *cur = rec->backrefs.next;
3992 struct extent_backref *node;
3993 struct tree_backref *back;
3996 while(cur != &rec->backrefs) {
3997 node = to_extent_backref(cur);
4001 back = to_tree_backref(node);
4002 if (node->full_backref)
4004 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4011 static int record_bad_block_io(struct btrfs_fs_info *info,
4012 struct cache_tree *extent_cache,
4015 struct extent_record *rec;
4016 struct cache_extent *cache;
4017 struct btrfs_key key;
4019 cache = lookup_cache_extent(extent_cache, start, len);
4023 rec = container_of(cache, struct extent_record, cache);
4024 if (!is_extent_tree_record(rec))
4027 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4028 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4031 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4032 struct extent_buffer *buf, int slot)
4034 if (btrfs_header_level(buf)) {
4035 struct btrfs_key_ptr ptr1, ptr2;
4037 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4038 sizeof(struct btrfs_key_ptr));
4039 read_extent_buffer(buf, &ptr2,
4040 btrfs_node_key_ptr_offset(slot + 1),
4041 sizeof(struct btrfs_key_ptr));
4042 write_extent_buffer(buf, &ptr1,
4043 btrfs_node_key_ptr_offset(slot + 1),
4044 sizeof(struct btrfs_key_ptr));
4045 write_extent_buffer(buf, &ptr2,
4046 btrfs_node_key_ptr_offset(slot),
4047 sizeof(struct btrfs_key_ptr));
4049 struct btrfs_disk_key key;
4050 btrfs_node_key(buf, &key, 0);
4051 btrfs_fixup_low_keys(root, path, &key,
4052 btrfs_header_level(buf) + 1);
4055 struct btrfs_item *item1, *item2;
4056 struct btrfs_key k1, k2;
4057 char *item1_data, *item2_data;
4058 u32 item1_offset, item2_offset, item1_size, item2_size;
4060 item1 = btrfs_item_nr(slot);
4061 item2 = btrfs_item_nr(slot + 1);
4062 btrfs_item_key_to_cpu(buf, &k1, slot);
4063 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4064 item1_offset = btrfs_item_offset(buf, item1);
4065 item2_offset = btrfs_item_offset(buf, item2);
4066 item1_size = btrfs_item_size(buf, item1);
4067 item2_size = btrfs_item_size(buf, item2);
4069 item1_data = malloc(item1_size);
4072 item2_data = malloc(item2_size);
4078 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4079 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4081 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4082 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4086 btrfs_set_item_offset(buf, item1, item2_offset);
4087 btrfs_set_item_offset(buf, item2, item1_offset);
4088 btrfs_set_item_size(buf, item1, item2_size);
4089 btrfs_set_item_size(buf, item2, item1_size);
4091 path->slots[0] = slot;
4092 btrfs_set_item_key_unsafe(root, path, &k2);
4093 path->slots[0] = slot + 1;
4094 btrfs_set_item_key_unsafe(root, path, &k1);
4099 static int fix_key_order(struct btrfs_trans_handle *trans,
4100 struct btrfs_root *root,
4101 struct btrfs_path *path)
4103 struct extent_buffer *buf;
4104 struct btrfs_key k1, k2;
4106 int level = path->lowest_level;
4109 buf = path->nodes[level];
4110 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4112 btrfs_node_key_to_cpu(buf, &k1, i);
4113 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4115 btrfs_item_key_to_cpu(buf, &k1, i);
4116 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4118 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4120 ret = swap_values(root, path, buf, i);
4123 btrfs_mark_buffer_dirty(buf);
4129 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4130 struct btrfs_root *root,
4131 struct btrfs_path *path,
4132 struct extent_buffer *buf, int slot)
4134 struct btrfs_key key;
4135 int nritems = btrfs_header_nritems(buf);
4137 btrfs_item_key_to_cpu(buf, &key, slot);
4139 /* These are all the keys we can deal with missing. */
4140 if (key.type != BTRFS_DIR_INDEX_KEY &&
4141 key.type != BTRFS_EXTENT_ITEM_KEY &&
4142 key.type != BTRFS_METADATA_ITEM_KEY &&
4143 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4144 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4147 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4148 (unsigned long long)key.objectid, key.type,
4149 (unsigned long long)key.offset, slot, buf->start);
4150 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4151 btrfs_item_nr_offset(slot + 1),
4152 sizeof(struct btrfs_item) *
4153 (nritems - slot - 1));
4154 btrfs_set_header_nritems(buf, nritems - 1);
4156 struct btrfs_disk_key disk_key;
4158 btrfs_item_key(buf, &disk_key, 0);
4159 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4161 btrfs_mark_buffer_dirty(buf);
4165 static int fix_item_offset(struct btrfs_trans_handle *trans,
4166 struct btrfs_root *root,
4167 struct btrfs_path *path)
4169 struct extent_buffer *buf;
4173 /* We should only get this for leaves */
4174 BUG_ON(path->lowest_level);
4175 buf = path->nodes[0];
4177 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4178 unsigned int shift = 0, offset;
4180 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4181 BTRFS_LEAF_DATA_SIZE(root)) {
4182 if (btrfs_item_end_nr(buf, i) >
4183 BTRFS_LEAF_DATA_SIZE(root)) {
4184 ret = delete_bogus_item(trans, root, path,
4188 fprintf(stderr, "item is off the end of the "
4189 "leaf, can't fix\n");
4193 shift = BTRFS_LEAF_DATA_SIZE(root) -
4194 btrfs_item_end_nr(buf, i);
4195 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4196 btrfs_item_offset_nr(buf, i - 1)) {
4197 if (btrfs_item_end_nr(buf, i) >
4198 btrfs_item_offset_nr(buf, i - 1)) {
4199 ret = delete_bogus_item(trans, root, path,
4203 fprintf(stderr, "items overlap, can't fix\n");
4207 shift = btrfs_item_offset_nr(buf, i - 1) -
4208 btrfs_item_end_nr(buf, i);
4213 printf("Shifting item nr %d by %u bytes in block %llu\n",
4214 i, shift, (unsigned long long)buf->start);
4215 offset = btrfs_item_offset_nr(buf, i);
4216 memmove_extent_buffer(buf,
4217 btrfs_leaf_data(buf) + offset + shift,
4218 btrfs_leaf_data(buf) + offset,
4219 btrfs_item_size_nr(buf, i));
4220 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4222 btrfs_mark_buffer_dirty(buf);
4226 * We may have moved things, in which case we want to exit so we don't
4227 * write those changes out. Once we have proper abort functionality in
4228 * progs this can be changed to something nicer.
4235 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4236 * then just return -EIO.
4238 static int try_to_fix_bad_block(struct btrfs_root *root,
4239 struct extent_buffer *buf,
4240 enum btrfs_tree_block_status status)
4242 struct btrfs_trans_handle *trans;
4243 struct ulist *roots;
4244 struct ulist_node *node;
4245 struct btrfs_root *search_root;
4246 struct btrfs_path *path;
4247 struct ulist_iterator iter;
4248 struct btrfs_key root_key, key;
4251 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4252 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4255 path = btrfs_alloc_path();
4259 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4262 btrfs_free_path(path);
4266 ULIST_ITER_INIT(&iter);
4267 while ((node = ulist_next(roots, &iter))) {
4268 root_key.objectid = node->val;
4269 root_key.type = BTRFS_ROOT_ITEM_KEY;
4270 root_key.offset = (u64)-1;
4272 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4279 trans = btrfs_start_transaction(search_root, 0);
4280 if (IS_ERR(trans)) {
4281 ret = PTR_ERR(trans);
4285 path->lowest_level = btrfs_header_level(buf);
4286 path->skip_check_block = 1;
4287 if (path->lowest_level)
4288 btrfs_node_key_to_cpu(buf, &key, 0);
4290 btrfs_item_key_to_cpu(buf, &key, 0);
4291 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4294 btrfs_commit_transaction(trans, search_root);
4297 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4298 ret = fix_key_order(trans, search_root, path);
4299 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4300 ret = fix_item_offset(trans, search_root, path);
4302 btrfs_commit_transaction(trans, search_root);
4305 btrfs_release_path(path);
4306 btrfs_commit_transaction(trans, search_root);
4309 btrfs_free_path(path);
4313 static int check_block(struct btrfs_root *root,
4314 struct cache_tree *extent_cache,
4315 struct extent_buffer *buf, u64 flags)
4317 struct extent_record *rec;
4318 struct cache_extent *cache;
4319 struct btrfs_key key;
4320 enum btrfs_tree_block_status status;
4324 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4327 rec = container_of(cache, struct extent_record, cache);
4328 rec->generation = btrfs_header_generation(buf);
4330 level = btrfs_header_level(buf);
4331 if (btrfs_header_nritems(buf) > 0) {
4334 btrfs_item_key_to_cpu(buf, &key, 0);
4336 btrfs_node_key_to_cpu(buf, &key, 0);
4338 rec->info_objectid = key.objectid;
4340 rec->info_level = level;
4342 if (btrfs_is_leaf(buf))
4343 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4345 status = btrfs_check_node(root, &rec->parent_key, buf);
4347 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4349 status = try_to_fix_bad_block(root, buf, status);
4350 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4352 fprintf(stderr, "bad block %llu\n",
4353 (unsigned long long)buf->start);
4356 * Signal to callers we need to start the scan over
4357 * again since we'll have cowed blocks.
4362 rec->content_checked = 1;
4363 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4364 rec->owner_ref_checked = 1;
4366 ret = check_owner_ref(root, rec, buf);
4368 rec->owner_ref_checked = 1;
4372 maybe_free_extent_rec(extent_cache, rec);
4376 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4377 u64 parent, u64 root)
4379 struct list_head *cur = rec->backrefs.next;
4380 struct extent_backref *node;
4381 struct tree_backref *back;
4383 while(cur != &rec->backrefs) {
4384 node = to_extent_backref(cur);
4388 back = to_tree_backref(node);
4390 if (!node->full_backref)
4392 if (parent == back->parent)
4395 if (node->full_backref)
4397 if (back->root == root)
4404 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4405 u64 parent, u64 root)
4407 struct tree_backref *ref = malloc(sizeof(*ref));
4411 memset(&ref->node, 0, sizeof(ref->node));
4413 ref->parent = parent;
4414 ref->node.full_backref = 1;
4417 ref->node.full_backref = 0;
4419 list_add_tail(&ref->node.list, &rec->backrefs);
4424 static struct data_backref *find_data_backref(struct extent_record *rec,
4425 u64 parent, u64 root,
4426 u64 owner, u64 offset,
4428 u64 disk_bytenr, u64 bytes)
4430 struct list_head *cur = rec->backrefs.next;
4431 struct extent_backref *node;
4432 struct data_backref *back;
4434 while(cur != &rec->backrefs) {
4435 node = to_extent_backref(cur);
4439 back = to_data_backref(node);
4441 if (!node->full_backref)
4443 if (parent == back->parent)
4446 if (node->full_backref)
4448 if (back->root == root && back->owner == owner &&
4449 back->offset == offset) {
4450 if (found_ref && node->found_ref &&
4451 (back->bytes != bytes ||
4452 back->disk_bytenr != disk_bytenr))
4461 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4462 u64 parent, u64 root,
4463 u64 owner, u64 offset,
4466 struct data_backref *ref = malloc(sizeof(*ref));
4470 memset(&ref->node, 0, sizeof(ref->node));
4471 ref->node.is_data = 1;
4474 ref->parent = parent;
4477 ref->node.full_backref = 1;
4481 ref->offset = offset;
4482 ref->node.full_backref = 0;
4484 ref->bytes = max_size;
4487 list_add_tail(&ref->node.list, &rec->backrefs);
4488 if (max_size > rec->max_size)
4489 rec->max_size = max_size;
4493 /* Check if the type of extent matches with its chunk */
4494 static void check_extent_type(struct extent_record *rec)
4496 struct btrfs_block_group_cache *bg_cache;
4498 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4502 /* data extent, check chunk directly*/
4503 if (!rec->metadata) {
4504 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4505 rec->wrong_chunk_type = 1;
4509 /* metadata extent, check the obvious case first */
4510 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4511 BTRFS_BLOCK_GROUP_METADATA))) {
4512 rec->wrong_chunk_type = 1;
4517 * Check SYSTEM extent, as it's also marked as metadata, we can only
4518 * make sure it's a SYSTEM extent by its backref
4520 if (!list_empty(&rec->backrefs)) {
4521 struct extent_backref *node;
4522 struct tree_backref *tback;
4525 node = to_extent_backref(rec->backrefs.next);
4526 if (node->is_data) {
4527 /* tree block shouldn't have data backref */
4528 rec->wrong_chunk_type = 1;
4531 tback = container_of(node, struct tree_backref, node);
4533 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4534 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4536 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4537 if (!(bg_cache->flags & bg_type))
4538 rec->wrong_chunk_type = 1;
4543 * Allocate a new extent record, fill default values from @tmpl and insert int
4544 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4545 * the cache, otherwise it fails.
4547 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4548 struct extent_record *tmpl)
4550 struct extent_record *rec;
4553 rec = malloc(sizeof(*rec));
4556 rec->start = tmpl->start;
4557 rec->max_size = tmpl->max_size;
4558 rec->nr = max(tmpl->nr, tmpl->max_size);
4559 rec->found_rec = tmpl->found_rec;
4560 rec->content_checked = tmpl->content_checked;
4561 rec->owner_ref_checked = tmpl->owner_ref_checked;
4562 rec->num_duplicates = 0;
4563 rec->metadata = tmpl->metadata;
4564 rec->flag_block_full_backref = FLAG_UNSET;
4565 rec->bad_full_backref = 0;
4566 rec->crossing_stripes = 0;
4567 rec->wrong_chunk_type = 0;
4568 rec->is_root = tmpl->is_root;
4569 rec->refs = tmpl->refs;
4570 rec->extent_item_refs = tmpl->extent_item_refs;
4571 rec->parent_generation = tmpl->parent_generation;
4572 INIT_LIST_HEAD(&rec->backrefs);
4573 INIT_LIST_HEAD(&rec->dups);
4574 INIT_LIST_HEAD(&rec->list);
4575 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4576 rec->cache.start = tmpl->start;
4577 rec->cache.size = tmpl->nr;
4578 ret = insert_cache_extent(extent_cache, &rec->cache);
4580 bytes_used += rec->nr;
4583 rec->crossing_stripes = check_crossing_stripes(rec->start,
4584 global_info->tree_root->nodesize);
4585 check_extent_type(rec);
4590 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4592 * - refs - if found, increase refs
4593 * - is_root - if found, set
4594 * - content_checked - if found, set
4595 * - owner_ref_checked - if found, set
4597 * If not found, create a new one, initialize and insert.
4599 static int add_extent_rec(struct cache_tree *extent_cache,
4600 struct extent_record *tmpl)
4602 struct extent_record *rec;
4603 struct cache_extent *cache;
4607 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4609 rec = container_of(cache, struct extent_record, cache);
4613 rec->nr = max(tmpl->nr, tmpl->max_size);
4616 * We need to make sure to reset nr to whatever the extent
4617 * record says was the real size, this way we can compare it to
4620 if (tmpl->found_rec) {
4621 if (tmpl->start != rec->start || rec->found_rec) {
4622 struct extent_record *tmp;
4625 if (list_empty(&rec->list))
4626 list_add_tail(&rec->list,
4627 &duplicate_extents);
4630 * We have to do this song and dance in case we
4631 * find an extent record that falls inside of
4632 * our current extent record but does not have
4633 * the same objectid.
4635 tmp = malloc(sizeof(*tmp));
4638 tmp->start = tmpl->start;
4639 tmp->max_size = tmpl->max_size;
4642 tmp->metadata = tmpl->metadata;
4643 tmp->extent_item_refs = tmpl->extent_item_refs;
4644 INIT_LIST_HEAD(&tmp->list);
4645 list_add_tail(&tmp->list, &rec->dups);
4646 rec->num_duplicates++;
4653 if (tmpl->extent_item_refs && !dup) {
4654 if (rec->extent_item_refs) {
4655 fprintf(stderr, "block %llu rec "
4656 "extent_item_refs %llu, passed %llu\n",
4657 (unsigned long long)tmpl->start,
4658 (unsigned long long)
4659 rec->extent_item_refs,
4660 (unsigned long long)tmpl->extent_item_refs);
4662 rec->extent_item_refs = tmpl->extent_item_refs;
4666 if (tmpl->content_checked)
4667 rec->content_checked = 1;
4668 if (tmpl->owner_ref_checked)
4669 rec->owner_ref_checked = 1;
4670 memcpy(&rec->parent_key, &tmpl->parent_key,
4671 sizeof(tmpl->parent_key));
4672 if (tmpl->parent_generation)
4673 rec->parent_generation = tmpl->parent_generation;
4674 if (rec->max_size < tmpl->max_size)
4675 rec->max_size = tmpl->max_size;
4678 * A metadata extent can't cross stripe_len boundary, otherwise
4679 * kernel scrub won't be able to handle it.
4680 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4684 rec->crossing_stripes = check_crossing_stripes(
4685 rec->start, global_info->tree_root->nodesize);
4686 check_extent_type(rec);
4687 maybe_free_extent_rec(extent_cache, rec);
4691 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4696 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4697 u64 parent, u64 root, int found_ref)
4699 struct extent_record *rec;
4700 struct tree_backref *back;
4701 struct cache_extent *cache;
4703 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4705 struct extent_record tmpl;
4707 memset(&tmpl, 0, sizeof(tmpl));
4708 tmpl.start = bytenr;
4712 add_extent_rec_nolookup(extent_cache, &tmpl);
4714 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4719 rec = container_of(cache, struct extent_record, cache);
4720 if (rec->start != bytenr) {
4724 back = find_tree_backref(rec, parent, root);
4726 back = alloc_tree_backref(rec, parent, root);
4731 if (back->node.found_ref) {
4732 fprintf(stderr, "Extent back ref already exists "
4733 "for %llu parent %llu root %llu \n",
4734 (unsigned long long)bytenr,
4735 (unsigned long long)parent,
4736 (unsigned long long)root);
4738 back->node.found_ref = 1;
4740 if (back->node.found_extent_tree) {
4741 fprintf(stderr, "Extent back ref already exists "
4742 "for %llu parent %llu root %llu \n",
4743 (unsigned long long)bytenr,
4744 (unsigned long long)parent,
4745 (unsigned long long)root);
4747 back->node.found_extent_tree = 1;
4749 check_extent_type(rec);
4750 maybe_free_extent_rec(extent_cache, rec);
4754 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4755 u64 parent, u64 root, u64 owner, u64 offset,
4756 u32 num_refs, int found_ref, u64 max_size)
4758 struct extent_record *rec;
4759 struct data_backref *back;
4760 struct cache_extent *cache;
4762 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4764 struct extent_record tmpl;
4766 memset(&tmpl, 0, sizeof(tmpl));
4767 tmpl.start = bytenr;
4769 tmpl.max_size = max_size;
4771 add_extent_rec_nolookup(extent_cache, &tmpl);
4773 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4778 rec = container_of(cache, struct extent_record, cache);
4779 if (rec->max_size < max_size)
4780 rec->max_size = max_size;
4783 * If found_ref is set then max_size is the real size and must match the
4784 * existing refs. So if we have already found a ref then we need to
4785 * make sure that this ref matches the existing one, otherwise we need
4786 * to add a new backref so we can notice that the backrefs don't match
4787 * and we need to figure out who is telling the truth. This is to
4788 * account for that awful fsync bug I introduced where we'd end up with
4789 * a btrfs_file_extent_item that would have its length include multiple
4790 * prealloc extents or point inside of a prealloc extent.
4792 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4795 back = alloc_data_backref(rec, parent, root, owner, offset,
4801 BUG_ON(num_refs != 1);
4802 if (back->node.found_ref)
4803 BUG_ON(back->bytes != max_size);
4804 back->node.found_ref = 1;
4805 back->found_ref += 1;
4806 back->bytes = max_size;
4807 back->disk_bytenr = bytenr;
4809 rec->content_checked = 1;
4810 rec->owner_ref_checked = 1;
4812 if (back->node.found_extent_tree) {
4813 fprintf(stderr, "Extent back ref already exists "
4814 "for %llu parent %llu root %llu "
4815 "owner %llu offset %llu num_refs %lu\n",
4816 (unsigned long long)bytenr,
4817 (unsigned long long)parent,
4818 (unsigned long long)root,
4819 (unsigned long long)owner,
4820 (unsigned long long)offset,
4821 (unsigned long)num_refs);
4823 back->num_refs = num_refs;
4824 back->node.found_extent_tree = 1;
4826 maybe_free_extent_rec(extent_cache, rec);
4830 static int add_pending(struct cache_tree *pending,
4831 struct cache_tree *seen, u64 bytenr, u32 size)
4834 ret = add_cache_extent(seen, bytenr, size);
4837 add_cache_extent(pending, bytenr, size);
4841 static int pick_next_pending(struct cache_tree *pending,
4842 struct cache_tree *reada,
4843 struct cache_tree *nodes,
4844 u64 last, struct block_info *bits, int bits_nr,
4847 unsigned long node_start = last;
4848 struct cache_extent *cache;
4851 cache = search_cache_extent(reada, 0);
4853 bits[0].start = cache->start;
4854 bits[0].size = cache->size;
4859 if (node_start > 32768)
4860 node_start -= 32768;
4862 cache = search_cache_extent(nodes, node_start);
4864 cache = search_cache_extent(nodes, 0);
4867 cache = search_cache_extent(pending, 0);
4872 bits[ret].start = cache->start;
4873 bits[ret].size = cache->size;
4874 cache = next_cache_extent(cache);
4876 } while (cache && ret < bits_nr);
4882 bits[ret].start = cache->start;
4883 bits[ret].size = cache->size;
4884 cache = next_cache_extent(cache);
4886 } while (cache && ret < bits_nr);
4888 if (bits_nr - ret > 8) {
4889 u64 lookup = bits[0].start + bits[0].size;
4890 struct cache_extent *next;
4891 next = search_cache_extent(pending, lookup);
4893 if (next->start - lookup > 32768)
4895 bits[ret].start = next->start;
4896 bits[ret].size = next->size;
4897 lookup = next->start + next->size;
4901 next = next_cache_extent(next);
4909 static void free_chunk_record(struct cache_extent *cache)
4911 struct chunk_record *rec;
4913 rec = container_of(cache, struct chunk_record, cache);
4914 list_del_init(&rec->list);
4915 list_del_init(&rec->dextents);
4919 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4921 cache_tree_free_extents(chunk_cache, free_chunk_record);
4924 static void free_device_record(struct rb_node *node)
4926 struct device_record *rec;
4928 rec = container_of(node, struct device_record, node);
4932 FREE_RB_BASED_TREE(device_cache, free_device_record);
4934 int insert_block_group_record(struct block_group_tree *tree,
4935 struct block_group_record *bg_rec)
4939 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4943 list_add_tail(&bg_rec->list, &tree->block_groups);
4947 static void free_block_group_record(struct cache_extent *cache)
4949 struct block_group_record *rec;
4951 rec = container_of(cache, struct block_group_record, cache);
4952 list_del_init(&rec->list);
4956 void free_block_group_tree(struct block_group_tree *tree)
4958 cache_tree_free_extents(&tree->tree, free_block_group_record);
4961 int insert_device_extent_record(struct device_extent_tree *tree,
4962 struct device_extent_record *de_rec)
4967 * Device extent is a bit different from the other extents, because
4968 * the extents which belong to the different devices may have the
4969 * same start and size, so we need use the special extent cache
4970 * search/insert functions.
4972 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4976 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4977 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4981 static void free_device_extent_record(struct cache_extent *cache)
4983 struct device_extent_record *rec;
4985 rec = container_of(cache, struct device_extent_record, cache);
4986 if (!list_empty(&rec->chunk_list))
4987 list_del_init(&rec->chunk_list);
4988 if (!list_empty(&rec->device_list))
4989 list_del_init(&rec->device_list);
4993 void free_device_extent_tree(struct device_extent_tree *tree)
4995 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4998 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4999 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5000 struct extent_buffer *leaf, int slot)
5002 struct btrfs_extent_ref_v0 *ref0;
5003 struct btrfs_key key;
5005 btrfs_item_key_to_cpu(leaf, &key, slot);
5006 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5007 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5008 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5010 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5011 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5017 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5018 struct btrfs_key *key,
5021 struct btrfs_chunk *ptr;
5022 struct chunk_record *rec;
5025 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5026 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5028 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5030 fprintf(stderr, "memory allocation failed\n");
5034 INIT_LIST_HEAD(&rec->list);
5035 INIT_LIST_HEAD(&rec->dextents);
5038 rec->cache.start = key->offset;
5039 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5041 rec->generation = btrfs_header_generation(leaf);
5043 rec->objectid = key->objectid;
5044 rec->type = key->type;
5045 rec->offset = key->offset;
5047 rec->length = rec->cache.size;
5048 rec->owner = btrfs_chunk_owner(leaf, ptr);
5049 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5050 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5051 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5052 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5053 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5054 rec->num_stripes = num_stripes;
5055 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5057 for (i = 0; i < rec->num_stripes; ++i) {
5058 rec->stripes[i].devid =
5059 btrfs_stripe_devid_nr(leaf, ptr, i);
5060 rec->stripes[i].offset =
5061 btrfs_stripe_offset_nr(leaf, ptr, i);
5062 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5063 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5070 static int process_chunk_item(struct cache_tree *chunk_cache,
5071 struct btrfs_key *key, struct extent_buffer *eb,
5074 struct chunk_record *rec;
5077 rec = btrfs_new_chunk_record(eb, key, slot);
5078 ret = insert_cache_extent(chunk_cache, &rec->cache);
5080 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5081 rec->offset, rec->length);
5088 static int process_device_item(struct rb_root *dev_cache,
5089 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5091 struct btrfs_dev_item *ptr;
5092 struct device_record *rec;
5095 ptr = btrfs_item_ptr(eb,
5096 slot, struct btrfs_dev_item);
5098 rec = malloc(sizeof(*rec));
5100 fprintf(stderr, "memory allocation failed\n");
5104 rec->devid = key->offset;
5105 rec->generation = btrfs_header_generation(eb);
5107 rec->objectid = key->objectid;
5108 rec->type = key->type;
5109 rec->offset = key->offset;
5111 rec->devid = btrfs_device_id(eb, ptr);
5112 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5113 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5115 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5117 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5124 struct block_group_record *
5125 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5128 struct btrfs_block_group_item *ptr;
5129 struct block_group_record *rec;
5131 rec = calloc(1, sizeof(*rec));
5133 fprintf(stderr, "memory allocation failed\n");
5137 rec->cache.start = key->objectid;
5138 rec->cache.size = key->offset;
5140 rec->generation = btrfs_header_generation(leaf);
5142 rec->objectid = key->objectid;
5143 rec->type = key->type;
5144 rec->offset = key->offset;
5146 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5147 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5149 INIT_LIST_HEAD(&rec->list);
5154 static int process_block_group_item(struct block_group_tree *block_group_cache,
5155 struct btrfs_key *key,
5156 struct extent_buffer *eb, int slot)
5158 struct block_group_record *rec;
5161 rec = btrfs_new_block_group_record(eb, key, slot);
5162 ret = insert_block_group_record(block_group_cache, rec);
5164 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5165 rec->objectid, rec->offset);
5172 struct device_extent_record *
5173 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5174 struct btrfs_key *key, int slot)
5176 struct device_extent_record *rec;
5177 struct btrfs_dev_extent *ptr;
5179 rec = calloc(1, sizeof(*rec));
5181 fprintf(stderr, "memory allocation failed\n");
5185 rec->cache.objectid = key->objectid;
5186 rec->cache.start = key->offset;
5188 rec->generation = btrfs_header_generation(leaf);
5190 rec->objectid = key->objectid;
5191 rec->type = key->type;
5192 rec->offset = key->offset;
5194 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5195 rec->chunk_objecteid =
5196 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5198 btrfs_dev_extent_chunk_offset(leaf, ptr);
5199 rec->length = btrfs_dev_extent_length(leaf, ptr);
5200 rec->cache.size = rec->length;
5202 INIT_LIST_HEAD(&rec->chunk_list);
5203 INIT_LIST_HEAD(&rec->device_list);
5209 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5210 struct btrfs_key *key, struct extent_buffer *eb,
5213 struct device_extent_record *rec;
5216 rec = btrfs_new_device_extent_record(eb, key, slot);
5217 ret = insert_device_extent_record(dev_extent_cache, rec);
5220 "Device extent[%llu, %llu, %llu] existed.\n",
5221 rec->objectid, rec->offset, rec->length);
5228 static int process_extent_item(struct btrfs_root *root,
5229 struct cache_tree *extent_cache,
5230 struct extent_buffer *eb, int slot)
5232 struct btrfs_extent_item *ei;
5233 struct btrfs_extent_inline_ref *iref;
5234 struct btrfs_extent_data_ref *dref;
5235 struct btrfs_shared_data_ref *sref;
5236 struct btrfs_key key;
5237 struct extent_record tmpl;
5241 u32 item_size = btrfs_item_size_nr(eb, slot);
5247 btrfs_item_key_to_cpu(eb, &key, slot);
5249 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5251 num_bytes = root->nodesize;
5253 num_bytes = key.offset;
5256 if (item_size < sizeof(*ei)) {
5257 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5258 struct btrfs_extent_item_v0 *ei0;
5259 BUG_ON(item_size != sizeof(*ei0));
5260 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5261 refs = btrfs_extent_refs_v0(eb, ei0);
5265 memset(&tmpl, 0, sizeof(tmpl));
5266 tmpl.start = key.objectid;
5267 tmpl.nr = num_bytes;
5268 tmpl.extent_item_refs = refs;
5269 tmpl.metadata = metadata;
5271 tmpl.max_size = num_bytes;
5273 return add_extent_rec(extent_cache, &tmpl);
5276 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5277 refs = btrfs_extent_refs(eb, ei);
5278 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5283 memset(&tmpl, 0, sizeof(tmpl));
5284 tmpl.start = key.objectid;
5285 tmpl.nr = num_bytes;
5286 tmpl.extent_item_refs = refs;
5287 tmpl.metadata = metadata;
5289 tmpl.max_size = num_bytes;
5290 add_extent_rec(extent_cache, &tmpl);
5292 ptr = (unsigned long)(ei + 1);
5293 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5294 key.type == BTRFS_EXTENT_ITEM_KEY)
5295 ptr += sizeof(struct btrfs_tree_block_info);
5297 end = (unsigned long)ei + item_size;
5299 iref = (struct btrfs_extent_inline_ref *)ptr;
5300 type = btrfs_extent_inline_ref_type(eb, iref);
5301 offset = btrfs_extent_inline_ref_offset(eb, iref);
5303 case BTRFS_TREE_BLOCK_REF_KEY:
5304 add_tree_backref(extent_cache, key.objectid,
5307 case BTRFS_SHARED_BLOCK_REF_KEY:
5308 add_tree_backref(extent_cache, key.objectid,
5311 case BTRFS_EXTENT_DATA_REF_KEY:
5312 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5313 add_data_backref(extent_cache, key.objectid, 0,
5314 btrfs_extent_data_ref_root(eb, dref),
5315 btrfs_extent_data_ref_objectid(eb,
5317 btrfs_extent_data_ref_offset(eb, dref),
5318 btrfs_extent_data_ref_count(eb, dref),
5321 case BTRFS_SHARED_DATA_REF_KEY:
5322 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5323 add_data_backref(extent_cache, key.objectid, offset,
5325 btrfs_shared_data_ref_count(eb, sref),
5329 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5330 key.objectid, key.type, num_bytes);
5333 ptr += btrfs_extent_inline_ref_size(type);
5340 static int check_cache_range(struct btrfs_root *root,
5341 struct btrfs_block_group_cache *cache,
5342 u64 offset, u64 bytes)
5344 struct btrfs_free_space *entry;
5350 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5351 bytenr = btrfs_sb_offset(i);
5352 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5353 cache->key.objectid, bytenr, 0,
5354 &logical, &nr, &stripe_len);
5359 if (logical[nr] + stripe_len <= offset)
5361 if (offset + bytes <= logical[nr])
5363 if (logical[nr] == offset) {
5364 if (stripe_len >= bytes) {
5368 bytes -= stripe_len;
5369 offset += stripe_len;
5370 } else if (logical[nr] < offset) {
5371 if (logical[nr] + stripe_len >=
5376 bytes = (offset + bytes) -
5377 (logical[nr] + stripe_len);
5378 offset = logical[nr] + stripe_len;
5381 * Could be tricky, the super may land in the
5382 * middle of the area we're checking. First
5383 * check the easiest case, it's at the end.
5385 if (logical[nr] + stripe_len >=
5387 bytes = logical[nr] - offset;
5391 /* Check the left side */
5392 ret = check_cache_range(root, cache,
5394 logical[nr] - offset);
5400 /* Now we continue with the right side */
5401 bytes = (offset + bytes) -
5402 (logical[nr] + stripe_len);
5403 offset = logical[nr] + stripe_len;
5410 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5412 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5413 offset, offset+bytes);
5417 if (entry->offset != offset) {
5418 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5423 if (entry->bytes != bytes) {
5424 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5425 bytes, entry->bytes, offset);
5429 unlink_free_space(cache->free_space_ctl, entry);
5434 static int verify_space_cache(struct btrfs_root *root,
5435 struct btrfs_block_group_cache *cache)
5437 struct btrfs_path *path;
5438 struct extent_buffer *leaf;
5439 struct btrfs_key key;
5443 path = btrfs_alloc_path();
5447 root = root->fs_info->extent_root;
5449 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5451 key.objectid = last;
5453 key.type = BTRFS_EXTENT_ITEM_KEY;
5455 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5460 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5461 ret = btrfs_next_leaf(root, path);
5469 leaf = path->nodes[0];
5470 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5471 if (key.objectid >= cache->key.offset + cache->key.objectid)
5473 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5474 key.type != BTRFS_METADATA_ITEM_KEY) {
5479 if (last == key.objectid) {
5480 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5481 last = key.objectid + key.offset;
5483 last = key.objectid + root->nodesize;
5488 ret = check_cache_range(root, cache, last,
5489 key.objectid - last);
5492 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5493 last = key.objectid + key.offset;
5495 last = key.objectid + root->nodesize;
5499 if (last < cache->key.objectid + cache->key.offset)
5500 ret = check_cache_range(root, cache, last,
5501 cache->key.objectid +
5502 cache->key.offset - last);
5505 btrfs_free_path(path);
5508 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5509 fprintf(stderr, "There are still entries left in the space "
5517 static int check_space_cache(struct btrfs_root *root)
5519 struct btrfs_block_group_cache *cache;
5520 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5524 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5525 btrfs_super_generation(root->fs_info->super_copy) !=
5526 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5527 printf("cache and super generation don't match, space cache "
5528 "will be invalidated\n");
5532 if (ctx.progress_enabled) {
5533 ctx.tp = TASK_FREE_SPACE;
5534 task_start(ctx.info);
5538 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5542 start = cache->key.objectid + cache->key.offset;
5543 if (!cache->free_space_ctl) {
5544 if (btrfs_init_free_space_ctl(cache,
5545 root->sectorsize)) {
5550 btrfs_remove_free_space_cache(cache);
5553 if (btrfs_fs_compat_ro(root->fs_info,
5554 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5555 ret = exclude_super_stripes(root, cache);
5557 fprintf(stderr, "could not exclude super stripes: %s\n",
5562 ret = load_free_space_tree(root->fs_info, cache);
5563 free_excluded_extents(root, cache);
5565 fprintf(stderr, "could not load free space tree: %s\n",
5572 ret = load_free_space_cache(root->fs_info, cache);
5577 ret = verify_space_cache(root, cache);
5579 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5580 cache->key.objectid);
5585 task_stop(ctx.info);
5587 return error ? -EINVAL : 0;
5590 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5591 u64 num_bytes, unsigned long leaf_offset,
5592 struct extent_buffer *eb) {
5595 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5597 unsigned long csum_offset;
5601 u64 data_checked = 0;
5607 if (num_bytes % root->sectorsize)
5610 data = malloc(num_bytes);
5614 while (offset < num_bytes) {
5617 read_len = num_bytes - offset;
5618 /* read as much space once a time */
5619 ret = read_extent_data(root, data + offset,
5620 bytenr + offset, &read_len, mirror);
5624 /* verify every 4k data's checksum */
5625 while (data_checked < read_len) {
5627 tmp = offset + data_checked;
5629 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5630 csum, root->sectorsize);
5631 btrfs_csum_final(csum, (char *)&csum);
5633 csum_offset = leaf_offset +
5634 tmp / root->sectorsize * csum_size;
5635 read_extent_buffer(eb, (char *)&csum_expected,
5636 csum_offset, csum_size);
5637 /* try another mirror */
5638 if (csum != csum_expected) {
5639 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5640 mirror, bytenr + tmp,
5641 csum, csum_expected);
5642 num_copies = btrfs_num_copies(
5643 &root->fs_info->mapping_tree,
5645 if (mirror < num_copies - 1) {
5650 data_checked += root->sectorsize;
5659 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5662 struct btrfs_path *path;
5663 struct extent_buffer *leaf;
5664 struct btrfs_key key;
5667 path = btrfs_alloc_path();
5669 fprintf(stderr, "Error allocating path\n");
5673 key.objectid = bytenr;
5674 key.type = BTRFS_EXTENT_ITEM_KEY;
5675 key.offset = (u64)-1;
5678 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5681 fprintf(stderr, "Error looking up extent record %d\n", ret);
5682 btrfs_free_path(path);
5685 if (path->slots[0] > 0) {
5688 ret = btrfs_prev_leaf(root, path);
5691 } else if (ret > 0) {
5698 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5701 * Block group items come before extent items if they have the same
5702 * bytenr, so walk back one more just in case. Dear future traveller,
5703 * first congrats on mastering time travel. Now if it's not too much
5704 * trouble could you go back to 2006 and tell Chris to make the
5705 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5706 * EXTENT_ITEM_KEY please?
5708 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5709 if (path->slots[0] > 0) {
5712 ret = btrfs_prev_leaf(root, path);
5715 } else if (ret > 0) {
5720 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5724 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5725 ret = btrfs_next_leaf(root, path);
5727 fprintf(stderr, "Error going to next leaf "
5729 btrfs_free_path(path);
5735 leaf = path->nodes[0];
5736 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5737 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5741 if (key.objectid + key.offset < bytenr) {
5745 if (key.objectid > bytenr + num_bytes)
5748 if (key.objectid == bytenr) {
5749 if (key.offset >= num_bytes) {
5753 num_bytes -= key.offset;
5754 bytenr += key.offset;
5755 } else if (key.objectid < bytenr) {
5756 if (key.objectid + key.offset >= bytenr + num_bytes) {
5760 num_bytes = (bytenr + num_bytes) -
5761 (key.objectid + key.offset);
5762 bytenr = key.objectid + key.offset;
5764 if (key.objectid + key.offset < bytenr + num_bytes) {
5765 u64 new_start = key.objectid + key.offset;
5766 u64 new_bytes = bytenr + num_bytes - new_start;
5769 * Weird case, the extent is in the middle of
5770 * our range, we'll have to search one side
5771 * and then the other. Not sure if this happens
5772 * in real life, but no harm in coding it up
5773 * anyway just in case.
5775 btrfs_release_path(path);
5776 ret = check_extent_exists(root, new_start,
5779 fprintf(stderr, "Right section didn't "
5783 num_bytes = key.objectid - bytenr;
5786 num_bytes = key.objectid - bytenr;
5793 if (num_bytes && !ret) {
5794 fprintf(stderr, "There are no extents for csum range "
5795 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5799 btrfs_free_path(path);
5803 static int check_csums(struct btrfs_root *root)
5805 struct btrfs_path *path;
5806 struct extent_buffer *leaf;
5807 struct btrfs_key key;
5808 u64 offset = 0, num_bytes = 0;
5809 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5813 unsigned long leaf_offset;
5815 root = root->fs_info->csum_root;
5816 if (!extent_buffer_uptodate(root->node)) {
5817 fprintf(stderr, "No valid csum tree found\n");
5821 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5822 key.type = BTRFS_EXTENT_CSUM_KEY;
5825 path = btrfs_alloc_path();
5829 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5831 fprintf(stderr, "Error searching csum tree %d\n", ret);
5832 btrfs_free_path(path);
5836 if (ret > 0 && path->slots[0])
5841 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5842 ret = btrfs_next_leaf(root, path);
5844 fprintf(stderr, "Error going to next leaf "
5851 leaf = path->nodes[0];
5853 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5854 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5859 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5860 csum_size) * root->sectorsize;
5861 if (!check_data_csum)
5862 goto skip_csum_check;
5863 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5864 ret = check_extent_csums(root, key.offset, data_len,
5870 offset = key.offset;
5871 } else if (key.offset != offset + num_bytes) {
5872 ret = check_extent_exists(root, offset, num_bytes);
5874 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5875 "there is no extent record\n",
5876 offset, offset+num_bytes);
5879 offset = key.offset;
5882 num_bytes += data_len;
5886 btrfs_free_path(path);
5890 static int is_dropped_key(struct btrfs_key *key,
5891 struct btrfs_key *drop_key) {
5892 if (key->objectid < drop_key->objectid)
5894 else if (key->objectid == drop_key->objectid) {
5895 if (key->type < drop_key->type)
5897 else if (key->type == drop_key->type) {
5898 if (key->offset < drop_key->offset)
5906 * Here are the rules for FULL_BACKREF.
5908 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5909 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5911 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
5912 * if it happened after the relocation occurred since we'll have dropped the
5913 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5914 * have no real way to know for sure.
5916 * We process the blocks one root at a time, and we start from the lowest root
5917 * objectid and go to the highest. So we can just lookup the owner backref for
5918 * the record and if we don't find it then we know it doesn't exist and we have
5921 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5922 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5923 * be set or not and then we can check later once we've gathered all the refs.
5925 static int calc_extent_flag(struct btrfs_root *root,
5926 struct cache_tree *extent_cache,
5927 struct extent_buffer *buf,
5928 struct root_item_record *ri,
5931 struct extent_record *rec;
5932 struct cache_extent *cache;
5933 struct tree_backref *tback;
5936 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5937 /* we have added this extent before */
5939 rec = container_of(cache, struct extent_record, cache);
5942 * Except file/reloc tree, we can not have
5945 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5950 if (buf->start == ri->bytenr)
5953 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5956 owner = btrfs_header_owner(buf);
5957 if (owner == ri->objectid)
5960 tback = find_tree_backref(rec, 0, owner);
5965 if (rec->flag_block_full_backref != FLAG_UNSET &&
5966 rec->flag_block_full_backref != 0)
5967 rec->bad_full_backref = 1;
5970 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5971 if (rec->flag_block_full_backref != FLAG_UNSET &&
5972 rec->flag_block_full_backref != 1)
5973 rec->bad_full_backref = 1;
5977 static int run_next_block(struct btrfs_root *root,
5978 struct block_info *bits,
5981 struct cache_tree *pending,
5982 struct cache_tree *seen,
5983 struct cache_tree *reada,
5984 struct cache_tree *nodes,
5985 struct cache_tree *extent_cache,
5986 struct cache_tree *chunk_cache,
5987 struct rb_root *dev_cache,
5988 struct block_group_tree *block_group_cache,
5989 struct device_extent_tree *dev_extent_cache,
5990 struct root_item_record *ri)
5992 struct extent_buffer *buf;
5993 struct extent_record *rec = NULL;
6004 struct btrfs_key key;
6005 struct cache_extent *cache;
6008 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6009 bits_nr, &reada_bits);
6014 for(i = 0; i < nritems; i++) {
6015 ret = add_cache_extent(reada, bits[i].start,
6020 /* fixme, get the parent transid */
6021 readahead_tree_block(root, bits[i].start,
6025 *last = bits[0].start;
6026 bytenr = bits[0].start;
6027 size = bits[0].size;
6029 cache = lookup_cache_extent(pending, bytenr, size);
6031 remove_cache_extent(pending, cache);
6034 cache = lookup_cache_extent(reada, bytenr, size);
6036 remove_cache_extent(reada, cache);
6039 cache = lookup_cache_extent(nodes, bytenr, size);
6041 remove_cache_extent(nodes, cache);
6044 cache = lookup_cache_extent(extent_cache, bytenr, size);
6046 rec = container_of(cache, struct extent_record, cache);
6047 gen = rec->parent_generation;
6050 /* fixme, get the real parent transid */
6051 buf = read_tree_block(root, bytenr, size, gen);
6052 if (!extent_buffer_uptodate(buf)) {
6053 record_bad_block_io(root->fs_info,
6054 extent_cache, bytenr, size);
6058 nritems = btrfs_header_nritems(buf);
6061 if (!init_extent_tree) {
6062 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6063 btrfs_header_level(buf), 1, NULL,
6066 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6068 fprintf(stderr, "Couldn't calc extent flags\n");
6069 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6074 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6076 fprintf(stderr, "Couldn't calc extent flags\n");
6077 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6081 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6083 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6084 ri->objectid == btrfs_header_owner(buf)) {
6086 * Ok we got to this block from it's original owner and
6087 * we have FULL_BACKREF set. Relocation can leave
6088 * converted blocks over so this is altogether possible,
6089 * however it's not possible if the generation > the
6090 * last snapshot, so check for this case.
6092 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6093 btrfs_header_generation(buf) > ri->last_snapshot) {
6094 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6095 rec->bad_full_backref = 1;
6100 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6101 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6102 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6103 rec->bad_full_backref = 1;
6107 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6108 rec->flag_block_full_backref = 1;
6112 rec->flag_block_full_backref = 0;
6114 owner = btrfs_header_owner(buf);
6117 ret = check_block(root, extent_cache, buf, flags);
6121 if (btrfs_is_leaf(buf)) {
6122 btree_space_waste += btrfs_leaf_free_space(root, buf);
6123 for (i = 0; i < nritems; i++) {
6124 struct btrfs_file_extent_item *fi;
6125 btrfs_item_key_to_cpu(buf, &key, i);
6126 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6127 process_extent_item(root, extent_cache, buf,
6131 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6132 process_extent_item(root, extent_cache, buf,
6136 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6138 btrfs_item_size_nr(buf, i);
6141 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6142 process_chunk_item(chunk_cache, &key, buf, i);
6145 if (key.type == BTRFS_DEV_ITEM_KEY) {
6146 process_device_item(dev_cache, &key, buf, i);
6149 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6150 process_block_group_item(block_group_cache,
6154 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6155 process_device_extent_item(dev_extent_cache,
6160 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6161 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6162 process_extent_ref_v0(extent_cache, buf, i);
6169 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6170 add_tree_backref(extent_cache, key.objectid, 0,
6174 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6175 add_tree_backref(extent_cache, key.objectid,
6179 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6180 struct btrfs_extent_data_ref *ref;
6181 ref = btrfs_item_ptr(buf, i,
6182 struct btrfs_extent_data_ref);
6183 add_data_backref(extent_cache,
6185 btrfs_extent_data_ref_root(buf, ref),
6186 btrfs_extent_data_ref_objectid(buf,
6188 btrfs_extent_data_ref_offset(buf, ref),
6189 btrfs_extent_data_ref_count(buf, ref),
6190 0, root->sectorsize);
6193 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6194 struct btrfs_shared_data_ref *ref;
6195 ref = btrfs_item_ptr(buf, i,
6196 struct btrfs_shared_data_ref);
6197 add_data_backref(extent_cache,
6198 key.objectid, key.offset, 0, 0, 0,
6199 btrfs_shared_data_ref_count(buf, ref),
6200 0, root->sectorsize);
6203 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6204 struct bad_item *bad;
6206 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6210 bad = malloc(sizeof(struct bad_item));
6213 INIT_LIST_HEAD(&bad->list);
6214 memcpy(&bad->key, &key,
6215 sizeof(struct btrfs_key));
6216 bad->root_id = owner;
6217 list_add_tail(&bad->list, &delete_items);
6220 if (key.type != BTRFS_EXTENT_DATA_KEY)
6222 fi = btrfs_item_ptr(buf, i,
6223 struct btrfs_file_extent_item);
6224 if (btrfs_file_extent_type(buf, fi) ==
6225 BTRFS_FILE_EXTENT_INLINE)
6227 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6230 data_bytes_allocated +=
6231 btrfs_file_extent_disk_num_bytes(buf, fi);
6232 if (data_bytes_allocated < root->sectorsize) {
6235 data_bytes_referenced +=
6236 btrfs_file_extent_num_bytes(buf, fi);
6237 add_data_backref(extent_cache,
6238 btrfs_file_extent_disk_bytenr(buf, fi),
6239 parent, owner, key.objectid, key.offset -
6240 btrfs_file_extent_offset(buf, fi), 1, 1,
6241 btrfs_file_extent_disk_num_bytes(buf, fi));
6245 struct btrfs_key first_key;
6247 first_key.objectid = 0;
6250 btrfs_item_key_to_cpu(buf, &first_key, 0);
6251 level = btrfs_header_level(buf);
6252 for (i = 0; i < nritems; i++) {
6253 struct extent_record tmpl;
6255 ptr = btrfs_node_blockptr(buf, i);
6256 size = root->nodesize;
6257 btrfs_node_key_to_cpu(buf, &key, i);
6259 if ((level == ri->drop_level)
6260 && is_dropped_key(&key, &ri->drop_key)) {
6265 memset(&tmpl, 0, sizeof(tmpl));
6266 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6267 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6272 tmpl.max_size = size;
6273 ret = add_extent_rec(extent_cache, &tmpl);
6276 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6279 add_pending(nodes, seen, ptr, size);
6281 add_pending(pending, seen, ptr, size);
6284 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6285 nritems) * sizeof(struct btrfs_key_ptr);
6287 total_btree_bytes += buf->len;
6288 if (fs_root_objectid(btrfs_header_owner(buf)))
6289 total_fs_tree_bytes += buf->len;
6290 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6291 total_extent_tree_bytes += buf->len;
6292 if (!found_old_backref &&
6293 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6294 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6295 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6296 found_old_backref = 1;
6298 free_extent_buffer(buf);
6302 static int add_root_to_pending(struct extent_buffer *buf,
6303 struct cache_tree *extent_cache,
6304 struct cache_tree *pending,
6305 struct cache_tree *seen,
6306 struct cache_tree *nodes,
6309 struct extent_record tmpl;
6311 if (btrfs_header_level(buf) > 0)
6312 add_pending(nodes, seen, buf->start, buf->len);
6314 add_pending(pending, seen, buf->start, buf->len);
6316 memset(&tmpl, 0, sizeof(tmpl));
6317 tmpl.start = buf->start;
6322 tmpl.max_size = buf->len;
6323 add_extent_rec(extent_cache, &tmpl);
6325 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6326 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6327 add_tree_backref(extent_cache, buf->start, buf->start,
6330 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6334 /* as we fix the tree, we might be deleting blocks that
6335 * we're tracking for repair. This hook makes sure we
6336 * remove any backrefs for blocks as we are fixing them.
6338 static int free_extent_hook(struct btrfs_trans_handle *trans,
6339 struct btrfs_root *root,
6340 u64 bytenr, u64 num_bytes, u64 parent,
6341 u64 root_objectid, u64 owner, u64 offset,
6344 struct extent_record *rec;
6345 struct cache_extent *cache;
6347 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6349 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6350 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6354 rec = container_of(cache, struct extent_record, cache);
6356 struct data_backref *back;
6357 back = find_data_backref(rec, parent, root_objectid, owner,
6358 offset, 1, bytenr, num_bytes);
6361 if (back->node.found_ref) {
6362 back->found_ref -= refs_to_drop;
6364 rec->refs -= refs_to_drop;
6366 if (back->node.found_extent_tree) {
6367 back->num_refs -= refs_to_drop;
6368 if (rec->extent_item_refs)
6369 rec->extent_item_refs -= refs_to_drop;
6371 if (back->found_ref == 0)
6372 back->node.found_ref = 0;
6373 if (back->num_refs == 0)
6374 back->node.found_extent_tree = 0;
6376 if (!back->node.found_extent_tree && back->node.found_ref) {
6377 list_del(&back->node.list);
6381 struct tree_backref *back;
6382 back = find_tree_backref(rec, parent, root_objectid);
6385 if (back->node.found_ref) {
6388 back->node.found_ref = 0;
6390 if (back->node.found_extent_tree) {
6391 if (rec->extent_item_refs)
6392 rec->extent_item_refs--;
6393 back->node.found_extent_tree = 0;
6395 if (!back->node.found_extent_tree && back->node.found_ref) {
6396 list_del(&back->node.list);
6400 maybe_free_extent_rec(extent_cache, rec);
6405 static int delete_extent_records(struct btrfs_trans_handle *trans,
6406 struct btrfs_root *root,
6407 struct btrfs_path *path,
6408 u64 bytenr, u64 new_len)
6410 struct btrfs_key key;
6411 struct btrfs_key found_key;
6412 struct extent_buffer *leaf;
6417 key.objectid = bytenr;
6419 key.offset = (u64)-1;
6422 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6429 if (path->slots[0] == 0)
6435 leaf = path->nodes[0];
6436 slot = path->slots[0];
6438 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6439 if (found_key.objectid != bytenr)
6442 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6443 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6444 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6445 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6446 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6447 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6448 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6449 btrfs_release_path(path);
6450 if (found_key.type == 0) {
6451 if (found_key.offset == 0)
6453 key.offset = found_key.offset - 1;
6454 key.type = found_key.type;
6456 key.type = found_key.type - 1;
6457 key.offset = (u64)-1;
6461 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6462 found_key.objectid, found_key.type, found_key.offset);
6464 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6467 btrfs_release_path(path);
6469 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6470 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6471 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6472 found_key.offset : root->nodesize;
6474 ret = btrfs_update_block_group(trans, root, bytenr,
6481 btrfs_release_path(path);
6486 * for a single backref, this will allocate a new extent
6487 * and add the backref to it.
6489 static int record_extent(struct btrfs_trans_handle *trans,
6490 struct btrfs_fs_info *info,
6491 struct btrfs_path *path,
6492 struct extent_record *rec,
6493 struct extent_backref *back,
6494 int allocated, u64 flags)
6497 struct btrfs_root *extent_root = info->extent_root;
6498 struct extent_buffer *leaf;
6499 struct btrfs_key ins_key;
6500 struct btrfs_extent_item *ei;
6501 struct tree_backref *tback;
6502 struct data_backref *dback;
6503 struct btrfs_tree_block_info *bi;
6506 rec->max_size = max_t(u64, rec->max_size,
6507 info->extent_root->nodesize);
6510 u32 item_size = sizeof(*ei);
6513 item_size += sizeof(*bi);
6515 ins_key.objectid = rec->start;
6516 ins_key.offset = rec->max_size;
6517 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6519 ret = btrfs_insert_empty_item(trans, extent_root, path,
6520 &ins_key, item_size);
6524 leaf = path->nodes[0];
6525 ei = btrfs_item_ptr(leaf, path->slots[0],
6526 struct btrfs_extent_item);
6528 btrfs_set_extent_refs(leaf, ei, 0);
6529 btrfs_set_extent_generation(leaf, ei, rec->generation);
6531 if (back->is_data) {
6532 btrfs_set_extent_flags(leaf, ei,
6533 BTRFS_EXTENT_FLAG_DATA);
6535 struct btrfs_disk_key copy_key;;
6537 tback = to_tree_backref(back);
6538 bi = (struct btrfs_tree_block_info *)(ei + 1);
6539 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6542 btrfs_set_disk_key_objectid(©_key,
6543 rec->info_objectid);
6544 btrfs_set_disk_key_type(©_key, 0);
6545 btrfs_set_disk_key_offset(©_key, 0);
6547 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6548 btrfs_set_tree_block_key(leaf, bi, ©_key);
6550 btrfs_set_extent_flags(leaf, ei,
6551 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6554 btrfs_mark_buffer_dirty(leaf);
6555 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6556 rec->max_size, 1, 0);
6559 btrfs_release_path(path);
6562 if (back->is_data) {
6566 dback = to_data_backref(back);
6567 if (back->full_backref)
6568 parent = dback->parent;
6572 for (i = 0; i < dback->found_ref; i++) {
6573 /* if parent != 0, we're doing a full backref
6574 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6575 * just makes the backref allocator create a data
6578 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6579 rec->start, rec->max_size,
6583 BTRFS_FIRST_FREE_OBJECTID :
6589 fprintf(stderr, "adding new data backref"
6590 " on %llu %s %llu owner %llu"
6591 " offset %llu found %d\n",
6592 (unsigned long long)rec->start,
6593 back->full_backref ?
6595 back->full_backref ?
6596 (unsigned long long)parent :
6597 (unsigned long long)dback->root,
6598 (unsigned long long)dback->owner,
6599 (unsigned long long)dback->offset,
6604 tback = to_tree_backref(back);
6605 if (back->full_backref)
6606 parent = tback->parent;
6610 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6611 rec->start, rec->max_size,
6612 parent, tback->root, 0, 0);
6613 fprintf(stderr, "adding new tree backref on "
6614 "start %llu len %llu parent %llu root %llu\n",
6615 rec->start, rec->max_size, parent, tback->root);
6618 btrfs_release_path(path);
6622 static struct extent_entry *find_entry(struct list_head *entries,
6623 u64 bytenr, u64 bytes)
6625 struct extent_entry *entry = NULL;
6627 list_for_each_entry(entry, entries, list) {
6628 if (entry->bytenr == bytenr && entry->bytes == bytes)
6635 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6637 struct extent_entry *entry, *best = NULL, *prev = NULL;
6639 list_for_each_entry(entry, entries, list) {
6646 * If there are as many broken entries as entries then we know
6647 * not to trust this particular entry.
6649 if (entry->broken == entry->count)
6653 * If our current entry == best then we can't be sure our best
6654 * is really the best, so we need to keep searching.
6656 if (best && best->count == entry->count) {
6662 /* Prev == entry, not good enough, have to keep searching */
6663 if (!prev->broken && prev->count == entry->count)
6667 best = (prev->count > entry->count) ? prev : entry;
6668 else if (best->count < entry->count)
6676 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6677 struct data_backref *dback, struct extent_entry *entry)
6679 struct btrfs_trans_handle *trans;
6680 struct btrfs_root *root;
6681 struct btrfs_file_extent_item *fi;
6682 struct extent_buffer *leaf;
6683 struct btrfs_key key;
6687 key.objectid = dback->root;
6688 key.type = BTRFS_ROOT_ITEM_KEY;
6689 key.offset = (u64)-1;
6690 root = btrfs_read_fs_root(info, &key);
6692 fprintf(stderr, "Couldn't find root for our ref\n");
6697 * The backref points to the original offset of the extent if it was
6698 * split, so we need to search down to the offset we have and then walk
6699 * forward until we find the backref we're looking for.
6701 key.objectid = dback->owner;
6702 key.type = BTRFS_EXTENT_DATA_KEY;
6703 key.offset = dback->offset;
6704 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6706 fprintf(stderr, "Error looking up ref %d\n", ret);
6711 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6712 ret = btrfs_next_leaf(root, path);
6714 fprintf(stderr, "Couldn't find our ref, next\n");
6718 leaf = path->nodes[0];
6719 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6720 if (key.objectid != dback->owner ||
6721 key.type != BTRFS_EXTENT_DATA_KEY) {
6722 fprintf(stderr, "Couldn't find our ref, search\n");
6725 fi = btrfs_item_ptr(leaf, path->slots[0],
6726 struct btrfs_file_extent_item);
6727 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6728 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6730 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6735 btrfs_release_path(path);
6737 trans = btrfs_start_transaction(root, 1);
6739 return PTR_ERR(trans);
6742 * Ok we have the key of the file extent we want to fix, now we can cow
6743 * down to the thing and fix it.
6745 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6747 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6748 key.objectid, key.type, key.offset, ret);
6752 fprintf(stderr, "Well that's odd, we just found this key "
6753 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6758 leaf = path->nodes[0];
6759 fi = btrfs_item_ptr(leaf, path->slots[0],
6760 struct btrfs_file_extent_item);
6762 if (btrfs_file_extent_compression(leaf, fi) &&
6763 dback->disk_bytenr != entry->bytenr) {
6764 fprintf(stderr, "Ref doesn't match the record start and is "
6765 "compressed, please take a btrfs-image of this file "
6766 "system and send it to a btrfs developer so they can "
6767 "complete this functionality for bytenr %Lu\n",
6768 dback->disk_bytenr);
6773 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6774 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6775 } else if (dback->disk_bytenr > entry->bytenr) {
6776 u64 off_diff, offset;
6778 off_diff = dback->disk_bytenr - entry->bytenr;
6779 offset = btrfs_file_extent_offset(leaf, fi);
6780 if (dback->disk_bytenr + offset +
6781 btrfs_file_extent_num_bytes(leaf, fi) >
6782 entry->bytenr + entry->bytes) {
6783 fprintf(stderr, "Ref is past the entry end, please "
6784 "take a btrfs-image of this file system and "
6785 "send it to a btrfs developer, ref %Lu\n",
6786 dback->disk_bytenr);
6791 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6792 btrfs_set_file_extent_offset(leaf, fi, offset);
6793 } else if (dback->disk_bytenr < entry->bytenr) {
6796 offset = btrfs_file_extent_offset(leaf, fi);
6797 if (dback->disk_bytenr + offset < entry->bytenr) {
6798 fprintf(stderr, "Ref is before the entry start, please"
6799 " take a btrfs-image of this file system and "
6800 "send it to a btrfs developer, ref %Lu\n",
6801 dback->disk_bytenr);
6806 offset += dback->disk_bytenr;
6807 offset -= entry->bytenr;
6808 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6809 btrfs_set_file_extent_offset(leaf, fi, offset);
6812 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6815 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6816 * only do this if we aren't using compression, otherwise it's a
6819 if (!btrfs_file_extent_compression(leaf, fi))
6820 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6822 printf("ram bytes may be wrong?\n");
6823 btrfs_mark_buffer_dirty(leaf);
6825 err = btrfs_commit_transaction(trans, root);
6826 btrfs_release_path(path);
6827 return ret ? ret : err;
6830 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6831 struct extent_record *rec)
6833 struct extent_backref *back;
6834 struct data_backref *dback;
6835 struct extent_entry *entry, *best = NULL;
6838 int broken_entries = 0;
6843 * Metadata is easy and the backrefs should always agree on bytenr and
6844 * size, if not we've got bigger issues.
6849 list_for_each_entry(back, &rec->backrefs, list) {
6850 if (back->full_backref || !back->is_data)
6853 dback = to_data_backref(back);
6856 * We only pay attention to backrefs that we found a real
6859 if (dback->found_ref == 0)
6863 * For now we only catch when the bytes don't match, not the
6864 * bytenr. We can easily do this at the same time, but I want
6865 * to have a fs image to test on before we just add repair
6866 * functionality willy-nilly so we know we won't screw up the
6870 entry = find_entry(&entries, dback->disk_bytenr,
6873 entry = malloc(sizeof(struct extent_entry));
6878 memset(entry, 0, sizeof(*entry));
6879 entry->bytenr = dback->disk_bytenr;
6880 entry->bytes = dback->bytes;
6881 list_add_tail(&entry->list, &entries);
6886 * If we only have on entry we may think the entries agree when
6887 * in reality they don't so we have to do some extra checking.
6889 if (dback->disk_bytenr != rec->start ||
6890 dback->bytes != rec->nr || back->broken)
6901 /* Yay all the backrefs agree, carry on good sir */
6902 if (nr_entries <= 1 && !mismatch)
6905 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6906 "%Lu\n", rec->start);
6909 * First we want to see if the backrefs can agree amongst themselves who
6910 * is right, so figure out which one of the entries has the highest
6913 best = find_most_right_entry(&entries);
6916 * Ok so we may have an even split between what the backrefs think, so
6917 * this is where we use the extent ref to see what it thinks.
6920 entry = find_entry(&entries, rec->start, rec->nr);
6921 if (!entry && (!broken_entries || !rec->found_rec)) {
6922 fprintf(stderr, "Backrefs don't agree with each other "
6923 "and extent record doesn't agree with anybody,"
6924 " so we can't fix bytenr %Lu bytes %Lu\n",
6925 rec->start, rec->nr);
6928 } else if (!entry) {
6930 * Ok our backrefs were broken, we'll assume this is the
6931 * correct value and add an entry for this range.
6933 entry = malloc(sizeof(struct extent_entry));
6938 memset(entry, 0, sizeof(*entry));
6939 entry->bytenr = rec->start;
6940 entry->bytes = rec->nr;
6941 list_add_tail(&entry->list, &entries);
6945 best = find_most_right_entry(&entries);
6947 fprintf(stderr, "Backrefs and extent record evenly "
6948 "split on who is right, this is going to "
6949 "require user input to fix bytenr %Lu bytes "
6950 "%Lu\n", rec->start, rec->nr);
6957 * I don't think this can happen currently as we'll abort() if we catch
6958 * this case higher up, but in case somebody removes that we still can't
6959 * deal with it properly here yet, so just bail out of that's the case.
6961 if (best->bytenr != rec->start) {
6962 fprintf(stderr, "Extent start and backref starts don't match, "
6963 "please use btrfs-image on this file system and send "
6964 "it to a btrfs developer so they can make fsck fix "
6965 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6966 rec->start, rec->nr);
6972 * Ok great we all agreed on an extent record, let's go find the real
6973 * references and fix up the ones that don't match.
6975 list_for_each_entry(back, &rec->backrefs, list) {
6976 if (back->full_backref || !back->is_data)
6979 dback = to_data_backref(back);
6982 * Still ignoring backrefs that don't have a real ref attached
6985 if (dback->found_ref == 0)
6988 if (dback->bytes == best->bytes &&
6989 dback->disk_bytenr == best->bytenr)
6992 ret = repair_ref(info, path, dback, best);
6998 * Ok we messed with the actual refs, which means we need to drop our
6999 * entire cache and go back and rescan. I know this is a huge pain and
7000 * adds a lot of extra work, but it's the only way to be safe. Once all
7001 * the backrefs agree we may not need to do anything to the extent
7006 while (!list_empty(&entries)) {
7007 entry = list_entry(entries.next, struct extent_entry, list);
7008 list_del_init(&entry->list);
7014 static int process_duplicates(struct btrfs_root *root,
7015 struct cache_tree *extent_cache,
7016 struct extent_record *rec)
7018 struct extent_record *good, *tmp;
7019 struct cache_extent *cache;
7023 * If we found a extent record for this extent then return, or if we
7024 * have more than one duplicate we are likely going to need to delete
7027 if (rec->found_rec || rec->num_duplicates > 1)
7030 /* Shouldn't happen but just in case */
7031 BUG_ON(!rec->num_duplicates);
7034 * So this happens if we end up with a backref that doesn't match the
7035 * actual extent entry. So either the backref is bad or the extent
7036 * entry is bad. Either way we want to have the extent_record actually
7037 * reflect what we found in the extent_tree, so we need to take the
7038 * duplicate out and use that as the extent_record since the only way we
7039 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7041 remove_cache_extent(extent_cache, &rec->cache);
7043 good = to_extent_record(rec->dups.next);
7044 list_del_init(&good->list);
7045 INIT_LIST_HEAD(&good->backrefs);
7046 INIT_LIST_HEAD(&good->dups);
7047 good->cache.start = good->start;
7048 good->cache.size = good->nr;
7049 good->content_checked = 0;
7050 good->owner_ref_checked = 0;
7051 good->num_duplicates = 0;
7052 good->refs = rec->refs;
7053 list_splice_init(&rec->backrefs, &good->backrefs);
7055 cache = lookup_cache_extent(extent_cache, good->start,
7059 tmp = container_of(cache, struct extent_record, cache);
7062 * If we find another overlapping extent and it's found_rec is
7063 * set then it's a duplicate and we need to try and delete
7066 if (tmp->found_rec || tmp->num_duplicates > 0) {
7067 if (list_empty(&good->list))
7068 list_add_tail(&good->list,
7069 &duplicate_extents);
7070 good->num_duplicates += tmp->num_duplicates + 1;
7071 list_splice_init(&tmp->dups, &good->dups);
7072 list_del_init(&tmp->list);
7073 list_add_tail(&tmp->list, &good->dups);
7074 remove_cache_extent(extent_cache, &tmp->cache);
7079 * Ok we have another non extent item backed extent rec, so lets
7080 * just add it to this extent and carry on like we did above.
7082 good->refs += tmp->refs;
7083 list_splice_init(&tmp->backrefs, &good->backrefs);
7084 remove_cache_extent(extent_cache, &tmp->cache);
7087 ret = insert_cache_extent(extent_cache, &good->cache);
7090 return good->num_duplicates ? 0 : 1;
7093 static int delete_duplicate_records(struct btrfs_root *root,
7094 struct extent_record *rec)
7096 struct btrfs_trans_handle *trans;
7097 LIST_HEAD(delete_list);
7098 struct btrfs_path *path;
7099 struct extent_record *tmp, *good, *n;
7102 struct btrfs_key key;
7104 path = btrfs_alloc_path();
7111 /* Find the record that covers all of the duplicates. */
7112 list_for_each_entry(tmp, &rec->dups, list) {
7113 if (good->start < tmp->start)
7115 if (good->nr > tmp->nr)
7118 if (tmp->start + tmp->nr < good->start + good->nr) {
7119 fprintf(stderr, "Ok we have overlapping extents that "
7120 "aren't completely covered by each other, this "
7121 "is going to require more careful thought. "
7122 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7123 tmp->start, tmp->nr, good->start, good->nr);
7130 list_add_tail(&rec->list, &delete_list);
7132 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7135 list_move_tail(&tmp->list, &delete_list);
7138 root = root->fs_info->extent_root;
7139 trans = btrfs_start_transaction(root, 1);
7140 if (IS_ERR(trans)) {
7141 ret = PTR_ERR(trans);
7145 list_for_each_entry(tmp, &delete_list, list) {
7146 if (tmp->found_rec == 0)
7148 key.objectid = tmp->start;
7149 key.type = BTRFS_EXTENT_ITEM_KEY;
7150 key.offset = tmp->nr;
7152 /* Shouldn't happen but just in case */
7153 if (tmp->metadata) {
7154 fprintf(stderr, "Well this shouldn't happen, extent "
7155 "record overlaps but is metadata? "
7156 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7160 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7166 ret = btrfs_del_item(trans, root, path);
7169 btrfs_release_path(path);
7172 err = btrfs_commit_transaction(trans, root);
7176 while (!list_empty(&delete_list)) {
7177 tmp = to_extent_record(delete_list.next);
7178 list_del_init(&tmp->list);
7184 while (!list_empty(&rec->dups)) {
7185 tmp = to_extent_record(rec->dups.next);
7186 list_del_init(&tmp->list);
7190 btrfs_free_path(path);
7192 if (!ret && !nr_del)
7193 rec->num_duplicates = 0;
7195 return ret ? ret : nr_del;
7198 static int find_possible_backrefs(struct btrfs_fs_info *info,
7199 struct btrfs_path *path,
7200 struct cache_tree *extent_cache,
7201 struct extent_record *rec)
7203 struct btrfs_root *root;
7204 struct extent_backref *back;
7205 struct data_backref *dback;
7206 struct cache_extent *cache;
7207 struct btrfs_file_extent_item *fi;
7208 struct btrfs_key key;
7212 list_for_each_entry(back, &rec->backrefs, list) {
7213 /* Don't care about full backrefs (poor unloved backrefs) */
7214 if (back->full_backref || !back->is_data)
7217 dback = to_data_backref(back);
7219 /* We found this one, we don't need to do a lookup */
7220 if (dback->found_ref)
7223 key.objectid = dback->root;
7224 key.type = BTRFS_ROOT_ITEM_KEY;
7225 key.offset = (u64)-1;
7227 root = btrfs_read_fs_root(info, &key);
7229 /* No root, definitely a bad ref, skip */
7230 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7232 /* Other err, exit */
7234 return PTR_ERR(root);
7236 key.objectid = dback->owner;
7237 key.type = BTRFS_EXTENT_DATA_KEY;
7238 key.offset = dback->offset;
7239 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7241 btrfs_release_path(path);
7244 /* Didn't find it, we can carry on */
7249 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7250 struct btrfs_file_extent_item);
7251 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7252 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7253 btrfs_release_path(path);
7254 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7256 struct extent_record *tmp;
7257 tmp = container_of(cache, struct extent_record, cache);
7260 * If we found an extent record for the bytenr for this
7261 * particular backref then we can't add it to our
7262 * current extent record. We only want to add backrefs
7263 * that don't have a corresponding extent item in the
7264 * extent tree since they likely belong to this record
7265 * and we need to fix it if it doesn't match bytenrs.
7271 dback->found_ref += 1;
7272 dback->disk_bytenr = bytenr;
7273 dback->bytes = bytes;
7276 * Set this so the verify backref code knows not to trust the
7277 * values in this backref.
7286 * Record orphan data ref into corresponding root.
7288 * Return 0 if the extent item contains data ref and recorded.
7289 * Return 1 if the extent item contains no useful data ref
7290 * On that case, it may contains only shared_dataref or metadata backref
7291 * or the file extent exists(this should be handled by the extent bytenr
7293 * Return <0 if something goes wrong.
7295 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7296 struct extent_record *rec)
7298 struct btrfs_key key;
7299 struct btrfs_root *dest_root;
7300 struct extent_backref *back;
7301 struct data_backref *dback;
7302 struct orphan_data_extent *orphan;
7303 struct btrfs_path *path;
7304 int recorded_data_ref = 0;
7309 path = btrfs_alloc_path();
7312 list_for_each_entry(back, &rec->backrefs, list) {
7313 if (back->full_backref || !back->is_data ||
7314 !back->found_extent_tree)
7316 dback = to_data_backref(back);
7317 if (dback->found_ref)
7319 key.objectid = dback->root;
7320 key.type = BTRFS_ROOT_ITEM_KEY;
7321 key.offset = (u64)-1;
7323 dest_root = btrfs_read_fs_root(fs_info, &key);
7325 /* For non-exist root we just skip it */
7326 if (IS_ERR(dest_root) || !dest_root)
7329 key.objectid = dback->owner;
7330 key.type = BTRFS_EXTENT_DATA_KEY;
7331 key.offset = dback->offset;
7333 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7335 * For ret < 0, it's OK since the fs-tree may be corrupted,
7336 * we need to record it for inode/file extent rebuild.
7337 * For ret > 0, we record it only for file extent rebuild.
7338 * For ret == 0, the file extent exists but only bytenr
7339 * mismatch, let the original bytenr fix routine to handle,
7345 orphan = malloc(sizeof(*orphan));
7350 INIT_LIST_HEAD(&orphan->list);
7351 orphan->root = dback->root;
7352 orphan->objectid = dback->owner;
7353 orphan->offset = dback->offset;
7354 orphan->disk_bytenr = rec->cache.start;
7355 orphan->disk_len = rec->cache.size;
7356 list_add(&dest_root->orphan_data_extents, &orphan->list);
7357 recorded_data_ref = 1;
7360 btrfs_free_path(path);
7362 return !recorded_data_ref;
7368 * when an incorrect extent item is found, this will delete
7369 * all of the existing entries for it and recreate them
7370 * based on what the tree scan found.
7372 static int fixup_extent_refs(struct btrfs_fs_info *info,
7373 struct cache_tree *extent_cache,
7374 struct extent_record *rec)
7376 struct btrfs_trans_handle *trans = NULL;
7378 struct btrfs_path *path;
7379 struct list_head *cur = rec->backrefs.next;
7380 struct cache_extent *cache;
7381 struct extent_backref *back;
7385 if (rec->flag_block_full_backref)
7386 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7388 path = btrfs_alloc_path();
7392 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7394 * Sometimes the backrefs themselves are so broken they don't
7395 * get attached to any meaningful rec, so first go back and
7396 * check any of our backrefs that we couldn't find and throw
7397 * them into the list if we find the backref so that
7398 * verify_backrefs can figure out what to do.
7400 ret = find_possible_backrefs(info, path, extent_cache, rec);
7405 /* step one, make sure all of the backrefs agree */
7406 ret = verify_backrefs(info, path, rec);
7410 trans = btrfs_start_transaction(info->extent_root, 1);
7411 if (IS_ERR(trans)) {
7412 ret = PTR_ERR(trans);
7416 /* step two, delete all the existing records */
7417 ret = delete_extent_records(trans, info->extent_root, path,
7418 rec->start, rec->max_size);
7423 /* was this block corrupt? If so, don't add references to it */
7424 cache = lookup_cache_extent(info->corrupt_blocks,
7425 rec->start, rec->max_size);
7431 /* step three, recreate all the refs we did find */
7432 while(cur != &rec->backrefs) {
7433 back = to_extent_backref(cur);
7437 * if we didn't find any references, don't create a
7440 if (!back->found_ref)
7443 rec->bad_full_backref = 0;
7444 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7452 int err = btrfs_commit_transaction(trans, info->extent_root);
7457 btrfs_free_path(path);
7461 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7462 struct extent_record *rec)
7464 struct btrfs_trans_handle *trans;
7465 struct btrfs_root *root = fs_info->extent_root;
7466 struct btrfs_path *path;
7467 struct btrfs_extent_item *ei;
7468 struct btrfs_key key;
7472 key.objectid = rec->start;
7473 if (rec->metadata) {
7474 key.type = BTRFS_METADATA_ITEM_KEY;
7475 key.offset = rec->info_level;
7477 key.type = BTRFS_EXTENT_ITEM_KEY;
7478 key.offset = rec->max_size;
7481 path = btrfs_alloc_path();
7485 trans = btrfs_start_transaction(root, 0);
7486 if (IS_ERR(trans)) {
7487 btrfs_free_path(path);
7488 return PTR_ERR(trans);
7491 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7493 btrfs_free_path(path);
7494 btrfs_commit_transaction(trans, root);
7497 fprintf(stderr, "Didn't find extent for %llu\n",
7498 (unsigned long long)rec->start);
7499 btrfs_free_path(path);
7500 btrfs_commit_transaction(trans, root);
7504 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7505 struct btrfs_extent_item);
7506 flags = btrfs_extent_flags(path->nodes[0], ei);
7507 if (rec->flag_block_full_backref) {
7508 fprintf(stderr, "setting full backref on %llu\n",
7509 (unsigned long long)key.objectid);
7510 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7512 fprintf(stderr, "clearing full backref on %llu\n",
7513 (unsigned long long)key.objectid);
7514 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7516 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7517 btrfs_mark_buffer_dirty(path->nodes[0]);
7518 btrfs_free_path(path);
7519 return btrfs_commit_transaction(trans, root);
7522 /* right now we only prune from the extent allocation tree */
7523 static int prune_one_block(struct btrfs_trans_handle *trans,
7524 struct btrfs_fs_info *info,
7525 struct btrfs_corrupt_block *corrupt)
7528 struct btrfs_path path;
7529 struct extent_buffer *eb;
7533 int level = corrupt->level + 1;
7535 btrfs_init_path(&path);
7537 /* we want to stop at the parent to our busted block */
7538 path.lowest_level = level;
7540 ret = btrfs_search_slot(trans, info->extent_root,
7541 &corrupt->key, &path, -1, 1);
7546 eb = path.nodes[level];
7553 * hopefully the search gave us the block we want to prune,
7554 * lets try that first
7556 slot = path.slots[level];
7557 found = btrfs_node_blockptr(eb, slot);
7558 if (found == corrupt->cache.start)
7561 nritems = btrfs_header_nritems(eb);
7563 /* the search failed, lets scan this node and hope we find it */
7564 for (slot = 0; slot < nritems; slot++) {
7565 found = btrfs_node_blockptr(eb, slot);
7566 if (found == corrupt->cache.start)
7570 * we couldn't find the bad block. TODO, search all the nodes for pointers
7573 if (eb == info->extent_root->node) {
7578 btrfs_release_path(&path);
7583 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7584 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7587 btrfs_release_path(&path);
7591 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7593 struct btrfs_trans_handle *trans = NULL;
7594 struct cache_extent *cache;
7595 struct btrfs_corrupt_block *corrupt;
7598 cache = search_cache_extent(info->corrupt_blocks, 0);
7602 trans = btrfs_start_transaction(info->extent_root, 1);
7604 return PTR_ERR(trans);
7606 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7607 prune_one_block(trans, info, corrupt);
7608 remove_cache_extent(info->corrupt_blocks, cache);
7611 return btrfs_commit_transaction(trans, info->extent_root);
7615 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7617 struct btrfs_block_group_cache *cache;
7622 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7623 &start, &end, EXTENT_DIRTY);
7626 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7632 cache = btrfs_lookup_first_block_group(fs_info, start);
7637 start = cache->key.objectid + cache->key.offset;
7641 static int check_extent_refs(struct btrfs_root *root,
7642 struct cache_tree *extent_cache)
7644 struct extent_record *rec;
7645 struct cache_extent *cache;
7654 * if we're doing a repair, we have to make sure
7655 * we don't allocate from the problem extents.
7656 * In the worst case, this will be all the
7659 cache = search_cache_extent(extent_cache, 0);
7661 rec = container_of(cache, struct extent_record, cache);
7662 set_extent_dirty(root->fs_info->excluded_extents,
7664 rec->start + rec->max_size - 1,
7666 cache = next_cache_extent(cache);
7669 /* pin down all the corrupted blocks too */
7670 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7672 set_extent_dirty(root->fs_info->excluded_extents,
7674 cache->start + cache->size - 1,
7676 cache = next_cache_extent(cache);
7678 prune_corrupt_blocks(root->fs_info);
7679 reset_cached_block_groups(root->fs_info);
7682 reset_cached_block_groups(root->fs_info);
7685 * We need to delete any duplicate entries we find first otherwise we
7686 * could mess up the extent tree when we have backrefs that actually
7687 * belong to a different extent item and not the weird duplicate one.
7689 while (repair && !list_empty(&duplicate_extents)) {
7690 rec = to_extent_record(duplicate_extents.next);
7691 list_del_init(&rec->list);
7693 /* Sometimes we can find a backref before we find an actual
7694 * extent, so we need to process it a little bit to see if there
7695 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7696 * if this is a backref screwup. If we need to delete stuff
7697 * process_duplicates() will return 0, otherwise it will return
7700 if (process_duplicates(root, extent_cache, rec))
7702 ret = delete_duplicate_records(root, rec);
7706 * delete_duplicate_records will return the number of entries
7707 * deleted, so if it's greater than 0 then we know we actually
7708 * did something and we need to remove.
7722 cache = search_cache_extent(extent_cache, 0);
7725 rec = container_of(cache, struct extent_record, cache);
7726 if (rec->num_duplicates) {
7727 fprintf(stderr, "extent item %llu has multiple extent "
7728 "items\n", (unsigned long long)rec->start);
7733 if (rec->refs != rec->extent_item_refs) {
7734 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7735 (unsigned long long)rec->start,
7736 (unsigned long long)rec->nr);
7737 fprintf(stderr, "extent item %llu, found %llu\n",
7738 (unsigned long long)rec->extent_item_refs,
7739 (unsigned long long)rec->refs);
7740 ret = record_orphan_data_extents(root->fs_info, rec);
7747 * we can't use the extent to repair file
7748 * extent, let the fallback method handle it.
7750 if (!fixed && repair) {
7751 ret = fixup_extent_refs(
7762 if (all_backpointers_checked(rec, 1)) {
7763 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7764 (unsigned long long)rec->start,
7765 (unsigned long long)rec->nr);
7767 if (!fixed && !recorded && repair) {
7768 ret = fixup_extent_refs(root->fs_info,
7777 if (!rec->owner_ref_checked) {
7778 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7779 (unsigned long long)rec->start,
7780 (unsigned long long)rec->nr);
7781 if (!fixed && !recorded && repair) {
7782 ret = fixup_extent_refs(root->fs_info,
7791 if (rec->bad_full_backref) {
7792 fprintf(stderr, "bad full backref, on [%llu]\n",
7793 (unsigned long long)rec->start);
7795 ret = fixup_extent_flags(root->fs_info, rec);
7804 * Although it's not a extent ref's problem, we reuse this
7805 * routine for error reporting.
7806 * No repair function yet.
7808 if (rec->crossing_stripes) {
7810 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7811 rec->start, rec->start + rec->max_size);
7816 if (rec->wrong_chunk_type) {
7818 "bad extent [%llu, %llu), type mismatch with chunk\n",
7819 rec->start, rec->start + rec->max_size);
7824 remove_cache_extent(extent_cache, cache);
7825 free_all_extent_backrefs(rec);
7826 if (!init_extent_tree && repair && (!cur_err || fixed))
7827 clear_extent_dirty(root->fs_info->excluded_extents,
7829 rec->start + rec->max_size - 1,
7835 if (ret && ret != -EAGAIN) {
7836 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7839 struct btrfs_trans_handle *trans;
7841 root = root->fs_info->extent_root;
7842 trans = btrfs_start_transaction(root, 1);
7843 if (IS_ERR(trans)) {
7844 ret = PTR_ERR(trans);
7848 btrfs_fix_block_accounting(trans, root);
7849 ret = btrfs_commit_transaction(trans, root);
7854 fprintf(stderr, "repaired damaged extent references\n");
7860 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7864 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7865 stripe_size = length;
7866 stripe_size /= num_stripes;
7867 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7868 stripe_size = length * 2;
7869 stripe_size /= num_stripes;
7870 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7871 stripe_size = length;
7872 stripe_size /= (num_stripes - 1);
7873 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7874 stripe_size = length;
7875 stripe_size /= (num_stripes - 2);
7877 stripe_size = length;
7883 * Check the chunk with its block group/dev list ref:
7884 * Return 0 if all refs seems valid.
7885 * Return 1 if part of refs seems valid, need later check for rebuild ref
7886 * like missing block group and needs to search extent tree to rebuild them.
7887 * Return -1 if essential refs are missing and unable to rebuild.
7889 static int check_chunk_refs(struct chunk_record *chunk_rec,
7890 struct block_group_tree *block_group_cache,
7891 struct device_extent_tree *dev_extent_cache,
7894 struct cache_extent *block_group_item;
7895 struct block_group_record *block_group_rec;
7896 struct cache_extent *dev_extent_item;
7897 struct device_extent_record *dev_extent_rec;
7901 int metadump_v2 = 0;
7905 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7908 if (block_group_item) {
7909 block_group_rec = container_of(block_group_item,
7910 struct block_group_record,
7912 if (chunk_rec->length != block_group_rec->offset ||
7913 chunk_rec->offset != block_group_rec->objectid ||
7915 chunk_rec->type_flags != block_group_rec->flags)) {
7918 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7919 chunk_rec->objectid,
7924 chunk_rec->type_flags,
7925 block_group_rec->objectid,
7926 block_group_rec->type,
7927 block_group_rec->offset,
7928 block_group_rec->offset,
7929 block_group_rec->objectid,
7930 block_group_rec->flags);
7933 list_del_init(&block_group_rec->list);
7934 chunk_rec->bg_rec = block_group_rec;
7939 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7940 chunk_rec->objectid,
7945 chunk_rec->type_flags);
7952 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7953 chunk_rec->num_stripes);
7954 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7955 devid = chunk_rec->stripes[i].devid;
7956 offset = chunk_rec->stripes[i].offset;
7957 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7958 devid, offset, length);
7959 if (dev_extent_item) {
7960 dev_extent_rec = container_of(dev_extent_item,
7961 struct device_extent_record,
7963 if (dev_extent_rec->objectid != devid ||
7964 dev_extent_rec->offset != offset ||
7965 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7966 dev_extent_rec->length != length) {
7969 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7970 chunk_rec->objectid,
7973 chunk_rec->stripes[i].devid,
7974 chunk_rec->stripes[i].offset,
7975 dev_extent_rec->objectid,
7976 dev_extent_rec->offset,
7977 dev_extent_rec->length);
7980 list_move(&dev_extent_rec->chunk_list,
7981 &chunk_rec->dextents);
7986 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7987 chunk_rec->objectid,
7990 chunk_rec->stripes[i].devid,
7991 chunk_rec->stripes[i].offset);
7998 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7999 int check_chunks(struct cache_tree *chunk_cache,
8000 struct block_group_tree *block_group_cache,
8001 struct device_extent_tree *dev_extent_cache,
8002 struct list_head *good, struct list_head *bad,
8003 struct list_head *rebuild, int silent)
8005 struct cache_extent *chunk_item;
8006 struct chunk_record *chunk_rec;
8007 struct block_group_record *bg_rec;
8008 struct device_extent_record *dext_rec;
8012 chunk_item = first_cache_extent(chunk_cache);
8013 while (chunk_item) {
8014 chunk_rec = container_of(chunk_item, struct chunk_record,
8016 err = check_chunk_refs(chunk_rec, block_group_cache,
8017 dev_extent_cache, silent);
8020 if (err == 0 && good)
8021 list_add_tail(&chunk_rec->list, good);
8022 if (err > 0 && rebuild)
8023 list_add_tail(&chunk_rec->list, rebuild);
8025 list_add_tail(&chunk_rec->list, bad);
8026 chunk_item = next_cache_extent(chunk_item);
8029 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8032 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8040 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8044 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8055 static int check_device_used(struct device_record *dev_rec,
8056 struct device_extent_tree *dext_cache)
8058 struct cache_extent *cache;
8059 struct device_extent_record *dev_extent_rec;
8062 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8064 dev_extent_rec = container_of(cache,
8065 struct device_extent_record,
8067 if (dev_extent_rec->objectid != dev_rec->devid)
8070 list_del_init(&dev_extent_rec->device_list);
8071 total_byte += dev_extent_rec->length;
8072 cache = next_cache_extent(cache);
8075 if (total_byte != dev_rec->byte_used) {
8077 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8078 total_byte, dev_rec->byte_used, dev_rec->objectid,
8079 dev_rec->type, dev_rec->offset);
8086 /* check btrfs_dev_item -> btrfs_dev_extent */
8087 static int check_devices(struct rb_root *dev_cache,
8088 struct device_extent_tree *dev_extent_cache)
8090 struct rb_node *dev_node;
8091 struct device_record *dev_rec;
8092 struct device_extent_record *dext_rec;
8096 dev_node = rb_first(dev_cache);
8098 dev_rec = container_of(dev_node, struct device_record, node);
8099 err = check_device_used(dev_rec, dev_extent_cache);
8103 dev_node = rb_next(dev_node);
8105 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8108 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8109 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8116 static int add_root_item_to_list(struct list_head *head,
8117 u64 objectid, u64 bytenr, u64 last_snapshot,
8118 u8 level, u8 drop_level,
8119 int level_size, struct btrfs_key *drop_key)
8122 struct root_item_record *ri_rec;
8123 ri_rec = malloc(sizeof(*ri_rec));
8126 ri_rec->bytenr = bytenr;
8127 ri_rec->objectid = objectid;
8128 ri_rec->level = level;
8129 ri_rec->level_size = level_size;
8130 ri_rec->drop_level = drop_level;
8131 ri_rec->last_snapshot = last_snapshot;
8133 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8134 list_add_tail(&ri_rec->list, head);
8139 static void free_root_item_list(struct list_head *list)
8141 struct root_item_record *ri_rec;
8143 while (!list_empty(list)) {
8144 ri_rec = list_first_entry(list, struct root_item_record,
8146 list_del_init(&ri_rec->list);
8151 static int deal_root_from_list(struct list_head *list,
8152 struct btrfs_root *root,
8153 struct block_info *bits,
8155 struct cache_tree *pending,
8156 struct cache_tree *seen,
8157 struct cache_tree *reada,
8158 struct cache_tree *nodes,
8159 struct cache_tree *extent_cache,
8160 struct cache_tree *chunk_cache,
8161 struct rb_root *dev_cache,
8162 struct block_group_tree *block_group_cache,
8163 struct device_extent_tree *dev_extent_cache)
8168 while (!list_empty(list)) {
8169 struct root_item_record *rec;
8170 struct extent_buffer *buf;
8171 rec = list_entry(list->next,
8172 struct root_item_record, list);
8174 buf = read_tree_block(root->fs_info->tree_root,
8175 rec->bytenr, rec->level_size, 0);
8176 if (!extent_buffer_uptodate(buf)) {
8177 free_extent_buffer(buf);
8181 add_root_to_pending(buf, extent_cache, pending,
8182 seen, nodes, rec->objectid);
8184 * To rebuild extent tree, we need deal with snapshot
8185 * one by one, otherwise we deal with node firstly which
8186 * can maximize readahead.
8189 ret = run_next_block(root, bits, bits_nr, &last,
8190 pending, seen, reada, nodes,
8191 extent_cache, chunk_cache,
8192 dev_cache, block_group_cache,
8193 dev_extent_cache, rec);
8197 free_extent_buffer(buf);
8198 list_del(&rec->list);
8204 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8205 reada, nodes, extent_cache, chunk_cache,
8206 dev_cache, block_group_cache,
8207 dev_extent_cache, NULL);
8217 static int check_chunks_and_extents(struct btrfs_root *root)
8219 struct rb_root dev_cache;
8220 struct cache_tree chunk_cache;
8221 struct block_group_tree block_group_cache;
8222 struct device_extent_tree dev_extent_cache;
8223 struct cache_tree extent_cache;
8224 struct cache_tree seen;
8225 struct cache_tree pending;
8226 struct cache_tree reada;
8227 struct cache_tree nodes;
8228 struct extent_io_tree excluded_extents;
8229 struct cache_tree corrupt_blocks;
8230 struct btrfs_path path;
8231 struct btrfs_key key;
8232 struct btrfs_key found_key;
8234 struct block_info *bits;
8236 struct extent_buffer *leaf;
8238 struct btrfs_root_item ri;
8239 struct list_head dropping_trees;
8240 struct list_head normal_trees;
8241 struct btrfs_root *root1;
8246 dev_cache = RB_ROOT;
8247 cache_tree_init(&chunk_cache);
8248 block_group_tree_init(&block_group_cache);
8249 device_extent_tree_init(&dev_extent_cache);
8251 cache_tree_init(&extent_cache);
8252 cache_tree_init(&seen);
8253 cache_tree_init(&pending);
8254 cache_tree_init(&nodes);
8255 cache_tree_init(&reada);
8256 cache_tree_init(&corrupt_blocks);
8257 extent_io_tree_init(&excluded_extents);
8258 INIT_LIST_HEAD(&dropping_trees);
8259 INIT_LIST_HEAD(&normal_trees);
8262 root->fs_info->excluded_extents = &excluded_extents;
8263 root->fs_info->fsck_extent_cache = &extent_cache;
8264 root->fs_info->free_extent_hook = free_extent_hook;
8265 root->fs_info->corrupt_blocks = &corrupt_blocks;
8269 bits = malloc(bits_nr * sizeof(struct block_info));
8275 if (ctx.progress_enabled) {
8276 ctx.tp = TASK_EXTENTS;
8277 task_start(ctx.info);
8281 root1 = root->fs_info->tree_root;
8282 level = btrfs_header_level(root1->node);
8283 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8284 root1->node->start, 0, level, 0,
8285 root1->nodesize, NULL);
8288 root1 = root->fs_info->chunk_root;
8289 level = btrfs_header_level(root1->node);
8290 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8291 root1->node->start, 0, level, 0,
8292 root1->nodesize, NULL);
8295 btrfs_init_path(&path);
8298 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8299 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8304 leaf = path.nodes[0];
8305 slot = path.slots[0];
8306 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8307 ret = btrfs_next_leaf(root, &path);
8310 leaf = path.nodes[0];
8311 slot = path.slots[0];
8313 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8314 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8315 unsigned long offset;
8318 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8319 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8320 last_snapshot = btrfs_root_last_snapshot(&ri);
8321 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8322 level = btrfs_root_level(&ri);
8323 level_size = root->nodesize;
8324 ret = add_root_item_to_list(&normal_trees,
8326 btrfs_root_bytenr(&ri),
8327 last_snapshot, level,
8328 0, level_size, NULL);
8332 level = btrfs_root_level(&ri);
8333 level_size = root->nodesize;
8334 objectid = found_key.objectid;
8335 btrfs_disk_key_to_cpu(&found_key,
8337 ret = add_root_item_to_list(&dropping_trees,
8339 btrfs_root_bytenr(&ri),
8340 last_snapshot, level,
8342 level_size, &found_key);
8349 btrfs_release_path(&path);
8352 * check_block can return -EAGAIN if it fixes something, please keep
8353 * this in mind when dealing with return values from these functions, if
8354 * we get -EAGAIN we want to fall through and restart the loop.
8356 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8357 &seen, &reada, &nodes, &extent_cache,
8358 &chunk_cache, &dev_cache, &block_group_cache,
8365 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8366 &pending, &seen, &reada, &nodes,
8367 &extent_cache, &chunk_cache, &dev_cache,
8368 &block_group_cache, &dev_extent_cache);
8375 ret = check_chunks(&chunk_cache, &block_group_cache,
8376 &dev_extent_cache, NULL, NULL, NULL, 0);
8383 ret = check_extent_refs(root, &extent_cache);
8390 ret = check_devices(&dev_cache, &dev_extent_cache);
8395 task_stop(ctx.info);
8397 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8398 extent_io_tree_cleanup(&excluded_extents);
8399 root->fs_info->fsck_extent_cache = NULL;
8400 root->fs_info->free_extent_hook = NULL;
8401 root->fs_info->corrupt_blocks = NULL;
8402 root->fs_info->excluded_extents = NULL;
8405 free_chunk_cache_tree(&chunk_cache);
8406 free_device_cache_tree(&dev_cache);
8407 free_block_group_tree(&block_group_cache);
8408 free_device_extent_tree(&dev_extent_cache);
8409 free_extent_cache_tree(&seen);
8410 free_extent_cache_tree(&pending);
8411 free_extent_cache_tree(&reada);
8412 free_extent_cache_tree(&nodes);
8415 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8416 free_extent_cache_tree(&seen);
8417 free_extent_cache_tree(&pending);
8418 free_extent_cache_tree(&reada);
8419 free_extent_cache_tree(&nodes);
8420 free_chunk_cache_tree(&chunk_cache);
8421 free_block_group_tree(&block_group_cache);
8422 free_device_cache_tree(&dev_cache);
8423 free_device_extent_tree(&dev_extent_cache);
8424 free_extent_record_cache(root->fs_info, &extent_cache);
8425 free_root_item_list(&normal_trees);
8426 free_root_item_list(&dropping_trees);
8427 extent_io_tree_cleanup(&excluded_extents);
8431 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8432 struct btrfs_root *root, int overwrite)
8434 struct extent_buffer *c;
8435 struct extent_buffer *old = root->node;
8438 struct btrfs_disk_key disk_key = {0,0,0};
8444 extent_buffer_get(c);
8447 c = btrfs_alloc_free_block(trans, root,
8449 root->root_key.objectid,
8450 &disk_key, level, 0, 0);
8453 extent_buffer_get(c);
8457 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8458 btrfs_set_header_level(c, level);
8459 btrfs_set_header_bytenr(c, c->start);
8460 btrfs_set_header_generation(c, trans->transid);
8461 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8462 btrfs_set_header_owner(c, root->root_key.objectid);
8464 write_extent_buffer(c, root->fs_info->fsid,
8465 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8467 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8468 btrfs_header_chunk_tree_uuid(c),
8471 btrfs_mark_buffer_dirty(c);
8473 * this case can happen in the following case:
8475 * 1.overwrite previous root.
8477 * 2.reinit reloc data root, this is because we skip pin
8478 * down reloc data tree before which means we can allocate
8479 * same block bytenr here.
8481 if (old->start == c->start) {
8482 btrfs_set_root_generation(&root->root_item,
8484 root->root_item.level = btrfs_header_level(root->node);
8485 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8486 &root->root_key, &root->root_item);
8488 free_extent_buffer(c);
8492 free_extent_buffer(old);
8494 add_root_to_dirty_list(root);
8498 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8499 struct extent_buffer *eb, int tree_root)
8501 struct extent_buffer *tmp;
8502 struct btrfs_root_item *ri;
8503 struct btrfs_key key;
8506 int level = btrfs_header_level(eb);
8512 * If we have pinned this block before, don't pin it again.
8513 * This can not only avoid forever loop with broken filesystem
8514 * but also give us some speedups.
8516 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8517 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8520 btrfs_pin_extent(fs_info, eb->start, eb->len);
8522 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8523 nritems = btrfs_header_nritems(eb);
8524 for (i = 0; i < nritems; i++) {
8526 btrfs_item_key_to_cpu(eb, &key, i);
8527 if (key.type != BTRFS_ROOT_ITEM_KEY)
8529 /* Skip the extent root and reloc roots */
8530 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8531 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8532 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8534 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8535 bytenr = btrfs_disk_root_bytenr(eb, ri);
8538 * If at any point we start needing the real root we
8539 * will have to build a stump root for the root we are
8540 * in, but for now this doesn't actually use the root so
8541 * just pass in extent_root.
8543 tmp = read_tree_block(fs_info->extent_root, bytenr,
8545 if (!extent_buffer_uptodate(tmp)) {
8546 fprintf(stderr, "Error reading root block\n");
8549 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8550 free_extent_buffer(tmp);
8554 bytenr = btrfs_node_blockptr(eb, i);
8556 /* If we aren't the tree root don't read the block */
8557 if (level == 1 && !tree_root) {
8558 btrfs_pin_extent(fs_info, bytenr, nodesize);
8562 tmp = read_tree_block(fs_info->extent_root, bytenr,
8564 if (!extent_buffer_uptodate(tmp)) {
8565 fprintf(stderr, "Error reading tree block\n");
8568 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8569 free_extent_buffer(tmp);
8578 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8582 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8586 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8589 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8591 struct btrfs_block_group_cache *cache;
8592 struct btrfs_path *path;
8593 struct extent_buffer *leaf;
8594 struct btrfs_chunk *chunk;
8595 struct btrfs_key key;
8599 path = btrfs_alloc_path();
8604 key.type = BTRFS_CHUNK_ITEM_KEY;
8607 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8609 btrfs_free_path(path);
8614 * We do this in case the block groups were screwed up and had alloc
8615 * bits that aren't actually set on the chunks. This happens with
8616 * restored images every time and could happen in real life I guess.
8618 fs_info->avail_data_alloc_bits = 0;
8619 fs_info->avail_metadata_alloc_bits = 0;
8620 fs_info->avail_system_alloc_bits = 0;
8622 /* First we need to create the in-memory block groups */
8624 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8625 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8627 btrfs_free_path(path);
8635 leaf = path->nodes[0];
8636 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8637 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8642 chunk = btrfs_item_ptr(leaf, path->slots[0],
8643 struct btrfs_chunk);
8644 btrfs_add_block_group(fs_info, 0,
8645 btrfs_chunk_type(leaf, chunk),
8646 key.objectid, key.offset,
8647 btrfs_chunk_length(leaf, chunk));
8648 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8649 key.offset + btrfs_chunk_length(leaf, chunk),
8655 cache = btrfs_lookup_first_block_group(fs_info, start);
8659 start = cache->key.objectid + cache->key.offset;
8662 btrfs_free_path(path);
8666 static int reset_balance(struct btrfs_trans_handle *trans,
8667 struct btrfs_fs_info *fs_info)
8669 struct btrfs_root *root = fs_info->tree_root;
8670 struct btrfs_path *path;
8671 struct extent_buffer *leaf;
8672 struct btrfs_key key;
8673 int del_slot, del_nr = 0;
8677 path = btrfs_alloc_path();
8681 key.objectid = BTRFS_BALANCE_OBJECTID;
8682 key.type = BTRFS_BALANCE_ITEM_KEY;
8685 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8690 goto reinit_data_reloc;
8695 ret = btrfs_del_item(trans, root, path);
8698 btrfs_release_path(path);
8700 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8701 key.type = BTRFS_ROOT_ITEM_KEY;
8704 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8708 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8713 ret = btrfs_del_items(trans, root, path,
8720 btrfs_release_path(path);
8723 ret = btrfs_search_slot(trans, root, &key, path,
8730 leaf = path->nodes[0];
8731 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8732 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8734 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8739 del_slot = path->slots[0];
8748 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8752 btrfs_release_path(path);
8755 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8756 key.type = BTRFS_ROOT_ITEM_KEY;
8757 key.offset = (u64)-1;
8758 root = btrfs_read_fs_root(fs_info, &key);
8760 fprintf(stderr, "Error reading data reloc tree\n");
8761 ret = PTR_ERR(root);
8764 record_root_in_trans(trans, root);
8765 ret = btrfs_fsck_reinit_root(trans, root, 0);
8768 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8770 btrfs_free_path(path);
8774 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8775 struct btrfs_fs_info *fs_info)
8781 * The only reason we don't do this is because right now we're just
8782 * walking the trees we find and pinning down their bytes, we don't look
8783 * at any of the leaves. In order to do mixed groups we'd have to check
8784 * the leaves of any fs roots and pin down the bytes for any file
8785 * extents we find. Not hard but why do it if we don't have to?
8787 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8788 fprintf(stderr, "We don't support re-initing the extent tree "
8789 "for mixed block groups yet, please notify a btrfs "
8790 "developer you want to do this so they can add this "
8791 "functionality.\n");
8796 * first we need to walk all of the trees except the extent tree and pin
8797 * down the bytes that are in use so we don't overwrite any existing
8800 ret = pin_metadata_blocks(fs_info);
8802 fprintf(stderr, "error pinning down used bytes\n");
8807 * Need to drop all the block groups since we're going to recreate all
8810 btrfs_free_block_groups(fs_info);
8811 ret = reset_block_groups(fs_info);
8813 fprintf(stderr, "error resetting the block groups\n");
8817 /* Ok we can allocate now, reinit the extent root */
8818 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8820 fprintf(stderr, "extent root initialization failed\n");
8822 * When the transaction code is updated we should end the
8823 * transaction, but for now progs only knows about commit so
8824 * just return an error.
8830 * Now we have all the in-memory block groups setup so we can make
8831 * allocations properly, and the metadata we care about is safe since we
8832 * pinned all of it above.
8835 struct btrfs_block_group_cache *cache;
8837 cache = btrfs_lookup_first_block_group(fs_info, start);
8840 start = cache->key.objectid + cache->key.offset;
8841 ret = btrfs_insert_item(trans, fs_info->extent_root,
8842 &cache->key, &cache->item,
8843 sizeof(cache->item));
8845 fprintf(stderr, "Error adding block group\n");
8848 btrfs_extent_post_op(trans, fs_info->extent_root);
8851 ret = reset_balance(trans, fs_info);
8853 fprintf(stderr, "error resetting the pending balance\n");
8858 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8860 struct btrfs_path *path;
8861 struct btrfs_trans_handle *trans;
8862 struct btrfs_key key;
8865 printf("Recowing metadata block %llu\n", eb->start);
8866 key.objectid = btrfs_header_owner(eb);
8867 key.type = BTRFS_ROOT_ITEM_KEY;
8868 key.offset = (u64)-1;
8870 root = btrfs_read_fs_root(root->fs_info, &key);
8872 fprintf(stderr, "Couldn't find owner root %llu\n",
8874 return PTR_ERR(root);
8877 path = btrfs_alloc_path();
8881 trans = btrfs_start_transaction(root, 1);
8882 if (IS_ERR(trans)) {
8883 btrfs_free_path(path);
8884 return PTR_ERR(trans);
8887 path->lowest_level = btrfs_header_level(eb);
8888 if (path->lowest_level)
8889 btrfs_node_key_to_cpu(eb, &key, 0);
8891 btrfs_item_key_to_cpu(eb, &key, 0);
8893 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8894 btrfs_commit_transaction(trans, root);
8895 btrfs_free_path(path);
8899 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8901 struct btrfs_path *path;
8902 struct btrfs_trans_handle *trans;
8903 struct btrfs_key key;
8906 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8907 bad->key.type, bad->key.offset);
8908 key.objectid = bad->root_id;
8909 key.type = BTRFS_ROOT_ITEM_KEY;
8910 key.offset = (u64)-1;
8912 root = btrfs_read_fs_root(root->fs_info, &key);
8914 fprintf(stderr, "Couldn't find owner root %llu\n",
8916 return PTR_ERR(root);
8919 path = btrfs_alloc_path();
8923 trans = btrfs_start_transaction(root, 1);
8924 if (IS_ERR(trans)) {
8925 btrfs_free_path(path);
8926 return PTR_ERR(trans);
8929 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8935 ret = btrfs_del_item(trans, root, path);
8937 btrfs_commit_transaction(trans, root);
8938 btrfs_free_path(path);
8942 static int zero_log_tree(struct btrfs_root *root)
8944 struct btrfs_trans_handle *trans;
8947 trans = btrfs_start_transaction(root, 1);
8948 if (IS_ERR(trans)) {
8949 ret = PTR_ERR(trans);
8952 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8953 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8954 ret = btrfs_commit_transaction(trans, root);
8958 static int populate_csum(struct btrfs_trans_handle *trans,
8959 struct btrfs_root *csum_root, char *buf, u64 start,
8966 while (offset < len) {
8967 sectorsize = csum_root->sectorsize;
8968 ret = read_extent_data(csum_root, buf, start + offset,
8972 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8973 start + offset, buf, sectorsize);
8976 offset += sectorsize;
8981 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8982 struct btrfs_root *csum_root,
8983 struct btrfs_root *cur_root)
8985 struct btrfs_path *path;
8986 struct btrfs_key key;
8987 struct extent_buffer *node;
8988 struct btrfs_file_extent_item *fi;
8995 path = btrfs_alloc_path();
8998 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9008 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9011 /* Iterate all regular file extents and fill its csum */
9013 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9015 if (key.type != BTRFS_EXTENT_DATA_KEY)
9017 node = path->nodes[0];
9018 slot = path->slots[0];
9019 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9020 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9022 start = btrfs_file_extent_disk_bytenr(node, fi);
9023 len = btrfs_file_extent_disk_num_bytes(node, fi);
9025 ret = populate_csum(trans, csum_root, buf, start, len);
9032 * TODO: if next leaf is corrupted, jump to nearest next valid
9035 ret = btrfs_next_item(cur_root, path);
9045 btrfs_free_path(path);
9050 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9051 struct btrfs_root *csum_root)
9053 struct btrfs_fs_info *fs_info = csum_root->fs_info;
9054 struct btrfs_path *path;
9055 struct btrfs_root *tree_root = fs_info->tree_root;
9056 struct btrfs_root *cur_root;
9057 struct extent_buffer *node;
9058 struct btrfs_key key;
9062 path = btrfs_alloc_path();
9066 key.objectid = BTRFS_FS_TREE_OBJECTID;
9068 key.type = BTRFS_ROOT_ITEM_KEY;
9070 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9079 node = path->nodes[0];
9080 slot = path->slots[0];
9081 btrfs_item_key_to_cpu(node, &key, slot);
9082 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9084 if (key.type != BTRFS_ROOT_ITEM_KEY)
9086 if (!is_fstree(key.objectid))
9088 key.offset = (u64)-1;
9090 cur_root = btrfs_read_fs_root(fs_info, &key);
9091 if (IS_ERR(cur_root) || !cur_root) {
9092 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9096 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9101 ret = btrfs_next_item(tree_root, path);
9111 btrfs_free_path(path);
9115 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9116 struct btrfs_root *csum_root)
9118 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9119 struct btrfs_path *path;
9120 struct btrfs_extent_item *ei;
9121 struct extent_buffer *leaf;
9123 struct btrfs_key key;
9126 path = btrfs_alloc_path();
9131 key.type = BTRFS_EXTENT_ITEM_KEY;
9134 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9136 btrfs_free_path(path);
9140 buf = malloc(csum_root->sectorsize);
9142 btrfs_free_path(path);
9147 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9148 ret = btrfs_next_leaf(extent_root, path);
9156 leaf = path->nodes[0];
9158 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9159 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9164 ei = btrfs_item_ptr(leaf, path->slots[0],
9165 struct btrfs_extent_item);
9166 if (!(btrfs_extent_flags(leaf, ei) &
9167 BTRFS_EXTENT_FLAG_DATA)) {
9172 ret = populate_csum(trans, csum_root, buf, key.objectid,
9179 btrfs_free_path(path);
9185 * Recalculate the csum and put it into the csum tree.
9187 * Extent tree init will wipe out all the extent info, so in that case, we
9188 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9189 * will use fs/subvol trees to init the csum tree.
9191 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9192 struct btrfs_root *csum_root,
9196 return fill_csum_tree_from_fs(trans, csum_root);
9198 return fill_csum_tree_from_extent(trans, csum_root);
9201 static void free_roots_info_cache(void)
9203 if (!roots_info_cache)
9206 while (!cache_tree_empty(roots_info_cache)) {
9207 struct cache_extent *entry;
9208 struct root_item_info *rii;
9210 entry = first_cache_extent(roots_info_cache);
9213 remove_cache_extent(roots_info_cache, entry);
9214 rii = container_of(entry, struct root_item_info, cache_extent);
9218 free(roots_info_cache);
9219 roots_info_cache = NULL;
9222 static int build_roots_info_cache(struct btrfs_fs_info *info)
9225 struct btrfs_key key;
9226 struct extent_buffer *leaf;
9227 struct btrfs_path *path;
9229 if (!roots_info_cache) {
9230 roots_info_cache = malloc(sizeof(*roots_info_cache));
9231 if (!roots_info_cache)
9233 cache_tree_init(roots_info_cache);
9236 path = btrfs_alloc_path();
9241 key.type = BTRFS_EXTENT_ITEM_KEY;
9244 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9247 leaf = path->nodes[0];
9250 struct btrfs_key found_key;
9251 struct btrfs_extent_item *ei;
9252 struct btrfs_extent_inline_ref *iref;
9253 int slot = path->slots[0];
9258 struct cache_extent *entry;
9259 struct root_item_info *rii;
9261 if (slot >= btrfs_header_nritems(leaf)) {
9262 ret = btrfs_next_leaf(info->extent_root, path);
9269 leaf = path->nodes[0];
9270 slot = path->slots[0];
9273 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9275 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9276 found_key.type != BTRFS_METADATA_ITEM_KEY)
9279 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9280 flags = btrfs_extent_flags(leaf, ei);
9282 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9283 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9286 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9287 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9288 level = found_key.offset;
9290 struct btrfs_tree_block_info *binfo;
9292 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9293 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9294 level = btrfs_tree_block_level(leaf, binfo);
9298 * For a root extent, it must be of the following type and the
9299 * first (and only one) iref in the item.
9301 type = btrfs_extent_inline_ref_type(leaf, iref);
9302 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9305 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9306 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9308 rii = malloc(sizeof(struct root_item_info));
9313 rii->cache_extent.start = root_id;
9314 rii->cache_extent.size = 1;
9315 rii->level = (u8)-1;
9316 entry = &rii->cache_extent;
9317 ret = insert_cache_extent(roots_info_cache, entry);
9320 rii = container_of(entry, struct root_item_info,
9324 ASSERT(rii->cache_extent.start == root_id);
9325 ASSERT(rii->cache_extent.size == 1);
9327 if (level > rii->level || rii->level == (u8)-1) {
9329 rii->bytenr = found_key.objectid;
9330 rii->gen = btrfs_extent_generation(leaf, ei);
9331 rii->node_count = 1;
9332 } else if (level == rii->level) {
9340 btrfs_free_path(path);
9345 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9346 struct btrfs_path *path,
9347 const struct btrfs_key *root_key,
9348 const int read_only_mode)
9350 const u64 root_id = root_key->objectid;
9351 struct cache_extent *entry;
9352 struct root_item_info *rii;
9353 struct btrfs_root_item ri;
9354 unsigned long offset;
9356 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9359 "Error: could not find extent items for root %llu\n",
9360 root_key->objectid);
9364 rii = container_of(entry, struct root_item_info, cache_extent);
9365 ASSERT(rii->cache_extent.start == root_id);
9366 ASSERT(rii->cache_extent.size == 1);
9368 if (rii->node_count != 1) {
9370 "Error: could not find btree root extent for root %llu\n",
9375 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9376 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9378 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9379 btrfs_root_level(&ri) != rii->level ||
9380 btrfs_root_generation(&ri) != rii->gen) {
9383 * If we're in repair mode but our caller told us to not update
9384 * the root item, i.e. just check if it needs to be updated, don't
9385 * print this message, since the caller will call us again shortly
9386 * for the same root item without read only mode (the caller will
9387 * open a transaction first).
9389 if (!(read_only_mode && repair))
9391 "%sroot item for root %llu,"
9392 " current bytenr %llu, current gen %llu, current level %u,"
9393 " new bytenr %llu, new gen %llu, new level %u\n",
9394 (read_only_mode ? "" : "fixing "),
9396 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9397 btrfs_root_level(&ri),
9398 rii->bytenr, rii->gen, rii->level);
9400 if (btrfs_root_generation(&ri) > rii->gen) {
9402 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9403 root_id, btrfs_root_generation(&ri), rii->gen);
9407 if (!read_only_mode) {
9408 btrfs_set_root_bytenr(&ri, rii->bytenr);
9409 btrfs_set_root_level(&ri, rii->level);
9410 btrfs_set_root_generation(&ri, rii->gen);
9411 write_extent_buffer(path->nodes[0], &ri,
9412 offset, sizeof(ri));
9422 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9423 * caused read-only snapshots to be corrupted if they were created at a moment
9424 * when the source subvolume/snapshot had orphan items. The issue was that the
9425 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9426 * node instead of the post orphan cleanup root node.
9427 * So this function, and its callees, just detects and fixes those cases. Even
9428 * though the regression was for read-only snapshots, this function applies to
9429 * any snapshot/subvolume root.
9430 * This must be run before any other repair code - not doing it so, makes other
9431 * repair code delete or modify backrefs in the extent tree for example, which
9432 * will result in an inconsistent fs after repairing the root items.
9434 static int repair_root_items(struct btrfs_fs_info *info)
9436 struct btrfs_path *path = NULL;
9437 struct btrfs_key key;
9438 struct extent_buffer *leaf;
9439 struct btrfs_trans_handle *trans = NULL;
9444 ret = build_roots_info_cache(info);
9448 path = btrfs_alloc_path();
9454 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9455 key.type = BTRFS_ROOT_ITEM_KEY;
9460 * Avoid opening and committing transactions if a leaf doesn't have
9461 * any root items that need to be fixed, so that we avoid rotating
9462 * backup roots unnecessarily.
9465 trans = btrfs_start_transaction(info->tree_root, 1);
9466 if (IS_ERR(trans)) {
9467 ret = PTR_ERR(trans);
9472 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9476 leaf = path->nodes[0];
9479 struct btrfs_key found_key;
9481 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9482 int no_more_keys = find_next_key(path, &key);
9484 btrfs_release_path(path);
9486 ret = btrfs_commit_transaction(trans,
9498 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9500 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9502 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9505 ret = maybe_repair_root_item(info, path, &found_key,
9510 if (!trans && repair) {
9513 btrfs_release_path(path);
9523 free_roots_info_cache();
9524 btrfs_free_path(path);
9526 btrfs_commit_transaction(trans, info->tree_root);
9533 const char * const cmd_check_usage[] = {
9534 "btrfs check [options] <device>",
9535 "Check structural integrity of a filesystem (unmounted).",
9536 "Check structural integrity of an unmounted filesystem. Verify internal",
9537 "trees' consistency and item connectivity. In the repair mode try to",
9538 "fix the problems found.",
9539 "WARNING: the repair mode is considered dangerous",
9541 "-s|--super <superblock> use this superblock copy",
9542 "-b|--backup use the first valid backup root copy",
9543 "--repair try to repair the filesystem",
9544 "--readonly run in read-only mode (default)",
9545 "--init-csum-tree create a new CRC tree",
9546 "--init-extent-tree create a new extent tree",
9547 "--check-data-csum verify checksums of data blocks",
9548 "-Q|--qgroup-report print a report on qgroup consistency",
9549 "-E|--subvol-extents <subvolid>",
9550 " print subvolume extents and sharing state",
9551 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9552 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9553 "-p|--progress indicate progress",
9557 int cmd_check(int argc, char **argv)
9559 struct cache_tree root_cache;
9560 struct btrfs_root *root;
9561 struct btrfs_fs_info *info;
9564 u64 tree_root_bytenr = 0;
9565 u64 chunk_root_bytenr = 0;
9566 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9569 int init_csum_tree = 0;
9571 int qgroup_report = 0;
9572 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9576 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9577 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9578 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9579 static const struct option long_options[] = {
9580 { "super", required_argument, NULL, 's' },
9581 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9582 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9583 { "init-csum-tree", no_argument, NULL,
9584 GETOPT_VAL_INIT_CSUM },
9585 { "init-extent-tree", no_argument, NULL,
9586 GETOPT_VAL_INIT_EXTENT },
9587 { "check-data-csum", no_argument, NULL,
9588 GETOPT_VAL_CHECK_CSUM },
9589 { "backup", no_argument, NULL, 'b' },
9590 { "subvol-extents", required_argument, NULL, 'E' },
9591 { "qgroup-report", no_argument, NULL, 'Q' },
9592 { "tree-root", required_argument, NULL, 'r' },
9593 { "chunk-root", required_argument, NULL,
9594 GETOPT_VAL_CHUNK_TREE },
9595 { "progress", no_argument, NULL, 'p' },
9599 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9603 case 'a': /* ignored */ break;
9605 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9608 num = arg_strtou64(optarg);
9609 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9611 "ERROR: super mirror should be less than: %d\n",
9612 BTRFS_SUPER_MIRROR_MAX);
9615 bytenr = btrfs_sb_offset(((int)num));
9616 printf("using SB copy %llu, bytenr %llu\n", num,
9617 (unsigned long long)bytenr);
9623 subvolid = arg_strtou64(optarg);
9626 tree_root_bytenr = arg_strtou64(optarg);
9628 case GETOPT_VAL_CHUNK_TREE:
9629 chunk_root_bytenr = arg_strtou64(optarg);
9632 ctx.progress_enabled = true;
9636 usage(cmd_check_usage);
9637 case GETOPT_VAL_REPAIR:
9638 printf("enabling repair mode\n");
9640 ctree_flags |= OPEN_CTREE_WRITES;
9642 case GETOPT_VAL_READONLY:
9645 case GETOPT_VAL_INIT_CSUM:
9646 printf("Creating a new CRC tree\n");
9649 ctree_flags |= OPEN_CTREE_WRITES;
9651 case GETOPT_VAL_INIT_EXTENT:
9652 init_extent_tree = 1;
9653 ctree_flags |= (OPEN_CTREE_WRITES |
9654 OPEN_CTREE_NO_BLOCK_GROUPS);
9657 case GETOPT_VAL_CHECK_CSUM:
9658 check_data_csum = 1;
9663 if (check_argc_exact(argc - optind, 1))
9664 usage(cmd_check_usage);
9666 if (ctx.progress_enabled) {
9667 ctx.tp = TASK_NOTHING;
9668 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9671 /* This check is the only reason for --readonly to exist */
9672 if (readonly && repair) {
9673 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9678 cache_tree_init(&root_cache);
9680 if((ret = check_mounted(argv[optind])) < 0) {
9681 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9684 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9689 /* only allow partial opening under repair mode */
9691 ctree_flags |= OPEN_CTREE_PARTIAL;
9693 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9694 chunk_root_bytenr, ctree_flags);
9696 fprintf(stderr, "Couldn't open file system\n");
9702 root = info->fs_root;
9705 * repair mode will force us to commit transaction which
9706 * will make us fail to load log tree when mounting.
9708 if (repair && btrfs_super_log_root(info->super_copy)) {
9709 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9714 ret = zero_log_tree(root);
9716 fprintf(stderr, "fail to zero log tree\n");
9721 uuid_unparse(info->super_copy->fsid, uuidbuf);
9722 if (qgroup_report) {
9723 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9725 ret = qgroup_verify_all(info);
9727 ret = report_qgroups(1);
9731 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9732 subvolid, argv[optind], uuidbuf);
9733 ret = print_extent_state(info, subvolid);
9736 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9738 if (!extent_buffer_uptodate(info->tree_root->node) ||
9739 !extent_buffer_uptodate(info->dev_root->node) ||
9740 !extent_buffer_uptodate(info->chunk_root->node)) {
9741 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9746 if (init_extent_tree || init_csum_tree) {
9747 struct btrfs_trans_handle *trans;
9749 trans = btrfs_start_transaction(info->extent_root, 0);
9750 if (IS_ERR(trans)) {
9751 fprintf(stderr, "Error starting transaction\n");
9752 ret = PTR_ERR(trans);
9756 if (init_extent_tree) {
9757 printf("Creating a new extent tree\n");
9758 ret = reinit_extent_tree(trans, info);
9763 if (init_csum_tree) {
9764 fprintf(stderr, "Reinit crc root\n");
9765 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9767 fprintf(stderr, "crc root initialization failed\n");
9772 ret = fill_csum_tree(trans, info->csum_root,
9775 fprintf(stderr, "crc refilling failed\n");
9780 * Ok now we commit and run the normal fsck, which will add
9781 * extent entries for all of the items it finds.
9783 ret = btrfs_commit_transaction(trans, info->extent_root);
9787 if (!extent_buffer_uptodate(info->extent_root->node)) {
9788 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9792 if (!extent_buffer_uptodate(info->csum_root->node)) {
9793 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9798 if (!ctx.progress_enabled)
9799 fprintf(stderr, "checking extents\n");
9800 ret = check_chunks_and_extents(root);
9802 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9804 ret = repair_root_items(info);
9808 fprintf(stderr, "Fixed %d roots.\n", ret);
9810 } else if (ret > 0) {
9812 "Found %d roots with an outdated root item.\n",
9815 "Please run a filesystem check with the option --repair to fix them.\n");
9820 if (!ctx.progress_enabled) {
9821 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9822 fprintf(stderr, "checking free space tree\n");
9824 fprintf(stderr, "checking free space cache\n");
9826 ret = check_space_cache(root);
9831 * We used to have to have these hole extents in between our real
9832 * extents so if we don't have this flag set we need to make sure there
9833 * are no gaps in the file extents for inodes, otherwise we can just
9834 * ignore it when this happens.
9836 no_holes = btrfs_fs_incompat(root->fs_info,
9837 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9838 if (!ctx.progress_enabled)
9839 fprintf(stderr, "checking fs roots\n");
9840 ret = check_fs_roots(root, &root_cache);
9844 fprintf(stderr, "checking csums\n");
9845 ret = check_csums(root);
9849 fprintf(stderr, "checking root refs\n");
9850 ret = check_root_refs(root, &root_cache);
9854 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9855 struct extent_buffer *eb;
9857 eb = list_first_entry(&root->fs_info->recow_ebs,
9858 struct extent_buffer, recow);
9859 list_del_init(&eb->recow);
9860 ret = recow_extent_buffer(root, eb);
9865 while (!list_empty(&delete_items)) {
9866 struct bad_item *bad;
9868 bad = list_first_entry(&delete_items, struct bad_item, list);
9869 list_del_init(&bad->list);
9871 ret = delete_bad_item(root, bad);
9875 if (info->quota_enabled) {
9877 fprintf(stderr, "checking quota groups\n");
9878 err = qgroup_verify_all(info);
9883 if (!list_empty(&root->fs_info->recow_ebs)) {
9884 fprintf(stderr, "Transid errors in file system\n");
9888 /* Don't override original ret */
9892 ret = report_qgroups(0);
9893 if (found_old_backref) { /*
9894 * there was a disk format change when mixed
9895 * backref was in testing tree. The old format
9896 * existed about one week.
9898 printf("\n * Found old mixed backref format. "
9899 "The old format is not supported! *"
9900 "\n * Please mount the FS in readonly mode, "
9901 "backup data and re-format the FS. *\n\n");
9904 printf("found %llu bytes used err is %d\n",
9905 (unsigned long long)bytes_used, ret);
9906 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9907 printf("total tree bytes: %llu\n",
9908 (unsigned long long)total_btree_bytes);
9909 printf("total fs tree bytes: %llu\n",
9910 (unsigned long long)total_fs_tree_bytes);
9911 printf("total extent tree bytes: %llu\n",
9912 (unsigned long long)total_extent_tree_bytes);
9913 printf("btree space waste bytes: %llu\n",
9914 (unsigned long long)btree_space_waste);
9915 printf("file data blocks allocated: %llu\n referenced %llu\n",
9916 (unsigned long long)data_bytes_allocated,
9917 (unsigned long long)data_bytes_referenced);
9919 free_qgroup_counts();
9920 free_root_recs_tree(&root_cache);
9924 if (ctx.progress_enabled)
9925 task_deinit(ctx.info);