2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct inode_backref *tmp;
570 struct orphan_data_extent *src_orphan;
571 struct orphan_data_extent *dst_orphan;
575 rec = malloc(sizeof(*rec));
577 return ERR_PTR(-ENOMEM);
578 memcpy(rec, orig_rec, sizeof(*rec));
580 INIT_LIST_HEAD(&rec->backrefs);
581 INIT_LIST_HEAD(&rec->orphan_extents);
582 rec->holes = RB_ROOT;
584 list_for_each_entry(orig, &orig_rec->backrefs, list) {
585 size = sizeof(*orig) + orig->namelen + 1;
586 backref = malloc(size);
591 memcpy(backref, orig, size);
592 list_add_tail(&backref->list, &rec->backrefs);
594 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595 dst_orphan = malloc(sizeof(*dst_orphan));
600 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
603 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
609 if (!list_empty(&rec->backrefs))
610 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611 list_del(&orig->list);
615 if (!list_empty(&rec->orphan_extents))
616 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617 list_del(&orig->list);
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
629 struct orphan_data_extent *orphan;
631 if (list_empty(orphan_extents))
633 printf("The following data extent is lost in tree %llu:\n",
635 list_for_each_entry(orphan, orphan_extents, list) {
636 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637 orphan->objectid, orphan->offset, orphan->disk_bytenr,
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
644 u64 root_objectid = root->root_key.objectid;
645 int errors = rec->errors;
649 /* reloc root errors, we print its corresponding fs root objectid*/
650 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651 root_objectid = root->root_key.offset;
652 fprintf(stderr, "reloc");
654 fprintf(stderr, "root %llu inode %llu errors %x",
655 (unsigned long long) root_objectid,
656 (unsigned long long) rec->ino, rec->errors);
658 if (errors & I_ERR_NO_INODE_ITEM)
659 fprintf(stderr, ", no inode item");
660 if (errors & I_ERR_NO_ORPHAN_ITEM)
661 fprintf(stderr, ", no orphan item");
662 if (errors & I_ERR_DUP_INODE_ITEM)
663 fprintf(stderr, ", dup inode item");
664 if (errors & I_ERR_DUP_DIR_INDEX)
665 fprintf(stderr, ", dup dir index");
666 if (errors & I_ERR_ODD_DIR_ITEM)
667 fprintf(stderr, ", odd dir item");
668 if (errors & I_ERR_ODD_FILE_EXTENT)
669 fprintf(stderr, ", odd file extent");
670 if (errors & I_ERR_BAD_FILE_EXTENT)
671 fprintf(stderr, ", bad file extent");
672 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673 fprintf(stderr, ", file extent overlap");
674 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675 fprintf(stderr, ", file extent discount");
676 if (errors & I_ERR_DIR_ISIZE_WRONG)
677 fprintf(stderr, ", dir isize wrong");
678 if (errors & I_ERR_FILE_NBYTES_WRONG)
679 fprintf(stderr, ", nbytes wrong");
680 if (errors & I_ERR_ODD_CSUM_ITEM)
681 fprintf(stderr, ", odd csum item");
682 if (errors & I_ERR_SOME_CSUM_MISSING)
683 fprintf(stderr, ", some csum missing");
684 if (errors & I_ERR_LINK_COUNT_WRONG)
685 fprintf(stderr, ", link count wrong");
686 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687 fprintf(stderr, ", orphan file extent");
688 fprintf(stderr, "\n");
689 /* Print the orphan extents if needed */
690 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
693 /* Print the holes if needed */
694 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695 struct file_extent_hole *hole;
696 struct rb_node *node;
699 node = rb_first(&rec->holes);
700 fprintf(stderr, "Found file extent holes:\n");
703 hole = rb_entry(node, struct file_extent_hole, node);
704 fprintf(stderr, "\tstart: %llu, len: %llu\n",
705 hole->start, hole->len);
706 node = rb_next(node);
709 fprintf(stderr, "\tstart: 0, len: %llu\n",
710 round_up(rec->isize, root->sectorsize));
714 static void print_ref_error(int errors)
716 if (errors & REF_ERR_NO_DIR_ITEM)
717 fprintf(stderr, ", no dir item");
718 if (errors & REF_ERR_NO_DIR_INDEX)
719 fprintf(stderr, ", no dir index");
720 if (errors & REF_ERR_NO_INODE_REF)
721 fprintf(stderr, ", no inode ref");
722 if (errors & REF_ERR_DUP_DIR_ITEM)
723 fprintf(stderr, ", dup dir item");
724 if (errors & REF_ERR_DUP_DIR_INDEX)
725 fprintf(stderr, ", dup dir index");
726 if (errors & REF_ERR_DUP_INODE_REF)
727 fprintf(stderr, ", dup inode ref");
728 if (errors & REF_ERR_INDEX_UNMATCH)
729 fprintf(stderr, ", index unmatch");
730 if (errors & REF_ERR_FILETYPE_UNMATCH)
731 fprintf(stderr, ", filetype unmatch");
732 if (errors & REF_ERR_NAME_TOO_LONG)
733 fprintf(stderr, ", name too long");
734 if (errors & REF_ERR_NO_ROOT_REF)
735 fprintf(stderr, ", no root ref");
736 if (errors & REF_ERR_NO_ROOT_BACKREF)
737 fprintf(stderr, ", no root backref");
738 if (errors & REF_ERR_DUP_ROOT_REF)
739 fprintf(stderr, ", dup root ref");
740 if (errors & REF_ERR_DUP_ROOT_BACKREF)
741 fprintf(stderr, ", dup root backref");
742 fprintf(stderr, "\n");
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
748 struct ptr_node *node;
749 struct cache_extent *cache;
750 struct inode_record *rec = NULL;
753 cache = lookup_cache_extent(inode_cache, ino, 1);
755 node = container_of(cache, struct ptr_node, cache);
757 if (mod && rec->refs > 1) {
758 node->data = clone_inode_rec(rec);
759 if (IS_ERR(node->data))
765 rec = calloc(1, sizeof(*rec));
767 return ERR_PTR(-ENOMEM);
769 rec->extent_start = (u64)-1;
771 INIT_LIST_HEAD(&rec->backrefs);
772 INIT_LIST_HEAD(&rec->orphan_extents);
773 rec->holes = RB_ROOT;
775 node = malloc(sizeof(*node));
778 return ERR_PTR(-ENOMEM);
780 node->cache.start = ino;
781 node->cache.size = 1;
784 if (ino == BTRFS_FREE_INO_OBJECTID)
787 ret = insert_cache_extent(inode_cache, &node->cache);
789 return ERR_PTR(-EEXIST);
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
796 struct orphan_data_extent *orphan;
798 while (!list_empty(orphan_extents)) {
799 orphan = list_entry(orphan_extents->next,
800 struct orphan_data_extent, list);
801 list_del(&orphan->list);
806 static void free_inode_rec(struct inode_record *rec)
808 struct inode_backref *backref;
813 while (!list_empty(&rec->backrefs)) {
814 backref = list_entry(rec->backrefs.next,
815 struct inode_backref, list);
816 list_del(&backref->list);
819 free_orphan_data_extents(&rec->orphan_extents);
820 free_file_extent_holes(&rec->holes);
824 static int can_free_inode_rec(struct inode_record *rec)
826 if (!rec->errors && rec->checked && rec->found_inode_item &&
827 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833 struct inode_record *rec)
835 struct cache_extent *cache;
836 struct inode_backref *tmp, *backref;
837 struct ptr_node *node;
838 unsigned char filetype;
840 if (!rec->found_inode_item)
843 filetype = imode_to_type(rec->imode);
844 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845 if (backref->found_dir_item && backref->found_dir_index) {
846 if (backref->filetype != filetype)
847 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848 if (!backref->errors && backref->found_inode_ref &&
849 rec->nlink == rec->found_link) {
850 list_del(&backref->list);
856 if (!rec->checked || rec->merging)
859 if (S_ISDIR(rec->imode)) {
860 if (rec->found_size != rec->isize)
861 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862 if (rec->found_file_extent)
863 rec->errors |= I_ERR_ODD_FILE_EXTENT;
864 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865 if (rec->found_dir_item)
866 rec->errors |= I_ERR_ODD_DIR_ITEM;
867 if (rec->found_size != rec->nbytes)
868 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869 if (rec->nlink > 0 && !no_holes &&
870 (rec->extent_end < rec->isize ||
871 first_extent_gap(&rec->holes) < rec->isize))
872 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
875 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876 if (rec->found_csum_item && rec->nodatasum)
877 rec->errors |= I_ERR_ODD_CSUM_ITEM;
878 if (rec->some_csum_missing && !rec->nodatasum)
879 rec->errors |= I_ERR_SOME_CSUM_MISSING;
882 BUG_ON(rec->refs != 1);
883 if (can_free_inode_rec(rec)) {
884 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885 node = container_of(cache, struct ptr_node, cache);
886 BUG_ON(node->data != rec);
887 remove_cache_extent(inode_cache, &node->cache);
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
895 struct btrfs_path path;
896 struct btrfs_key key;
899 key.objectid = BTRFS_ORPHAN_OBJECTID;
900 key.type = BTRFS_ORPHAN_ITEM_KEY;
903 btrfs_init_path(&path);
904 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905 btrfs_release_path(&path);
911 static int process_inode_item(struct extent_buffer *eb,
912 int slot, struct btrfs_key *key,
913 struct shared_node *active_node)
915 struct inode_record *rec;
916 struct btrfs_inode_item *item;
918 rec = active_node->current;
919 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920 if (rec->found_inode_item) {
921 rec->errors |= I_ERR_DUP_INODE_ITEM;
924 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925 rec->nlink = btrfs_inode_nlink(eb, item);
926 rec->isize = btrfs_inode_size(eb, item);
927 rec->nbytes = btrfs_inode_nbytes(eb, item);
928 rec->imode = btrfs_inode_mode(eb, item);
929 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
931 rec->found_inode_item = 1;
933 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934 maybe_free_inode_rec(&active_node->inode_cache, rec);
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
940 int namelen, u64 dir)
942 struct inode_backref *backref;
944 list_for_each_entry(backref, &rec->backrefs, list) {
945 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
947 if (backref->dir != dir || backref->namelen != namelen)
949 if (memcmp(name, backref->name, namelen))
954 backref = malloc(sizeof(*backref) + namelen + 1);
957 memset(backref, 0, sizeof(*backref));
959 backref->namelen = namelen;
960 memcpy(backref->name, name, namelen);
961 backref->name[namelen] = '\0';
962 list_add_tail(&backref->list, &rec->backrefs);
966 static int add_inode_backref(struct cache_tree *inode_cache,
967 u64 ino, u64 dir, u64 index,
968 const char *name, int namelen,
969 int filetype, int itemtype, int errors)
971 struct inode_record *rec;
972 struct inode_backref *backref;
974 rec = get_inode_rec(inode_cache, ino, 1);
976 backref = get_inode_backref(rec, name, namelen, dir);
979 backref->errors |= errors;
980 if (itemtype == BTRFS_DIR_INDEX_KEY) {
981 if (backref->found_dir_index)
982 backref->errors |= REF_ERR_DUP_DIR_INDEX;
983 if (backref->found_inode_ref && backref->index != index)
984 backref->errors |= REF_ERR_INDEX_UNMATCH;
985 if (backref->found_dir_item && backref->filetype != filetype)
986 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
988 backref->index = index;
989 backref->filetype = filetype;
990 backref->found_dir_index = 1;
991 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
993 if (backref->found_dir_item)
994 backref->errors |= REF_ERR_DUP_DIR_ITEM;
995 if (backref->found_dir_index && backref->filetype != filetype)
996 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
998 backref->filetype = filetype;
999 backref->found_dir_item = 1;
1000 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1001 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1002 if (backref->found_inode_ref)
1003 backref->errors |= REF_ERR_DUP_INODE_REF;
1004 if (backref->found_dir_index && backref->index != index)
1005 backref->errors |= REF_ERR_INDEX_UNMATCH;
1007 backref->index = index;
1009 backref->ref_type = itemtype;
1010 backref->found_inode_ref = 1;
1015 maybe_free_inode_rec(inode_cache, rec);
1019 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1020 struct cache_tree *dst_cache)
1022 struct inode_backref *backref;
1027 list_for_each_entry(backref, &src->backrefs, list) {
1028 if (backref->found_dir_index) {
1029 add_inode_backref(dst_cache, dst->ino, backref->dir,
1030 backref->index, backref->name,
1031 backref->namelen, backref->filetype,
1032 BTRFS_DIR_INDEX_KEY, backref->errors);
1034 if (backref->found_dir_item) {
1036 add_inode_backref(dst_cache, dst->ino,
1037 backref->dir, 0, backref->name,
1038 backref->namelen, backref->filetype,
1039 BTRFS_DIR_ITEM_KEY, backref->errors);
1041 if (backref->found_inode_ref) {
1042 add_inode_backref(dst_cache, dst->ino,
1043 backref->dir, backref->index,
1044 backref->name, backref->namelen, 0,
1045 backref->ref_type, backref->errors);
1049 if (src->found_dir_item)
1050 dst->found_dir_item = 1;
1051 if (src->found_file_extent)
1052 dst->found_file_extent = 1;
1053 if (src->found_csum_item)
1054 dst->found_csum_item = 1;
1055 if (src->some_csum_missing)
1056 dst->some_csum_missing = 1;
1057 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1058 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1063 BUG_ON(src->found_link < dir_count);
1064 dst->found_link += src->found_link - dir_count;
1065 dst->found_size += src->found_size;
1066 if (src->extent_start != (u64)-1) {
1067 if (dst->extent_start == (u64)-1) {
1068 dst->extent_start = src->extent_start;
1069 dst->extent_end = src->extent_end;
1071 if (dst->extent_end > src->extent_start)
1072 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1073 else if (dst->extent_end < src->extent_start) {
1074 ret = add_file_extent_hole(&dst->holes,
1076 src->extent_start - dst->extent_end);
1078 if (dst->extent_end < src->extent_end)
1079 dst->extent_end = src->extent_end;
1083 dst->errors |= src->errors;
1084 if (src->found_inode_item) {
1085 if (!dst->found_inode_item) {
1086 dst->nlink = src->nlink;
1087 dst->isize = src->isize;
1088 dst->nbytes = src->nbytes;
1089 dst->imode = src->imode;
1090 dst->nodatasum = src->nodatasum;
1091 dst->found_inode_item = 1;
1093 dst->errors |= I_ERR_DUP_INODE_ITEM;
1101 static int splice_shared_node(struct shared_node *src_node,
1102 struct shared_node *dst_node)
1104 struct cache_extent *cache;
1105 struct ptr_node *node, *ins;
1106 struct cache_tree *src, *dst;
1107 struct inode_record *rec, *conflict;
1108 u64 current_ino = 0;
1112 if (--src_node->refs == 0)
1114 if (src_node->current)
1115 current_ino = src_node->current->ino;
1117 src = &src_node->root_cache;
1118 dst = &dst_node->root_cache;
1120 cache = search_cache_extent(src, 0);
1122 node = container_of(cache, struct ptr_node, cache);
1124 cache = next_cache_extent(cache);
1127 remove_cache_extent(src, &node->cache);
1130 ins = malloc(sizeof(*ins));
1132 ins->cache.start = node->cache.start;
1133 ins->cache.size = node->cache.size;
1137 ret = insert_cache_extent(dst, &ins->cache);
1138 if (ret == -EEXIST) {
1139 conflict = get_inode_rec(dst, rec->ino, 1);
1140 BUG_ON(IS_ERR(conflict));
1141 merge_inode_recs(rec, conflict, dst);
1143 conflict->checked = 1;
1144 if (dst_node->current == conflict)
1145 dst_node->current = NULL;
1147 maybe_free_inode_rec(dst, conflict);
1148 free_inode_rec(rec);
1155 if (src == &src_node->root_cache) {
1156 src = &src_node->inode_cache;
1157 dst = &dst_node->inode_cache;
1161 if (current_ino > 0 && (!dst_node->current ||
1162 current_ino > dst_node->current->ino)) {
1163 if (dst_node->current) {
1164 dst_node->current->checked = 1;
1165 maybe_free_inode_rec(dst, dst_node->current);
1167 dst_node->current = get_inode_rec(dst, current_ino, 1);
1168 BUG_ON(IS_ERR(dst_node->current));
1173 static void free_inode_ptr(struct cache_extent *cache)
1175 struct ptr_node *node;
1176 struct inode_record *rec;
1178 node = container_of(cache, struct ptr_node, cache);
1180 free_inode_rec(rec);
1184 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1186 static struct shared_node *find_shared_node(struct cache_tree *shared,
1189 struct cache_extent *cache;
1190 struct shared_node *node;
1192 cache = lookup_cache_extent(shared, bytenr, 1);
1194 node = container_of(cache, struct shared_node, cache);
1200 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1203 struct shared_node *node;
1205 node = calloc(1, sizeof(*node));
1208 node->cache.start = bytenr;
1209 node->cache.size = 1;
1210 cache_tree_init(&node->root_cache);
1211 cache_tree_init(&node->inode_cache);
1214 ret = insert_cache_extent(shared, &node->cache);
1219 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1220 struct walk_control *wc, int level)
1222 struct shared_node *node;
1223 struct shared_node *dest;
1226 if (level == wc->active_node)
1229 BUG_ON(wc->active_node <= level);
1230 node = find_shared_node(&wc->shared, bytenr);
1232 ret = add_shared_node(&wc->shared, bytenr, refs);
1234 node = find_shared_node(&wc->shared, bytenr);
1235 wc->nodes[level] = node;
1236 wc->active_node = level;
1240 if (wc->root_level == wc->active_node &&
1241 btrfs_root_refs(&root->root_item) == 0) {
1242 if (--node->refs == 0) {
1243 free_inode_recs_tree(&node->root_cache);
1244 free_inode_recs_tree(&node->inode_cache);
1245 remove_cache_extent(&wc->shared, &node->cache);
1251 dest = wc->nodes[wc->active_node];
1252 splice_shared_node(node, dest);
1253 if (node->refs == 0) {
1254 remove_cache_extent(&wc->shared, &node->cache);
1260 static int leave_shared_node(struct btrfs_root *root,
1261 struct walk_control *wc, int level)
1263 struct shared_node *node;
1264 struct shared_node *dest;
1267 if (level == wc->root_level)
1270 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1274 BUG_ON(i >= BTRFS_MAX_LEVEL);
1276 node = wc->nodes[wc->active_node];
1277 wc->nodes[wc->active_node] = NULL;
1278 wc->active_node = i;
1280 dest = wc->nodes[wc->active_node];
1281 if (wc->active_node < wc->root_level ||
1282 btrfs_root_refs(&root->root_item) > 0) {
1283 BUG_ON(node->refs <= 1);
1284 splice_shared_node(node, dest);
1286 BUG_ON(node->refs < 2);
1295 * 1 - if the root with id child_root_id is a child of root parent_root_id
1296 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1297 * has other root(s) as parent(s)
1298 * 2 - if the root child_root_id doesn't have any parent roots
1300 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1303 struct btrfs_path path;
1304 struct btrfs_key key;
1305 struct extent_buffer *leaf;
1309 btrfs_init_path(&path);
1311 key.objectid = parent_root_id;
1312 key.type = BTRFS_ROOT_REF_KEY;
1313 key.offset = child_root_id;
1314 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1318 btrfs_release_path(&path);
1322 key.objectid = child_root_id;
1323 key.type = BTRFS_ROOT_BACKREF_KEY;
1325 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1331 leaf = path.nodes[0];
1332 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1333 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1336 leaf = path.nodes[0];
1339 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1340 if (key.objectid != child_root_id ||
1341 key.type != BTRFS_ROOT_BACKREF_KEY)
1346 if (key.offset == parent_root_id) {
1347 btrfs_release_path(&path);
1354 btrfs_release_path(&path);
1357 return has_parent ? 0 : 2;
1360 static int process_dir_item(struct btrfs_root *root,
1361 struct extent_buffer *eb,
1362 int slot, struct btrfs_key *key,
1363 struct shared_node *active_node)
1373 struct btrfs_dir_item *di;
1374 struct inode_record *rec;
1375 struct cache_tree *root_cache;
1376 struct cache_tree *inode_cache;
1377 struct btrfs_key location;
1378 char namebuf[BTRFS_NAME_LEN];
1380 root_cache = &active_node->root_cache;
1381 inode_cache = &active_node->inode_cache;
1382 rec = active_node->current;
1383 rec->found_dir_item = 1;
1385 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1386 total = btrfs_item_size_nr(eb, slot);
1387 while (cur < total) {
1389 btrfs_dir_item_key_to_cpu(eb, di, &location);
1390 name_len = btrfs_dir_name_len(eb, di);
1391 data_len = btrfs_dir_data_len(eb, di);
1392 filetype = btrfs_dir_type(eb, di);
1394 rec->found_size += name_len;
1395 if (name_len <= BTRFS_NAME_LEN) {
1399 len = BTRFS_NAME_LEN;
1400 error = REF_ERR_NAME_TOO_LONG;
1402 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1404 if (location.type == BTRFS_INODE_ITEM_KEY) {
1405 add_inode_backref(inode_cache, location.objectid,
1406 key->objectid, key->offset, namebuf,
1407 len, filetype, key->type, error);
1408 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1409 add_inode_backref(root_cache, location.objectid,
1410 key->objectid, key->offset,
1411 namebuf, len, filetype,
1414 fprintf(stderr, "invalid location in dir item %u\n",
1416 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1417 key->objectid, key->offset, namebuf,
1418 len, filetype, key->type, error);
1421 len = sizeof(*di) + name_len + data_len;
1422 di = (struct btrfs_dir_item *)((char *)di + len);
1425 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1426 rec->errors |= I_ERR_DUP_DIR_INDEX;
1431 static int process_inode_ref(struct extent_buffer *eb,
1432 int slot, struct btrfs_key *key,
1433 struct shared_node *active_node)
1441 struct cache_tree *inode_cache;
1442 struct btrfs_inode_ref *ref;
1443 char namebuf[BTRFS_NAME_LEN];
1445 inode_cache = &active_node->inode_cache;
1447 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1448 total = btrfs_item_size_nr(eb, slot);
1449 while (cur < total) {
1450 name_len = btrfs_inode_ref_name_len(eb, ref);
1451 index = btrfs_inode_ref_index(eb, ref);
1452 if (name_len <= BTRFS_NAME_LEN) {
1456 len = BTRFS_NAME_LEN;
1457 error = REF_ERR_NAME_TOO_LONG;
1459 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1460 add_inode_backref(inode_cache, key->objectid, key->offset,
1461 index, namebuf, len, 0, key->type, error);
1463 len = sizeof(*ref) + name_len;
1464 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1470 static int process_inode_extref(struct extent_buffer *eb,
1471 int slot, struct btrfs_key *key,
1472 struct shared_node *active_node)
1481 struct cache_tree *inode_cache;
1482 struct btrfs_inode_extref *extref;
1483 char namebuf[BTRFS_NAME_LEN];
1485 inode_cache = &active_node->inode_cache;
1487 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1490 name_len = btrfs_inode_extref_name_len(eb, extref);
1491 index = btrfs_inode_extref_index(eb, extref);
1492 parent = btrfs_inode_extref_parent(eb, extref);
1493 if (name_len <= BTRFS_NAME_LEN) {
1497 len = BTRFS_NAME_LEN;
1498 error = REF_ERR_NAME_TOO_LONG;
1500 read_extent_buffer(eb, namebuf,
1501 (unsigned long)(extref + 1), len);
1502 add_inode_backref(inode_cache, key->objectid, parent,
1503 index, namebuf, len, 0, key->type, error);
1505 len = sizeof(*extref) + name_len;
1506 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1513 static int count_csum_range(struct btrfs_root *root, u64 start,
1514 u64 len, u64 *found)
1516 struct btrfs_key key;
1517 struct btrfs_path path;
1518 struct extent_buffer *leaf;
1523 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1525 btrfs_init_path(&path);
1527 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1529 key.type = BTRFS_EXTENT_CSUM_KEY;
1531 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1535 if (ret > 0 && path.slots[0] > 0) {
1536 leaf = path.nodes[0];
1537 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1538 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1539 key.type == BTRFS_EXTENT_CSUM_KEY)
1544 leaf = path.nodes[0];
1545 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1546 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1551 leaf = path.nodes[0];
1554 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1555 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1556 key.type != BTRFS_EXTENT_CSUM_KEY)
1559 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1560 if (key.offset >= start + len)
1563 if (key.offset > start)
1566 size = btrfs_item_size_nr(leaf, path.slots[0]);
1567 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1568 if (csum_end > start) {
1569 size = min(csum_end - start, len);
1578 btrfs_release_path(&path);
1584 static int process_file_extent(struct btrfs_root *root,
1585 struct extent_buffer *eb,
1586 int slot, struct btrfs_key *key,
1587 struct shared_node *active_node)
1589 struct inode_record *rec;
1590 struct btrfs_file_extent_item *fi;
1592 u64 disk_bytenr = 0;
1593 u64 extent_offset = 0;
1594 u64 mask = root->sectorsize - 1;
1598 rec = active_node->current;
1599 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1600 rec->found_file_extent = 1;
1602 if (rec->extent_start == (u64)-1) {
1603 rec->extent_start = key->offset;
1604 rec->extent_end = key->offset;
1607 if (rec->extent_end > key->offset)
1608 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1609 else if (rec->extent_end < key->offset) {
1610 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1611 key->offset - rec->extent_end);
1616 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1617 extent_type = btrfs_file_extent_type(eb, fi);
1619 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1620 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1622 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1623 rec->found_size += num_bytes;
1624 num_bytes = (num_bytes + mask) & ~mask;
1625 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1626 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1627 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1628 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1629 extent_offset = btrfs_file_extent_offset(eb, fi);
1630 if (num_bytes == 0 || (num_bytes & mask))
1631 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1632 if (num_bytes + extent_offset >
1633 btrfs_file_extent_ram_bytes(eb, fi))
1634 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1635 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1636 (btrfs_file_extent_compression(eb, fi) ||
1637 btrfs_file_extent_encryption(eb, fi) ||
1638 btrfs_file_extent_other_encoding(eb, fi)))
1639 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1640 if (disk_bytenr > 0)
1641 rec->found_size += num_bytes;
1643 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1645 rec->extent_end = key->offset + num_bytes;
1648 * The data reloc tree will copy full extents into its inode and then
1649 * copy the corresponding csums. Because the extent it copied could be
1650 * a preallocated extent that hasn't been written to yet there may be no
1651 * csums to copy, ergo we won't have csums for our file extent. This is
1652 * ok so just don't bother checking csums if the inode belongs to the
1655 if (disk_bytenr > 0 &&
1656 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1658 if (btrfs_file_extent_compression(eb, fi))
1659 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1661 disk_bytenr += extent_offset;
1663 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1666 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1668 rec->found_csum_item = 1;
1669 if (found < num_bytes)
1670 rec->some_csum_missing = 1;
1671 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1673 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1679 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1680 struct walk_control *wc)
1682 struct btrfs_key key;
1686 struct cache_tree *inode_cache;
1687 struct shared_node *active_node;
1689 if (wc->root_level == wc->active_node &&
1690 btrfs_root_refs(&root->root_item) == 0)
1693 active_node = wc->nodes[wc->active_node];
1694 inode_cache = &active_node->inode_cache;
1695 nritems = btrfs_header_nritems(eb);
1696 for (i = 0; i < nritems; i++) {
1697 btrfs_item_key_to_cpu(eb, &key, i);
1699 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1701 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1704 if (active_node->current == NULL ||
1705 active_node->current->ino < key.objectid) {
1706 if (active_node->current) {
1707 active_node->current->checked = 1;
1708 maybe_free_inode_rec(inode_cache,
1709 active_node->current);
1711 active_node->current = get_inode_rec(inode_cache,
1713 BUG_ON(IS_ERR(active_node->current));
1716 case BTRFS_DIR_ITEM_KEY:
1717 case BTRFS_DIR_INDEX_KEY:
1718 ret = process_dir_item(root, eb, i, &key, active_node);
1720 case BTRFS_INODE_REF_KEY:
1721 ret = process_inode_ref(eb, i, &key, active_node);
1723 case BTRFS_INODE_EXTREF_KEY:
1724 ret = process_inode_extref(eb, i, &key, active_node);
1726 case BTRFS_INODE_ITEM_KEY:
1727 ret = process_inode_item(eb, i, &key, active_node);
1729 case BTRFS_EXTENT_DATA_KEY:
1730 ret = process_file_extent(root, eb, i, &key,
1740 static void reada_walk_down(struct btrfs_root *root,
1741 struct extent_buffer *node, int slot)
1750 level = btrfs_header_level(node);
1754 nritems = btrfs_header_nritems(node);
1755 blocksize = btrfs_level_size(root, level - 1);
1756 for (i = slot; i < nritems; i++) {
1757 bytenr = btrfs_node_blockptr(node, i);
1758 ptr_gen = btrfs_node_ptr_generation(node, i);
1759 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1764 * Check the child node/leaf by the following condition:
1765 * 1. the first item key of the node/leaf should be the same with the one
1767 * 2. block in parent node should match the child node/leaf.
1768 * 3. generation of parent node and child's header should be consistent.
1770 * Or the child node/leaf pointed by the key in parent is not valid.
1772 * We hope to check leaf owner too, but since subvol may share leaves,
1773 * which makes leaf owner check not so strong, key check should be
1774 * sufficient enough for that case.
1776 static int check_child_node(struct btrfs_root *root,
1777 struct extent_buffer *parent, int slot,
1778 struct extent_buffer *child)
1780 struct btrfs_key parent_key;
1781 struct btrfs_key child_key;
1784 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785 if (btrfs_header_level(child) == 0)
1786 btrfs_item_key_to_cpu(child, &child_key, 0);
1788 btrfs_node_key_to_cpu(child, &child_key, 0);
1790 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1793 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794 parent_key.objectid, parent_key.type, parent_key.offset,
1795 child_key.objectid, child_key.type, child_key.offset);
1797 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1799 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800 btrfs_node_blockptr(parent, slot),
1801 btrfs_header_bytenr(child));
1803 if (btrfs_node_ptr_generation(parent, slot) !=
1804 btrfs_header_generation(child)) {
1806 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807 btrfs_header_generation(child),
1808 btrfs_node_ptr_generation(parent, slot));
1813 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1814 struct walk_control *wc, int *level)
1816 enum btrfs_tree_block_status status;
1819 struct extent_buffer *next;
1820 struct extent_buffer *cur;
1825 WARN_ON(*level < 0);
1826 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1827 ret = btrfs_lookup_extent_info(NULL, root,
1828 path->nodes[*level]->start,
1829 *level, 1, &refs, NULL);
1836 ret = enter_shared_node(root, path->nodes[*level]->start,
1844 while (*level >= 0) {
1845 WARN_ON(*level < 0);
1846 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1847 cur = path->nodes[*level];
1849 if (btrfs_header_level(cur) != *level)
1852 if (path->slots[*level] >= btrfs_header_nritems(cur))
1855 ret = process_one_leaf(root, cur, wc);
1860 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1861 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1862 blocksize = btrfs_level_size(root, *level - 1);
1863 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1869 ret = enter_shared_node(root, bytenr, refs,
1872 path->slots[*level]++;
1877 next = btrfs_find_tree_block(root, bytenr, blocksize);
1878 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1879 free_extent_buffer(next);
1880 reada_walk_down(root, cur, path->slots[*level]);
1881 next = read_tree_block(root, bytenr, blocksize,
1883 if (!extent_buffer_uptodate(next)) {
1884 struct btrfs_key node_key;
1886 btrfs_node_key_to_cpu(path->nodes[*level],
1888 path->slots[*level]);
1889 btrfs_add_corrupt_extent_record(root->fs_info,
1891 path->nodes[*level]->start,
1892 root->leafsize, *level);
1898 ret = check_child_node(root, cur, path->slots[*level], next);
1904 if (btrfs_is_leaf(next))
1905 status = btrfs_check_leaf(root, NULL, next);
1907 status = btrfs_check_node(root, NULL, next);
1908 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1909 free_extent_buffer(next);
1914 *level = *level - 1;
1915 free_extent_buffer(path->nodes[*level]);
1916 path->nodes[*level] = next;
1917 path->slots[*level] = 0;
1920 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1924 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1925 struct walk_control *wc, int *level)
1928 struct extent_buffer *leaf;
1930 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1931 leaf = path->nodes[i];
1932 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1937 free_extent_buffer(path->nodes[*level]);
1938 path->nodes[*level] = NULL;
1939 BUG_ON(*level > wc->active_node);
1940 if (*level == wc->active_node)
1941 leave_shared_node(root, wc, *level);
1948 static int check_root_dir(struct inode_record *rec)
1950 struct inode_backref *backref;
1953 if (!rec->found_inode_item || rec->errors)
1955 if (rec->nlink != 1 || rec->found_link != 0)
1957 if (list_empty(&rec->backrefs))
1959 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1960 if (!backref->found_inode_ref)
1962 if (backref->index != 0 || backref->namelen != 2 ||
1963 memcmp(backref->name, "..", 2))
1965 if (backref->found_dir_index || backref->found_dir_item)
1972 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1973 struct btrfs_root *root, struct btrfs_path *path,
1974 struct inode_record *rec)
1976 struct btrfs_inode_item *ei;
1977 struct btrfs_key key;
1980 key.objectid = rec->ino;
1981 key.type = BTRFS_INODE_ITEM_KEY;
1982 key.offset = (u64)-1;
1984 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1988 if (!path->slots[0]) {
1995 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1996 if (key.objectid != rec->ino) {
2001 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2002 struct btrfs_inode_item);
2003 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2004 btrfs_mark_buffer_dirty(path->nodes[0]);
2005 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2006 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2007 root->root_key.objectid);
2009 btrfs_release_path(path);
2013 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2014 struct btrfs_root *root,
2015 struct btrfs_path *path,
2016 struct inode_record *rec)
2020 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2021 btrfs_release_path(path);
2023 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2027 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2028 struct btrfs_root *root,
2029 struct btrfs_path *path,
2030 struct inode_record *rec)
2032 struct btrfs_inode_item *ei;
2033 struct btrfs_key key;
2036 key.objectid = rec->ino;
2037 key.type = BTRFS_INODE_ITEM_KEY;
2040 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2047 /* Since ret == 0, no need to check anything */
2048 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2049 struct btrfs_inode_item);
2050 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2051 btrfs_mark_buffer_dirty(path->nodes[0]);
2052 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2053 printf("reset nbytes for ino %llu root %llu\n",
2054 rec->ino, root->root_key.objectid);
2056 btrfs_release_path(path);
2060 static int add_missing_dir_index(struct btrfs_root *root,
2061 struct cache_tree *inode_cache,
2062 struct inode_record *rec,
2063 struct inode_backref *backref)
2065 struct btrfs_path *path;
2066 struct btrfs_trans_handle *trans;
2067 struct btrfs_dir_item *dir_item;
2068 struct extent_buffer *leaf;
2069 struct btrfs_key key;
2070 struct btrfs_disk_key disk_key;
2071 struct inode_record *dir_rec;
2072 unsigned long name_ptr;
2073 u32 data_size = sizeof(*dir_item) + backref->namelen;
2076 path = btrfs_alloc_path();
2080 trans = btrfs_start_transaction(root, 1);
2081 if (IS_ERR(trans)) {
2082 btrfs_free_path(path);
2083 return PTR_ERR(trans);
2086 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2087 (unsigned long long)rec->ino);
2088 key.objectid = backref->dir;
2089 key.type = BTRFS_DIR_INDEX_KEY;
2090 key.offset = backref->index;
2092 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2095 leaf = path->nodes[0];
2096 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2098 disk_key.objectid = cpu_to_le64(rec->ino);
2099 disk_key.type = BTRFS_INODE_ITEM_KEY;
2100 disk_key.offset = 0;
2102 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2103 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2104 btrfs_set_dir_data_len(leaf, dir_item, 0);
2105 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2106 name_ptr = (unsigned long)(dir_item + 1);
2107 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2108 btrfs_mark_buffer_dirty(leaf);
2109 btrfs_free_path(path);
2110 btrfs_commit_transaction(trans, root);
2112 backref->found_dir_index = 1;
2113 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2114 BUG_ON(IS_ERR(dir_rec));
2117 dir_rec->found_size += backref->namelen;
2118 if (dir_rec->found_size == dir_rec->isize &&
2119 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2120 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2121 if (dir_rec->found_size != dir_rec->isize)
2122 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2127 static int delete_dir_index(struct btrfs_root *root,
2128 struct cache_tree *inode_cache,
2129 struct inode_record *rec,
2130 struct inode_backref *backref)
2132 struct btrfs_trans_handle *trans;
2133 struct btrfs_dir_item *di;
2134 struct btrfs_path *path;
2137 path = btrfs_alloc_path();
2141 trans = btrfs_start_transaction(root, 1);
2142 if (IS_ERR(trans)) {
2143 btrfs_free_path(path);
2144 return PTR_ERR(trans);
2148 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2149 (unsigned long long)backref->dir,
2150 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2151 (unsigned long long)root->objectid);
2153 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2154 backref->name, backref->namelen,
2155 backref->index, -1);
2158 btrfs_free_path(path);
2159 btrfs_commit_transaction(trans, root);
2166 ret = btrfs_del_item(trans, root, path);
2168 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2170 btrfs_free_path(path);
2171 btrfs_commit_transaction(trans, root);
2175 static int create_inode_item(struct btrfs_root *root,
2176 struct inode_record *rec,
2177 struct inode_backref *backref, int root_dir)
2179 struct btrfs_trans_handle *trans;
2180 struct btrfs_inode_item inode_item;
2181 time_t now = time(NULL);
2184 trans = btrfs_start_transaction(root, 1);
2185 if (IS_ERR(trans)) {
2186 ret = PTR_ERR(trans);
2190 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2191 "be incomplete, please check permissions and content after "
2192 "the fsck completes.\n", (unsigned long long)root->objectid,
2193 (unsigned long long)rec->ino);
2195 memset(&inode_item, 0, sizeof(inode_item));
2196 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2198 btrfs_set_stack_inode_nlink(&inode_item, 1);
2200 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2201 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2202 if (rec->found_dir_item) {
2203 if (rec->found_file_extent)
2204 fprintf(stderr, "root %llu inode %llu has both a dir "
2205 "item and extents, unsure if it is a dir or a "
2206 "regular file so setting it as a directory\n",
2207 (unsigned long long)root->objectid,
2208 (unsigned long long)rec->ino);
2209 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2210 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2211 } else if (!rec->found_dir_item) {
2212 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2213 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2215 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2216 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2217 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2218 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2219 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2220 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2221 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2222 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2224 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2226 btrfs_commit_transaction(trans, root);
2230 static int repair_inode_backrefs(struct btrfs_root *root,
2231 struct inode_record *rec,
2232 struct cache_tree *inode_cache,
2235 struct inode_backref *tmp, *backref;
2236 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2240 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2241 if (!delete && rec->ino == root_dirid) {
2242 if (!rec->found_inode_item) {
2243 ret = create_inode_item(root, rec, backref, 1);
2250 /* Index 0 for root dir's are special, don't mess with it */
2251 if (rec->ino == root_dirid && backref->index == 0)
2255 ((backref->found_dir_index && !backref->found_inode_ref) ||
2256 (backref->found_dir_index && backref->found_inode_ref &&
2257 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2258 ret = delete_dir_index(root, inode_cache, rec, backref);
2262 list_del(&backref->list);
2266 if (!delete && !backref->found_dir_index &&
2267 backref->found_dir_item && backref->found_inode_ref) {
2268 ret = add_missing_dir_index(root, inode_cache, rec,
2273 if (backref->found_dir_item &&
2274 backref->found_dir_index &&
2275 backref->found_dir_index) {
2276 if (!backref->errors &&
2277 backref->found_inode_ref) {
2278 list_del(&backref->list);
2284 if (!delete && (!backref->found_dir_index &&
2285 !backref->found_dir_item &&
2286 backref->found_inode_ref)) {
2287 struct btrfs_trans_handle *trans;
2288 struct btrfs_key location;
2290 ret = check_dir_conflict(root, backref->name,
2296 * let nlink fixing routine to handle it,
2297 * which can do it better.
2302 location.objectid = rec->ino;
2303 location.type = BTRFS_INODE_ITEM_KEY;
2304 location.offset = 0;
2306 trans = btrfs_start_transaction(root, 1);
2307 if (IS_ERR(trans)) {
2308 ret = PTR_ERR(trans);
2311 fprintf(stderr, "adding missing dir index/item pair "
2313 (unsigned long long)rec->ino);
2314 ret = btrfs_insert_dir_item(trans, root, backref->name,
2316 backref->dir, &location,
2317 imode_to_type(rec->imode),
2320 btrfs_commit_transaction(trans, root);
2324 if (!delete && (backref->found_inode_ref &&
2325 backref->found_dir_index &&
2326 backref->found_dir_item &&
2327 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2328 !rec->found_inode_item)) {
2329 ret = create_inode_item(root, rec, backref, 0);
2336 return ret ? ret : repaired;
2340 * To determine the file type for nlink/inode_item repair
2342 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2343 * Return -ENOENT if file type is not found.
2345 static int find_file_type(struct inode_record *rec, u8 *type)
2347 struct inode_backref *backref;
2349 /* For inode item recovered case */
2350 if (rec->found_inode_item) {
2351 *type = imode_to_type(rec->imode);
2355 list_for_each_entry(backref, &rec->backrefs, list) {
2356 if (backref->found_dir_index || backref->found_dir_item) {
2357 *type = backref->filetype;
2365 * To determine the file name for nlink repair
2367 * Return 0 if file name is found, set name and namelen.
2368 * Return -ENOENT if file name is not found.
2370 static int find_file_name(struct inode_record *rec,
2371 char *name, int *namelen)
2373 struct inode_backref *backref;
2375 list_for_each_entry(backref, &rec->backrefs, list) {
2376 if (backref->found_dir_index || backref->found_dir_item ||
2377 backref->found_inode_ref) {
2378 memcpy(name, backref->name, backref->namelen);
2379 *namelen = backref->namelen;
2386 /* Reset the nlink of the inode to the correct one */
2387 static int reset_nlink(struct btrfs_trans_handle *trans,
2388 struct btrfs_root *root,
2389 struct btrfs_path *path,
2390 struct inode_record *rec)
2392 struct inode_backref *backref;
2393 struct inode_backref *tmp;
2394 struct btrfs_key key;
2395 struct btrfs_inode_item *inode_item;
2398 /* We don't believe this either, reset it and iterate backref */
2399 rec->found_link = 0;
2401 /* Remove all backref including the valid ones */
2402 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2403 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2404 backref->index, backref->name,
2405 backref->namelen, 0);
2409 /* remove invalid backref, so it won't be added back */
2410 if (!(backref->found_dir_index &&
2411 backref->found_dir_item &&
2412 backref->found_inode_ref)) {
2413 list_del(&backref->list);
2420 /* Set nlink to 0 */
2421 key.objectid = rec->ino;
2422 key.type = BTRFS_INODE_ITEM_KEY;
2424 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2431 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2432 struct btrfs_inode_item);
2433 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2434 btrfs_mark_buffer_dirty(path->nodes[0]);
2435 btrfs_release_path(path);
2438 * Add back valid inode_ref/dir_item/dir_index,
2439 * add_link() will handle the nlink inc, so new nlink must be correct
2441 list_for_each_entry(backref, &rec->backrefs, list) {
2442 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2443 backref->name, backref->namelen,
2444 backref->filetype, &backref->index, 1);
2449 btrfs_release_path(path);
2453 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2454 struct btrfs_root *root,
2455 struct btrfs_path *path,
2456 struct inode_record *rec)
2458 char *dir_name = "lost+found";
2459 char namebuf[BTRFS_NAME_LEN] = {0};
2464 int name_recovered = 0;
2465 int type_recovered = 0;
2469 * Get file name and type first before these invalid inode ref
2470 * are deleted by remove_all_invalid_backref()
2472 name_recovered = !find_file_name(rec, namebuf, &namelen);
2473 type_recovered = !find_file_type(rec, &type);
2475 if (!name_recovered) {
2476 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2477 rec->ino, rec->ino);
2478 namelen = count_digits(rec->ino);
2479 sprintf(namebuf, "%llu", rec->ino);
2482 if (!type_recovered) {
2483 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2485 type = BTRFS_FT_REG_FILE;
2489 ret = reset_nlink(trans, root, path, rec);
2492 "Failed to reset nlink for inode %llu: %s\n",
2493 rec->ino, strerror(-ret));
2497 if (rec->found_link == 0) {
2498 lost_found_ino = root->highest_inode;
2499 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2504 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2505 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2508 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2509 dir_name, strerror(-ret));
2512 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2513 namebuf, namelen, type, NULL, 1);
2515 * Add ".INO" suffix several times to handle case where
2516 * "FILENAME.INO" is already taken by another file.
2518 while (ret == -EEXIST) {
2520 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2522 if (namelen + count_digits(rec->ino) + 1 >
2527 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2529 namelen += count_digits(rec->ino) + 1;
2530 ret = btrfs_add_link(trans, root, rec->ino,
2531 lost_found_ino, namebuf,
2532 namelen, type, NULL, 1);
2536 "Failed to link the inode %llu to %s dir: %s\n",
2537 rec->ino, dir_name, strerror(-ret));
2541 * Just increase the found_link, don't actually add the
2542 * backref. This will make things easier and this inode
2543 * record will be freed after the repair is done.
2544 * So fsck will not report problem about this inode.
2547 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2548 namelen, namebuf, dir_name);
2550 printf("Fixed the nlink of inode %llu\n", rec->ino);
2553 * Clear the flag anyway, or we will loop forever for the same inode
2554 * as it will not be removed from the bad inode list and the dead loop
2557 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2558 btrfs_release_path(path);
2563 * Check if there is any normal(reg or prealloc) file extent for given
2565 * This is used to determine the file type when neither its dir_index/item or
2566 * inode_item exists.
2568 * This will *NOT* report error, if any error happens, just consider it does
2569 * not have any normal file extent.
2571 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2573 struct btrfs_path *path;
2574 struct btrfs_key key;
2575 struct btrfs_key found_key;
2576 struct btrfs_file_extent_item *fi;
2580 path = btrfs_alloc_path();
2584 key.type = BTRFS_EXTENT_DATA_KEY;
2587 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2592 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2593 ret = btrfs_next_leaf(root, path);
2600 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2602 if (found_key.objectid != ino ||
2603 found_key.type != BTRFS_EXTENT_DATA_KEY)
2605 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2606 struct btrfs_file_extent_item);
2607 type = btrfs_file_extent_type(path->nodes[0], fi);
2608 if (type != BTRFS_FILE_EXTENT_INLINE) {
2614 btrfs_free_path(path);
2618 static u32 btrfs_type_to_imode(u8 type)
2620 static u32 imode_by_btrfs_type[] = {
2621 [BTRFS_FT_REG_FILE] = S_IFREG,
2622 [BTRFS_FT_DIR] = S_IFDIR,
2623 [BTRFS_FT_CHRDEV] = S_IFCHR,
2624 [BTRFS_FT_BLKDEV] = S_IFBLK,
2625 [BTRFS_FT_FIFO] = S_IFIFO,
2626 [BTRFS_FT_SOCK] = S_IFSOCK,
2627 [BTRFS_FT_SYMLINK] = S_IFLNK,
2630 return imode_by_btrfs_type[(type)];
2633 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2634 struct btrfs_root *root,
2635 struct btrfs_path *path,
2636 struct inode_record *rec)
2640 int type_recovered = 0;
2643 printf("Trying to rebuild inode:%llu\n", rec->ino);
2645 type_recovered = !find_file_type(rec, &filetype);
2648 * Try to determine inode type if type not found.
2650 * For found regular file extent, it must be FILE.
2651 * For found dir_item/index, it must be DIR.
2653 * For undetermined one, use FILE as fallback.
2656 * 1. If found backref(inode_index/item is already handled) to it,
2658 * Need new inode-inode ref structure to allow search for that.
2660 if (!type_recovered) {
2661 if (rec->found_file_extent &&
2662 find_normal_file_extent(root, rec->ino)) {
2664 filetype = BTRFS_FT_REG_FILE;
2665 } else if (rec->found_dir_item) {
2667 filetype = BTRFS_FT_DIR;
2668 } else if (!list_empty(&rec->orphan_extents)) {
2670 filetype = BTRFS_FT_REG_FILE;
2672 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2675 filetype = BTRFS_FT_REG_FILE;
2679 ret = btrfs_new_inode(trans, root, rec->ino,
2680 mode | btrfs_type_to_imode(filetype));
2685 * Here inode rebuild is done, we only rebuild the inode item,
2686 * don't repair the nlink(like move to lost+found).
2687 * That is the job of nlink repair.
2689 * We just fill the record and return
2691 rec->found_dir_item = 1;
2692 rec->imode = mode | btrfs_type_to_imode(filetype);
2694 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2695 /* Ensure the inode_nlinks repair function will be called */
2696 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2701 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2702 struct btrfs_root *root,
2703 struct btrfs_path *path,
2704 struct inode_record *rec)
2706 struct orphan_data_extent *orphan;
2707 struct orphan_data_extent *tmp;
2710 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2712 * Check for conflicting file extents
2714 * Here we don't know whether the extents is compressed or not,
2715 * so we can only assume it not compressed nor data offset,
2716 * and use its disk_len as extent length.
2718 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2719 orphan->offset, orphan->disk_len, 0);
2720 btrfs_release_path(path);
2725 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2726 orphan->disk_bytenr, orphan->disk_len);
2727 ret = btrfs_free_extent(trans,
2728 root->fs_info->extent_root,
2729 orphan->disk_bytenr, orphan->disk_len,
2730 0, root->objectid, orphan->objectid,
2735 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2736 orphan->offset, orphan->disk_bytenr,
2737 orphan->disk_len, orphan->disk_len);
2741 /* Update file size info */
2742 rec->found_size += orphan->disk_len;
2743 if (rec->found_size == rec->nbytes)
2744 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2746 /* Update the file extent hole info too */
2747 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2751 if (RB_EMPTY_ROOT(&rec->holes))
2752 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2754 list_del(&orphan->list);
2757 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2762 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2763 struct btrfs_root *root,
2764 struct btrfs_path *path,
2765 struct inode_record *rec)
2767 struct rb_node *node;
2768 struct file_extent_hole *hole;
2772 node = rb_first(&rec->holes);
2776 hole = rb_entry(node, struct file_extent_hole, node);
2777 ret = btrfs_punch_hole(trans, root, rec->ino,
2778 hole->start, hole->len);
2781 ret = del_file_extent_hole(&rec->holes, hole->start,
2785 if (RB_EMPTY_ROOT(&rec->holes))
2786 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2787 node = rb_first(&rec->holes);
2789 /* special case for a file losing all its file extent */
2791 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2792 round_up(rec->isize, root->sectorsize));
2796 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2797 rec->ino, root->objectid);
2802 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2804 struct btrfs_trans_handle *trans;
2805 struct btrfs_path *path;
2808 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2809 I_ERR_NO_ORPHAN_ITEM |
2810 I_ERR_LINK_COUNT_WRONG |
2811 I_ERR_NO_INODE_ITEM |
2812 I_ERR_FILE_EXTENT_ORPHAN |
2813 I_ERR_FILE_EXTENT_DISCOUNT|
2814 I_ERR_FILE_NBYTES_WRONG)))
2817 path = btrfs_alloc_path();
2822 * For nlink repair, it may create a dir and add link, so
2823 * 2 for parent(256)'s dir_index and dir_item
2824 * 2 for lost+found dir's inode_item and inode_ref
2825 * 1 for the new inode_ref of the file
2826 * 2 for lost+found dir's dir_index and dir_item for the file
2828 trans = btrfs_start_transaction(root, 7);
2829 if (IS_ERR(trans)) {
2830 btrfs_free_path(path);
2831 return PTR_ERR(trans);
2834 if (rec->errors & I_ERR_NO_INODE_ITEM)
2835 ret = repair_inode_no_item(trans, root, path, rec);
2836 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2837 ret = repair_inode_orphan_extent(trans, root, path, rec);
2838 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2839 ret = repair_inode_discount_extent(trans, root, path, rec);
2840 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2841 ret = repair_inode_isize(trans, root, path, rec);
2842 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2843 ret = repair_inode_orphan_item(trans, root, path, rec);
2844 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2845 ret = repair_inode_nlinks(trans, root, path, rec);
2846 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2847 ret = repair_inode_nbytes(trans, root, path, rec);
2848 btrfs_commit_transaction(trans, root);
2849 btrfs_free_path(path);
2853 static int check_inode_recs(struct btrfs_root *root,
2854 struct cache_tree *inode_cache)
2856 struct cache_extent *cache;
2857 struct ptr_node *node;
2858 struct inode_record *rec;
2859 struct inode_backref *backref;
2864 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2866 if (btrfs_root_refs(&root->root_item) == 0) {
2867 if (!cache_tree_empty(inode_cache))
2868 fprintf(stderr, "warning line %d\n", __LINE__);
2873 * We need to record the highest inode number for later 'lost+found'
2875 * We must select a ino not used/refered by any existing inode, or
2876 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2877 * this may cause 'lost+found' dir has wrong nlinks.
2879 cache = last_cache_extent(inode_cache);
2881 node = container_of(cache, struct ptr_node, cache);
2883 if (rec->ino > root->highest_inode)
2884 root->highest_inode = rec->ino;
2888 * We need to repair backrefs first because we could change some of the
2889 * errors in the inode recs.
2891 * We also need to go through and delete invalid backrefs first and then
2892 * add the correct ones second. We do this because we may get EEXIST
2893 * when adding back the correct index because we hadn't yet deleted the
2896 * For example, if we were missing a dir index then the directories
2897 * isize would be wrong, so if we fixed the isize to what we thought it
2898 * would be and then fixed the backref we'd still have a invalid fs, so
2899 * we need to add back the dir index and then check to see if the isize
2904 if (stage == 3 && !err)
2907 cache = search_cache_extent(inode_cache, 0);
2908 while (repair && cache) {
2909 node = container_of(cache, struct ptr_node, cache);
2911 cache = next_cache_extent(cache);
2913 /* Need to free everything up and rescan */
2915 remove_cache_extent(inode_cache, &node->cache);
2917 free_inode_rec(rec);
2921 if (list_empty(&rec->backrefs))
2924 ret = repair_inode_backrefs(root, rec, inode_cache,
2938 rec = get_inode_rec(inode_cache, root_dirid, 0);
2939 BUG_ON(IS_ERR(rec));
2941 ret = check_root_dir(rec);
2943 fprintf(stderr, "root %llu root dir %llu error\n",
2944 (unsigned long long)root->root_key.objectid,
2945 (unsigned long long)root_dirid);
2946 print_inode_error(root, rec);
2951 struct btrfs_trans_handle *trans;
2953 trans = btrfs_start_transaction(root, 1);
2954 if (IS_ERR(trans)) {
2955 err = PTR_ERR(trans);
2960 "root %llu missing its root dir, recreating\n",
2961 (unsigned long long)root->objectid);
2963 ret = btrfs_make_root_dir(trans, root, root_dirid);
2966 btrfs_commit_transaction(trans, root);
2970 fprintf(stderr, "root %llu root dir %llu not found\n",
2971 (unsigned long long)root->root_key.objectid,
2972 (unsigned long long)root_dirid);
2976 cache = search_cache_extent(inode_cache, 0);
2979 node = container_of(cache, struct ptr_node, cache);
2981 remove_cache_extent(inode_cache, &node->cache);
2983 if (rec->ino == root_dirid ||
2984 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2985 free_inode_rec(rec);
2989 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2990 ret = check_orphan_item(root, rec->ino);
2992 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2993 if (can_free_inode_rec(rec)) {
2994 free_inode_rec(rec);
2999 if (!rec->found_inode_item)
3000 rec->errors |= I_ERR_NO_INODE_ITEM;
3001 if (rec->found_link != rec->nlink)
3002 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3004 ret = try_repair_inode(root, rec);
3005 if (ret == 0 && can_free_inode_rec(rec)) {
3006 free_inode_rec(rec);
3012 if (!(repair && ret == 0))
3014 print_inode_error(root, rec);
3015 list_for_each_entry(backref, &rec->backrefs, list) {
3016 if (!backref->found_dir_item)
3017 backref->errors |= REF_ERR_NO_DIR_ITEM;
3018 if (!backref->found_dir_index)
3019 backref->errors |= REF_ERR_NO_DIR_INDEX;
3020 if (!backref->found_inode_ref)
3021 backref->errors |= REF_ERR_NO_INODE_REF;
3022 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3023 " namelen %u name %s filetype %d errors %x",
3024 (unsigned long long)backref->dir,
3025 (unsigned long long)backref->index,
3026 backref->namelen, backref->name,
3027 backref->filetype, backref->errors);
3028 print_ref_error(backref->errors);
3030 free_inode_rec(rec);
3032 return (error > 0) ? -1 : 0;
3035 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3038 struct cache_extent *cache;
3039 struct root_record *rec = NULL;
3042 cache = lookup_cache_extent(root_cache, objectid, 1);
3044 rec = container_of(cache, struct root_record, cache);
3046 rec = calloc(1, sizeof(*rec));
3048 return ERR_PTR(-ENOMEM);
3049 rec->objectid = objectid;
3050 INIT_LIST_HEAD(&rec->backrefs);
3051 rec->cache.start = objectid;
3052 rec->cache.size = 1;
3054 ret = insert_cache_extent(root_cache, &rec->cache);
3056 return ERR_PTR(-EEXIST);
3061 static struct root_backref *get_root_backref(struct root_record *rec,
3062 u64 ref_root, u64 dir, u64 index,
3063 const char *name, int namelen)
3065 struct root_backref *backref;
3067 list_for_each_entry(backref, &rec->backrefs, list) {
3068 if (backref->ref_root != ref_root || backref->dir != dir ||
3069 backref->namelen != namelen)
3071 if (memcmp(name, backref->name, namelen))
3076 backref = calloc(1, sizeof(*backref) + namelen + 1);
3079 backref->ref_root = ref_root;
3081 backref->index = index;
3082 backref->namelen = namelen;
3083 memcpy(backref->name, name, namelen);
3084 backref->name[namelen] = '\0';
3085 list_add_tail(&backref->list, &rec->backrefs);
3089 static void free_root_record(struct cache_extent *cache)
3091 struct root_record *rec;
3092 struct root_backref *backref;
3094 rec = container_of(cache, struct root_record, cache);
3095 while (!list_empty(&rec->backrefs)) {
3096 backref = list_entry(rec->backrefs.next,
3097 struct root_backref, list);
3098 list_del(&backref->list);
3105 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3107 static int add_root_backref(struct cache_tree *root_cache,
3108 u64 root_id, u64 ref_root, u64 dir, u64 index,
3109 const char *name, int namelen,
3110 int item_type, int errors)
3112 struct root_record *rec;
3113 struct root_backref *backref;
3115 rec = get_root_rec(root_cache, root_id);
3116 BUG_ON(IS_ERR(rec));
3117 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3120 backref->errors |= errors;
3122 if (item_type != BTRFS_DIR_ITEM_KEY) {
3123 if (backref->found_dir_index || backref->found_back_ref ||
3124 backref->found_forward_ref) {
3125 if (backref->index != index)
3126 backref->errors |= REF_ERR_INDEX_UNMATCH;
3128 backref->index = index;
3132 if (item_type == BTRFS_DIR_ITEM_KEY) {
3133 if (backref->found_forward_ref)
3135 backref->found_dir_item = 1;
3136 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3137 backref->found_dir_index = 1;
3138 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3139 if (backref->found_forward_ref)
3140 backref->errors |= REF_ERR_DUP_ROOT_REF;
3141 else if (backref->found_dir_item)
3143 backref->found_forward_ref = 1;
3144 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3145 if (backref->found_back_ref)
3146 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3147 backref->found_back_ref = 1;
3152 if (backref->found_forward_ref && backref->found_dir_item)
3153 backref->reachable = 1;
3157 static int merge_root_recs(struct btrfs_root *root,
3158 struct cache_tree *src_cache,
3159 struct cache_tree *dst_cache)
3161 struct cache_extent *cache;
3162 struct ptr_node *node;
3163 struct inode_record *rec;
3164 struct inode_backref *backref;
3167 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3168 free_inode_recs_tree(src_cache);
3173 cache = search_cache_extent(src_cache, 0);
3176 node = container_of(cache, struct ptr_node, cache);
3178 remove_cache_extent(src_cache, &node->cache);
3181 ret = is_child_root(root, root->objectid, rec->ino);
3187 list_for_each_entry(backref, &rec->backrefs, list) {
3188 BUG_ON(backref->found_inode_ref);
3189 if (backref->found_dir_item)
3190 add_root_backref(dst_cache, rec->ino,
3191 root->root_key.objectid, backref->dir,
3192 backref->index, backref->name,
3193 backref->namelen, BTRFS_DIR_ITEM_KEY,
3195 if (backref->found_dir_index)
3196 add_root_backref(dst_cache, rec->ino,
3197 root->root_key.objectid, backref->dir,
3198 backref->index, backref->name,
3199 backref->namelen, BTRFS_DIR_INDEX_KEY,
3203 free_inode_rec(rec);
3210 static int check_root_refs(struct btrfs_root *root,
3211 struct cache_tree *root_cache)
3213 struct root_record *rec;
3214 struct root_record *ref_root;
3215 struct root_backref *backref;
3216 struct cache_extent *cache;
3222 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3223 BUG_ON(IS_ERR(rec));
3226 /* fixme: this can not detect circular references */
3229 cache = search_cache_extent(root_cache, 0);
3233 rec = container_of(cache, struct root_record, cache);
3234 cache = next_cache_extent(cache);
3236 if (rec->found_ref == 0)
3239 list_for_each_entry(backref, &rec->backrefs, list) {
3240 if (!backref->reachable)
3243 ref_root = get_root_rec(root_cache,
3245 BUG_ON(IS_ERR(ref_root));
3246 if (ref_root->found_ref > 0)
3249 backref->reachable = 0;
3251 if (rec->found_ref == 0)
3257 cache = search_cache_extent(root_cache, 0);
3261 rec = container_of(cache, struct root_record, cache);
3262 cache = next_cache_extent(cache);
3264 if (rec->found_ref == 0 &&
3265 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3266 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3267 ret = check_orphan_item(root->fs_info->tree_root,
3273 * If we don't have a root item then we likely just have
3274 * a dir item in a snapshot for this root but no actual
3275 * ref key or anything so it's meaningless.
3277 if (!rec->found_root_item)
3280 fprintf(stderr, "fs tree %llu not referenced\n",
3281 (unsigned long long)rec->objectid);
3285 if (rec->found_ref > 0 && !rec->found_root_item)
3287 list_for_each_entry(backref, &rec->backrefs, list) {
3288 if (!backref->found_dir_item)
3289 backref->errors |= REF_ERR_NO_DIR_ITEM;
3290 if (!backref->found_dir_index)
3291 backref->errors |= REF_ERR_NO_DIR_INDEX;
3292 if (!backref->found_back_ref)
3293 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3294 if (!backref->found_forward_ref)
3295 backref->errors |= REF_ERR_NO_ROOT_REF;
3296 if (backref->reachable && backref->errors)
3303 fprintf(stderr, "fs tree %llu refs %u %s\n",
3304 (unsigned long long)rec->objectid, rec->found_ref,
3305 rec->found_root_item ? "" : "not found");
3307 list_for_each_entry(backref, &rec->backrefs, list) {
3308 if (!backref->reachable)
3310 if (!backref->errors && rec->found_root_item)
3312 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3313 " index %llu namelen %u name %s errors %x\n",
3314 (unsigned long long)backref->ref_root,
3315 (unsigned long long)backref->dir,
3316 (unsigned long long)backref->index,
3317 backref->namelen, backref->name,
3319 print_ref_error(backref->errors);
3322 return errors > 0 ? 1 : 0;
3325 static int process_root_ref(struct extent_buffer *eb, int slot,
3326 struct btrfs_key *key,
3327 struct cache_tree *root_cache)
3333 struct btrfs_root_ref *ref;
3334 char namebuf[BTRFS_NAME_LEN];
3337 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3339 dirid = btrfs_root_ref_dirid(eb, ref);
3340 index = btrfs_root_ref_sequence(eb, ref);
3341 name_len = btrfs_root_ref_name_len(eb, ref);
3343 if (name_len <= BTRFS_NAME_LEN) {
3347 len = BTRFS_NAME_LEN;
3348 error = REF_ERR_NAME_TOO_LONG;
3350 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3352 if (key->type == BTRFS_ROOT_REF_KEY) {
3353 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3354 index, namebuf, len, key->type, error);
3356 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3357 index, namebuf, len, key->type, error);
3362 static void free_corrupt_block(struct cache_extent *cache)
3364 struct btrfs_corrupt_block *corrupt;
3366 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3370 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3373 * Repair the btree of the given root.
3375 * The fix is to remove the node key in corrupt_blocks cache_tree.
3376 * and rebalance the tree.
3377 * After the fix, the btree should be writeable.
3379 static int repair_btree(struct btrfs_root *root,
3380 struct cache_tree *corrupt_blocks)
3382 struct btrfs_trans_handle *trans;
3383 struct btrfs_path *path;
3384 struct btrfs_corrupt_block *corrupt;
3385 struct cache_extent *cache;
3386 struct btrfs_key key;
3391 if (cache_tree_empty(corrupt_blocks))
3394 path = btrfs_alloc_path();
3398 trans = btrfs_start_transaction(root, 1);
3399 if (IS_ERR(trans)) {
3400 ret = PTR_ERR(trans);
3401 fprintf(stderr, "Error starting transaction: %s\n",
3405 cache = first_cache_extent(corrupt_blocks);
3407 corrupt = container_of(cache, struct btrfs_corrupt_block,
3409 level = corrupt->level;
3410 path->lowest_level = level;
3411 key.objectid = corrupt->key.objectid;
3412 key.type = corrupt->key.type;
3413 key.offset = corrupt->key.offset;
3416 * Here we don't want to do any tree balance, since it may
3417 * cause a balance with corrupted brother leaf/node,
3418 * so ins_len set to 0 here.
3419 * Balance will be done after all corrupt node/leaf is deleted.
3421 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3424 offset = btrfs_node_blockptr(path->nodes[level],
3425 path->slots[level]);
3427 /* Remove the ptr */
3428 ret = btrfs_del_ptr(trans, root, path, level,
3429 path->slots[level]);
3433 * Remove the corresponding extent
3434 * return value is not concerned.
3436 btrfs_release_path(path);
3437 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3438 0, root->root_key.objectid,
3440 cache = next_cache_extent(cache);
3443 /* Balance the btree using btrfs_search_slot() */
3444 cache = first_cache_extent(corrupt_blocks);
3446 corrupt = container_of(cache, struct btrfs_corrupt_block,
3448 memcpy(&key, &corrupt->key, sizeof(key));
3449 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3452 /* return will always >0 since it won't find the item */
3454 btrfs_release_path(path);
3455 cache = next_cache_extent(cache);
3458 btrfs_commit_transaction(trans, root);
3460 btrfs_free_path(path);
3464 static int check_fs_root(struct btrfs_root *root,
3465 struct cache_tree *root_cache,
3466 struct walk_control *wc)
3472 struct btrfs_path path;
3473 struct shared_node root_node;
3474 struct root_record *rec;
3475 struct btrfs_root_item *root_item = &root->root_item;
3476 struct cache_tree corrupt_blocks;
3477 struct orphan_data_extent *orphan;
3478 struct orphan_data_extent *tmp;
3479 enum btrfs_tree_block_status status;
3482 * Reuse the corrupt_block cache tree to record corrupted tree block
3484 * Unlike the usage in extent tree check, here we do it in a per
3485 * fs/subvol tree base.
3487 cache_tree_init(&corrupt_blocks);
3488 root->fs_info->corrupt_blocks = &corrupt_blocks;
3490 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3491 rec = get_root_rec(root_cache, root->root_key.objectid);
3492 BUG_ON(IS_ERR(rec));
3493 if (btrfs_root_refs(root_item) > 0)
3494 rec->found_root_item = 1;
3497 btrfs_init_path(&path);
3498 memset(&root_node, 0, sizeof(root_node));
3499 cache_tree_init(&root_node.root_cache);
3500 cache_tree_init(&root_node.inode_cache);
3502 /* Move the orphan extent record to corresponding inode_record */
3503 list_for_each_entry_safe(orphan, tmp,
3504 &root->orphan_data_extents, list) {
3505 struct inode_record *inode;
3507 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3509 BUG_ON(IS_ERR(inode));
3510 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3511 list_move(&orphan->list, &inode->orphan_extents);
3514 level = btrfs_header_level(root->node);
3515 memset(wc->nodes, 0, sizeof(wc->nodes));
3516 wc->nodes[level] = &root_node;
3517 wc->active_node = level;
3518 wc->root_level = level;
3520 /* We may not have checked the root block, lets do that now */
3521 if (btrfs_is_leaf(root->node))
3522 status = btrfs_check_leaf(root, NULL, root->node);
3524 status = btrfs_check_node(root, NULL, root->node);
3525 if (status != BTRFS_TREE_BLOCK_CLEAN)
3528 if (btrfs_root_refs(root_item) > 0 ||
3529 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3530 path.nodes[level] = root->node;
3531 extent_buffer_get(root->node);
3532 path.slots[level] = 0;
3534 struct btrfs_key key;
3535 struct btrfs_disk_key found_key;
3537 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3538 level = root_item->drop_level;
3539 path.lowest_level = level;
3540 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3543 btrfs_node_key(path.nodes[level], &found_key,
3545 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3546 sizeof(found_key)));
3550 wret = walk_down_tree(root, &path, wc, &level);
3556 wret = walk_up_tree(root, &path, wc, &level);
3563 btrfs_release_path(&path);
3565 if (!cache_tree_empty(&corrupt_blocks)) {
3566 struct cache_extent *cache;
3567 struct btrfs_corrupt_block *corrupt;
3569 printf("The following tree block(s) is corrupted in tree %llu:\n",
3570 root->root_key.objectid);
3571 cache = first_cache_extent(&corrupt_blocks);
3573 corrupt = container_of(cache,
3574 struct btrfs_corrupt_block,
3576 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3577 cache->start, corrupt->level,
3578 corrupt->key.objectid, corrupt->key.type,
3579 corrupt->key.offset);
3580 cache = next_cache_extent(cache);
3583 printf("Try to repair the btree for root %llu\n",
3584 root->root_key.objectid);
3585 ret = repair_btree(root, &corrupt_blocks);
3587 fprintf(stderr, "Failed to repair btree: %s\n",
3590 printf("Btree for root %llu is fixed\n",
3591 root->root_key.objectid);
3595 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3599 if (root_node.current) {
3600 root_node.current->checked = 1;
3601 maybe_free_inode_rec(&root_node.inode_cache,
3605 err = check_inode_recs(root, &root_node.inode_cache);
3609 free_corrupt_blocks_tree(&corrupt_blocks);
3610 root->fs_info->corrupt_blocks = NULL;
3611 free_orphan_data_extents(&root->orphan_data_extents);
3615 static int fs_root_objectid(u64 objectid)
3617 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3618 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3620 return is_fstree(objectid);
3623 static int check_fs_roots(struct btrfs_root *root,
3624 struct cache_tree *root_cache)
3626 struct btrfs_path path;
3627 struct btrfs_key key;
3628 struct walk_control wc;
3629 struct extent_buffer *leaf, *tree_node;
3630 struct btrfs_root *tmp_root;
3631 struct btrfs_root *tree_root = root->fs_info->tree_root;
3635 if (ctx.progress_enabled) {
3636 ctx.tp = TASK_FS_ROOTS;
3637 task_start(ctx.info);
3641 * Just in case we made any changes to the extent tree that weren't
3642 * reflected into the free space cache yet.
3645 reset_cached_block_groups(root->fs_info);
3646 memset(&wc, 0, sizeof(wc));
3647 cache_tree_init(&wc.shared);
3648 btrfs_init_path(&path);
3653 key.type = BTRFS_ROOT_ITEM_KEY;
3654 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3659 tree_node = tree_root->node;
3661 if (tree_node != tree_root->node) {
3662 free_root_recs_tree(root_cache);
3663 btrfs_release_path(&path);
3666 leaf = path.nodes[0];
3667 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3668 ret = btrfs_next_leaf(tree_root, &path);
3674 leaf = path.nodes[0];
3676 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3677 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3678 fs_root_objectid(key.objectid)) {
3679 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3680 tmp_root = btrfs_read_fs_root_no_cache(
3681 root->fs_info, &key);
3683 key.offset = (u64)-1;
3684 tmp_root = btrfs_read_fs_root(
3685 root->fs_info, &key);
3687 if (IS_ERR(tmp_root)) {
3691 ret = check_fs_root(tmp_root, root_cache, &wc);
3692 if (ret == -EAGAIN) {
3693 free_root_recs_tree(root_cache);
3694 btrfs_release_path(&path);
3699 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3700 btrfs_free_fs_root(tmp_root);
3701 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3702 key.type == BTRFS_ROOT_BACKREF_KEY) {
3703 process_root_ref(leaf, path.slots[0], &key,
3710 btrfs_release_path(&path);
3712 free_extent_cache_tree(&wc.shared);
3713 if (!cache_tree_empty(&wc.shared))
3714 fprintf(stderr, "warning line %d\n", __LINE__);
3716 task_stop(ctx.info);
3721 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3723 struct list_head *cur = rec->backrefs.next;
3724 struct extent_backref *back;
3725 struct tree_backref *tback;
3726 struct data_backref *dback;
3730 while(cur != &rec->backrefs) {
3731 back = list_entry(cur, struct extent_backref, list);
3733 if (!back->found_extent_tree) {
3737 if (back->is_data) {
3738 dback = (struct data_backref *)back;
3739 fprintf(stderr, "Backref %llu %s %llu"
3740 " owner %llu offset %llu num_refs %lu"
3741 " not found in extent tree\n",
3742 (unsigned long long)rec->start,
3743 back->full_backref ?
3745 back->full_backref ?
3746 (unsigned long long)dback->parent:
3747 (unsigned long long)dback->root,
3748 (unsigned long long)dback->owner,
3749 (unsigned long long)dback->offset,
3750 (unsigned long)dback->num_refs);
3752 tback = (struct tree_backref *)back;
3753 fprintf(stderr, "Backref %llu parent %llu"
3754 " root %llu not found in extent tree\n",
3755 (unsigned long long)rec->start,
3756 (unsigned long long)tback->parent,
3757 (unsigned long long)tback->root);
3760 if (!back->is_data && !back->found_ref) {
3764 tback = (struct tree_backref *)back;
3765 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3766 (unsigned long long)rec->start,
3767 back->full_backref ? "parent" : "root",
3768 back->full_backref ?
3769 (unsigned long long)tback->parent :
3770 (unsigned long long)tback->root, back);
3772 if (back->is_data) {
3773 dback = (struct data_backref *)back;
3774 if (dback->found_ref != dback->num_refs) {
3778 fprintf(stderr, "Incorrect local backref count"
3779 " on %llu %s %llu owner %llu"
3780 " offset %llu found %u wanted %u back %p\n",
3781 (unsigned long long)rec->start,
3782 back->full_backref ?
3784 back->full_backref ?
3785 (unsigned long long)dback->parent:
3786 (unsigned long long)dback->root,
3787 (unsigned long long)dback->owner,
3788 (unsigned long long)dback->offset,
3789 dback->found_ref, dback->num_refs, back);
3791 if (dback->disk_bytenr != rec->start) {
3795 fprintf(stderr, "Backref disk bytenr does not"
3796 " match extent record, bytenr=%llu, "
3797 "ref bytenr=%llu\n",
3798 (unsigned long long)rec->start,
3799 (unsigned long long)dback->disk_bytenr);
3802 if (dback->bytes != rec->nr) {
3806 fprintf(stderr, "Backref bytes do not match "
3807 "extent backref, bytenr=%llu, ref "
3808 "bytes=%llu, backref bytes=%llu\n",
3809 (unsigned long long)rec->start,
3810 (unsigned long long)rec->nr,
3811 (unsigned long long)dback->bytes);
3814 if (!back->is_data) {
3817 dback = (struct data_backref *)back;
3818 found += dback->found_ref;
3821 if (found != rec->refs) {
3825 fprintf(stderr, "Incorrect global backref count "
3826 "on %llu found %llu wanted %llu\n",
3827 (unsigned long long)rec->start,
3828 (unsigned long long)found,
3829 (unsigned long long)rec->refs);
3835 static int free_all_extent_backrefs(struct extent_record *rec)
3837 struct extent_backref *back;
3838 struct list_head *cur;
3839 while (!list_empty(&rec->backrefs)) {
3840 cur = rec->backrefs.next;
3841 back = list_entry(cur, struct extent_backref, list);
3848 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3849 struct cache_tree *extent_cache)
3851 struct cache_extent *cache;
3852 struct extent_record *rec;
3855 cache = first_cache_extent(extent_cache);
3858 rec = container_of(cache, struct extent_record, cache);
3859 remove_cache_extent(extent_cache, cache);
3860 free_all_extent_backrefs(rec);
3865 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3866 struct extent_record *rec)
3868 if (rec->content_checked && rec->owner_ref_checked &&
3869 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3870 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3871 !rec->bad_full_backref && !rec->crossing_stripes &&
3872 !rec->wrong_chunk_type) {
3873 remove_cache_extent(extent_cache, &rec->cache);
3874 free_all_extent_backrefs(rec);
3875 list_del_init(&rec->list);
3881 static int check_owner_ref(struct btrfs_root *root,
3882 struct extent_record *rec,
3883 struct extent_buffer *buf)
3885 struct extent_backref *node;
3886 struct tree_backref *back;
3887 struct btrfs_root *ref_root;
3888 struct btrfs_key key;
3889 struct btrfs_path path;
3890 struct extent_buffer *parent;
3895 list_for_each_entry(node, &rec->backrefs, list) {
3898 if (!node->found_ref)
3900 if (node->full_backref)
3902 back = (struct tree_backref *)node;
3903 if (btrfs_header_owner(buf) == back->root)
3906 BUG_ON(rec->is_root);
3908 /* try to find the block by search corresponding fs tree */
3909 key.objectid = btrfs_header_owner(buf);
3910 key.type = BTRFS_ROOT_ITEM_KEY;
3911 key.offset = (u64)-1;
3913 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3914 if (IS_ERR(ref_root))
3917 level = btrfs_header_level(buf);
3919 btrfs_item_key_to_cpu(buf, &key, 0);
3921 btrfs_node_key_to_cpu(buf, &key, 0);
3923 btrfs_init_path(&path);
3924 path.lowest_level = level + 1;
3925 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3929 parent = path.nodes[level + 1];
3930 if (parent && buf->start == btrfs_node_blockptr(parent,
3931 path.slots[level + 1]))
3934 btrfs_release_path(&path);
3935 return found ? 0 : 1;
3938 static int is_extent_tree_record(struct extent_record *rec)
3940 struct list_head *cur = rec->backrefs.next;
3941 struct extent_backref *node;
3942 struct tree_backref *back;
3945 while(cur != &rec->backrefs) {
3946 node = list_entry(cur, struct extent_backref, list);
3950 back = (struct tree_backref *)node;
3951 if (node->full_backref)
3953 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3960 static int record_bad_block_io(struct btrfs_fs_info *info,
3961 struct cache_tree *extent_cache,
3964 struct extent_record *rec;
3965 struct cache_extent *cache;
3966 struct btrfs_key key;
3968 cache = lookup_cache_extent(extent_cache, start, len);
3972 rec = container_of(cache, struct extent_record, cache);
3973 if (!is_extent_tree_record(rec))
3976 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3977 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3980 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3981 struct extent_buffer *buf, int slot)
3983 if (btrfs_header_level(buf)) {
3984 struct btrfs_key_ptr ptr1, ptr2;
3986 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3987 sizeof(struct btrfs_key_ptr));
3988 read_extent_buffer(buf, &ptr2,
3989 btrfs_node_key_ptr_offset(slot + 1),
3990 sizeof(struct btrfs_key_ptr));
3991 write_extent_buffer(buf, &ptr1,
3992 btrfs_node_key_ptr_offset(slot + 1),
3993 sizeof(struct btrfs_key_ptr));
3994 write_extent_buffer(buf, &ptr2,
3995 btrfs_node_key_ptr_offset(slot),
3996 sizeof(struct btrfs_key_ptr));
3998 struct btrfs_disk_key key;
3999 btrfs_node_key(buf, &key, 0);
4000 btrfs_fixup_low_keys(root, path, &key,
4001 btrfs_header_level(buf) + 1);
4004 struct btrfs_item *item1, *item2;
4005 struct btrfs_key k1, k2;
4006 char *item1_data, *item2_data;
4007 u32 item1_offset, item2_offset, item1_size, item2_size;
4009 item1 = btrfs_item_nr(slot);
4010 item2 = btrfs_item_nr(slot + 1);
4011 btrfs_item_key_to_cpu(buf, &k1, slot);
4012 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4013 item1_offset = btrfs_item_offset(buf, item1);
4014 item2_offset = btrfs_item_offset(buf, item2);
4015 item1_size = btrfs_item_size(buf, item1);
4016 item2_size = btrfs_item_size(buf, item2);
4018 item1_data = malloc(item1_size);
4021 item2_data = malloc(item2_size);
4027 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4028 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4030 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4031 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4035 btrfs_set_item_offset(buf, item1, item2_offset);
4036 btrfs_set_item_offset(buf, item2, item1_offset);
4037 btrfs_set_item_size(buf, item1, item2_size);
4038 btrfs_set_item_size(buf, item2, item1_size);
4040 path->slots[0] = slot;
4041 btrfs_set_item_key_unsafe(root, path, &k2);
4042 path->slots[0] = slot + 1;
4043 btrfs_set_item_key_unsafe(root, path, &k1);
4048 static int fix_key_order(struct btrfs_trans_handle *trans,
4049 struct btrfs_root *root,
4050 struct btrfs_path *path)
4052 struct extent_buffer *buf;
4053 struct btrfs_key k1, k2;
4055 int level = path->lowest_level;
4058 buf = path->nodes[level];
4059 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4061 btrfs_node_key_to_cpu(buf, &k1, i);
4062 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4064 btrfs_item_key_to_cpu(buf, &k1, i);
4065 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4067 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4069 ret = swap_values(root, path, buf, i);
4072 btrfs_mark_buffer_dirty(buf);
4078 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4079 struct btrfs_root *root,
4080 struct btrfs_path *path,
4081 struct extent_buffer *buf, int slot)
4083 struct btrfs_key key;
4084 int nritems = btrfs_header_nritems(buf);
4086 btrfs_item_key_to_cpu(buf, &key, slot);
4088 /* These are all the keys we can deal with missing. */
4089 if (key.type != BTRFS_DIR_INDEX_KEY &&
4090 key.type != BTRFS_EXTENT_ITEM_KEY &&
4091 key.type != BTRFS_METADATA_ITEM_KEY &&
4092 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4093 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4096 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4097 (unsigned long long)key.objectid, key.type,
4098 (unsigned long long)key.offset, slot, buf->start);
4099 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4100 btrfs_item_nr_offset(slot + 1),
4101 sizeof(struct btrfs_item) *
4102 (nritems - slot - 1));
4103 btrfs_set_header_nritems(buf, nritems - 1);
4105 struct btrfs_disk_key disk_key;
4107 btrfs_item_key(buf, &disk_key, 0);
4108 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4110 btrfs_mark_buffer_dirty(buf);
4114 static int fix_item_offset(struct btrfs_trans_handle *trans,
4115 struct btrfs_root *root,
4116 struct btrfs_path *path)
4118 struct extent_buffer *buf;
4122 /* We should only get this for leaves */
4123 BUG_ON(path->lowest_level);
4124 buf = path->nodes[0];
4126 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4127 unsigned int shift = 0, offset;
4129 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4130 BTRFS_LEAF_DATA_SIZE(root)) {
4131 if (btrfs_item_end_nr(buf, i) >
4132 BTRFS_LEAF_DATA_SIZE(root)) {
4133 ret = delete_bogus_item(trans, root, path,
4137 fprintf(stderr, "item is off the end of the "
4138 "leaf, can't fix\n");
4142 shift = BTRFS_LEAF_DATA_SIZE(root) -
4143 btrfs_item_end_nr(buf, i);
4144 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4145 btrfs_item_offset_nr(buf, i - 1)) {
4146 if (btrfs_item_end_nr(buf, i) >
4147 btrfs_item_offset_nr(buf, i - 1)) {
4148 ret = delete_bogus_item(trans, root, path,
4152 fprintf(stderr, "items overlap, can't fix\n");
4156 shift = btrfs_item_offset_nr(buf, i - 1) -
4157 btrfs_item_end_nr(buf, i);
4162 printf("Shifting item nr %d by %u bytes in block %llu\n",
4163 i, shift, (unsigned long long)buf->start);
4164 offset = btrfs_item_offset_nr(buf, i);
4165 memmove_extent_buffer(buf,
4166 btrfs_leaf_data(buf) + offset + shift,
4167 btrfs_leaf_data(buf) + offset,
4168 btrfs_item_size_nr(buf, i));
4169 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4171 btrfs_mark_buffer_dirty(buf);
4175 * We may have moved things, in which case we want to exit so we don't
4176 * write those changes out. Once we have proper abort functionality in
4177 * progs this can be changed to something nicer.
4184 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4185 * then just return -EIO.
4187 static int try_to_fix_bad_block(struct btrfs_root *root,
4188 struct extent_buffer *buf,
4189 enum btrfs_tree_block_status status)
4191 struct btrfs_trans_handle *trans;
4192 struct ulist *roots;
4193 struct ulist_node *node;
4194 struct btrfs_root *search_root;
4195 struct btrfs_path *path;
4196 struct ulist_iterator iter;
4197 struct btrfs_key root_key, key;
4200 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4201 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4204 path = btrfs_alloc_path();
4208 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4211 btrfs_free_path(path);
4215 ULIST_ITER_INIT(&iter);
4216 while ((node = ulist_next(roots, &iter))) {
4217 root_key.objectid = node->val;
4218 root_key.type = BTRFS_ROOT_ITEM_KEY;
4219 root_key.offset = (u64)-1;
4221 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4228 trans = btrfs_start_transaction(search_root, 0);
4229 if (IS_ERR(trans)) {
4230 ret = PTR_ERR(trans);
4234 path->lowest_level = btrfs_header_level(buf);
4235 path->skip_check_block = 1;
4236 if (path->lowest_level)
4237 btrfs_node_key_to_cpu(buf, &key, 0);
4239 btrfs_item_key_to_cpu(buf, &key, 0);
4240 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4243 btrfs_commit_transaction(trans, search_root);
4246 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4247 ret = fix_key_order(trans, search_root, path);
4248 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4249 ret = fix_item_offset(trans, search_root, path);
4251 btrfs_commit_transaction(trans, search_root);
4254 btrfs_release_path(path);
4255 btrfs_commit_transaction(trans, search_root);
4258 btrfs_free_path(path);
4262 static int check_block(struct btrfs_root *root,
4263 struct cache_tree *extent_cache,
4264 struct extent_buffer *buf, u64 flags)
4266 struct extent_record *rec;
4267 struct cache_extent *cache;
4268 struct btrfs_key key;
4269 enum btrfs_tree_block_status status;
4273 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4276 rec = container_of(cache, struct extent_record, cache);
4277 rec->generation = btrfs_header_generation(buf);
4279 level = btrfs_header_level(buf);
4280 if (btrfs_header_nritems(buf) > 0) {
4283 btrfs_item_key_to_cpu(buf, &key, 0);
4285 btrfs_node_key_to_cpu(buf, &key, 0);
4287 rec->info_objectid = key.objectid;
4289 rec->info_level = level;
4291 if (btrfs_is_leaf(buf))
4292 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4294 status = btrfs_check_node(root, &rec->parent_key, buf);
4296 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4298 status = try_to_fix_bad_block(root, buf, status);
4299 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4301 fprintf(stderr, "bad block %llu\n",
4302 (unsigned long long)buf->start);
4305 * Signal to callers we need to start the scan over
4306 * again since we'll have cow'ed blocks.
4311 rec->content_checked = 1;
4312 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4313 rec->owner_ref_checked = 1;
4315 ret = check_owner_ref(root, rec, buf);
4317 rec->owner_ref_checked = 1;
4321 maybe_free_extent_rec(extent_cache, rec);
4325 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4326 u64 parent, u64 root)
4328 struct list_head *cur = rec->backrefs.next;
4329 struct extent_backref *node;
4330 struct tree_backref *back;
4332 while(cur != &rec->backrefs) {
4333 node = list_entry(cur, struct extent_backref, list);
4337 back = (struct tree_backref *)node;
4339 if (!node->full_backref)
4341 if (parent == back->parent)
4344 if (node->full_backref)
4346 if (back->root == root)
4353 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4354 u64 parent, u64 root)
4356 struct tree_backref *ref = malloc(sizeof(*ref));
4360 memset(&ref->node, 0, sizeof(ref->node));
4362 ref->parent = parent;
4363 ref->node.full_backref = 1;
4366 ref->node.full_backref = 0;
4368 list_add_tail(&ref->node.list, &rec->backrefs);
4373 static struct data_backref *find_data_backref(struct extent_record *rec,
4374 u64 parent, u64 root,
4375 u64 owner, u64 offset,
4377 u64 disk_bytenr, u64 bytes)
4379 struct list_head *cur = rec->backrefs.next;
4380 struct extent_backref *node;
4381 struct data_backref *back;
4383 while(cur != &rec->backrefs) {
4384 node = list_entry(cur, struct extent_backref, list);
4388 back = (struct data_backref *)node;
4390 if (!node->full_backref)
4392 if (parent == back->parent)
4395 if (node->full_backref)
4397 if (back->root == root && back->owner == owner &&
4398 back->offset == offset) {
4399 if (found_ref && node->found_ref &&
4400 (back->bytes != bytes ||
4401 back->disk_bytenr != disk_bytenr))
4410 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4411 u64 parent, u64 root,
4412 u64 owner, u64 offset,
4415 struct data_backref *ref = malloc(sizeof(*ref));
4416 memset(&ref->node, 0, sizeof(ref->node));
4417 ref->node.is_data = 1;
4420 ref->parent = parent;
4423 ref->node.full_backref = 1;
4427 ref->offset = offset;
4428 ref->node.full_backref = 0;
4430 ref->bytes = max_size;
4433 list_add_tail(&ref->node.list, &rec->backrefs);
4434 if (max_size > rec->max_size)
4435 rec->max_size = max_size;
4439 /* Check if the type of extent matches with its chunk */
4440 static void check_extent_type(struct extent_record *rec)
4442 struct btrfs_block_group_cache *bg_cache;
4444 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4448 /* data extent, check chunk directly*/
4449 if (!rec->metadata) {
4450 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4451 rec->wrong_chunk_type = 1;
4455 /* metadata extent, check the obvious case first */
4456 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4457 BTRFS_BLOCK_GROUP_METADATA))) {
4458 rec->wrong_chunk_type = 1;
4463 * Check SYSTEM extent, as it's also marked as metadata, we can only
4464 * make sure it's a SYSTEM extent by its backref
4466 if (!list_empty(&rec->backrefs)) {
4467 struct extent_backref *node;
4468 struct tree_backref *tback;
4471 node = list_entry(rec->backrefs.next, struct extent_backref,
4473 if (node->is_data) {
4474 /* tree block shouldn't have data backref */
4475 rec->wrong_chunk_type = 1;
4478 tback = container_of(node, struct tree_backref, node);
4480 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4481 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4483 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4484 if (!(bg_cache->flags & bg_type))
4485 rec->wrong_chunk_type = 1;
4489 static int add_extent_rec(struct cache_tree *extent_cache,
4490 struct btrfs_key *parent_key, u64 parent_gen,
4491 u64 start, u64 nr, u64 extent_item_refs,
4492 int is_root, int inc_ref, int set_checked,
4493 int metadata, int extent_rec, u64 max_size)
4495 struct extent_record *rec;
4496 struct cache_extent *cache;
4500 cache = lookup_cache_extent(extent_cache, start, nr);
4502 rec = container_of(cache, struct extent_record, cache);
4506 rec->nr = max(nr, max_size);
4509 * We need to make sure to reset nr to whatever the extent
4510 * record says was the real size, this way we can compare it to
4514 if (start != rec->start || rec->found_rec) {
4515 struct extent_record *tmp;
4518 if (list_empty(&rec->list))
4519 list_add_tail(&rec->list,
4520 &duplicate_extents);
4523 * We have to do this song and dance in case we
4524 * find an extent record that falls inside of
4525 * our current extent record but does not have
4526 * the same objectid.
4528 tmp = malloc(sizeof(*tmp));
4532 tmp->max_size = max_size;
4535 tmp->metadata = metadata;
4536 tmp->extent_item_refs = extent_item_refs;
4537 INIT_LIST_HEAD(&tmp->list);
4538 list_add_tail(&tmp->list, &rec->dups);
4539 rec->num_duplicates++;
4546 if (extent_item_refs && !dup) {
4547 if (rec->extent_item_refs) {
4548 fprintf(stderr, "block %llu rec "
4549 "extent_item_refs %llu, passed %llu\n",
4550 (unsigned long long)start,
4551 (unsigned long long)
4552 rec->extent_item_refs,
4553 (unsigned long long)extent_item_refs);
4555 rec->extent_item_refs = extent_item_refs;
4560 rec->content_checked = 1;
4561 rec->owner_ref_checked = 1;
4565 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4567 rec->parent_generation = parent_gen;
4569 if (rec->max_size < max_size)
4570 rec->max_size = max_size;
4573 * A metadata extent can't cross stripe_len boundary, otherwise
4574 * kernel scrub won't be able to handle it.
4575 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4578 if (metadata && check_crossing_stripes(rec->start,
4580 rec->crossing_stripes = 1;
4581 check_extent_type(rec);
4582 maybe_free_extent_rec(extent_cache, rec);
4585 rec = malloc(sizeof(*rec));
4587 rec->max_size = max_size;
4588 rec->nr = max(nr, max_size);
4589 rec->found_rec = !!extent_rec;
4590 rec->content_checked = 0;
4591 rec->owner_ref_checked = 0;
4592 rec->num_duplicates = 0;
4593 rec->metadata = metadata;
4594 rec->flag_block_full_backref = -1;
4595 rec->bad_full_backref = 0;
4596 rec->crossing_stripes = 0;
4597 rec->wrong_chunk_type = 0;
4598 INIT_LIST_HEAD(&rec->backrefs);
4599 INIT_LIST_HEAD(&rec->dups);
4600 INIT_LIST_HEAD(&rec->list);
4612 if (extent_item_refs)
4613 rec->extent_item_refs = extent_item_refs;
4615 rec->extent_item_refs = 0;
4618 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4620 memset(&rec->parent_key, 0, sizeof(*parent_key));
4623 rec->parent_generation = parent_gen;
4625 rec->parent_generation = 0;
4627 rec->cache.start = start;
4628 rec->cache.size = nr;
4629 ret = insert_cache_extent(extent_cache, &rec->cache);
4633 rec->content_checked = 1;
4634 rec->owner_ref_checked = 1;
4638 if (check_crossing_stripes(rec->start, rec->max_size))
4639 rec->crossing_stripes = 1;
4640 check_extent_type(rec);
4644 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4645 u64 parent, u64 root, int found_ref)
4647 struct extent_record *rec;
4648 struct tree_backref *back;
4649 struct cache_extent *cache;
4651 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4653 add_extent_rec(extent_cache, NULL, 0, bytenr,
4654 1, 0, 0, 0, 0, 1, 0, 0);
4655 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4660 rec = container_of(cache, struct extent_record, cache);
4661 if (rec->start != bytenr) {
4665 back = find_tree_backref(rec, parent, root);
4667 back = alloc_tree_backref(rec, parent, root);
4672 if (back->node.found_ref) {
4673 fprintf(stderr, "Extent back ref already exists "
4674 "for %llu parent %llu root %llu \n",
4675 (unsigned long long)bytenr,
4676 (unsigned long long)parent,
4677 (unsigned long long)root);
4679 back->node.found_ref = 1;
4681 if (back->node.found_extent_tree) {
4682 fprintf(stderr, "Extent back ref already exists "
4683 "for %llu parent %llu root %llu \n",
4684 (unsigned long long)bytenr,
4685 (unsigned long long)parent,
4686 (unsigned long long)root);
4688 back->node.found_extent_tree = 1;
4690 check_extent_type(rec);
4691 maybe_free_extent_rec(extent_cache, rec);
4695 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4696 u64 parent, u64 root, u64 owner, u64 offset,
4697 u32 num_refs, int found_ref, u64 max_size)
4699 struct extent_record *rec;
4700 struct data_backref *back;
4701 struct cache_extent *cache;
4703 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4705 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4707 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4712 rec = container_of(cache, struct extent_record, cache);
4713 if (rec->max_size < max_size)
4714 rec->max_size = max_size;
4717 * If found_ref is set then max_size is the real size and must match the
4718 * existing refs. So if we have already found a ref then we need to
4719 * make sure that this ref matches the existing one, otherwise we need
4720 * to add a new backref so we can notice that the backrefs don't match
4721 * and we need to figure out who is telling the truth. This is to
4722 * account for that awful fsync bug I introduced where we'd end up with
4723 * a btrfs_file_extent_item that would have its length include multiple
4724 * prealloc extents or point inside of a prealloc extent.
4726 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4729 back = alloc_data_backref(rec, parent, root, owner, offset,
4733 BUG_ON(num_refs != 1);
4734 if (back->node.found_ref)
4735 BUG_ON(back->bytes != max_size);
4736 back->node.found_ref = 1;
4737 back->found_ref += 1;
4738 back->bytes = max_size;
4739 back->disk_bytenr = bytenr;
4741 rec->content_checked = 1;
4742 rec->owner_ref_checked = 1;
4744 if (back->node.found_extent_tree) {
4745 fprintf(stderr, "Extent back ref already exists "
4746 "for %llu parent %llu root %llu "
4747 "owner %llu offset %llu num_refs %lu\n",
4748 (unsigned long long)bytenr,
4749 (unsigned long long)parent,
4750 (unsigned long long)root,
4751 (unsigned long long)owner,
4752 (unsigned long long)offset,
4753 (unsigned long)num_refs);
4755 back->num_refs = num_refs;
4756 back->node.found_extent_tree = 1;
4758 maybe_free_extent_rec(extent_cache, rec);
4762 static int add_pending(struct cache_tree *pending,
4763 struct cache_tree *seen, u64 bytenr, u32 size)
4766 ret = add_cache_extent(seen, bytenr, size);
4769 add_cache_extent(pending, bytenr, size);
4773 static int pick_next_pending(struct cache_tree *pending,
4774 struct cache_tree *reada,
4775 struct cache_tree *nodes,
4776 u64 last, struct block_info *bits, int bits_nr,
4779 unsigned long node_start = last;
4780 struct cache_extent *cache;
4783 cache = search_cache_extent(reada, 0);
4785 bits[0].start = cache->start;
4786 bits[0].size = cache->size;
4791 if (node_start > 32768)
4792 node_start -= 32768;
4794 cache = search_cache_extent(nodes, node_start);
4796 cache = search_cache_extent(nodes, 0);
4799 cache = search_cache_extent(pending, 0);
4804 bits[ret].start = cache->start;
4805 bits[ret].size = cache->size;
4806 cache = next_cache_extent(cache);
4808 } while (cache && ret < bits_nr);
4814 bits[ret].start = cache->start;
4815 bits[ret].size = cache->size;
4816 cache = next_cache_extent(cache);
4818 } while (cache && ret < bits_nr);
4820 if (bits_nr - ret > 8) {
4821 u64 lookup = bits[0].start + bits[0].size;
4822 struct cache_extent *next;
4823 next = search_cache_extent(pending, lookup);
4825 if (next->start - lookup > 32768)
4827 bits[ret].start = next->start;
4828 bits[ret].size = next->size;
4829 lookup = next->start + next->size;
4833 next = next_cache_extent(next);
4841 static void free_chunk_record(struct cache_extent *cache)
4843 struct chunk_record *rec;
4845 rec = container_of(cache, struct chunk_record, cache);
4846 list_del_init(&rec->list);
4847 list_del_init(&rec->dextents);
4851 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4853 cache_tree_free_extents(chunk_cache, free_chunk_record);
4856 static void free_device_record(struct rb_node *node)
4858 struct device_record *rec;
4860 rec = container_of(node, struct device_record, node);
4864 FREE_RB_BASED_TREE(device_cache, free_device_record);
4866 int insert_block_group_record(struct block_group_tree *tree,
4867 struct block_group_record *bg_rec)
4871 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4875 list_add_tail(&bg_rec->list, &tree->block_groups);
4879 static void free_block_group_record(struct cache_extent *cache)
4881 struct block_group_record *rec;
4883 rec = container_of(cache, struct block_group_record, cache);
4884 list_del_init(&rec->list);
4888 void free_block_group_tree(struct block_group_tree *tree)
4890 cache_tree_free_extents(&tree->tree, free_block_group_record);
4893 int insert_device_extent_record(struct device_extent_tree *tree,
4894 struct device_extent_record *de_rec)
4899 * Device extent is a bit different from the other extents, because
4900 * the extents which belong to the different devices may have the
4901 * same start and size, so we need use the special extent cache
4902 * search/insert functions.
4904 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4908 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4909 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4913 static void free_device_extent_record(struct cache_extent *cache)
4915 struct device_extent_record *rec;
4917 rec = container_of(cache, struct device_extent_record, cache);
4918 if (!list_empty(&rec->chunk_list))
4919 list_del_init(&rec->chunk_list);
4920 if (!list_empty(&rec->device_list))
4921 list_del_init(&rec->device_list);
4925 void free_device_extent_tree(struct device_extent_tree *tree)
4927 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4930 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4931 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4932 struct extent_buffer *leaf, int slot)
4934 struct btrfs_extent_ref_v0 *ref0;
4935 struct btrfs_key key;
4937 btrfs_item_key_to_cpu(leaf, &key, slot);
4938 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4939 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4940 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4942 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4943 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4949 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4950 struct btrfs_key *key,
4953 struct btrfs_chunk *ptr;
4954 struct chunk_record *rec;
4957 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4958 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4960 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4962 fprintf(stderr, "memory allocation failed\n");
4966 INIT_LIST_HEAD(&rec->list);
4967 INIT_LIST_HEAD(&rec->dextents);
4970 rec->cache.start = key->offset;
4971 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4973 rec->generation = btrfs_header_generation(leaf);
4975 rec->objectid = key->objectid;
4976 rec->type = key->type;
4977 rec->offset = key->offset;
4979 rec->length = rec->cache.size;
4980 rec->owner = btrfs_chunk_owner(leaf, ptr);
4981 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4982 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4983 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4984 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4985 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4986 rec->num_stripes = num_stripes;
4987 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4989 for (i = 0; i < rec->num_stripes; ++i) {
4990 rec->stripes[i].devid =
4991 btrfs_stripe_devid_nr(leaf, ptr, i);
4992 rec->stripes[i].offset =
4993 btrfs_stripe_offset_nr(leaf, ptr, i);
4994 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4995 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5002 static int process_chunk_item(struct cache_tree *chunk_cache,
5003 struct btrfs_key *key, struct extent_buffer *eb,
5006 struct chunk_record *rec;
5009 rec = btrfs_new_chunk_record(eb, key, slot);
5010 ret = insert_cache_extent(chunk_cache, &rec->cache);
5012 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5013 rec->offset, rec->length);
5020 static int process_device_item(struct rb_root *dev_cache,
5021 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5023 struct btrfs_dev_item *ptr;
5024 struct device_record *rec;
5027 ptr = btrfs_item_ptr(eb,
5028 slot, struct btrfs_dev_item);
5030 rec = malloc(sizeof(*rec));
5032 fprintf(stderr, "memory allocation failed\n");
5036 rec->devid = key->offset;
5037 rec->generation = btrfs_header_generation(eb);
5039 rec->objectid = key->objectid;
5040 rec->type = key->type;
5041 rec->offset = key->offset;
5043 rec->devid = btrfs_device_id(eb, ptr);
5044 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5045 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5047 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5049 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5056 struct block_group_record *
5057 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5060 struct btrfs_block_group_item *ptr;
5061 struct block_group_record *rec;
5063 rec = calloc(1, sizeof(*rec));
5065 fprintf(stderr, "memory allocation failed\n");
5069 rec->cache.start = key->objectid;
5070 rec->cache.size = key->offset;
5072 rec->generation = btrfs_header_generation(leaf);
5074 rec->objectid = key->objectid;
5075 rec->type = key->type;
5076 rec->offset = key->offset;
5078 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5079 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5081 INIT_LIST_HEAD(&rec->list);
5086 static int process_block_group_item(struct block_group_tree *block_group_cache,
5087 struct btrfs_key *key,
5088 struct extent_buffer *eb, int slot)
5090 struct block_group_record *rec;
5093 rec = btrfs_new_block_group_record(eb, key, slot);
5094 ret = insert_block_group_record(block_group_cache, rec);
5096 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5097 rec->objectid, rec->offset);
5104 struct device_extent_record *
5105 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5106 struct btrfs_key *key, int slot)
5108 struct device_extent_record *rec;
5109 struct btrfs_dev_extent *ptr;
5111 rec = calloc(1, sizeof(*rec));
5113 fprintf(stderr, "memory allocation failed\n");
5117 rec->cache.objectid = key->objectid;
5118 rec->cache.start = key->offset;
5120 rec->generation = btrfs_header_generation(leaf);
5122 rec->objectid = key->objectid;
5123 rec->type = key->type;
5124 rec->offset = key->offset;
5126 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5127 rec->chunk_objecteid =
5128 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5130 btrfs_dev_extent_chunk_offset(leaf, ptr);
5131 rec->length = btrfs_dev_extent_length(leaf, ptr);
5132 rec->cache.size = rec->length;
5134 INIT_LIST_HEAD(&rec->chunk_list);
5135 INIT_LIST_HEAD(&rec->device_list);
5141 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5142 struct btrfs_key *key, struct extent_buffer *eb,
5145 struct device_extent_record *rec;
5148 rec = btrfs_new_device_extent_record(eb, key, slot);
5149 ret = insert_device_extent_record(dev_extent_cache, rec);
5152 "Device extent[%llu, %llu, %llu] existed.\n",
5153 rec->objectid, rec->offset, rec->length);
5160 static int process_extent_item(struct btrfs_root *root,
5161 struct cache_tree *extent_cache,
5162 struct extent_buffer *eb, int slot)
5164 struct btrfs_extent_item *ei;
5165 struct btrfs_extent_inline_ref *iref;
5166 struct btrfs_extent_data_ref *dref;
5167 struct btrfs_shared_data_ref *sref;
5168 struct btrfs_key key;
5172 u32 item_size = btrfs_item_size_nr(eb, slot);
5178 btrfs_item_key_to_cpu(eb, &key, slot);
5180 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5182 num_bytes = root->leafsize;
5184 num_bytes = key.offset;
5187 if (item_size < sizeof(*ei)) {
5188 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5189 struct btrfs_extent_item_v0 *ei0;
5190 BUG_ON(item_size != sizeof(*ei0));
5191 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5192 refs = btrfs_extent_refs_v0(eb, ei0);
5196 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5197 num_bytes, refs, 0, 0, 0, metadata, 1,
5201 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5202 refs = btrfs_extent_refs(eb, ei);
5203 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5208 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5209 refs, 0, 0, 0, metadata, 1, num_bytes);
5211 ptr = (unsigned long)(ei + 1);
5212 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5213 key.type == BTRFS_EXTENT_ITEM_KEY)
5214 ptr += sizeof(struct btrfs_tree_block_info);
5216 end = (unsigned long)ei + item_size;
5218 iref = (struct btrfs_extent_inline_ref *)ptr;
5219 type = btrfs_extent_inline_ref_type(eb, iref);
5220 offset = btrfs_extent_inline_ref_offset(eb, iref);
5222 case BTRFS_TREE_BLOCK_REF_KEY:
5223 add_tree_backref(extent_cache, key.objectid,
5226 case BTRFS_SHARED_BLOCK_REF_KEY:
5227 add_tree_backref(extent_cache, key.objectid,
5230 case BTRFS_EXTENT_DATA_REF_KEY:
5231 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5232 add_data_backref(extent_cache, key.objectid, 0,
5233 btrfs_extent_data_ref_root(eb, dref),
5234 btrfs_extent_data_ref_objectid(eb,
5236 btrfs_extent_data_ref_offset(eb, dref),
5237 btrfs_extent_data_ref_count(eb, dref),
5240 case BTRFS_SHARED_DATA_REF_KEY:
5241 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5242 add_data_backref(extent_cache, key.objectid, offset,
5244 btrfs_shared_data_ref_count(eb, sref),
5248 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5249 key.objectid, key.type, num_bytes);
5252 ptr += btrfs_extent_inline_ref_size(type);
5259 static int check_cache_range(struct btrfs_root *root,
5260 struct btrfs_block_group_cache *cache,
5261 u64 offset, u64 bytes)
5263 struct btrfs_free_space *entry;
5269 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5270 bytenr = btrfs_sb_offset(i);
5271 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5272 cache->key.objectid, bytenr, 0,
5273 &logical, &nr, &stripe_len);
5278 if (logical[nr] + stripe_len <= offset)
5280 if (offset + bytes <= logical[nr])
5282 if (logical[nr] == offset) {
5283 if (stripe_len >= bytes) {
5287 bytes -= stripe_len;
5288 offset += stripe_len;
5289 } else if (logical[nr] < offset) {
5290 if (logical[nr] + stripe_len >=
5295 bytes = (offset + bytes) -
5296 (logical[nr] + stripe_len);
5297 offset = logical[nr] + stripe_len;
5300 * Could be tricky, the super may land in the
5301 * middle of the area we're checking. First
5302 * check the easiest case, it's at the end.
5304 if (logical[nr] + stripe_len >=
5306 bytes = logical[nr] - offset;
5310 /* Check the left side */
5311 ret = check_cache_range(root, cache,
5313 logical[nr] - offset);
5319 /* Now we continue with the right side */
5320 bytes = (offset + bytes) -
5321 (logical[nr] + stripe_len);
5322 offset = logical[nr] + stripe_len;
5329 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5331 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5332 offset, offset+bytes);
5336 if (entry->offset != offset) {
5337 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5342 if (entry->bytes != bytes) {
5343 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5344 bytes, entry->bytes, offset);
5348 unlink_free_space(cache->free_space_ctl, entry);
5353 static int verify_space_cache(struct btrfs_root *root,
5354 struct btrfs_block_group_cache *cache)
5356 struct btrfs_path *path;
5357 struct extent_buffer *leaf;
5358 struct btrfs_key key;
5362 path = btrfs_alloc_path();
5366 root = root->fs_info->extent_root;
5368 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5370 key.objectid = last;
5372 key.type = BTRFS_EXTENT_ITEM_KEY;
5374 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5379 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5380 ret = btrfs_next_leaf(root, path);
5388 leaf = path->nodes[0];
5389 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5390 if (key.objectid >= cache->key.offset + cache->key.objectid)
5392 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5393 key.type != BTRFS_METADATA_ITEM_KEY) {
5398 if (last == key.objectid) {
5399 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5400 last = key.objectid + key.offset;
5402 last = key.objectid + root->leafsize;
5407 ret = check_cache_range(root, cache, last,
5408 key.objectid - last);
5411 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5412 last = key.objectid + key.offset;
5414 last = key.objectid + root->leafsize;
5418 if (last < cache->key.objectid + cache->key.offset)
5419 ret = check_cache_range(root, cache, last,
5420 cache->key.objectid +
5421 cache->key.offset - last);
5424 btrfs_free_path(path);
5427 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5428 fprintf(stderr, "There are still entries left in the space "
5436 static int check_space_cache(struct btrfs_root *root)
5438 struct btrfs_block_group_cache *cache;
5439 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5443 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5444 btrfs_super_generation(root->fs_info->super_copy) !=
5445 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5446 printf("cache and super generation don't match, space cache "
5447 "will be invalidated\n");
5451 if (ctx.progress_enabled) {
5452 ctx.tp = TASK_FREE_SPACE;
5453 task_start(ctx.info);
5457 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5461 start = cache->key.objectid + cache->key.offset;
5462 if (!cache->free_space_ctl) {
5463 if (btrfs_init_free_space_ctl(cache,
5464 root->sectorsize)) {
5469 btrfs_remove_free_space_cache(cache);
5472 ret = load_free_space_cache(root->fs_info, cache);
5476 ret = verify_space_cache(root, cache);
5478 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5479 cache->key.objectid);
5484 task_stop(ctx.info);
5486 return error ? -EINVAL : 0;
5489 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5490 u64 num_bytes, unsigned long leaf_offset,
5491 struct extent_buffer *eb) {
5494 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5496 unsigned long csum_offset;
5500 u64 data_checked = 0;
5506 if (num_bytes % root->sectorsize)
5509 data = malloc(num_bytes);
5513 while (offset < num_bytes) {
5516 read_len = num_bytes - offset;
5517 /* read as much space once a time */
5518 ret = read_extent_data(root, data + offset,
5519 bytenr + offset, &read_len, mirror);
5523 /* verify every 4k data's checksum */
5524 while (data_checked < read_len) {
5526 tmp = offset + data_checked;
5528 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5529 csum, root->sectorsize);
5530 btrfs_csum_final(csum, (char *)&csum);
5532 csum_offset = leaf_offset +
5533 tmp / root->sectorsize * csum_size;
5534 read_extent_buffer(eb, (char *)&csum_expected,
5535 csum_offset, csum_size);
5536 /* try another mirror */
5537 if (csum != csum_expected) {
5538 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5539 mirror, bytenr + tmp,
5540 csum, csum_expected);
5541 num_copies = btrfs_num_copies(
5542 &root->fs_info->mapping_tree,
5544 if (mirror < num_copies - 1) {
5549 data_checked += root->sectorsize;
5558 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5561 struct btrfs_path *path;
5562 struct extent_buffer *leaf;
5563 struct btrfs_key key;
5566 path = btrfs_alloc_path();
5568 fprintf(stderr, "Error allocing path\n");
5572 key.objectid = bytenr;
5573 key.type = BTRFS_EXTENT_ITEM_KEY;
5574 key.offset = (u64)-1;
5577 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5580 fprintf(stderr, "Error looking up extent record %d\n", ret);
5581 btrfs_free_path(path);
5584 if (path->slots[0] > 0) {
5587 ret = btrfs_prev_leaf(root, path);
5590 } else if (ret > 0) {
5597 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5600 * Block group items come before extent items if they have the same
5601 * bytenr, so walk back one more just in case. Dear future traveler,
5602 * first congrats on mastering time travel. Now if it's not too much
5603 * trouble could you go back to 2006 and tell Chris to make the
5604 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5605 * EXTENT_ITEM_KEY please?
5607 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5608 if (path->slots[0] > 0) {
5611 ret = btrfs_prev_leaf(root, path);
5614 } else if (ret > 0) {
5619 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5623 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5624 ret = btrfs_next_leaf(root, path);
5626 fprintf(stderr, "Error going to next leaf "
5628 btrfs_free_path(path);
5634 leaf = path->nodes[0];
5635 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5636 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5640 if (key.objectid + key.offset < bytenr) {
5644 if (key.objectid > bytenr + num_bytes)
5647 if (key.objectid == bytenr) {
5648 if (key.offset >= num_bytes) {
5652 num_bytes -= key.offset;
5653 bytenr += key.offset;
5654 } else if (key.objectid < bytenr) {
5655 if (key.objectid + key.offset >= bytenr + num_bytes) {
5659 num_bytes = (bytenr + num_bytes) -
5660 (key.objectid + key.offset);
5661 bytenr = key.objectid + key.offset;
5663 if (key.objectid + key.offset < bytenr + num_bytes) {
5664 u64 new_start = key.objectid + key.offset;
5665 u64 new_bytes = bytenr + num_bytes - new_start;
5668 * Weird case, the extent is in the middle of
5669 * our range, we'll have to search one side
5670 * and then the other. Not sure if this happens
5671 * in real life, but no harm in coding it up
5672 * anyway just in case.
5674 btrfs_release_path(path);
5675 ret = check_extent_exists(root, new_start,
5678 fprintf(stderr, "Right section didn't "
5682 num_bytes = key.objectid - bytenr;
5685 num_bytes = key.objectid - bytenr;
5692 if (num_bytes && !ret) {
5693 fprintf(stderr, "There are no extents for csum range "
5694 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5698 btrfs_free_path(path);
5702 static int check_csums(struct btrfs_root *root)
5704 struct btrfs_path *path;
5705 struct extent_buffer *leaf;
5706 struct btrfs_key key;
5707 u64 offset = 0, num_bytes = 0;
5708 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5712 unsigned long leaf_offset;
5714 root = root->fs_info->csum_root;
5715 if (!extent_buffer_uptodate(root->node)) {
5716 fprintf(stderr, "No valid csum tree found\n");
5720 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5721 key.type = BTRFS_EXTENT_CSUM_KEY;
5724 path = btrfs_alloc_path();
5728 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5730 fprintf(stderr, "Error searching csum tree %d\n", ret);
5731 btrfs_free_path(path);
5735 if (ret > 0 && path->slots[0])
5740 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5741 ret = btrfs_next_leaf(root, path);
5743 fprintf(stderr, "Error going to next leaf "
5750 leaf = path->nodes[0];
5752 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5753 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5758 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5759 csum_size) * root->sectorsize;
5760 if (!check_data_csum)
5761 goto skip_csum_check;
5762 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5763 ret = check_extent_csums(root, key.offset, data_len,
5769 offset = key.offset;
5770 } else if (key.offset != offset + num_bytes) {
5771 ret = check_extent_exists(root, offset, num_bytes);
5773 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5774 "there is no extent record\n",
5775 offset, offset+num_bytes);
5778 offset = key.offset;
5781 num_bytes += data_len;
5785 btrfs_free_path(path);
5789 static int is_dropped_key(struct btrfs_key *key,
5790 struct btrfs_key *drop_key) {
5791 if (key->objectid < drop_key->objectid)
5793 else if (key->objectid == drop_key->objectid) {
5794 if (key->type < drop_key->type)
5796 else if (key->type == drop_key->type) {
5797 if (key->offset < drop_key->offset)
5805 * Here are the rules for FULL_BACKREF.
5807 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5808 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5810 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5811 * if it happened after the relocation occurred since we'll have dropped the
5812 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5813 * have no real way to know for sure.
5815 * We process the blocks one root at a time, and we start from the lowest root
5816 * objectid and go to the highest. So we can just lookup the owner backref for
5817 * the record and if we don't find it then we know it doesn't exist and we have
5820 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5821 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5822 * be set or not and then we can check later once we've gathered all the refs.
5824 static int calc_extent_flag(struct btrfs_root *root,
5825 struct cache_tree *extent_cache,
5826 struct extent_buffer *buf,
5827 struct root_item_record *ri,
5830 struct extent_record *rec;
5831 struct cache_extent *cache;
5832 struct tree_backref *tback;
5835 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5836 /* we have added this extent before */
5838 rec = container_of(cache, struct extent_record, cache);
5841 * Except file/reloc tree, we can not have
5844 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5849 if (buf->start == ri->bytenr)
5852 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5855 owner = btrfs_header_owner(buf);
5856 if (owner == ri->objectid)
5859 tback = find_tree_backref(rec, 0, owner);
5864 if (rec->flag_block_full_backref != -1 &&
5865 rec->flag_block_full_backref != 0)
5866 rec->bad_full_backref = 1;
5869 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5870 if (rec->flag_block_full_backref != -1 &&
5871 rec->flag_block_full_backref != 1)
5872 rec->bad_full_backref = 1;
5876 static int run_next_block(struct btrfs_root *root,
5877 struct block_info *bits,
5880 struct cache_tree *pending,
5881 struct cache_tree *seen,
5882 struct cache_tree *reada,
5883 struct cache_tree *nodes,
5884 struct cache_tree *extent_cache,
5885 struct cache_tree *chunk_cache,
5886 struct rb_root *dev_cache,
5887 struct block_group_tree *block_group_cache,
5888 struct device_extent_tree *dev_extent_cache,
5889 struct root_item_record *ri)
5891 struct extent_buffer *buf;
5892 struct extent_record *rec = NULL;
5903 struct btrfs_key key;
5904 struct cache_extent *cache;
5907 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5908 bits_nr, &reada_bits);
5913 for(i = 0; i < nritems; i++) {
5914 ret = add_cache_extent(reada, bits[i].start,
5919 /* fixme, get the parent transid */
5920 readahead_tree_block(root, bits[i].start,
5924 *last = bits[0].start;
5925 bytenr = bits[0].start;
5926 size = bits[0].size;
5928 cache = lookup_cache_extent(pending, bytenr, size);
5930 remove_cache_extent(pending, cache);
5933 cache = lookup_cache_extent(reada, bytenr, size);
5935 remove_cache_extent(reada, cache);
5938 cache = lookup_cache_extent(nodes, bytenr, size);
5940 remove_cache_extent(nodes, cache);
5943 cache = lookup_cache_extent(extent_cache, bytenr, size);
5945 rec = container_of(cache, struct extent_record, cache);
5946 gen = rec->parent_generation;
5949 /* fixme, get the real parent transid */
5950 buf = read_tree_block(root, bytenr, size, gen);
5951 if (!extent_buffer_uptodate(buf)) {
5952 record_bad_block_io(root->fs_info,
5953 extent_cache, bytenr, size);
5957 nritems = btrfs_header_nritems(buf);
5960 if (!init_extent_tree) {
5961 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5962 btrfs_header_level(buf), 1, NULL,
5965 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5967 fprintf(stderr, "Couldn't calc extent flags\n");
5968 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5973 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5975 fprintf(stderr, "Couldn't calc extent flags\n");
5976 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5980 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5982 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5983 ri->objectid == btrfs_header_owner(buf)) {
5985 * Ok we got to this block from it's original owner and
5986 * we have FULL_BACKREF set. Relocation can leave
5987 * converted blocks over so this is altogether possible,
5988 * however it's not possible if the generation > the
5989 * last snapshot, so check for this case.
5991 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5992 btrfs_header_generation(buf) > ri->last_snapshot) {
5993 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5994 rec->bad_full_backref = 1;
5999 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6000 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6001 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6002 rec->bad_full_backref = 1;
6006 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6007 rec->flag_block_full_backref = 1;
6011 rec->flag_block_full_backref = 0;
6013 owner = btrfs_header_owner(buf);
6016 ret = check_block(root, extent_cache, buf, flags);
6020 if (btrfs_is_leaf(buf)) {
6021 btree_space_waste += btrfs_leaf_free_space(root, buf);
6022 for (i = 0; i < nritems; i++) {
6023 struct btrfs_file_extent_item *fi;
6024 btrfs_item_key_to_cpu(buf, &key, i);
6025 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6026 process_extent_item(root, extent_cache, buf,
6030 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6031 process_extent_item(root, extent_cache, buf,
6035 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6037 btrfs_item_size_nr(buf, i);
6040 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6041 process_chunk_item(chunk_cache, &key, buf, i);
6044 if (key.type == BTRFS_DEV_ITEM_KEY) {
6045 process_device_item(dev_cache, &key, buf, i);
6048 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6049 process_block_group_item(block_group_cache,
6053 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6054 process_device_extent_item(dev_extent_cache,
6059 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6060 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6061 process_extent_ref_v0(extent_cache, buf, i);
6068 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6069 add_tree_backref(extent_cache, key.objectid, 0,
6073 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6074 add_tree_backref(extent_cache, key.objectid,
6078 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6079 struct btrfs_extent_data_ref *ref;
6080 ref = btrfs_item_ptr(buf, i,
6081 struct btrfs_extent_data_ref);
6082 add_data_backref(extent_cache,
6084 btrfs_extent_data_ref_root(buf, ref),
6085 btrfs_extent_data_ref_objectid(buf,
6087 btrfs_extent_data_ref_offset(buf, ref),
6088 btrfs_extent_data_ref_count(buf, ref),
6089 0, root->sectorsize);
6092 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6093 struct btrfs_shared_data_ref *ref;
6094 ref = btrfs_item_ptr(buf, i,
6095 struct btrfs_shared_data_ref);
6096 add_data_backref(extent_cache,
6097 key.objectid, key.offset, 0, 0, 0,
6098 btrfs_shared_data_ref_count(buf, ref),
6099 0, root->sectorsize);
6102 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6103 struct bad_item *bad;
6105 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6109 bad = malloc(sizeof(struct bad_item));
6112 INIT_LIST_HEAD(&bad->list);
6113 memcpy(&bad->key, &key,
6114 sizeof(struct btrfs_key));
6115 bad->root_id = owner;
6116 list_add_tail(&bad->list, &delete_items);
6119 if (key.type != BTRFS_EXTENT_DATA_KEY)
6121 fi = btrfs_item_ptr(buf, i,
6122 struct btrfs_file_extent_item);
6123 if (btrfs_file_extent_type(buf, fi) ==
6124 BTRFS_FILE_EXTENT_INLINE)
6126 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6129 data_bytes_allocated +=
6130 btrfs_file_extent_disk_num_bytes(buf, fi);
6131 if (data_bytes_allocated < root->sectorsize) {
6134 data_bytes_referenced +=
6135 btrfs_file_extent_num_bytes(buf, fi);
6136 add_data_backref(extent_cache,
6137 btrfs_file_extent_disk_bytenr(buf, fi),
6138 parent, owner, key.objectid, key.offset -
6139 btrfs_file_extent_offset(buf, fi), 1, 1,
6140 btrfs_file_extent_disk_num_bytes(buf, fi));
6144 struct btrfs_key first_key;
6146 first_key.objectid = 0;
6149 btrfs_item_key_to_cpu(buf, &first_key, 0);
6150 level = btrfs_header_level(buf);
6151 for (i = 0; i < nritems; i++) {
6152 ptr = btrfs_node_blockptr(buf, i);
6153 size = btrfs_level_size(root, level - 1);
6154 btrfs_node_key_to_cpu(buf, &key, i);
6156 if ((level == ri->drop_level)
6157 && is_dropped_key(&key, &ri->drop_key)) {
6161 ret = add_extent_rec(extent_cache, &key,
6162 btrfs_node_ptr_generation(buf, i),
6163 ptr, size, 0, 0, 1, 0, 1, 0,
6167 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6170 add_pending(nodes, seen, ptr, size);
6172 add_pending(pending, seen, ptr, size);
6175 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6176 nritems) * sizeof(struct btrfs_key_ptr);
6178 total_btree_bytes += buf->len;
6179 if (fs_root_objectid(btrfs_header_owner(buf)))
6180 total_fs_tree_bytes += buf->len;
6181 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6182 total_extent_tree_bytes += buf->len;
6183 if (!found_old_backref &&
6184 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6185 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6186 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6187 found_old_backref = 1;
6189 free_extent_buffer(buf);
6193 static int add_root_to_pending(struct extent_buffer *buf,
6194 struct cache_tree *extent_cache,
6195 struct cache_tree *pending,
6196 struct cache_tree *seen,
6197 struct cache_tree *nodes,
6200 if (btrfs_header_level(buf) > 0)
6201 add_pending(nodes, seen, buf->start, buf->len);
6203 add_pending(pending, seen, buf->start, buf->len);
6204 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6205 0, 1, 1, 0, 1, 0, buf->len);
6207 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6208 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6209 add_tree_backref(extent_cache, buf->start, buf->start,
6212 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6216 /* as we fix the tree, we might be deleting blocks that
6217 * we're tracking for repair. This hook makes sure we
6218 * remove any backrefs for blocks as we are fixing them.
6220 static int free_extent_hook(struct btrfs_trans_handle *trans,
6221 struct btrfs_root *root,
6222 u64 bytenr, u64 num_bytes, u64 parent,
6223 u64 root_objectid, u64 owner, u64 offset,
6226 struct extent_record *rec;
6227 struct cache_extent *cache;
6229 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6231 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6232 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6236 rec = container_of(cache, struct extent_record, cache);
6238 struct data_backref *back;
6239 back = find_data_backref(rec, parent, root_objectid, owner,
6240 offset, 1, bytenr, num_bytes);
6243 if (back->node.found_ref) {
6244 back->found_ref -= refs_to_drop;
6246 rec->refs -= refs_to_drop;
6248 if (back->node.found_extent_tree) {
6249 back->num_refs -= refs_to_drop;
6250 if (rec->extent_item_refs)
6251 rec->extent_item_refs -= refs_to_drop;
6253 if (back->found_ref == 0)
6254 back->node.found_ref = 0;
6255 if (back->num_refs == 0)
6256 back->node.found_extent_tree = 0;
6258 if (!back->node.found_extent_tree && back->node.found_ref) {
6259 list_del(&back->node.list);
6263 struct tree_backref *back;
6264 back = find_tree_backref(rec, parent, root_objectid);
6267 if (back->node.found_ref) {
6270 back->node.found_ref = 0;
6272 if (back->node.found_extent_tree) {
6273 if (rec->extent_item_refs)
6274 rec->extent_item_refs--;
6275 back->node.found_extent_tree = 0;
6277 if (!back->node.found_extent_tree && back->node.found_ref) {
6278 list_del(&back->node.list);
6282 maybe_free_extent_rec(extent_cache, rec);
6287 static int delete_extent_records(struct btrfs_trans_handle *trans,
6288 struct btrfs_root *root,
6289 struct btrfs_path *path,
6290 u64 bytenr, u64 new_len)
6292 struct btrfs_key key;
6293 struct btrfs_key found_key;
6294 struct extent_buffer *leaf;
6299 key.objectid = bytenr;
6301 key.offset = (u64)-1;
6304 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6311 if (path->slots[0] == 0)
6317 leaf = path->nodes[0];
6318 slot = path->slots[0];
6320 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6321 if (found_key.objectid != bytenr)
6324 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6325 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6326 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6327 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6328 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6329 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6330 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6331 btrfs_release_path(path);
6332 if (found_key.type == 0) {
6333 if (found_key.offset == 0)
6335 key.offset = found_key.offset - 1;
6336 key.type = found_key.type;
6338 key.type = found_key.type - 1;
6339 key.offset = (u64)-1;
6343 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6344 found_key.objectid, found_key.type, found_key.offset);
6346 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6349 btrfs_release_path(path);
6351 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6352 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6353 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6354 found_key.offset : root->leafsize;
6356 ret = btrfs_update_block_group(trans, root, bytenr,
6363 btrfs_release_path(path);
6368 * for a single backref, this will allocate a new extent
6369 * and add the backref to it.
6371 static int record_extent(struct btrfs_trans_handle *trans,
6372 struct btrfs_fs_info *info,
6373 struct btrfs_path *path,
6374 struct extent_record *rec,
6375 struct extent_backref *back,
6376 int allocated, u64 flags)
6379 struct btrfs_root *extent_root = info->extent_root;
6380 struct extent_buffer *leaf;
6381 struct btrfs_key ins_key;
6382 struct btrfs_extent_item *ei;
6383 struct tree_backref *tback;
6384 struct data_backref *dback;
6385 struct btrfs_tree_block_info *bi;
6388 rec->max_size = max_t(u64, rec->max_size,
6389 info->extent_root->leafsize);
6392 u32 item_size = sizeof(*ei);
6395 item_size += sizeof(*bi);
6397 ins_key.objectid = rec->start;
6398 ins_key.offset = rec->max_size;
6399 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6401 ret = btrfs_insert_empty_item(trans, extent_root, path,
6402 &ins_key, item_size);
6406 leaf = path->nodes[0];
6407 ei = btrfs_item_ptr(leaf, path->slots[0],
6408 struct btrfs_extent_item);
6410 btrfs_set_extent_refs(leaf, ei, 0);
6411 btrfs_set_extent_generation(leaf, ei, rec->generation);
6413 if (back->is_data) {
6414 btrfs_set_extent_flags(leaf, ei,
6415 BTRFS_EXTENT_FLAG_DATA);
6417 struct btrfs_disk_key copy_key;;
6419 tback = (struct tree_backref *)back;
6420 bi = (struct btrfs_tree_block_info *)(ei + 1);
6421 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6424 btrfs_set_disk_key_objectid(©_key,
6425 rec->info_objectid);
6426 btrfs_set_disk_key_type(©_key, 0);
6427 btrfs_set_disk_key_offset(©_key, 0);
6429 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6430 btrfs_set_tree_block_key(leaf, bi, ©_key);
6432 btrfs_set_extent_flags(leaf, ei,
6433 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6436 btrfs_mark_buffer_dirty(leaf);
6437 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6438 rec->max_size, 1, 0);
6441 btrfs_release_path(path);
6444 if (back->is_data) {
6448 dback = (struct data_backref *)back;
6449 if (back->full_backref)
6450 parent = dback->parent;
6454 for (i = 0; i < dback->found_ref; i++) {
6455 /* if parent != 0, we're doing a full backref
6456 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6457 * just makes the backref allocator create a data
6460 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6461 rec->start, rec->max_size,
6465 BTRFS_FIRST_FREE_OBJECTID :
6471 fprintf(stderr, "adding new data backref"
6472 " on %llu %s %llu owner %llu"
6473 " offset %llu found %d\n",
6474 (unsigned long long)rec->start,
6475 back->full_backref ?
6477 back->full_backref ?
6478 (unsigned long long)parent :
6479 (unsigned long long)dback->root,
6480 (unsigned long long)dback->owner,
6481 (unsigned long long)dback->offset,
6486 tback = (struct tree_backref *)back;
6487 if (back->full_backref)
6488 parent = tback->parent;
6492 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6493 rec->start, rec->max_size,
6494 parent, tback->root, 0, 0);
6495 fprintf(stderr, "adding new tree backref on "
6496 "start %llu len %llu parent %llu root %llu\n",
6497 rec->start, rec->max_size, parent, tback->root);
6500 btrfs_release_path(path);
6504 struct extent_entry {
6509 struct list_head list;
6512 static struct extent_entry *find_entry(struct list_head *entries,
6513 u64 bytenr, u64 bytes)
6515 struct extent_entry *entry = NULL;
6517 list_for_each_entry(entry, entries, list) {
6518 if (entry->bytenr == bytenr && entry->bytes == bytes)
6525 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6527 struct extent_entry *entry, *best = NULL, *prev = NULL;
6529 list_for_each_entry(entry, entries, list) {
6536 * If there are as many broken entries as entries then we know
6537 * not to trust this particular entry.
6539 if (entry->broken == entry->count)
6543 * If our current entry == best then we can't be sure our best
6544 * is really the best, so we need to keep searching.
6546 if (best && best->count == entry->count) {
6552 /* Prev == entry, not good enough, have to keep searching */
6553 if (!prev->broken && prev->count == entry->count)
6557 best = (prev->count > entry->count) ? prev : entry;
6558 else if (best->count < entry->count)
6566 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6567 struct data_backref *dback, struct extent_entry *entry)
6569 struct btrfs_trans_handle *trans;
6570 struct btrfs_root *root;
6571 struct btrfs_file_extent_item *fi;
6572 struct extent_buffer *leaf;
6573 struct btrfs_key key;
6577 key.objectid = dback->root;
6578 key.type = BTRFS_ROOT_ITEM_KEY;
6579 key.offset = (u64)-1;
6580 root = btrfs_read_fs_root(info, &key);
6582 fprintf(stderr, "Couldn't find root for our ref\n");
6587 * The backref points to the original offset of the extent if it was
6588 * split, so we need to search down to the offset we have and then walk
6589 * forward until we find the backref we're looking for.
6591 key.objectid = dback->owner;
6592 key.type = BTRFS_EXTENT_DATA_KEY;
6593 key.offset = dback->offset;
6594 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6596 fprintf(stderr, "Error looking up ref %d\n", ret);
6601 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6602 ret = btrfs_next_leaf(root, path);
6604 fprintf(stderr, "Couldn't find our ref, next\n");
6608 leaf = path->nodes[0];
6609 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6610 if (key.objectid != dback->owner ||
6611 key.type != BTRFS_EXTENT_DATA_KEY) {
6612 fprintf(stderr, "Couldn't find our ref, search\n");
6615 fi = btrfs_item_ptr(leaf, path->slots[0],
6616 struct btrfs_file_extent_item);
6617 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6618 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6620 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6625 btrfs_release_path(path);
6627 trans = btrfs_start_transaction(root, 1);
6629 return PTR_ERR(trans);
6632 * Ok we have the key of the file extent we want to fix, now we can cow
6633 * down to the thing and fix it.
6635 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6637 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6638 key.objectid, key.type, key.offset, ret);
6642 fprintf(stderr, "Well that's odd, we just found this key "
6643 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6648 leaf = path->nodes[0];
6649 fi = btrfs_item_ptr(leaf, path->slots[0],
6650 struct btrfs_file_extent_item);
6652 if (btrfs_file_extent_compression(leaf, fi) &&
6653 dback->disk_bytenr != entry->bytenr) {
6654 fprintf(stderr, "Ref doesn't match the record start and is "
6655 "compressed, please take a btrfs-image of this file "
6656 "system and send it to a btrfs developer so they can "
6657 "complete this functionality for bytenr %Lu\n",
6658 dback->disk_bytenr);
6663 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6664 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6665 } else if (dback->disk_bytenr > entry->bytenr) {
6666 u64 off_diff, offset;
6668 off_diff = dback->disk_bytenr - entry->bytenr;
6669 offset = btrfs_file_extent_offset(leaf, fi);
6670 if (dback->disk_bytenr + offset +
6671 btrfs_file_extent_num_bytes(leaf, fi) >
6672 entry->bytenr + entry->bytes) {
6673 fprintf(stderr, "Ref is past the entry end, please "
6674 "take a btrfs-image of this file system and "
6675 "send it to a btrfs developer, ref %Lu\n",
6676 dback->disk_bytenr);
6681 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6682 btrfs_set_file_extent_offset(leaf, fi, offset);
6683 } else if (dback->disk_bytenr < entry->bytenr) {
6686 offset = btrfs_file_extent_offset(leaf, fi);
6687 if (dback->disk_bytenr + offset < entry->bytenr) {
6688 fprintf(stderr, "Ref is before the entry start, please"
6689 " take a btrfs-image of this file system and "
6690 "send it to a btrfs developer, ref %Lu\n",
6691 dback->disk_bytenr);
6696 offset += dback->disk_bytenr;
6697 offset -= entry->bytenr;
6698 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6699 btrfs_set_file_extent_offset(leaf, fi, offset);
6702 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6705 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6706 * only do this if we aren't using compression, otherwise it's a
6709 if (!btrfs_file_extent_compression(leaf, fi))
6710 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6712 printf("ram bytes may be wrong?\n");
6713 btrfs_mark_buffer_dirty(leaf);
6715 err = btrfs_commit_transaction(trans, root);
6716 btrfs_release_path(path);
6717 return ret ? ret : err;
6720 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6721 struct extent_record *rec)
6723 struct extent_backref *back;
6724 struct data_backref *dback;
6725 struct extent_entry *entry, *best = NULL;
6728 int broken_entries = 0;
6733 * Metadata is easy and the backrefs should always agree on bytenr and
6734 * size, if not we've got bigger issues.
6739 list_for_each_entry(back, &rec->backrefs, list) {
6740 if (back->full_backref || !back->is_data)
6743 dback = (struct data_backref *)back;
6746 * We only pay attention to backrefs that we found a real
6749 if (dback->found_ref == 0)
6753 * For now we only catch when the bytes don't match, not the
6754 * bytenr. We can easily do this at the same time, but I want
6755 * to have a fs image to test on before we just add repair
6756 * functionality willy-nilly so we know we won't screw up the
6760 entry = find_entry(&entries, dback->disk_bytenr,
6763 entry = malloc(sizeof(struct extent_entry));
6768 memset(entry, 0, sizeof(*entry));
6769 entry->bytenr = dback->disk_bytenr;
6770 entry->bytes = dback->bytes;
6771 list_add_tail(&entry->list, &entries);
6776 * If we only have on entry we may think the entries agree when
6777 * in reality they don't so we have to do some extra checking.
6779 if (dback->disk_bytenr != rec->start ||
6780 dback->bytes != rec->nr || back->broken)
6791 /* Yay all the backrefs agree, carry on good sir */
6792 if (nr_entries <= 1 && !mismatch)
6795 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6796 "%Lu\n", rec->start);
6799 * First we want to see if the backrefs can agree amongst themselves who
6800 * is right, so figure out which one of the entries has the highest
6803 best = find_most_right_entry(&entries);
6806 * Ok so we may have an even split between what the backrefs think, so
6807 * this is where we use the extent ref to see what it thinks.
6810 entry = find_entry(&entries, rec->start, rec->nr);
6811 if (!entry && (!broken_entries || !rec->found_rec)) {
6812 fprintf(stderr, "Backrefs don't agree with each other "
6813 "and extent record doesn't agree with anybody,"
6814 " so we can't fix bytenr %Lu bytes %Lu\n",
6815 rec->start, rec->nr);
6818 } else if (!entry) {
6820 * Ok our backrefs were broken, we'll assume this is the
6821 * correct value and add an entry for this range.
6823 entry = malloc(sizeof(struct extent_entry));
6828 memset(entry, 0, sizeof(*entry));
6829 entry->bytenr = rec->start;
6830 entry->bytes = rec->nr;
6831 list_add_tail(&entry->list, &entries);
6835 best = find_most_right_entry(&entries);
6837 fprintf(stderr, "Backrefs and extent record evenly "
6838 "split on who is right, this is going to "
6839 "require user input to fix bytenr %Lu bytes "
6840 "%Lu\n", rec->start, rec->nr);
6847 * I don't think this can happen currently as we'll abort() if we catch
6848 * this case higher up, but in case somebody removes that we still can't
6849 * deal with it properly here yet, so just bail out of that's the case.
6851 if (best->bytenr != rec->start) {
6852 fprintf(stderr, "Extent start and backref starts don't match, "
6853 "please use btrfs-image on this file system and send "
6854 "it to a btrfs developer so they can make fsck fix "
6855 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6856 rec->start, rec->nr);
6862 * Ok great we all agreed on an extent record, let's go find the real
6863 * references and fix up the ones that don't match.
6865 list_for_each_entry(back, &rec->backrefs, list) {
6866 if (back->full_backref || !back->is_data)
6869 dback = (struct data_backref *)back;
6872 * Still ignoring backrefs that don't have a real ref attached
6875 if (dback->found_ref == 0)
6878 if (dback->bytes == best->bytes &&
6879 dback->disk_bytenr == best->bytenr)
6882 ret = repair_ref(info, path, dback, best);
6888 * Ok we messed with the actual refs, which means we need to drop our
6889 * entire cache and go back and rescan. I know this is a huge pain and
6890 * adds a lot of extra work, but it's the only way to be safe. Once all
6891 * the backrefs agree we may not need to do anything to the extent
6896 while (!list_empty(&entries)) {
6897 entry = list_entry(entries.next, struct extent_entry, list);
6898 list_del_init(&entry->list);
6904 static int process_duplicates(struct btrfs_root *root,
6905 struct cache_tree *extent_cache,
6906 struct extent_record *rec)
6908 struct extent_record *good, *tmp;
6909 struct cache_extent *cache;
6913 * If we found a extent record for this extent then return, or if we
6914 * have more than one duplicate we are likely going to need to delete
6917 if (rec->found_rec || rec->num_duplicates > 1)
6920 /* Shouldn't happen but just in case */
6921 BUG_ON(!rec->num_duplicates);
6924 * So this happens if we end up with a backref that doesn't match the
6925 * actual extent entry. So either the backref is bad or the extent
6926 * entry is bad. Either way we want to have the extent_record actually
6927 * reflect what we found in the extent_tree, so we need to take the
6928 * duplicate out and use that as the extent_record since the only way we
6929 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6931 remove_cache_extent(extent_cache, &rec->cache);
6933 good = list_entry(rec->dups.next, struct extent_record, list);
6934 list_del_init(&good->list);
6935 INIT_LIST_HEAD(&good->backrefs);
6936 INIT_LIST_HEAD(&good->dups);
6937 good->cache.start = good->start;
6938 good->cache.size = good->nr;
6939 good->content_checked = 0;
6940 good->owner_ref_checked = 0;
6941 good->num_duplicates = 0;
6942 good->refs = rec->refs;
6943 list_splice_init(&rec->backrefs, &good->backrefs);
6945 cache = lookup_cache_extent(extent_cache, good->start,
6949 tmp = container_of(cache, struct extent_record, cache);
6952 * If we find another overlapping extent and it's found_rec is
6953 * set then it's a duplicate and we need to try and delete
6956 if (tmp->found_rec || tmp->num_duplicates > 0) {
6957 if (list_empty(&good->list))
6958 list_add_tail(&good->list,
6959 &duplicate_extents);
6960 good->num_duplicates += tmp->num_duplicates + 1;
6961 list_splice_init(&tmp->dups, &good->dups);
6962 list_del_init(&tmp->list);
6963 list_add_tail(&tmp->list, &good->dups);
6964 remove_cache_extent(extent_cache, &tmp->cache);
6969 * Ok we have another non extent item backed extent rec, so lets
6970 * just add it to this extent and carry on like we did above.
6972 good->refs += tmp->refs;
6973 list_splice_init(&tmp->backrefs, &good->backrefs);
6974 remove_cache_extent(extent_cache, &tmp->cache);
6977 ret = insert_cache_extent(extent_cache, &good->cache);
6980 return good->num_duplicates ? 0 : 1;
6983 static int delete_duplicate_records(struct btrfs_root *root,
6984 struct extent_record *rec)
6986 struct btrfs_trans_handle *trans;
6987 LIST_HEAD(delete_list);
6988 struct btrfs_path *path;
6989 struct extent_record *tmp, *good, *n;
6992 struct btrfs_key key;
6994 path = btrfs_alloc_path();
7001 /* Find the record that covers all of the duplicates. */
7002 list_for_each_entry(tmp, &rec->dups, list) {
7003 if (good->start < tmp->start)
7005 if (good->nr > tmp->nr)
7008 if (tmp->start + tmp->nr < good->start + good->nr) {
7009 fprintf(stderr, "Ok we have overlapping extents that "
7010 "aren't completely covered by eachother, this "
7011 "is going to require more careful thought. "
7012 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7013 tmp->start, tmp->nr, good->start, good->nr);
7020 list_add_tail(&rec->list, &delete_list);
7022 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7025 list_move_tail(&tmp->list, &delete_list);
7028 root = root->fs_info->extent_root;
7029 trans = btrfs_start_transaction(root, 1);
7030 if (IS_ERR(trans)) {
7031 ret = PTR_ERR(trans);
7035 list_for_each_entry(tmp, &delete_list, list) {
7036 if (tmp->found_rec == 0)
7038 key.objectid = tmp->start;
7039 key.type = BTRFS_EXTENT_ITEM_KEY;
7040 key.offset = tmp->nr;
7042 /* Shouldn't happen but just in case */
7043 if (tmp->metadata) {
7044 fprintf(stderr, "Well this shouldn't happen, extent "
7045 "record overlaps but is metadata? "
7046 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7050 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7056 ret = btrfs_del_item(trans, root, path);
7059 btrfs_release_path(path);
7062 err = btrfs_commit_transaction(trans, root);
7066 while (!list_empty(&delete_list)) {
7067 tmp = list_entry(delete_list.next, struct extent_record, list);
7068 list_del_init(&tmp->list);
7074 while (!list_empty(&rec->dups)) {
7075 tmp = list_entry(rec->dups.next, struct extent_record, list);
7076 list_del_init(&tmp->list);
7080 btrfs_free_path(path);
7082 if (!ret && !nr_del)
7083 rec->num_duplicates = 0;
7085 return ret ? ret : nr_del;
7088 static int find_possible_backrefs(struct btrfs_fs_info *info,
7089 struct btrfs_path *path,
7090 struct cache_tree *extent_cache,
7091 struct extent_record *rec)
7093 struct btrfs_root *root;
7094 struct extent_backref *back;
7095 struct data_backref *dback;
7096 struct cache_extent *cache;
7097 struct btrfs_file_extent_item *fi;
7098 struct btrfs_key key;
7102 list_for_each_entry(back, &rec->backrefs, list) {
7103 /* Don't care about full backrefs (poor unloved backrefs) */
7104 if (back->full_backref || !back->is_data)
7107 dback = (struct data_backref *)back;
7109 /* We found this one, we don't need to do a lookup */
7110 if (dback->found_ref)
7113 key.objectid = dback->root;
7114 key.type = BTRFS_ROOT_ITEM_KEY;
7115 key.offset = (u64)-1;
7117 root = btrfs_read_fs_root(info, &key);
7119 /* No root, definitely a bad ref, skip */
7120 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7122 /* Other err, exit */
7124 return PTR_ERR(root);
7126 key.objectid = dback->owner;
7127 key.type = BTRFS_EXTENT_DATA_KEY;
7128 key.offset = dback->offset;
7129 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7131 btrfs_release_path(path);
7134 /* Didn't find it, we can carry on */
7139 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7140 struct btrfs_file_extent_item);
7141 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7142 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7143 btrfs_release_path(path);
7144 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7146 struct extent_record *tmp;
7147 tmp = container_of(cache, struct extent_record, cache);
7150 * If we found an extent record for the bytenr for this
7151 * particular backref then we can't add it to our
7152 * current extent record. We only want to add backrefs
7153 * that don't have a corresponding extent item in the
7154 * extent tree since they likely belong to this record
7155 * and we need to fix it if it doesn't match bytenrs.
7161 dback->found_ref += 1;
7162 dback->disk_bytenr = bytenr;
7163 dback->bytes = bytes;
7166 * Set this so the verify backref code knows not to trust the
7167 * values in this backref.
7176 * Record orphan data ref into corresponding root.
7178 * Return 0 if the extent item contains data ref and recorded.
7179 * Return 1 if the extent item contains no useful data ref
7180 * On that case, it may contains only shared_dataref or metadata backref
7181 * or the file extent exists(this should be handled by the extent bytenr
7183 * Return <0 if something goes wrong.
7185 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7186 struct extent_record *rec)
7188 struct btrfs_key key;
7189 struct btrfs_root *dest_root;
7190 struct extent_backref *back;
7191 struct data_backref *dback;
7192 struct orphan_data_extent *orphan;
7193 struct btrfs_path *path;
7194 int recorded_data_ref = 0;
7199 path = btrfs_alloc_path();
7202 list_for_each_entry(back, &rec->backrefs, list) {
7203 if (back->full_backref || !back->is_data ||
7204 !back->found_extent_tree)
7206 dback = (struct data_backref *)back;
7207 if (dback->found_ref)
7209 key.objectid = dback->root;
7210 key.type = BTRFS_ROOT_ITEM_KEY;
7211 key.offset = (u64)-1;
7213 dest_root = btrfs_read_fs_root(fs_info, &key);
7215 /* For non-exist root we just skip it */
7216 if (IS_ERR(dest_root) || !dest_root)
7219 key.objectid = dback->owner;
7220 key.type = BTRFS_EXTENT_DATA_KEY;
7221 key.offset = dback->offset;
7223 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7225 * For ret < 0, it's OK since the fs-tree may be corrupted,
7226 * we need to record it for inode/file extent rebuild.
7227 * For ret > 0, we record it only for file extent rebuild.
7228 * For ret == 0, the file extent exists but only bytenr
7229 * mismatch, let the original bytenr fix routine to handle,
7235 orphan = malloc(sizeof(*orphan));
7240 INIT_LIST_HEAD(&orphan->list);
7241 orphan->root = dback->root;
7242 orphan->objectid = dback->owner;
7243 orphan->offset = dback->offset;
7244 orphan->disk_bytenr = rec->cache.start;
7245 orphan->disk_len = rec->cache.size;
7246 list_add(&dest_root->orphan_data_extents, &orphan->list);
7247 recorded_data_ref = 1;
7250 btrfs_free_path(path);
7252 return !recorded_data_ref;
7258 * when an incorrect extent item is found, this will delete
7259 * all of the existing entries for it and recreate them
7260 * based on what the tree scan found.
7262 static int fixup_extent_refs(struct btrfs_fs_info *info,
7263 struct cache_tree *extent_cache,
7264 struct extent_record *rec)
7266 struct btrfs_trans_handle *trans = NULL;
7268 struct btrfs_path *path;
7269 struct list_head *cur = rec->backrefs.next;
7270 struct cache_extent *cache;
7271 struct extent_backref *back;
7275 if (rec->flag_block_full_backref)
7276 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7278 path = btrfs_alloc_path();
7282 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7284 * Sometimes the backrefs themselves are so broken they don't
7285 * get attached to any meaningful rec, so first go back and
7286 * check any of our backrefs that we couldn't find and throw
7287 * them into the list if we find the backref so that
7288 * verify_backrefs can figure out what to do.
7290 ret = find_possible_backrefs(info, path, extent_cache, rec);
7295 /* step one, make sure all of the backrefs agree */
7296 ret = verify_backrefs(info, path, rec);
7300 trans = btrfs_start_transaction(info->extent_root, 1);
7301 if (IS_ERR(trans)) {
7302 ret = PTR_ERR(trans);
7306 /* step two, delete all the existing records */
7307 ret = delete_extent_records(trans, info->extent_root, path,
7308 rec->start, rec->max_size);
7313 /* was this block corrupt? If so, don't add references to it */
7314 cache = lookup_cache_extent(info->corrupt_blocks,
7315 rec->start, rec->max_size);
7321 /* step three, recreate all the refs we did find */
7322 while(cur != &rec->backrefs) {
7323 back = list_entry(cur, struct extent_backref, list);
7327 * if we didn't find any references, don't create a
7330 if (!back->found_ref)
7333 rec->bad_full_backref = 0;
7334 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7342 int err = btrfs_commit_transaction(trans, info->extent_root);
7347 btrfs_free_path(path);
7351 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7352 struct extent_record *rec)
7354 struct btrfs_trans_handle *trans;
7355 struct btrfs_root *root = fs_info->extent_root;
7356 struct btrfs_path *path;
7357 struct btrfs_extent_item *ei;
7358 struct btrfs_key key;
7362 key.objectid = rec->start;
7363 if (rec->metadata) {
7364 key.type = BTRFS_METADATA_ITEM_KEY;
7365 key.offset = rec->info_level;
7367 key.type = BTRFS_EXTENT_ITEM_KEY;
7368 key.offset = rec->max_size;
7371 path = btrfs_alloc_path();
7375 trans = btrfs_start_transaction(root, 0);
7376 if (IS_ERR(trans)) {
7377 btrfs_free_path(path);
7378 return PTR_ERR(trans);
7381 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7383 btrfs_free_path(path);
7384 btrfs_commit_transaction(trans, root);
7387 fprintf(stderr, "Didn't find extent for %llu\n",
7388 (unsigned long long)rec->start);
7389 btrfs_free_path(path);
7390 btrfs_commit_transaction(trans, root);
7394 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7395 struct btrfs_extent_item);
7396 flags = btrfs_extent_flags(path->nodes[0], ei);
7397 if (rec->flag_block_full_backref) {
7398 fprintf(stderr, "setting full backref on %llu\n",
7399 (unsigned long long)key.objectid);
7400 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7402 fprintf(stderr, "clearing full backref on %llu\n",
7403 (unsigned long long)key.objectid);
7404 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7406 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7407 btrfs_mark_buffer_dirty(path->nodes[0]);
7408 btrfs_free_path(path);
7409 return btrfs_commit_transaction(trans, root);
7412 /* right now we only prune from the extent allocation tree */
7413 static int prune_one_block(struct btrfs_trans_handle *trans,
7414 struct btrfs_fs_info *info,
7415 struct btrfs_corrupt_block *corrupt)
7418 struct btrfs_path path;
7419 struct extent_buffer *eb;
7423 int level = corrupt->level + 1;
7425 btrfs_init_path(&path);
7427 /* we want to stop at the parent to our busted block */
7428 path.lowest_level = level;
7430 ret = btrfs_search_slot(trans, info->extent_root,
7431 &corrupt->key, &path, -1, 1);
7436 eb = path.nodes[level];
7443 * hopefully the search gave us the block we want to prune,
7444 * lets try that first
7446 slot = path.slots[level];
7447 found = btrfs_node_blockptr(eb, slot);
7448 if (found == corrupt->cache.start)
7451 nritems = btrfs_header_nritems(eb);
7453 /* the search failed, lets scan this node and hope we find it */
7454 for (slot = 0; slot < nritems; slot++) {
7455 found = btrfs_node_blockptr(eb, slot);
7456 if (found == corrupt->cache.start)
7460 * we couldn't find the bad block. TODO, search all the nodes for pointers
7463 if (eb == info->extent_root->node) {
7468 btrfs_release_path(&path);
7473 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7474 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7477 btrfs_release_path(&path);
7481 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7483 struct btrfs_trans_handle *trans = NULL;
7484 struct cache_extent *cache;
7485 struct btrfs_corrupt_block *corrupt;
7488 cache = search_cache_extent(info->corrupt_blocks, 0);
7492 trans = btrfs_start_transaction(info->extent_root, 1);
7494 return PTR_ERR(trans);
7496 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7497 prune_one_block(trans, info, corrupt);
7498 remove_cache_extent(info->corrupt_blocks, cache);
7501 return btrfs_commit_transaction(trans, info->extent_root);
7505 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7507 struct btrfs_block_group_cache *cache;
7512 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7513 &start, &end, EXTENT_DIRTY);
7516 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7522 cache = btrfs_lookup_first_block_group(fs_info, start);
7527 start = cache->key.objectid + cache->key.offset;
7531 static int check_extent_refs(struct btrfs_root *root,
7532 struct cache_tree *extent_cache)
7534 struct extent_record *rec;
7535 struct cache_extent *cache;
7544 * if we're doing a repair, we have to make sure
7545 * we don't allocate from the problem extents.
7546 * In the worst case, this will be all the
7549 cache = search_cache_extent(extent_cache, 0);
7551 rec = container_of(cache, struct extent_record, cache);
7552 set_extent_dirty(root->fs_info->excluded_extents,
7554 rec->start + rec->max_size - 1,
7556 cache = next_cache_extent(cache);
7559 /* pin down all the corrupted blocks too */
7560 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7562 set_extent_dirty(root->fs_info->excluded_extents,
7564 cache->start + cache->size - 1,
7566 cache = next_cache_extent(cache);
7568 prune_corrupt_blocks(root->fs_info);
7569 reset_cached_block_groups(root->fs_info);
7572 reset_cached_block_groups(root->fs_info);
7575 * We need to delete any duplicate entries we find first otherwise we
7576 * could mess up the extent tree when we have backrefs that actually
7577 * belong to a different extent item and not the weird duplicate one.
7579 while (repair && !list_empty(&duplicate_extents)) {
7580 rec = list_entry(duplicate_extents.next, struct extent_record,
7582 list_del_init(&rec->list);
7584 /* Sometimes we can find a backref before we find an actual
7585 * extent, so we need to process it a little bit to see if there
7586 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7587 * if this is a backref screwup. If we need to delete stuff
7588 * process_duplicates() will return 0, otherwise it will return
7591 if (process_duplicates(root, extent_cache, rec))
7593 ret = delete_duplicate_records(root, rec);
7597 * delete_duplicate_records will return the number of entries
7598 * deleted, so if it's greater than 0 then we know we actually
7599 * did something and we need to remove.
7613 cache = search_cache_extent(extent_cache, 0);
7616 rec = container_of(cache, struct extent_record, cache);
7617 if (rec->num_duplicates) {
7618 fprintf(stderr, "extent item %llu has multiple extent "
7619 "items\n", (unsigned long long)rec->start);
7624 if (rec->refs != rec->extent_item_refs) {
7625 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7626 (unsigned long long)rec->start,
7627 (unsigned long long)rec->nr);
7628 fprintf(stderr, "extent item %llu, found %llu\n",
7629 (unsigned long long)rec->extent_item_refs,
7630 (unsigned long long)rec->refs);
7631 ret = record_orphan_data_extents(root->fs_info, rec);
7638 * we can't use the extent to repair file
7639 * extent, let the fallback method handle it.
7641 if (!fixed && repair) {
7642 ret = fixup_extent_refs(
7653 if (all_backpointers_checked(rec, 1)) {
7654 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7655 (unsigned long long)rec->start,
7656 (unsigned long long)rec->nr);
7658 if (!fixed && !recorded && repair) {
7659 ret = fixup_extent_refs(root->fs_info,
7668 if (!rec->owner_ref_checked) {
7669 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7670 (unsigned long long)rec->start,
7671 (unsigned long long)rec->nr);
7672 if (!fixed && !recorded && repair) {
7673 ret = fixup_extent_refs(root->fs_info,
7682 if (rec->bad_full_backref) {
7683 fprintf(stderr, "bad full backref, on [%llu]\n",
7684 (unsigned long long)rec->start);
7686 ret = fixup_extent_flags(root->fs_info, rec);
7695 * Although it's not a extent ref's problem, we reuse this
7696 * routine for error reporting.
7697 * No repair function yet.
7699 if (rec->crossing_stripes) {
7701 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7702 rec->start, rec->start + rec->max_size);
7707 if (rec->wrong_chunk_type) {
7709 "bad extent [%llu, %llu), type mismatch with chunk\n",
7710 rec->start, rec->start + rec->max_size);
7715 remove_cache_extent(extent_cache, cache);
7716 free_all_extent_backrefs(rec);
7717 if (!init_extent_tree && repair && (!cur_err || fixed))
7718 clear_extent_dirty(root->fs_info->excluded_extents,
7720 rec->start + rec->max_size - 1,
7726 if (ret && ret != -EAGAIN) {
7727 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7730 struct btrfs_trans_handle *trans;
7732 root = root->fs_info->extent_root;
7733 trans = btrfs_start_transaction(root, 1);
7734 if (IS_ERR(trans)) {
7735 ret = PTR_ERR(trans);
7739 btrfs_fix_block_accounting(trans, root);
7740 ret = btrfs_commit_transaction(trans, root);
7745 fprintf(stderr, "repaired damaged extent references\n");
7751 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7755 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7756 stripe_size = length;
7757 stripe_size /= num_stripes;
7758 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7759 stripe_size = length * 2;
7760 stripe_size /= num_stripes;
7761 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7762 stripe_size = length;
7763 stripe_size /= (num_stripes - 1);
7764 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7765 stripe_size = length;
7766 stripe_size /= (num_stripes - 2);
7768 stripe_size = length;
7774 * Check the chunk with its block group/dev list ref:
7775 * Return 0 if all refs seems valid.
7776 * Return 1 if part of refs seems valid, need later check for rebuild ref
7777 * like missing block group and needs to search extent tree to rebuild them.
7778 * Return -1 if essential refs are missing and unable to rebuild.
7780 static int check_chunk_refs(struct chunk_record *chunk_rec,
7781 struct block_group_tree *block_group_cache,
7782 struct device_extent_tree *dev_extent_cache,
7785 struct cache_extent *block_group_item;
7786 struct block_group_record *block_group_rec;
7787 struct cache_extent *dev_extent_item;
7788 struct device_extent_record *dev_extent_rec;
7792 int metadump_v2 = 0;
7796 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7799 if (block_group_item) {
7800 block_group_rec = container_of(block_group_item,
7801 struct block_group_record,
7803 if (chunk_rec->length != block_group_rec->offset ||
7804 chunk_rec->offset != block_group_rec->objectid ||
7806 chunk_rec->type_flags != block_group_rec->flags)) {
7809 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7810 chunk_rec->objectid,
7815 chunk_rec->type_flags,
7816 block_group_rec->objectid,
7817 block_group_rec->type,
7818 block_group_rec->offset,
7819 block_group_rec->offset,
7820 block_group_rec->objectid,
7821 block_group_rec->flags);
7824 list_del_init(&block_group_rec->list);
7825 chunk_rec->bg_rec = block_group_rec;
7830 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7831 chunk_rec->objectid,
7836 chunk_rec->type_flags);
7843 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7844 chunk_rec->num_stripes);
7845 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7846 devid = chunk_rec->stripes[i].devid;
7847 offset = chunk_rec->stripes[i].offset;
7848 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7849 devid, offset, length);
7850 if (dev_extent_item) {
7851 dev_extent_rec = container_of(dev_extent_item,
7852 struct device_extent_record,
7854 if (dev_extent_rec->objectid != devid ||
7855 dev_extent_rec->offset != offset ||
7856 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7857 dev_extent_rec->length != length) {
7860 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7861 chunk_rec->objectid,
7864 chunk_rec->stripes[i].devid,
7865 chunk_rec->stripes[i].offset,
7866 dev_extent_rec->objectid,
7867 dev_extent_rec->offset,
7868 dev_extent_rec->length);
7871 list_move(&dev_extent_rec->chunk_list,
7872 &chunk_rec->dextents);
7877 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7878 chunk_rec->objectid,
7881 chunk_rec->stripes[i].devid,
7882 chunk_rec->stripes[i].offset);
7889 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7890 int check_chunks(struct cache_tree *chunk_cache,
7891 struct block_group_tree *block_group_cache,
7892 struct device_extent_tree *dev_extent_cache,
7893 struct list_head *good, struct list_head *bad,
7894 struct list_head *rebuild, int silent)
7896 struct cache_extent *chunk_item;
7897 struct chunk_record *chunk_rec;
7898 struct block_group_record *bg_rec;
7899 struct device_extent_record *dext_rec;
7903 chunk_item = first_cache_extent(chunk_cache);
7904 while (chunk_item) {
7905 chunk_rec = container_of(chunk_item, struct chunk_record,
7907 err = check_chunk_refs(chunk_rec, block_group_cache,
7908 dev_extent_cache, silent);
7911 if (err == 0 && good)
7912 list_add_tail(&chunk_rec->list, good);
7913 if (err > 0 && rebuild)
7914 list_add_tail(&chunk_rec->list, rebuild);
7916 list_add_tail(&chunk_rec->list, bad);
7917 chunk_item = next_cache_extent(chunk_item);
7920 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7923 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7931 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7935 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7946 static int check_device_used(struct device_record *dev_rec,
7947 struct device_extent_tree *dext_cache)
7949 struct cache_extent *cache;
7950 struct device_extent_record *dev_extent_rec;
7953 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7955 dev_extent_rec = container_of(cache,
7956 struct device_extent_record,
7958 if (dev_extent_rec->objectid != dev_rec->devid)
7961 list_del_init(&dev_extent_rec->device_list);
7962 total_byte += dev_extent_rec->length;
7963 cache = next_cache_extent(cache);
7966 if (total_byte != dev_rec->byte_used) {
7968 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7969 total_byte, dev_rec->byte_used, dev_rec->objectid,
7970 dev_rec->type, dev_rec->offset);
7977 /* check btrfs_dev_item -> btrfs_dev_extent */
7978 static int check_devices(struct rb_root *dev_cache,
7979 struct device_extent_tree *dev_extent_cache)
7981 struct rb_node *dev_node;
7982 struct device_record *dev_rec;
7983 struct device_extent_record *dext_rec;
7987 dev_node = rb_first(dev_cache);
7989 dev_rec = container_of(dev_node, struct device_record, node);
7990 err = check_device_used(dev_rec, dev_extent_cache);
7994 dev_node = rb_next(dev_node);
7996 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7999 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8000 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8007 static int add_root_item_to_list(struct list_head *head,
8008 u64 objectid, u64 bytenr, u64 last_snapshot,
8009 u8 level, u8 drop_level,
8010 int level_size, struct btrfs_key *drop_key)
8013 struct root_item_record *ri_rec;
8014 ri_rec = malloc(sizeof(*ri_rec));
8017 ri_rec->bytenr = bytenr;
8018 ri_rec->objectid = objectid;
8019 ri_rec->level = level;
8020 ri_rec->level_size = level_size;
8021 ri_rec->drop_level = drop_level;
8022 ri_rec->last_snapshot = last_snapshot;
8024 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8025 list_add_tail(&ri_rec->list, head);
8030 static void free_root_item_list(struct list_head *list)
8032 struct root_item_record *ri_rec;
8034 while (!list_empty(list)) {
8035 ri_rec = list_first_entry(list, struct root_item_record,
8037 list_del_init(&ri_rec->list);
8042 static int deal_root_from_list(struct list_head *list,
8043 struct btrfs_root *root,
8044 struct block_info *bits,
8046 struct cache_tree *pending,
8047 struct cache_tree *seen,
8048 struct cache_tree *reada,
8049 struct cache_tree *nodes,
8050 struct cache_tree *extent_cache,
8051 struct cache_tree *chunk_cache,
8052 struct rb_root *dev_cache,
8053 struct block_group_tree *block_group_cache,
8054 struct device_extent_tree *dev_extent_cache)
8059 while (!list_empty(list)) {
8060 struct root_item_record *rec;
8061 struct extent_buffer *buf;
8062 rec = list_entry(list->next,
8063 struct root_item_record, list);
8065 buf = read_tree_block(root->fs_info->tree_root,
8066 rec->bytenr, rec->level_size, 0);
8067 if (!extent_buffer_uptodate(buf)) {
8068 free_extent_buffer(buf);
8072 add_root_to_pending(buf, extent_cache, pending,
8073 seen, nodes, rec->objectid);
8075 * To rebuild extent tree, we need deal with snapshot
8076 * one by one, otherwise we deal with node firstly which
8077 * can maximize readahead.
8080 ret = run_next_block(root, bits, bits_nr, &last,
8081 pending, seen, reada, nodes,
8082 extent_cache, chunk_cache,
8083 dev_cache, block_group_cache,
8084 dev_extent_cache, rec);
8088 free_extent_buffer(buf);
8089 list_del(&rec->list);
8095 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8096 reada, nodes, extent_cache, chunk_cache,
8097 dev_cache, block_group_cache,
8098 dev_extent_cache, NULL);
8108 static int check_chunks_and_extents(struct btrfs_root *root)
8110 struct rb_root dev_cache;
8111 struct cache_tree chunk_cache;
8112 struct block_group_tree block_group_cache;
8113 struct device_extent_tree dev_extent_cache;
8114 struct cache_tree extent_cache;
8115 struct cache_tree seen;
8116 struct cache_tree pending;
8117 struct cache_tree reada;
8118 struct cache_tree nodes;
8119 struct extent_io_tree excluded_extents;
8120 struct cache_tree corrupt_blocks;
8121 struct btrfs_path path;
8122 struct btrfs_key key;
8123 struct btrfs_key found_key;
8125 struct block_info *bits;
8127 struct extent_buffer *leaf;
8129 struct btrfs_root_item ri;
8130 struct list_head dropping_trees;
8131 struct list_head normal_trees;
8132 struct btrfs_root *root1;
8137 dev_cache = RB_ROOT;
8138 cache_tree_init(&chunk_cache);
8139 block_group_tree_init(&block_group_cache);
8140 device_extent_tree_init(&dev_extent_cache);
8142 cache_tree_init(&extent_cache);
8143 cache_tree_init(&seen);
8144 cache_tree_init(&pending);
8145 cache_tree_init(&nodes);
8146 cache_tree_init(&reada);
8147 cache_tree_init(&corrupt_blocks);
8148 extent_io_tree_init(&excluded_extents);
8149 INIT_LIST_HEAD(&dropping_trees);
8150 INIT_LIST_HEAD(&normal_trees);
8153 root->fs_info->excluded_extents = &excluded_extents;
8154 root->fs_info->fsck_extent_cache = &extent_cache;
8155 root->fs_info->free_extent_hook = free_extent_hook;
8156 root->fs_info->corrupt_blocks = &corrupt_blocks;
8160 bits = malloc(bits_nr * sizeof(struct block_info));
8166 if (ctx.progress_enabled) {
8167 ctx.tp = TASK_EXTENTS;
8168 task_start(ctx.info);
8172 root1 = root->fs_info->tree_root;
8173 level = btrfs_header_level(root1->node);
8174 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8175 root1->node->start, 0, level, 0,
8176 btrfs_level_size(root1, level), NULL);
8179 root1 = root->fs_info->chunk_root;
8180 level = btrfs_header_level(root1->node);
8181 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8182 root1->node->start, 0, level, 0,
8183 btrfs_level_size(root1, level), NULL);
8186 btrfs_init_path(&path);
8189 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8190 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8195 leaf = path.nodes[0];
8196 slot = path.slots[0];
8197 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8198 ret = btrfs_next_leaf(root, &path);
8201 leaf = path.nodes[0];
8202 slot = path.slots[0];
8204 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8205 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8206 unsigned long offset;
8209 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8210 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8211 last_snapshot = btrfs_root_last_snapshot(&ri);
8212 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8213 level = btrfs_root_level(&ri);
8214 level_size = btrfs_level_size(root, level);
8215 ret = add_root_item_to_list(&normal_trees,
8217 btrfs_root_bytenr(&ri),
8218 last_snapshot, level,
8219 0, level_size, NULL);
8223 level = btrfs_root_level(&ri);
8224 level_size = btrfs_level_size(root, level);
8225 objectid = found_key.objectid;
8226 btrfs_disk_key_to_cpu(&found_key,
8228 ret = add_root_item_to_list(&dropping_trees,
8230 btrfs_root_bytenr(&ri),
8231 last_snapshot, level,
8233 level_size, &found_key);
8240 btrfs_release_path(&path);
8243 * check_block can return -EAGAIN if it fixes something, please keep
8244 * this in mind when dealing with return values from these functions, if
8245 * we get -EAGAIN we want to fall through and restart the loop.
8247 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8248 &seen, &reada, &nodes, &extent_cache,
8249 &chunk_cache, &dev_cache, &block_group_cache,
8256 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8257 &pending, &seen, &reada, &nodes,
8258 &extent_cache, &chunk_cache, &dev_cache,
8259 &block_group_cache, &dev_extent_cache);
8266 ret = check_chunks(&chunk_cache, &block_group_cache,
8267 &dev_extent_cache, NULL, NULL, NULL, 0);
8274 ret = check_extent_refs(root, &extent_cache);
8281 ret = check_devices(&dev_cache, &dev_extent_cache);
8286 task_stop(ctx.info);
8288 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8289 extent_io_tree_cleanup(&excluded_extents);
8290 root->fs_info->fsck_extent_cache = NULL;
8291 root->fs_info->free_extent_hook = NULL;
8292 root->fs_info->corrupt_blocks = NULL;
8293 root->fs_info->excluded_extents = NULL;
8296 free_chunk_cache_tree(&chunk_cache);
8297 free_device_cache_tree(&dev_cache);
8298 free_block_group_tree(&block_group_cache);
8299 free_device_extent_tree(&dev_extent_cache);
8300 free_extent_cache_tree(&seen);
8301 free_extent_cache_tree(&pending);
8302 free_extent_cache_tree(&reada);
8303 free_extent_cache_tree(&nodes);
8306 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8307 free_extent_cache_tree(&seen);
8308 free_extent_cache_tree(&pending);
8309 free_extent_cache_tree(&reada);
8310 free_extent_cache_tree(&nodes);
8311 free_chunk_cache_tree(&chunk_cache);
8312 free_block_group_tree(&block_group_cache);
8313 free_device_cache_tree(&dev_cache);
8314 free_device_extent_tree(&dev_extent_cache);
8315 free_extent_record_cache(root->fs_info, &extent_cache);
8316 free_root_item_list(&normal_trees);
8317 free_root_item_list(&dropping_trees);
8318 extent_io_tree_cleanup(&excluded_extents);
8322 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8323 struct btrfs_root *root, int overwrite)
8325 struct extent_buffer *c;
8326 struct extent_buffer *old = root->node;
8329 struct btrfs_disk_key disk_key = {0,0,0};
8335 extent_buffer_get(c);
8338 c = btrfs_alloc_free_block(trans, root,
8339 btrfs_level_size(root, 0),
8340 root->root_key.objectid,
8341 &disk_key, level, 0, 0);
8344 extent_buffer_get(c);
8348 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8349 btrfs_set_header_level(c, level);
8350 btrfs_set_header_bytenr(c, c->start);
8351 btrfs_set_header_generation(c, trans->transid);
8352 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8353 btrfs_set_header_owner(c, root->root_key.objectid);
8355 write_extent_buffer(c, root->fs_info->fsid,
8356 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8358 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8359 btrfs_header_chunk_tree_uuid(c),
8362 btrfs_mark_buffer_dirty(c);
8364 * this case can happen in the following case:
8366 * 1.overwrite previous root.
8368 * 2.reinit reloc data root, this is because we skip pin
8369 * down reloc data tree before which means we can allocate
8370 * same block bytenr here.
8372 if (old->start == c->start) {
8373 btrfs_set_root_generation(&root->root_item,
8375 root->root_item.level = btrfs_header_level(root->node);
8376 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8377 &root->root_key, &root->root_item);
8379 free_extent_buffer(c);
8383 free_extent_buffer(old);
8385 add_root_to_dirty_list(root);
8389 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8390 struct extent_buffer *eb, int tree_root)
8392 struct extent_buffer *tmp;
8393 struct btrfs_root_item *ri;
8394 struct btrfs_key key;
8397 int level = btrfs_header_level(eb);
8403 * If we have pinned this block before, don't pin it again.
8404 * This can not only avoid forever loop with broken filesystem
8405 * but also give us some speedups.
8407 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8408 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8411 btrfs_pin_extent(fs_info, eb->start, eb->len);
8413 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8414 nritems = btrfs_header_nritems(eb);
8415 for (i = 0; i < nritems; i++) {
8417 btrfs_item_key_to_cpu(eb, &key, i);
8418 if (key.type != BTRFS_ROOT_ITEM_KEY)
8420 /* Skip the extent root and reloc roots */
8421 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8422 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8423 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8425 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8426 bytenr = btrfs_disk_root_bytenr(eb, ri);
8429 * If at any point we start needing the real root we
8430 * will have to build a stump root for the root we are
8431 * in, but for now this doesn't actually use the root so
8432 * just pass in extent_root.
8434 tmp = read_tree_block(fs_info->extent_root, bytenr,
8436 if (!extent_buffer_uptodate(tmp)) {
8437 fprintf(stderr, "Error reading root block\n");
8440 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8441 free_extent_buffer(tmp);
8445 bytenr = btrfs_node_blockptr(eb, i);
8447 /* If we aren't the tree root don't read the block */
8448 if (level == 1 && !tree_root) {
8449 btrfs_pin_extent(fs_info, bytenr, leafsize);
8453 tmp = read_tree_block(fs_info->extent_root, bytenr,
8455 if (!extent_buffer_uptodate(tmp)) {
8456 fprintf(stderr, "Error reading tree block\n");
8459 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8460 free_extent_buffer(tmp);
8469 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8473 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8477 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8480 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8482 struct btrfs_block_group_cache *cache;
8483 struct btrfs_path *path;
8484 struct extent_buffer *leaf;
8485 struct btrfs_chunk *chunk;
8486 struct btrfs_key key;
8490 path = btrfs_alloc_path();
8495 key.type = BTRFS_CHUNK_ITEM_KEY;
8498 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8500 btrfs_free_path(path);
8505 * We do this in case the block groups were screwed up and had alloc
8506 * bits that aren't actually set on the chunks. This happens with
8507 * restored images every time and could happen in real life I guess.
8509 fs_info->avail_data_alloc_bits = 0;
8510 fs_info->avail_metadata_alloc_bits = 0;
8511 fs_info->avail_system_alloc_bits = 0;
8513 /* First we need to create the in-memory block groups */
8515 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8516 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8518 btrfs_free_path(path);
8526 leaf = path->nodes[0];
8527 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8528 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8533 chunk = btrfs_item_ptr(leaf, path->slots[0],
8534 struct btrfs_chunk);
8535 btrfs_add_block_group(fs_info, 0,
8536 btrfs_chunk_type(leaf, chunk),
8537 key.objectid, key.offset,
8538 btrfs_chunk_length(leaf, chunk));
8539 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8540 key.offset + btrfs_chunk_length(leaf, chunk),
8546 cache = btrfs_lookup_first_block_group(fs_info, start);
8550 start = cache->key.objectid + cache->key.offset;
8553 btrfs_free_path(path);
8557 static int reset_balance(struct btrfs_trans_handle *trans,
8558 struct btrfs_fs_info *fs_info)
8560 struct btrfs_root *root = fs_info->tree_root;
8561 struct btrfs_path *path;
8562 struct extent_buffer *leaf;
8563 struct btrfs_key key;
8564 int del_slot, del_nr = 0;
8568 path = btrfs_alloc_path();
8572 key.objectid = BTRFS_BALANCE_OBJECTID;
8573 key.type = BTRFS_BALANCE_ITEM_KEY;
8576 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8581 goto reinit_data_reloc;
8586 ret = btrfs_del_item(trans, root, path);
8589 btrfs_release_path(path);
8591 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8592 key.type = BTRFS_ROOT_ITEM_KEY;
8595 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8599 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8604 ret = btrfs_del_items(trans, root, path,
8611 btrfs_release_path(path);
8614 ret = btrfs_search_slot(trans, root, &key, path,
8621 leaf = path->nodes[0];
8622 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8623 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8625 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8630 del_slot = path->slots[0];
8639 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8643 btrfs_release_path(path);
8646 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8647 key.type = BTRFS_ROOT_ITEM_KEY;
8648 key.offset = (u64)-1;
8649 root = btrfs_read_fs_root(fs_info, &key);
8651 fprintf(stderr, "Error reading data reloc tree\n");
8652 ret = PTR_ERR(root);
8655 record_root_in_trans(trans, root);
8656 ret = btrfs_fsck_reinit_root(trans, root, 0);
8659 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8661 btrfs_free_path(path);
8665 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8666 struct btrfs_fs_info *fs_info)
8672 * The only reason we don't do this is because right now we're just
8673 * walking the trees we find and pinning down their bytes, we don't look
8674 * at any of the leaves. In order to do mixed groups we'd have to check
8675 * the leaves of any fs roots and pin down the bytes for any file
8676 * extents we find. Not hard but why do it if we don't have to?
8678 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8679 fprintf(stderr, "We don't support re-initing the extent tree "
8680 "for mixed block groups yet, please notify a btrfs "
8681 "developer you want to do this so they can add this "
8682 "functionality.\n");
8687 * first we need to walk all of the trees except the extent tree and pin
8688 * down the bytes that are in use so we don't overwrite any existing
8691 ret = pin_metadata_blocks(fs_info);
8693 fprintf(stderr, "error pinning down used bytes\n");
8698 * Need to drop all the block groups since we're going to recreate all
8701 btrfs_free_block_groups(fs_info);
8702 ret = reset_block_groups(fs_info);
8704 fprintf(stderr, "error resetting the block groups\n");
8708 /* Ok we can allocate now, reinit the extent root */
8709 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8711 fprintf(stderr, "extent root initialization failed\n");
8713 * When the transaction code is updated we should end the
8714 * transaction, but for now progs only knows about commit so
8715 * just return an error.
8721 * Now we have all the in-memory block groups setup so we can make
8722 * allocations properly, and the metadata we care about is safe since we
8723 * pinned all of it above.
8726 struct btrfs_block_group_cache *cache;
8728 cache = btrfs_lookup_first_block_group(fs_info, start);
8731 start = cache->key.objectid + cache->key.offset;
8732 ret = btrfs_insert_item(trans, fs_info->extent_root,
8733 &cache->key, &cache->item,
8734 sizeof(cache->item));
8736 fprintf(stderr, "Error adding block group\n");
8739 btrfs_extent_post_op(trans, fs_info->extent_root);
8742 ret = reset_balance(trans, fs_info);
8744 fprintf(stderr, "error reseting the pending balance\n");
8749 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8751 struct btrfs_path *path;
8752 struct btrfs_trans_handle *trans;
8753 struct btrfs_key key;
8756 printf("Recowing metadata block %llu\n", eb->start);
8757 key.objectid = btrfs_header_owner(eb);
8758 key.type = BTRFS_ROOT_ITEM_KEY;
8759 key.offset = (u64)-1;
8761 root = btrfs_read_fs_root(root->fs_info, &key);
8763 fprintf(stderr, "Couldn't find owner root %llu\n",
8765 return PTR_ERR(root);
8768 path = btrfs_alloc_path();
8772 trans = btrfs_start_transaction(root, 1);
8773 if (IS_ERR(trans)) {
8774 btrfs_free_path(path);
8775 return PTR_ERR(trans);
8778 path->lowest_level = btrfs_header_level(eb);
8779 if (path->lowest_level)
8780 btrfs_node_key_to_cpu(eb, &key, 0);
8782 btrfs_item_key_to_cpu(eb, &key, 0);
8784 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8785 btrfs_commit_transaction(trans, root);
8786 btrfs_free_path(path);
8790 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8792 struct btrfs_path *path;
8793 struct btrfs_trans_handle *trans;
8794 struct btrfs_key key;
8797 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8798 bad->key.type, bad->key.offset);
8799 key.objectid = bad->root_id;
8800 key.type = BTRFS_ROOT_ITEM_KEY;
8801 key.offset = (u64)-1;
8803 root = btrfs_read_fs_root(root->fs_info, &key);
8805 fprintf(stderr, "Couldn't find owner root %llu\n",
8807 return PTR_ERR(root);
8810 path = btrfs_alloc_path();
8814 trans = btrfs_start_transaction(root, 1);
8815 if (IS_ERR(trans)) {
8816 btrfs_free_path(path);
8817 return PTR_ERR(trans);
8820 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8826 ret = btrfs_del_item(trans, root, path);
8828 btrfs_commit_transaction(trans, root);
8829 btrfs_free_path(path);
8833 static int zero_log_tree(struct btrfs_root *root)
8835 struct btrfs_trans_handle *trans;
8838 trans = btrfs_start_transaction(root, 1);
8839 if (IS_ERR(trans)) {
8840 ret = PTR_ERR(trans);
8843 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8844 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8845 ret = btrfs_commit_transaction(trans, root);
8849 static int populate_csum(struct btrfs_trans_handle *trans,
8850 struct btrfs_root *csum_root, char *buf, u64 start,
8857 while (offset < len) {
8858 sectorsize = csum_root->sectorsize;
8859 ret = read_extent_data(csum_root, buf, start + offset,
8863 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8864 start + offset, buf, sectorsize);
8867 offset += sectorsize;
8872 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8873 struct btrfs_root *csum_root,
8874 struct btrfs_root *cur_root)
8876 struct btrfs_path *path;
8877 struct btrfs_key key;
8878 struct extent_buffer *node;
8879 struct btrfs_file_extent_item *fi;
8886 path = btrfs_alloc_path();
8889 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8899 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8902 /* Iterate all regular file extents and fill its csum */
8904 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8906 if (key.type != BTRFS_EXTENT_DATA_KEY)
8908 node = path->nodes[0];
8909 slot = path->slots[0];
8910 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8911 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8913 start = btrfs_file_extent_disk_bytenr(node, fi);
8914 len = btrfs_file_extent_disk_num_bytes(node, fi);
8916 ret = populate_csum(trans, csum_root, buf, start, len);
8923 * TODO: if next leaf is corrupted, jump to nearest next valid
8926 ret = btrfs_next_item(cur_root, path);
8936 btrfs_free_path(path);
8941 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8942 struct btrfs_root *csum_root)
8944 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8945 struct btrfs_path *path;
8946 struct btrfs_root *tree_root = fs_info->tree_root;
8947 struct btrfs_root *cur_root;
8948 struct extent_buffer *node;
8949 struct btrfs_key key;
8953 path = btrfs_alloc_path();
8957 key.objectid = BTRFS_FS_TREE_OBJECTID;
8959 key.type = BTRFS_ROOT_ITEM_KEY;
8961 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8970 node = path->nodes[0];
8971 slot = path->slots[0];
8972 btrfs_item_key_to_cpu(node, &key, slot);
8973 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8975 if (key.type != BTRFS_ROOT_ITEM_KEY)
8977 if (!is_fstree(key.objectid))
8979 key.offset = (u64)-1;
8981 cur_root = btrfs_read_fs_root(fs_info, &key);
8982 if (IS_ERR(cur_root) || !cur_root) {
8983 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8987 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8992 ret = btrfs_next_item(tree_root, path);
9002 btrfs_free_path(path);
9006 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9007 struct btrfs_root *csum_root)
9009 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9010 struct btrfs_path *path;
9011 struct btrfs_extent_item *ei;
9012 struct extent_buffer *leaf;
9014 struct btrfs_key key;
9017 path = btrfs_alloc_path();
9022 key.type = BTRFS_EXTENT_ITEM_KEY;
9025 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9027 btrfs_free_path(path);
9031 buf = malloc(csum_root->sectorsize);
9033 btrfs_free_path(path);
9038 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9039 ret = btrfs_next_leaf(extent_root, path);
9047 leaf = path->nodes[0];
9049 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9050 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9055 ei = btrfs_item_ptr(leaf, path->slots[0],
9056 struct btrfs_extent_item);
9057 if (!(btrfs_extent_flags(leaf, ei) &
9058 BTRFS_EXTENT_FLAG_DATA)) {
9063 ret = populate_csum(trans, csum_root, buf, key.objectid,
9070 btrfs_free_path(path);
9076 * Recalculate the csum and put it into the csum tree.
9078 * Extent tree init will wipe out all the extent info, so in that case, we
9079 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9080 * will use fs/subvol trees to init the csum tree.
9082 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9083 struct btrfs_root *csum_root,
9087 return fill_csum_tree_from_fs(trans, csum_root);
9089 return fill_csum_tree_from_extent(trans, csum_root);
9092 struct root_item_info {
9093 /* level of the root */
9095 /* number of nodes at this level, must be 1 for a root */
9099 struct cache_extent cache_extent;
9102 static struct cache_tree *roots_info_cache = NULL;
9104 static void free_roots_info_cache(void)
9106 if (!roots_info_cache)
9109 while (!cache_tree_empty(roots_info_cache)) {
9110 struct cache_extent *entry;
9111 struct root_item_info *rii;
9113 entry = first_cache_extent(roots_info_cache);
9116 remove_cache_extent(roots_info_cache, entry);
9117 rii = container_of(entry, struct root_item_info, cache_extent);
9121 free(roots_info_cache);
9122 roots_info_cache = NULL;
9125 static int build_roots_info_cache(struct btrfs_fs_info *info)
9128 struct btrfs_key key;
9129 struct extent_buffer *leaf;
9130 struct btrfs_path *path;
9132 if (!roots_info_cache) {
9133 roots_info_cache = malloc(sizeof(*roots_info_cache));
9134 if (!roots_info_cache)
9136 cache_tree_init(roots_info_cache);
9139 path = btrfs_alloc_path();
9144 key.type = BTRFS_EXTENT_ITEM_KEY;
9147 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9150 leaf = path->nodes[0];
9153 struct btrfs_key found_key;
9154 struct btrfs_extent_item *ei;
9155 struct btrfs_extent_inline_ref *iref;
9156 int slot = path->slots[0];
9161 struct cache_extent *entry;
9162 struct root_item_info *rii;
9164 if (slot >= btrfs_header_nritems(leaf)) {
9165 ret = btrfs_next_leaf(info->extent_root, path);
9172 leaf = path->nodes[0];
9173 slot = path->slots[0];
9176 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9178 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9179 found_key.type != BTRFS_METADATA_ITEM_KEY)
9182 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9183 flags = btrfs_extent_flags(leaf, ei);
9185 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9186 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9189 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9190 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9191 level = found_key.offset;
9193 struct btrfs_tree_block_info *info;
9195 info = (struct btrfs_tree_block_info *)(ei + 1);
9196 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9197 level = btrfs_tree_block_level(leaf, info);
9201 * For a root extent, it must be of the following type and the
9202 * first (and only one) iref in the item.
9204 type = btrfs_extent_inline_ref_type(leaf, iref);
9205 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9208 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9209 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9211 rii = malloc(sizeof(struct root_item_info));
9216 rii->cache_extent.start = root_id;
9217 rii->cache_extent.size = 1;
9218 rii->level = (u8)-1;
9219 entry = &rii->cache_extent;
9220 ret = insert_cache_extent(roots_info_cache, entry);
9223 rii = container_of(entry, struct root_item_info,
9227 ASSERT(rii->cache_extent.start == root_id);
9228 ASSERT(rii->cache_extent.size == 1);
9230 if (level > rii->level || rii->level == (u8)-1) {
9232 rii->bytenr = found_key.objectid;
9233 rii->gen = btrfs_extent_generation(leaf, ei);
9234 rii->node_count = 1;
9235 } else if (level == rii->level) {
9243 btrfs_free_path(path);
9248 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9249 struct btrfs_path *path,
9250 const struct btrfs_key *root_key,
9251 const int read_only_mode)
9253 const u64 root_id = root_key->objectid;
9254 struct cache_extent *entry;
9255 struct root_item_info *rii;
9256 struct btrfs_root_item ri;
9257 unsigned long offset;
9259 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9262 "Error: could not find extent items for root %llu\n",
9263 root_key->objectid);
9267 rii = container_of(entry, struct root_item_info, cache_extent);
9268 ASSERT(rii->cache_extent.start == root_id);
9269 ASSERT(rii->cache_extent.size == 1);
9271 if (rii->node_count != 1) {
9273 "Error: could not find btree root extent for root %llu\n",
9278 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9279 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9281 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9282 btrfs_root_level(&ri) != rii->level ||
9283 btrfs_root_generation(&ri) != rii->gen) {
9286 * If we're in repair mode but our caller told us to not update
9287 * the root item, i.e. just check if it needs to be updated, don't
9288 * print this message, since the caller will call us again shortly
9289 * for the same root item without read only mode (the caller will
9290 * open a transaction first).
9292 if (!(read_only_mode && repair))
9294 "%sroot item for root %llu,"
9295 " current bytenr %llu, current gen %llu, current level %u,"
9296 " new bytenr %llu, new gen %llu, new level %u\n",
9297 (read_only_mode ? "" : "fixing "),
9299 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9300 btrfs_root_level(&ri),
9301 rii->bytenr, rii->gen, rii->level);
9303 if (btrfs_root_generation(&ri) > rii->gen) {
9305 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9306 root_id, btrfs_root_generation(&ri), rii->gen);
9310 if (!read_only_mode) {
9311 btrfs_set_root_bytenr(&ri, rii->bytenr);
9312 btrfs_set_root_level(&ri, rii->level);
9313 btrfs_set_root_generation(&ri, rii->gen);
9314 write_extent_buffer(path->nodes[0], &ri,
9315 offset, sizeof(ri));
9325 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9326 * caused read-only snapshots to be corrupted if they were created at a moment
9327 * when the source subvolume/snapshot had orphan items. The issue was that the
9328 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9329 * node instead of the post orphan cleanup root node.
9330 * So this function, and its callees, just detects and fixes those cases. Even
9331 * though the regression was for read-only snapshots, this function applies to
9332 * any snapshot/subvolume root.
9333 * This must be run before any other repair code - not doing it so, makes other
9334 * repair code delete or modify backrefs in the extent tree for example, which
9335 * will result in an inconsistent fs after repairing the root items.
9337 static int repair_root_items(struct btrfs_fs_info *info)
9339 struct btrfs_path *path = NULL;
9340 struct btrfs_key key;
9341 struct extent_buffer *leaf;
9342 struct btrfs_trans_handle *trans = NULL;
9347 ret = build_roots_info_cache(info);
9351 path = btrfs_alloc_path();
9357 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9358 key.type = BTRFS_ROOT_ITEM_KEY;
9363 * Avoid opening and committing transactions if a leaf doesn't have
9364 * any root items that need to be fixed, so that we avoid rotating
9365 * backup roots unnecessarily.
9368 trans = btrfs_start_transaction(info->tree_root, 1);
9369 if (IS_ERR(trans)) {
9370 ret = PTR_ERR(trans);
9375 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9379 leaf = path->nodes[0];
9382 struct btrfs_key found_key;
9384 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9385 int no_more_keys = find_next_key(path, &key);
9387 btrfs_release_path(path);
9389 ret = btrfs_commit_transaction(trans,
9401 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9403 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9405 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9408 ret = maybe_repair_root_item(info, path, &found_key,
9413 if (!trans && repair) {
9416 btrfs_release_path(path);
9426 free_roots_info_cache();
9427 btrfs_free_path(path);
9429 btrfs_commit_transaction(trans, info->tree_root);
9436 const char * const cmd_check_usage[] = {
9437 "btrfs check [options] <device>",
9438 "Check structural inegrity of a filesystem (unmounted).",
9439 "Check structural inegrity of an unmounted filesystem. Verify internal",
9440 "trees' consistency and item connectivity. In the repair mode try to",
9441 "fix the problems found.",
9442 "WARNING: the repair mode is considered dangerous",
9444 "-s|--super <superblock> use this superblock copy",
9445 "-b|--backup use the backup root copy",
9446 "--repair try to repair the filesystem",
9447 "--readonly run in read-only mode (default)",
9448 "--init-csum-tree create a new CRC tree",
9449 "--init-extent-tree create a new extent tree",
9450 "--check-data-csum verify checkums of data blocks",
9451 "-Q|--qgroup-report print a report on qgroup consistency",
9452 "-E|--subvol-extents <subvolid>",
9453 " print subvolume extents and sharing state",
9454 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9455 "-p|--progress indicate progress",
9459 int cmd_check(int argc, char **argv)
9461 struct cache_tree root_cache;
9462 struct btrfs_root *root;
9463 struct btrfs_fs_info *info;
9466 u64 tree_root_bytenr = 0;
9467 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9470 int init_csum_tree = 0;
9472 int qgroup_report = 0;
9473 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9477 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9478 OPT_CHECK_CSUM, OPT_READONLY };
9479 static const struct option long_options[] = {
9480 { "super", required_argument, NULL, 's' },
9481 { "repair", no_argument, NULL, OPT_REPAIR },
9482 { "readonly", no_argument, NULL, OPT_READONLY },
9483 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9484 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9485 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9486 { "backup", no_argument, NULL, 'b' },
9487 { "subvol-extents", required_argument, NULL, 'E' },
9488 { "qgroup-report", no_argument, NULL, 'Q' },
9489 { "tree-root", required_argument, NULL, 'r' },
9490 { "progress", no_argument, NULL, 'p' },
9494 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9498 case 'a': /* ignored */ break;
9500 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9503 num = arg_strtou64(optarg);
9504 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9506 "ERROR: super mirror should be less than: %d\n",
9507 BTRFS_SUPER_MIRROR_MAX);
9510 bytenr = btrfs_sb_offset(((int)num));
9511 printf("using SB copy %llu, bytenr %llu\n", num,
9512 (unsigned long long)bytenr);
9518 subvolid = arg_strtou64(optarg);
9521 tree_root_bytenr = arg_strtou64(optarg);
9524 ctx.progress_enabled = true;
9528 usage(cmd_check_usage);
9530 printf("enabling repair mode\n");
9532 ctree_flags |= OPEN_CTREE_WRITES;
9538 printf("Creating a new CRC tree\n");
9541 ctree_flags |= OPEN_CTREE_WRITES;
9543 case OPT_INIT_EXTENT:
9544 init_extent_tree = 1;
9545 ctree_flags |= (OPEN_CTREE_WRITES |
9546 OPEN_CTREE_NO_BLOCK_GROUPS);
9549 case OPT_CHECK_CSUM:
9550 check_data_csum = 1;
9554 argc = argc - optind;
9556 if (check_argc_exact(argc, 1))
9557 usage(cmd_check_usage);
9559 if (ctx.progress_enabled) {
9560 ctx.tp = TASK_NOTHING;
9561 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9564 /* This check is the only reason for --readonly to exist */
9565 if (readonly && repair) {
9566 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9571 cache_tree_init(&root_cache);
9573 if((ret = check_mounted(argv[optind])) < 0) {
9574 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9577 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9582 /* only allow partial opening under repair mode */
9584 ctree_flags |= OPEN_CTREE_PARTIAL;
9586 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9589 fprintf(stderr, "Couldn't open file system\n");
9595 root = info->fs_root;
9598 * repair mode will force us to commit transaction which
9599 * will make us fail to load log tree when mounting.
9601 if (repair && btrfs_super_log_root(info->super_copy)) {
9602 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9607 ret = zero_log_tree(root);
9609 fprintf(stderr, "fail to zero log tree\n");
9614 uuid_unparse(info->super_copy->fsid, uuidbuf);
9615 if (qgroup_report) {
9616 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9618 ret = qgroup_verify_all(info);
9620 print_qgroup_report(1);
9624 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9625 subvolid, argv[optind], uuidbuf);
9626 ret = print_extent_state(info, subvolid);
9629 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9631 if (!extent_buffer_uptodate(info->tree_root->node) ||
9632 !extent_buffer_uptodate(info->dev_root->node) ||
9633 !extent_buffer_uptodate(info->chunk_root->node)) {
9634 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9639 if (init_extent_tree || init_csum_tree) {
9640 struct btrfs_trans_handle *trans;
9642 trans = btrfs_start_transaction(info->extent_root, 0);
9643 if (IS_ERR(trans)) {
9644 fprintf(stderr, "Error starting transaction\n");
9645 ret = PTR_ERR(trans);
9649 if (init_extent_tree) {
9650 printf("Creating a new extent tree\n");
9651 ret = reinit_extent_tree(trans, info);
9656 if (init_csum_tree) {
9657 fprintf(stderr, "Reinit crc root\n");
9658 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9660 fprintf(stderr, "crc root initialization failed\n");
9665 ret = fill_csum_tree(trans, info->csum_root,
9668 fprintf(stderr, "crc refilling failed\n");
9673 * Ok now we commit and run the normal fsck, which will add
9674 * extent entries for all of the items it finds.
9676 ret = btrfs_commit_transaction(trans, info->extent_root);
9680 if (!extent_buffer_uptodate(info->extent_root->node)) {
9681 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9685 if (!extent_buffer_uptodate(info->csum_root->node)) {
9686 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9691 if (!ctx.progress_enabled)
9692 fprintf(stderr, "checking extents\n");
9693 ret = check_chunks_and_extents(root);
9695 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9697 ret = repair_root_items(info);
9701 fprintf(stderr, "Fixed %d roots.\n", ret);
9703 } else if (ret > 0) {
9705 "Found %d roots with an outdated root item.\n",
9708 "Please run a filesystem check with the option --repair to fix them.\n");
9713 if (!ctx.progress_enabled)
9714 fprintf(stderr, "checking free space cache\n");
9715 ret = check_space_cache(root);
9720 * We used to have to have these hole extents in between our real
9721 * extents so if we don't have this flag set we need to make sure there
9722 * are no gaps in the file extents for inodes, otherwise we can just
9723 * ignore it when this happens.
9725 no_holes = btrfs_fs_incompat(root->fs_info,
9726 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9727 if (!ctx.progress_enabled)
9728 fprintf(stderr, "checking fs roots\n");
9729 ret = check_fs_roots(root, &root_cache);
9733 fprintf(stderr, "checking csums\n");
9734 ret = check_csums(root);
9738 fprintf(stderr, "checking root refs\n");
9739 ret = check_root_refs(root, &root_cache);
9743 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9744 struct extent_buffer *eb;
9746 eb = list_first_entry(&root->fs_info->recow_ebs,
9747 struct extent_buffer, recow);
9748 list_del_init(&eb->recow);
9749 ret = recow_extent_buffer(root, eb);
9754 while (!list_empty(&delete_items)) {
9755 struct bad_item *bad;
9757 bad = list_first_entry(&delete_items, struct bad_item, list);
9758 list_del_init(&bad->list);
9760 ret = delete_bad_item(root, bad);
9764 if (info->quota_enabled) {
9766 fprintf(stderr, "checking quota groups\n");
9767 err = qgroup_verify_all(info);
9772 if (!list_empty(&root->fs_info->recow_ebs)) {
9773 fprintf(stderr, "Transid errors in file system\n");
9777 print_qgroup_report(0);
9778 if (found_old_backref) { /*
9779 * there was a disk format change when mixed
9780 * backref was in testing tree. The old format
9781 * existed about one week.
9783 printf("\n * Found old mixed backref format. "
9784 "The old format is not supported! *"
9785 "\n * Please mount the FS in readonly mode, "
9786 "backup data and re-format the FS. *\n\n");
9789 printf("found %llu bytes used err is %d\n",
9790 (unsigned long long)bytes_used, ret);
9791 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9792 printf("total tree bytes: %llu\n",
9793 (unsigned long long)total_btree_bytes);
9794 printf("total fs tree bytes: %llu\n",
9795 (unsigned long long)total_fs_tree_bytes);
9796 printf("total extent tree bytes: %llu\n",
9797 (unsigned long long)total_extent_tree_bytes);
9798 printf("btree space waste bytes: %llu\n",
9799 (unsigned long long)btree_space_waste);
9800 printf("file data blocks allocated: %llu\n referenced %llu\n",
9801 (unsigned long long)data_bytes_allocated,
9802 (unsigned long long)data_bytes_referenced);
9804 free_root_recs_tree(&root_cache);
9808 if (ctx.progress_enabled)
9809 task_deinit(ctx.info);