2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct inode_backref *tmp;
570 struct orphan_data_extent *src_orphan;
571 struct orphan_data_extent *dst_orphan;
575 rec = malloc(sizeof(*rec));
577 return ERR_PTR(-ENOMEM);
578 memcpy(rec, orig_rec, sizeof(*rec));
580 INIT_LIST_HEAD(&rec->backrefs);
581 INIT_LIST_HEAD(&rec->orphan_extents);
582 rec->holes = RB_ROOT;
584 list_for_each_entry(orig, &orig_rec->backrefs, list) {
585 size = sizeof(*orig) + orig->namelen + 1;
586 backref = malloc(size);
591 memcpy(backref, orig, size);
592 list_add_tail(&backref->list, &rec->backrefs);
594 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595 dst_orphan = malloc(sizeof(*dst_orphan));
600 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
603 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
609 if (!list_empty(&rec->backrefs))
610 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611 list_del(&orig->list);
615 if (!list_empty(&rec->orphan_extents))
616 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617 list_del(&orig->list);
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
629 struct orphan_data_extent *orphan;
631 if (list_empty(orphan_extents))
633 printf("The following data extent is lost in tree %llu:\n",
635 list_for_each_entry(orphan, orphan_extents, list) {
636 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637 orphan->objectid, orphan->offset, orphan->disk_bytenr,
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
644 u64 root_objectid = root->root_key.objectid;
645 int errors = rec->errors;
649 /* reloc root errors, we print its corresponding fs root objectid*/
650 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651 root_objectid = root->root_key.offset;
652 fprintf(stderr, "reloc");
654 fprintf(stderr, "root %llu inode %llu errors %x",
655 (unsigned long long) root_objectid,
656 (unsigned long long) rec->ino, rec->errors);
658 if (errors & I_ERR_NO_INODE_ITEM)
659 fprintf(stderr, ", no inode item");
660 if (errors & I_ERR_NO_ORPHAN_ITEM)
661 fprintf(stderr, ", no orphan item");
662 if (errors & I_ERR_DUP_INODE_ITEM)
663 fprintf(stderr, ", dup inode item");
664 if (errors & I_ERR_DUP_DIR_INDEX)
665 fprintf(stderr, ", dup dir index");
666 if (errors & I_ERR_ODD_DIR_ITEM)
667 fprintf(stderr, ", odd dir item");
668 if (errors & I_ERR_ODD_FILE_EXTENT)
669 fprintf(stderr, ", odd file extent");
670 if (errors & I_ERR_BAD_FILE_EXTENT)
671 fprintf(stderr, ", bad file extent");
672 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673 fprintf(stderr, ", file extent overlap");
674 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675 fprintf(stderr, ", file extent discount");
676 if (errors & I_ERR_DIR_ISIZE_WRONG)
677 fprintf(stderr, ", dir isize wrong");
678 if (errors & I_ERR_FILE_NBYTES_WRONG)
679 fprintf(stderr, ", nbytes wrong");
680 if (errors & I_ERR_ODD_CSUM_ITEM)
681 fprintf(stderr, ", odd csum item");
682 if (errors & I_ERR_SOME_CSUM_MISSING)
683 fprintf(stderr, ", some csum missing");
684 if (errors & I_ERR_LINK_COUNT_WRONG)
685 fprintf(stderr, ", link count wrong");
686 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687 fprintf(stderr, ", orphan file extent");
688 fprintf(stderr, "\n");
689 /* Print the orphan extents if needed */
690 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
693 /* Print the holes if needed */
694 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695 struct file_extent_hole *hole;
696 struct rb_node *node;
699 node = rb_first(&rec->holes);
700 fprintf(stderr, "Found file extent holes:\n");
703 hole = rb_entry(node, struct file_extent_hole, node);
704 fprintf(stderr, "\tstart: %llu, len: %llu\n",
705 hole->start, hole->len);
706 node = rb_next(node);
709 fprintf(stderr, "\tstart: 0, len: %llu\n",
710 round_up(rec->isize, root->sectorsize));
714 static void print_ref_error(int errors)
716 if (errors & REF_ERR_NO_DIR_ITEM)
717 fprintf(stderr, ", no dir item");
718 if (errors & REF_ERR_NO_DIR_INDEX)
719 fprintf(stderr, ", no dir index");
720 if (errors & REF_ERR_NO_INODE_REF)
721 fprintf(stderr, ", no inode ref");
722 if (errors & REF_ERR_DUP_DIR_ITEM)
723 fprintf(stderr, ", dup dir item");
724 if (errors & REF_ERR_DUP_DIR_INDEX)
725 fprintf(stderr, ", dup dir index");
726 if (errors & REF_ERR_DUP_INODE_REF)
727 fprintf(stderr, ", dup inode ref");
728 if (errors & REF_ERR_INDEX_UNMATCH)
729 fprintf(stderr, ", index unmatch");
730 if (errors & REF_ERR_FILETYPE_UNMATCH)
731 fprintf(stderr, ", filetype unmatch");
732 if (errors & REF_ERR_NAME_TOO_LONG)
733 fprintf(stderr, ", name too long");
734 if (errors & REF_ERR_NO_ROOT_REF)
735 fprintf(stderr, ", no root ref");
736 if (errors & REF_ERR_NO_ROOT_BACKREF)
737 fprintf(stderr, ", no root backref");
738 if (errors & REF_ERR_DUP_ROOT_REF)
739 fprintf(stderr, ", dup root ref");
740 if (errors & REF_ERR_DUP_ROOT_BACKREF)
741 fprintf(stderr, ", dup root backref");
742 fprintf(stderr, "\n");
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
748 struct ptr_node *node;
749 struct cache_extent *cache;
750 struct inode_record *rec = NULL;
753 cache = lookup_cache_extent(inode_cache, ino, 1);
755 node = container_of(cache, struct ptr_node, cache);
757 if (mod && rec->refs > 1) {
758 node->data = clone_inode_rec(rec);
759 if (IS_ERR(node->data))
765 rec = calloc(1, sizeof(*rec));
767 return ERR_PTR(-ENOMEM);
769 rec->extent_start = (u64)-1;
771 INIT_LIST_HEAD(&rec->backrefs);
772 INIT_LIST_HEAD(&rec->orphan_extents);
773 rec->holes = RB_ROOT;
775 node = malloc(sizeof(*node));
778 return ERR_PTR(-ENOMEM);
780 node->cache.start = ino;
781 node->cache.size = 1;
784 if (ino == BTRFS_FREE_INO_OBJECTID)
787 ret = insert_cache_extent(inode_cache, &node->cache);
789 return ERR_PTR(-EEXIST);
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
796 struct orphan_data_extent *orphan;
798 while (!list_empty(orphan_extents)) {
799 orphan = list_entry(orphan_extents->next,
800 struct orphan_data_extent, list);
801 list_del(&orphan->list);
806 static void free_inode_rec(struct inode_record *rec)
808 struct inode_backref *backref;
813 while (!list_empty(&rec->backrefs)) {
814 backref = list_entry(rec->backrefs.next,
815 struct inode_backref, list);
816 list_del(&backref->list);
819 free_orphan_data_extents(&rec->orphan_extents);
820 free_file_extent_holes(&rec->holes);
824 static int can_free_inode_rec(struct inode_record *rec)
826 if (!rec->errors && rec->checked && rec->found_inode_item &&
827 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833 struct inode_record *rec)
835 struct cache_extent *cache;
836 struct inode_backref *tmp, *backref;
837 struct ptr_node *node;
838 unsigned char filetype;
840 if (!rec->found_inode_item)
843 filetype = imode_to_type(rec->imode);
844 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845 if (backref->found_dir_item && backref->found_dir_index) {
846 if (backref->filetype != filetype)
847 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848 if (!backref->errors && backref->found_inode_ref &&
849 rec->nlink == rec->found_link) {
850 list_del(&backref->list);
856 if (!rec->checked || rec->merging)
859 if (S_ISDIR(rec->imode)) {
860 if (rec->found_size != rec->isize)
861 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862 if (rec->found_file_extent)
863 rec->errors |= I_ERR_ODD_FILE_EXTENT;
864 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865 if (rec->found_dir_item)
866 rec->errors |= I_ERR_ODD_DIR_ITEM;
867 if (rec->found_size != rec->nbytes)
868 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869 if (rec->nlink > 0 && !no_holes &&
870 (rec->extent_end < rec->isize ||
871 first_extent_gap(&rec->holes) < rec->isize))
872 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
875 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876 if (rec->found_csum_item && rec->nodatasum)
877 rec->errors |= I_ERR_ODD_CSUM_ITEM;
878 if (rec->some_csum_missing && !rec->nodatasum)
879 rec->errors |= I_ERR_SOME_CSUM_MISSING;
882 BUG_ON(rec->refs != 1);
883 if (can_free_inode_rec(rec)) {
884 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885 node = container_of(cache, struct ptr_node, cache);
886 BUG_ON(node->data != rec);
887 remove_cache_extent(inode_cache, &node->cache);
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
895 struct btrfs_path path;
896 struct btrfs_key key;
899 key.objectid = BTRFS_ORPHAN_OBJECTID;
900 key.type = BTRFS_ORPHAN_ITEM_KEY;
903 btrfs_init_path(&path);
904 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905 btrfs_release_path(&path);
911 static int process_inode_item(struct extent_buffer *eb,
912 int slot, struct btrfs_key *key,
913 struct shared_node *active_node)
915 struct inode_record *rec;
916 struct btrfs_inode_item *item;
918 rec = active_node->current;
919 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920 if (rec->found_inode_item) {
921 rec->errors |= I_ERR_DUP_INODE_ITEM;
924 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925 rec->nlink = btrfs_inode_nlink(eb, item);
926 rec->isize = btrfs_inode_size(eb, item);
927 rec->nbytes = btrfs_inode_nbytes(eb, item);
928 rec->imode = btrfs_inode_mode(eb, item);
929 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
931 rec->found_inode_item = 1;
933 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934 maybe_free_inode_rec(&active_node->inode_cache, rec);
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
940 int namelen, u64 dir)
942 struct inode_backref *backref;
944 list_for_each_entry(backref, &rec->backrefs, list) {
945 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
947 if (backref->dir != dir || backref->namelen != namelen)
949 if (memcmp(name, backref->name, namelen))
954 backref = malloc(sizeof(*backref) + namelen + 1);
957 memset(backref, 0, sizeof(*backref));
959 backref->namelen = namelen;
960 memcpy(backref->name, name, namelen);
961 backref->name[namelen] = '\0';
962 list_add_tail(&backref->list, &rec->backrefs);
966 static int add_inode_backref(struct cache_tree *inode_cache,
967 u64 ino, u64 dir, u64 index,
968 const char *name, int namelen,
969 int filetype, int itemtype, int errors)
971 struct inode_record *rec;
972 struct inode_backref *backref;
974 rec = get_inode_rec(inode_cache, ino, 1);
976 backref = get_inode_backref(rec, name, namelen, dir);
979 backref->errors |= errors;
980 if (itemtype == BTRFS_DIR_INDEX_KEY) {
981 if (backref->found_dir_index)
982 backref->errors |= REF_ERR_DUP_DIR_INDEX;
983 if (backref->found_inode_ref && backref->index != index)
984 backref->errors |= REF_ERR_INDEX_UNMATCH;
985 if (backref->found_dir_item && backref->filetype != filetype)
986 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
988 backref->index = index;
989 backref->filetype = filetype;
990 backref->found_dir_index = 1;
991 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
993 if (backref->found_dir_item)
994 backref->errors |= REF_ERR_DUP_DIR_ITEM;
995 if (backref->found_dir_index && backref->filetype != filetype)
996 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
998 backref->filetype = filetype;
999 backref->found_dir_item = 1;
1000 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1001 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1002 if (backref->found_inode_ref)
1003 backref->errors |= REF_ERR_DUP_INODE_REF;
1004 if (backref->found_dir_index && backref->index != index)
1005 backref->errors |= REF_ERR_INDEX_UNMATCH;
1007 backref->index = index;
1009 backref->ref_type = itemtype;
1010 backref->found_inode_ref = 1;
1015 maybe_free_inode_rec(inode_cache, rec);
1019 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1020 struct cache_tree *dst_cache)
1022 struct inode_backref *backref;
1027 list_for_each_entry(backref, &src->backrefs, list) {
1028 if (backref->found_dir_index) {
1029 add_inode_backref(dst_cache, dst->ino, backref->dir,
1030 backref->index, backref->name,
1031 backref->namelen, backref->filetype,
1032 BTRFS_DIR_INDEX_KEY, backref->errors);
1034 if (backref->found_dir_item) {
1036 add_inode_backref(dst_cache, dst->ino,
1037 backref->dir, 0, backref->name,
1038 backref->namelen, backref->filetype,
1039 BTRFS_DIR_ITEM_KEY, backref->errors);
1041 if (backref->found_inode_ref) {
1042 add_inode_backref(dst_cache, dst->ino,
1043 backref->dir, backref->index,
1044 backref->name, backref->namelen, 0,
1045 backref->ref_type, backref->errors);
1049 if (src->found_dir_item)
1050 dst->found_dir_item = 1;
1051 if (src->found_file_extent)
1052 dst->found_file_extent = 1;
1053 if (src->found_csum_item)
1054 dst->found_csum_item = 1;
1055 if (src->some_csum_missing)
1056 dst->some_csum_missing = 1;
1057 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1058 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1063 BUG_ON(src->found_link < dir_count);
1064 dst->found_link += src->found_link - dir_count;
1065 dst->found_size += src->found_size;
1066 if (src->extent_start != (u64)-1) {
1067 if (dst->extent_start == (u64)-1) {
1068 dst->extent_start = src->extent_start;
1069 dst->extent_end = src->extent_end;
1071 if (dst->extent_end > src->extent_start)
1072 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1073 else if (dst->extent_end < src->extent_start) {
1074 ret = add_file_extent_hole(&dst->holes,
1076 src->extent_start - dst->extent_end);
1078 if (dst->extent_end < src->extent_end)
1079 dst->extent_end = src->extent_end;
1083 dst->errors |= src->errors;
1084 if (src->found_inode_item) {
1085 if (!dst->found_inode_item) {
1086 dst->nlink = src->nlink;
1087 dst->isize = src->isize;
1088 dst->nbytes = src->nbytes;
1089 dst->imode = src->imode;
1090 dst->nodatasum = src->nodatasum;
1091 dst->found_inode_item = 1;
1093 dst->errors |= I_ERR_DUP_INODE_ITEM;
1101 static int splice_shared_node(struct shared_node *src_node,
1102 struct shared_node *dst_node)
1104 struct cache_extent *cache;
1105 struct ptr_node *node, *ins;
1106 struct cache_tree *src, *dst;
1107 struct inode_record *rec, *conflict;
1108 u64 current_ino = 0;
1112 if (--src_node->refs == 0)
1114 if (src_node->current)
1115 current_ino = src_node->current->ino;
1117 src = &src_node->root_cache;
1118 dst = &dst_node->root_cache;
1120 cache = search_cache_extent(src, 0);
1122 node = container_of(cache, struct ptr_node, cache);
1124 cache = next_cache_extent(cache);
1127 remove_cache_extent(src, &node->cache);
1130 ins = malloc(sizeof(*ins));
1132 ins->cache.start = node->cache.start;
1133 ins->cache.size = node->cache.size;
1137 ret = insert_cache_extent(dst, &ins->cache);
1138 if (ret == -EEXIST) {
1139 conflict = get_inode_rec(dst, rec->ino, 1);
1140 BUG_ON(IS_ERR(conflict));
1141 merge_inode_recs(rec, conflict, dst);
1143 conflict->checked = 1;
1144 if (dst_node->current == conflict)
1145 dst_node->current = NULL;
1147 maybe_free_inode_rec(dst, conflict);
1148 free_inode_rec(rec);
1155 if (src == &src_node->root_cache) {
1156 src = &src_node->inode_cache;
1157 dst = &dst_node->inode_cache;
1161 if (current_ino > 0 && (!dst_node->current ||
1162 current_ino > dst_node->current->ino)) {
1163 if (dst_node->current) {
1164 dst_node->current->checked = 1;
1165 maybe_free_inode_rec(dst, dst_node->current);
1167 dst_node->current = get_inode_rec(dst, current_ino, 1);
1168 BUG_ON(IS_ERR(dst_node->current));
1173 static void free_inode_ptr(struct cache_extent *cache)
1175 struct ptr_node *node;
1176 struct inode_record *rec;
1178 node = container_of(cache, struct ptr_node, cache);
1180 free_inode_rec(rec);
1184 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1186 static struct shared_node *find_shared_node(struct cache_tree *shared,
1189 struct cache_extent *cache;
1190 struct shared_node *node;
1192 cache = lookup_cache_extent(shared, bytenr, 1);
1194 node = container_of(cache, struct shared_node, cache);
1200 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1203 struct shared_node *node;
1205 node = calloc(1, sizeof(*node));
1208 node->cache.start = bytenr;
1209 node->cache.size = 1;
1210 cache_tree_init(&node->root_cache);
1211 cache_tree_init(&node->inode_cache);
1214 ret = insert_cache_extent(shared, &node->cache);
1219 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1220 struct walk_control *wc, int level)
1222 struct shared_node *node;
1223 struct shared_node *dest;
1226 if (level == wc->active_node)
1229 BUG_ON(wc->active_node <= level);
1230 node = find_shared_node(&wc->shared, bytenr);
1232 ret = add_shared_node(&wc->shared, bytenr, refs);
1234 node = find_shared_node(&wc->shared, bytenr);
1235 wc->nodes[level] = node;
1236 wc->active_node = level;
1240 if (wc->root_level == wc->active_node &&
1241 btrfs_root_refs(&root->root_item) == 0) {
1242 if (--node->refs == 0) {
1243 free_inode_recs_tree(&node->root_cache);
1244 free_inode_recs_tree(&node->inode_cache);
1245 remove_cache_extent(&wc->shared, &node->cache);
1251 dest = wc->nodes[wc->active_node];
1252 splice_shared_node(node, dest);
1253 if (node->refs == 0) {
1254 remove_cache_extent(&wc->shared, &node->cache);
1260 static int leave_shared_node(struct btrfs_root *root,
1261 struct walk_control *wc, int level)
1263 struct shared_node *node;
1264 struct shared_node *dest;
1267 if (level == wc->root_level)
1270 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1274 BUG_ON(i >= BTRFS_MAX_LEVEL);
1276 node = wc->nodes[wc->active_node];
1277 wc->nodes[wc->active_node] = NULL;
1278 wc->active_node = i;
1280 dest = wc->nodes[wc->active_node];
1281 if (wc->active_node < wc->root_level ||
1282 btrfs_root_refs(&root->root_item) > 0) {
1283 BUG_ON(node->refs <= 1);
1284 splice_shared_node(node, dest);
1286 BUG_ON(node->refs < 2);
1295 * 1 - if the root with id child_root_id is a child of root parent_root_id
1296 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1297 * has other root(s) as parent(s)
1298 * 2 - if the root child_root_id doesn't have any parent roots
1300 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1303 struct btrfs_path path;
1304 struct btrfs_key key;
1305 struct extent_buffer *leaf;
1309 btrfs_init_path(&path);
1311 key.objectid = parent_root_id;
1312 key.type = BTRFS_ROOT_REF_KEY;
1313 key.offset = child_root_id;
1314 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1318 btrfs_release_path(&path);
1322 key.objectid = child_root_id;
1323 key.type = BTRFS_ROOT_BACKREF_KEY;
1325 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1331 leaf = path.nodes[0];
1332 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1333 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1336 leaf = path.nodes[0];
1339 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1340 if (key.objectid != child_root_id ||
1341 key.type != BTRFS_ROOT_BACKREF_KEY)
1346 if (key.offset == parent_root_id) {
1347 btrfs_release_path(&path);
1354 btrfs_release_path(&path);
1357 return has_parent ? 0 : 2;
1360 static int process_dir_item(struct btrfs_root *root,
1361 struct extent_buffer *eb,
1362 int slot, struct btrfs_key *key,
1363 struct shared_node *active_node)
1373 struct btrfs_dir_item *di;
1374 struct inode_record *rec;
1375 struct cache_tree *root_cache;
1376 struct cache_tree *inode_cache;
1377 struct btrfs_key location;
1378 char namebuf[BTRFS_NAME_LEN];
1380 root_cache = &active_node->root_cache;
1381 inode_cache = &active_node->inode_cache;
1382 rec = active_node->current;
1383 rec->found_dir_item = 1;
1385 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1386 total = btrfs_item_size_nr(eb, slot);
1387 while (cur < total) {
1389 btrfs_dir_item_key_to_cpu(eb, di, &location);
1390 name_len = btrfs_dir_name_len(eb, di);
1391 data_len = btrfs_dir_data_len(eb, di);
1392 filetype = btrfs_dir_type(eb, di);
1394 rec->found_size += name_len;
1395 if (name_len <= BTRFS_NAME_LEN) {
1399 len = BTRFS_NAME_LEN;
1400 error = REF_ERR_NAME_TOO_LONG;
1402 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1404 if (location.type == BTRFS_INODE_ITEM_KEY) {
1405 add_inode_backref(inode_cache, location.objectid,
1406 key->objectid, key->offset, namebuf,
1407 len, filetype, key->type, error);
1408 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1409 add_inode_backref(root_cache, location.objectid,
1410 key->objectid, key->offset,
1411 namebuf, len, filetype,
1414 fprintf(stderr, "invalid location in dir item %u\n",
1416 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1417 key->objectid, key->offset, namebuf,
1418 len, filetype, key->type, error);
1421 len = sizeof(*di) + name_len + data_len;
1422 di = (struct btrfs_dir_item *)((char *)di + len);
1425 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1426 rec->errors |= I_ERR_DUP_DIR_INDEX;
1431 static int process_inode_ref(struct extent_buffer *eb,
1432 int slot, struct btrfs_key *key,
1433 struct shared_node *active_node)
1441 struct cache_tree *inode_cache;
1442 struct btrfs_inode_ref *ref;
1443 char namebuf[BTRFS_NAME_LEN];
1445 inode_cache = &active_node->inode_cache;
1447 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1448 total = btrfs_item_size_nr(eb, slot);
1449 while (cur < total) {
1450 name_len = btrfs_inode_ref_name_len(eb, ref);
1451 index = btrfs_inode_ref_index(eb, ref);
1452 if (name_len <= BTRFS_NAME_LEN) {
1456 len = BTRFS_NAME_LEN;
1457 error = REF_ERR_NAME_TOO_LONG;
1459 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1460 add_inode_backref(inode_cache, key->objectid, key->offset,
1461 index, namebuf, len, 0, key->type, error);
1463 len = sizeof(*ref) + name_len;
1464 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1470 static int process_inode_extref(struct extent_buffer *eb,
1471 int slot, struct btrfs_key *key,
1472 struct shared_node *active_node)
1481 struct cache_tree *inode_cache;
1482 struct btrfs_inode_extref *extref;
1483 char namebuf[BTRFS_NAME_LEN];
1485 inode_cache = &active_node->inode_cache;
1487 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1490 name_len = btrfs_inode_extref_name_len(eb, extref);
1491 index = btrfs_inode_extref_index(eb, extref);
1492 parent = btrfs_inode_extref_parent(eb, extref);
1493 if (name_len <= BTRFS_NAME_LEN) {
1497 len = BTRFS_NAME_LEN;
1498 error = REF_ERR_NAME_TOO_LONG;
1500 read_extent_buffer(eb, namebuf,
1501 (unsigned long)(extref + 1), len);
1502 add_inode_backref(inode_cache, key->objectid, parent,
1503 index, namebuf, len, 0, key->type, error);
1505 len = sizeof(*extref) + name_len;
1506 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1513 static int count_csum_range(struct btrfs_root *root, u64 start,
1514 u64 len, u64 *found)
1516 struct btrfs_key key;
1517 struct btrfs_path path;
1518 struct extent_buffer *leaf;
1523 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1525 btrfs_init_path(&path);
1527 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1529 key.type = BTRFS_EXTENT_CSUM_KEY;
1531 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1535 if (ret > 0 && path.slots[0] > 0) {
1536 leaf = path.nodes[0];
1537 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1538 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1539 key.type == BTRFS_EXTENT_CSUM_KEY)
1544 leaf = path.nodes[0];
1545 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1546 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1551 leaf = path.nodes[0];
1554 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1555 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1556 key.type != BTRFS_EXTENT_CSUM_KEY)
1559 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1560 if (key.offset >= start + len)
1563 if (key.offset > start)
1566 size = btrfs_item_size_nr(leaf, path.slots[0]);
1567 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1568 if (csum_end > start) {
1569 size = min(csum_end - start, len);
1578 btrfs_release_path(&path);
1584 static int process_file_extent(struct btrfs_root *root,
1585 struct extent_buffer *eb,
1586 int slot, struct btrfs_key *key,
1587 struct shared_node *active_node)
1589 struct inode_record *rec;
1590 struct btrfs_file_extent_item *fi;
1592 u64 disk_bytenr = 0;
1593 u64 extent_offset = 0;
1594 u64 mask = root->sectorsize - 1;
1598 rec = active_node->current;
1599 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1600 rec->found_file_extent = 1;
1602 if (rec->extent_start == (u64)-1) {
1603 rec->extent_start = key->offset;
1604 rec->extent_end = key->offset;
1607 if (rec->extent_end > key->offset)
1608 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1609 else if (rec->extent_end < key->offset) {
1610 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1611 key->offset - rec->extent_end);
1616 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1617 extent_type = btrfs_file_extent_type(eb, fi);
1619 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1620 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1622 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1623 rec->found_size += num_bytes;
1624 num_bytes = (num_bytes + mask) & ~mask;
1625 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1626 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1627 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1628 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1629 extent_offset = btrfs_file_extent_offset(eb, fi);
1630 if (num_bytes == 0 || (num_bytes & mask))
1631 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1632 if (num_bytes + extent_offset >
1633 btrfs_file_extent_ram_bytes(eb, fi))
1634 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1635 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1636 (btrfs_file_extent_compression(eb, fi) ||
1637 btrfs_file_extent_encryption(eb, fi) ||
1638 btrfs_file_extent_other_encoding(eb, fi)))
1639 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1640 if (disk_bytenr > 0)
1641 rec->found_size += num_bytes;
1643 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1645 rec->extent_end = key->offset + num_bytes;
1648 * The data reloc tree will copy full extents into its inode and then
1649 * copy the corresponding csums. Because the extent it copied could be
1650 * a preallocated extent that hasn't been written to yet there may be no
1651 * csums to copy, ergo we won't have csums for our file extent. This is
1652 * ok so just don't bother checking csums if the inode belongs to the
1655 if (disk_bytenr > 0 &&
1656 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1658 if (btrfs_file_extent_compression(eb, fi))
1659 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1661 disk_bytenr += extent_offset;
1663 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1666 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1668 rec->found_csum_item = 1;
1669 if (found < num_bytes)
1670 rec->some_csum_missing = 1;
1671 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1673 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1679 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1680 struct walk_control *wc)
1682 struct btrfs_key key;
1686 struct cache_tree *inode_cache;
1687 struct shared_node *active_node;
1689 if (wc->root_level == wc->active_node &&
1690 btrfs_root_refs(&root->root_item) == 0)
1693 active_node = wc->nodes[wc->active_node];
1694 inode_cache = &active_node->inode_cache;
1695 nritems = btrfs_header_nritems(eb);
1696 for (i = 0; i < nritems; i++) {
1697 btrfs_item_key_to_cpu(eb, &key, i);
1699 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1701 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1704 if (active_node->current == NULL ||
1705 active_node->current->ino < key.objectid) {
1706 if (active_node->current) {
1707 active_node->current->checked = 1;
1708 maybe_free_inode_rec(inode_cache,
1709 active_node->current);
1711 active_node->current = get_inode_rec(inode_cache,
1713 BUG_ON(IS_ERR(active_node->current));
1716 case BTRFS_DIR_ITEM_KEY:
1717 case BTRFS_DIR_INDEX_KEY:
1718 ret = process_dir_item(root, eb, i, &key, active_node);
1720 case BTRFS_INODE_REF_KEY:
1721 ret = process_inode_ref(eb, i, &key, active_node);
1723 case BTRFS_INODE_EXTREF_KEY:
1724 ret = process_inode_extref(eb, i, &key, active_node);
1726 case BTRFS_INODE_ITEM_KEY:
1727 ret = process_inode_item(eb, i, &key, active_node);
1729 case BTRFS_EXTENT_DATA_KEY:
1730 ret = process_file_extent(root, eb, i, &key,
1740 static void reada_walk_down(struct btrfs_root *root,
1741 struct extent_buffer *node, int slot)
1750 level = btrfs_header_level(node);
1754 nritems = btrfs_header_nritems(node);
1755 blocksize = btrfs_level_size(root, level - 1);
1756 for (i = slot; i < nritems; i++) {
1757 bytenr = btrfs_node_blockptr(node, i);
1758 ptr_gen = btrfs_node_ptr_generation(node, i);
1759 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1764 * Check the child node/leaf by the following condition:
1765 * 1. the first item key of the node/leaf should be the same with the one
1767 * 2. block in parent node should match the child node/leaf.
1768 * 3. generation of parent node and child's header should be consistent.
1770 * Or the child node/leaf pointed by the key in parent is not valid.
1772 * We hope to check leaf owner too, but since subvol may share leaves,
1773 * which makes leaf owner check not so strong, key check should be
1774 * sufficient enough for that case.
1776 static int check_child_node(struct btrfs_root *root,
1777 struct extent_buffer *parent, int slot,
1778 struct extent_buffer *child)
1780 struct btrfs_key parent_key;
1781 struct btrfs_key child_key;
1784 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785 if (btrfs_header_level(child) == 0)
1786 btrfs_item_key_to_cpu(child, &child_key, 0);
1788 btrfs_node_key_to_cpu(child, &child_key, 0);
1790 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1793 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794 parent_key.objectid, parent_key.type, parent_key.offset,
1795 child_key.objectid, child_key.type, child_key.offset);
1797 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1799 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800 btrfs_node_blockptr(parent, slot),
1801 btrfs_header_bytenr(child));
1803 if (btrfs_node_ptr_generation(parent, slot) !=
1804 btrfs_header_generation(child)) {
1806 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807 btrfs_header_generation(child),
1808 btrfs_node_ptr_generation(parent, slot));
1813 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1814 struct walk_control *wc, int *level)
1816 enum btrfs_tree_block_status status;
1819 struct extent_buffer *next;
1820 struct extent_buffer *cur;
1825 WARN_ON(*level < 0);
1826 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1827 ret = btrfs_lookup_extent_info(NULL, root,
1828 path->nodes[*level]->start,
1829 *level, 1, &refs, NULL);
1836 ret = enter_shared_node(root, path->nodes[*level]->start,
1844 while (*level >= 0) {
1845 WARN_ON(*level < 0);
1846 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1847 cur = path->nodes[*level];
1849 if (btrfs_header_level(cur) != *level)
1852 if (path->slots[*level] >= btrfs_header_nritems(cur))
1855 ret = process_one_leaf(root, cur, wc);
1860 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1861 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1862 blocksize = btrfs_level_size(root, *level - 1);
1863 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1869 ret = enter_shared_node(root, bytenr, refs,
1872 path->slots[*level]++;
1877 next = btrfs_find_tree_block(root, bytenr, blocksize);
1878 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1879 free_extent_buffer(next);
1880 reada_walk_down(root, cur, path->slots[*level]);
1881 next = read_tree_block(root, bytenr, blocksize,
1883 if (!extent_buffer_uptodate(next)) {
1884 struct btrfs_key node_key;
1886 btrfs_node_key_to_cpu(path->nodes[*level],
1888 path->slots[*level]);
1889 btrfs_add_corrupt_extent_record(root->fs_info,
1891 path->nodes[*level]->start,
1892 root->leafsize, *level);
1898 ret = check_child_node(root, cur, path->slots[*level], next);
1904 if (btrfs_is_leaf(next))
1905 status = btrfs_check_leaf(root, NULL, next);
1907 status = btrfs_check_node(root, NULL, next);
1908 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1909 free_extent_buffer(next);
1914 *level = *level - 1;
1915 free_extent_buffer(path->nodes[*level]);
1916 path->nodes[*level] = next;
1917 path->slots[*level] = 0;
1920 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1924 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1925 struct walk_control *wc, int *level)
1928 struct extent_buffer *leaf;
1930 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1931 leaf = path->nodes[i];
1932 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1937 free_extent_buffer(path->nodes[*level]);
1938 path->nodes[*level] = NULL;
1939 BUG_ON(*level > wc->active_node);
1940 if (*level == wc->active_node)
1941 leave_shared_node(root, wc, *level);
1948 static int check_root_dir(struct inode_record *rec)
1950 struct inode_backref *backref;
1953 if (!rec->found_inode_item || rec->errors)
1955 if (rec->nlink != 1 || rec->found_link != 0)
1957 if (list_empty(&rec->backrefs))
1959 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1960 if (!backref->found_inode_ref)
1962 if (backref->index != 0 || backref->namelen != 2 ||
1963 memcmp(backref->name, "..", 2))
1965 if (backref->found_dir_index || backref->found_dir_item)
1972 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1973 struct btrfs_root *root, struct btrfs_path *path,
1974 struct inode_record *rec)
1976 struct btrfs_inode_item *ei;
1977 struct btrfs_key key;
1980 key.objectid = rec->ino;
1981 key.type = BTRFS_INODE_ITEM_KEY;
1982 key.offset = (u64)-1;
1984 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1988 if (!path->slots[0]) {
1995 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1996 if (key.objectid != rec->ino) {
2001 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2002 struct btrfs_inode_item);
2003 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2004 btrfs_mark_buffer_dirty(path->nodes[0]);
2005 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2006 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2007 root->root_key.objectid);
2009 btrfs_release_path(path);
2013 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2014 struct btrfs_root *root,
2015 struct btrfs_path *path,
2016 struct inode_record *rec)
2020 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2021 btrfs_release_path(path);
2023 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2027 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2028 struct btrfs_root *root,
2029 struct btrfs_path *path,
2030 struct inode_record *rec)
2032 struct btrfs_inode_item *ei;
2033 struct btrfs_key key;
2036 key.objectid = rec->ino;
2037 key.type = BTRFS_INODE_ITEM_KEY;
2040 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2047 /* Since ret == 0, no need to check anything */
2048 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2049 struct btrfs_inode_item);
2050 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2051 btrfs_mark_buffer_dirty(path->nodes[0]);
2052 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2053 printf("reset nbytes for ino %llu root %llu\n",
2054 rec->ino, root->root_key.objectid);
2056 btrfs_release_path(path);
2060 static int add_missing_dir_index(struct btrfs_root *root,
2061 struct cache_tree *inode_cache,
2062 struct inode_record *rec,
2063 struct inode_backref *backref)
2065 struct btrfs_path *path;
2066 struct btrfs_trans_handle *trans;
2067 struct btrfs_dir_item *dir_item;
2068 struct extent_buffer *leaf;
2069 struct btrfs_key key;
2070 struct btrfs_disk_key disk_key;
2071 struct inode_record *dir_rec;
2072 unsigned long name_ptr;
2073 u32 data_size = sizeof(*dir_item) + backref->namelen;
2076 path = btrfs_alloc_path();
2080 trans = btrfs_start_transaction(root, 1);
2081 if (IS_ERR(trans)) {
2082 btrfs_free_path(path);
2083 return PTR_ERR(trans);
2086 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2087 (unsigned long long)rec->ino);
2088 key.objectid = backref->dir;
2089 key.type = BTRFS_DIR_INDEX_KEY;
2090 key.offset = backref->index;
2092 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2095 leaf = path->nodes[0];
2096 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2098 disk_key.objectid = cpu_to_le64(rec->ino);
2099 disk_key.type = BTRFS_INODE_ITEM_KEY;
2100 disk_key.offset = 0;
2102 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2103 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2104 btrfs_set_dir_data_len(leaf, dir_item, 0);
2105 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2106 name_ptr = (unsigned long)(dir_item + 1);
2107 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2108 btrfs_mark_buffer_dirty(leaf);
2109 btrfs_free_path(path);
2110 btrfs_commit_transaction(trans, root);
2112 backref->found_dir_index = 1;
2113 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2114 BUG_ON(IS_ERR(dir_rec));
2117 dir_rec->found_size += backref->namelen;
2118 if (dir_rec->found_size == dir_rec->isize &&
2119 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2120 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2121 if (dir_rec->found_size != dir_rec->isize)
2122 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2127 static int delete_dir_index(struct btrfs_root *root,
2128 struct cache_tree *inode_cache,
2129 struct inode_record *rec,
2130 struct inode_backref *backref)
2132 struct btrfs_trans_handle *trans;
2133 struct btrfs_dir_item *di;
2134 struct btrfs_path *path;
2137 path = btrfs_alloc_path();
2141 trans = btrfs_start_transaction(root, 1);
2142 if (IS_ERR(trans)) {
2143 btrfs_free_path(path);
2144 return PTR_ERR(trans);
2148 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2149 (unsigned long long)backref->dir,
2150 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2151 (unsigned long long)root->objectid);
2153 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2154 backref->name, backref->namelen,
2155 backref->index, -1);
2158 btrfs_free_path(path);
2159 btrfs_commit_transaction(trans, root);
2166 ret = btrfs_del_item(trans, root, path);
2168 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2170 btrfs_free_path(path);
2171 btrfs_commit_transaction(trans, root);
2175 static int create_inode_item(struct btrfs_root *root,
2176 struct inode_record *rec,
2177 struct inode_backref *backref, int root_dir)
2179 struct btrfs_trans_handle *trans;
2180 struct btrfs_inode_item inode_item;
2181 time_t now = time(NULL);
2184 trans = btrfs_start_transaction(root, 1);
2185 if (IS_ERR(trans)) {
2186 ret = PTR_ERR(trans);
2190 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2191 "be incomplete, please check permissions and content after "
2192 "the fsck completes.\n", (unsigned long long)root->objectid,
2193 (unsigned long long)rec->ino);
2195 memset(&inode_item, 0, sizeof(inode_item));
2196 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2198 btrfs_set_stack_inode_nlink(&inode_item, 1);
2200 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2201 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2202 if (rec->found_dir_item) {
2203 if (rec->found_file_extent)
2204 fprintf(stderr, "root %llu inode %llu has both a dir "
2205 "item and extents, unsure if it is a dir or a "
2206 "regular file so setting it as a directory\n",
2207 (unsigned long long)root->objectid,
2208 (unsigned long long)rec->ino);
2209 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2210 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2211 } else if (!rec->found_dir_item) {
2212 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2213 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2215 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2216 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2217 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2218 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2219 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2220 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2221 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2222 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2224 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2226 btrfs_commit_transaction(trans, root);
2230 static int repair_inode_backrefs(struct btrfs_root *root,
2231 struct inode_record *rec,
2232 struct cache_tree *inode_cache,
2235 struct inode_backref *tmp, *backref;
2236 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2240 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2241 if (!delete && rec->ino == root_dirid) {
2242 if (!rec->found_inode_item) {
2243 ret = create_inode_item(root, rec, backref, 1);
2250 /* Index 0 for root dir's are special, don't mess with it */
2251 if (rec->ino == root_dirid && backref->index == 0)
2255 ((backref->found_dir_index && !backref->found_inode_ref) ||
2256 (backref->found_dir_index && backref->found_inode_ref &&
2257 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2258 ret = delete_dir_index(root, inode_cache, rec, backref);
2262 list_del(&backref->list);
2266 if (!delete && !backref->found_dir_index &&
2267 backref->found_dir_item && backref->found_inode_ref) {
2268 ret = add_missing_dir_index(root, inode_cache, rec,
2273 if (backref->found_dir_item &&
2274 backref->found_dir_index &&
2275 backref->found_dir_index) {
2276 if (!backref->errors &&
2277 backref->found_inode_ref) {
2278 list_del(&backref->list);
2284 if (!delete && (!backref->found_dir_index &&
2285 !backref->found_dir_item &&
2286 backref->found_inode_ref)) {
2287 struct btrfs_trans_handle *trans;
2288 struct btrfs_key location;
2290 ret = check_dir_conflict(root, backref->name,
2296 * let nlink fixing routine to handle it,
2297 * which can do it better.
2302 location.objectid = rec->ino;
2303 location.type = BTRFS_INODE_ITEM_KEY;
2304 location.offset = 0;
2306 trans = btrfs_start_transaction(root, 1);
2307 if (IS_ERR(trans)) {
2308 ret = PTR_ERR(trans);
2311 fprintf(stderr, "adding missing dir index/item pair "
2313 (unsigned long long)rec->ino);
2314 ret = btrfs_insert_dir_item(trans, root, backref->name,
2316 backref->dir, &location,
2317 imode_to_type(rec->imode),
2320 btrfs_commit_transaction(trans, root);
2324 if (!delete && (backref->found_inode_ref &&
2325 backref->found_dir_index &&
2326 backref->found_dir_item &&
2327 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2328 !rec->found_inode_item)) {
2329 ret = create_inode_item(root, rec, backref, 0);
2336 return ret ? ret : repaired;
2340 * To determine the file type for nlink/inode_item repair
2342 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2343 * Return -ENOENT if file type is not found.
2345 static int find_file_type(struct inode_record *rec, u8 *type)
2347 struct inode_backref *backref;
2349 /* For inode item recovered case */
2350 if (rec->found_inode_item) {
2351 *type = imode_to_type(rec->imode);
2355 list_for_each_entry(backref, &rec->backrefs, list) {
2356 if (backref->found_dir_index || backref->found_dir_item) {
2357 *type = backref->filetype;
2365 * To determine the file name for nlink repair
2367 * Return 0 if file name is found, set name and namelen.
2368 * Return -ENOENT if file name is not found.
2370 static int find_file_name(struct inode_record *rec,
2371 char *name, int *namelen)
2373 struct inode_backref *backref;
2375 list_for_each_entry(backref, &rec->backrefs, list) {
2376 if (backref->found_dir_index || backref->found_dir_item ||
2377 backref->found_inode_ref) {
2378 memcpy(name, backref->name, backref->namelen);
2379 *namelen = backref->namelen;
2386 /* Reset the nlink of the inode to the correct one */
2387 static int reset_nlink(struct btrfs_trans_handle *trans,
2388 struct btrfs_root *root,
2389 struct btrfs_path *path,
2390 struct inode_record *rec)
2392 struct inode_backref *backref;
2393 struct inode_backref *tmp;
2394 struct btrfs_key key;
2395 struct btrfs_inode_item *inode_item;
2398 /* We don't believe this either, reset it and iterate backref */
2399 rec->found_link = 0;
2401 /* Remove all backref including the valid ones */
2402 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2403 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2404 backref->index, backref->name,
2405 backref->namelen, 0);
2409 /* remove invalid backref, so it won't be added back */
2410 if (!(backref->found_dir_index &&
2411 backref->found_dir_item &&
2412 backref->found_inode_ref)) {
2413 list_del(&backref->list);
2420 /* Set nlink to 0 */
2421 key.objectid = rec->ino;
2422 key.type = BTRFS_INODE_ITEM_KEY;
2424 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2431 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2432 struct btrfs_inode_item);
2433 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2434 btrfs_mark_buffer_dirty(path->nodes[0]);
2435 btrfs_release_path(path);
2438 * Add back valid inode_ref/dir_item/dir_index,
2439 * add_link() will handle the nlink inc, so new nlink must be correct
2441 list_for_each_entry(backref, &rec->backrefs, list) {
2442 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2443 backref->name, backref->namelen,
2444 backref->filetype, &backref->index, 1);
2449 btrfs_release_path(path);
2453 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2454 struct btrfs_root *root,
2455 struct btrfs_path *path,
2456 struct inode_record *rec)
2458 char *dir_name = "lost+found";
2459 char namebuf[BTRFS_NAME_LEN] = {0};
2464 int name_recovered = 0;
2465 int type_recovered = 0;
2469 * Get file name and type first before these invalid inode ref
2470 * are deleted by remove_all_invalid_backref()
2472 name_recovered = !find_file_name(rec, namebuf, &namelen);
2473 type_recovered = !find_file_type(rec, &type);
2475 if (!name_recovered) {
2476 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2477 rec->ino, rec->ino);
2478 namelen = count_digits(rec->ino);
2479 sprintf(namebuf, "%llu", rec->ino);
2482 if (!type_recovered) {
2483 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2485 type = BTRFS_FT_REG_FILE;
2489 ret = reset_nlink(trans, root, path, rec);
2492 "Failed to reset nlink for inode %llu: %s\n",
2493 rec->ino, strerror(-ret));
2497 if (rec->found_link == 0) {
2498 lost_found_ino = root->highest_inode;
2499 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2504 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2505 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2508 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2509 dir_name, strerror(-ret));
2512 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2513 namebuf, namelen, type, NULL, 1);
2515 * Add ".INO" suffix several times to handle case where
2516 * "FILENAME.INO" is already taken by another file.
2518 while (ret == -EEXIST) {
2520 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2522 if (namelen + count_digits(rec->ino) + 1 >
2527 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2529 namelen += count_digits(rec->ino) + 1;
2530 ret = btrfs_add_link(trans, root, rec->ino,
2531 lost_found_ino, namebuf,
2532 namelen, type, NULL, 1);
2536 "Failed to link the inode %llu to %s dir: %s\n",
2537 rec->ino, dir_name, strerror(-ret));
2541 * Just increase the found_link, don't actually add the
2542 * backref. This will make things easier and this inode
2543 * record will be freed after the repair is done.
2544 * So fsck will not report problem about this inode.
2547 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2548 namelen, namebuf, dir_name);
2550 printf("Fixed the nlink of inode %llu\n", rec->ino);
2553 * Clear the flag anyway, or we will loop forever for the same inode
2554 * as it will not be removed from the bad inode list and the dead loop
2557 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2558 btrfs_release_path(path);
2563 * Check if there is any normal(reg or prealloc) file extent for given
2565 * This is used to determine the file type when neither its dir_index/item or
2566 * inode_item exists.
2568 * This will *NOT* report error, if any error happens, just consider it does
2569 * not have any normal file extent.
2571 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2573 struct btrfs_path *path;
2574 struct btrfs_key key;
2575 struct btrfs_key found_key;
2576 struct btrfs_file_extent_item *fi;
2580 path = btrfs_alloc_path();
2584 key.type = BTRFS_EXTENT_DATA_KEY;
2587 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2592 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2593 ret = btrfs_next_leaf(root, path);
2600 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2602 if (found_key.objectid != ino ||
2603 found_key.type != BTRFS_EXTENT_DATA_KEY)
2605 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2606 struct btrfs_file_extent_item);
2607 type = btrfs_file_extent_type(path->nodes[0], fi);
2608 if (type != BTRFS_FILE_EXTENT_INLINE) {
2614 btrfs_free_path(path);
2618 static u32 btrfs_type_to_imode(u8 type)
2620 static u32 imode_by_btrfs_type[] = {
2621 [BTRFS_FT_REG_FILE] = S_IFREG,
2622 [BTRFS_FT_DIR] = S_IFDIR,
2623 [BTRFS_FT_CHRDEV] = S_IFCHR,
2624 [BTRFS_FT_BLKDEV] = S_IFBLK,
2625 [BTRFS_FT_FIFO] = S_IFIFO,
2626 [BTRFS_FT_SOCK] = S_IFSOCK,
2627 [BTRFS_FT_SYMLINK] = S_IFLNK,
2630 return imode_by_btrfs_type[(type)];
2633 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2634 struct btrfs_root *root,
2635 struct btrfs_path *path,
2636 struct inode_record *rec)
2640 int type_recovered = 0;
2643 printf("Trying to rebuild inode:%llu\n", rec->ino);
2645 type_recovered = !find_file_type(rec, &filetype);
2648 * Try to determine inode type if type not found.
2650 * For found regular file extent, it must be FILE.
2651 * For found dir_item/index, it must be DIR.
2653 * For undetermined one, use FILE as fallback.
2656 * 1. If found backref(inode_index/item is already handled) to it,
2658 * Need new inode-inode ref structure to allow search for that.
2660 if (!type_recovered) {
2661 if (rec->found_file_extent &&
2662 find_normal_file_extent(root, rec->ino)) {
2664 filetype = BTRFS_FT_REG_FILE;
2665 } else if (rec->found_dir_item) {
2667 filetype = BTRFS_FT_DIR;
2668 } else if (!list_empty(&rec->orphan_extents)) {
2670 filetype = BTRFS_FT_REG_FILE;
2672 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2675 filetype = BTRFS_FT_REG_FILE;
2679 ret = btrfs_new_inode(trans, root, rec->ino,
2680 mode | btrfs_type_to_imode(filetype));
2685 * Here inode rebuild is done, we only rebuild the inode item,
2686 * don't repair the nlink(like move to lost+found).
2687 * That is the job of nlink repair.
2689 * We just fill the record and return
2691 rec->found_dir_item = 1;
2692 rec->imode = mode | btrfs_type_to_imode(filetype);
2694 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2695 /* Ensure the inode_nlinks repair function will be called */
2696 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2701 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2702 struct btrfs_root *root,
2703 struct btrfs_path *path,
2704 struct inode_record *rec)
2706 struct orphan_data_extent *orphan;
2707 struct orphan_data_extent *tmp;
2710 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2712 * Check for conflicting file extents
2714 * Here we don't know whether the extents is compressed or not,
2715 * so we can only assume it not compressed nor data offset,
2716 * and use its disk_len as extent length.
2718 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2719 orphan->offset, orphan->disk_len, 0);
2720 btrfs_release_path(path);
2725 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2726 orphan->disk_bytenr, orphan->disk_len);
2727 ret = btrfs_free_extent(trans,
2728 root->fs_info->extent_root,
2729 orphan->disk_bytenr, orphan->disk_len,
2730 0, root->objectid, orphan->objectid,
2735 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2736 orphan->offset, orphan->disk_bytenr,
2737 orphan->disk_len, orphan->disk_len);
2741 /* Update file size info */
2742 rec->found_size += orphan->disk_len;
2743 if (rec->found_size == rec->nbytes)
2744 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2746 /* Update the file extent hole info too */
2747 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2751 if (RB_EMPTY_ROOT(&rec->holes))
2752 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2754 list_del(&orphan->list);
2757 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2762 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2763 struct btrfs_root *root,
2764 struct btrfs_path *path,
2765 struct inode_record *rec)
2767 struct rb_node *node;
2768 struct file_extent_hole *hole;
2772 node = rb_first(&rec->holes);
2776 hole = rb_entry(node, struct file_extent_hole, node);
2777 ret = btrfs_punch_hole(trans, root, rec->ino,
2778 hole->start, hole->len);
2781 ret = del_file_extent_hole(&rec->holes, hole->start,
2785 if (RB_EMPTY_ROOT(&rec->holes))
2786 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2787 node = rb_first(&rec->holes);
2789 /* special case for a file losing all its file extent */
2791 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2792 round_up(rec->isize, root->sectorsize));
2796 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2797 rec->ino, root->objectid);
2802 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2804 struct btrfs_trans_handle *trans;
2805 struct btrfs_path *path;
2808 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2809 I_ERR_NO_ORPHAN_ITEM |
2810 I_ERR_LINK_COUNT_WRONG |
2811 I_ERR_NO_INODE_ITEM |
2812 I_ERR_FILE_EXTENT_ORPHAN |
2813 I_ERR_FILE_EXTENT_DISCOUNT|
2814 I_ERR_FILE_NBYTES_WRONG)))
2817 path = btrfs_alloc_path();
2822 * For nlink repair, it may create a dir and add link, so
2823 * 2 for parent(256)'s dir_index and dir_item
2824 * 2 for lost+found dir's inode_item and inode_ref
2825 * 1 for the new inode_ref of the file
2826 * 2 for lost+found dir's dir_index and dir_item for the file
2828 trans = btrfs_start_transaction(root, 7);
2829 if (IS_ERR(trans)) {
2830 btrfs_free_path(path);
2831 return PTR_ERR(trans);
2834 if (rec->errors & I_ERR_NO_INODE_ITEM)
2835 ret = repair_inode_no_item(trans, root, path, rec);
2836 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2837 ret = repair_inode_orphan_extent(trans, root, path, rec);
2838 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2839 ret = repair_inode_discount_extent(trans, root, path, rec);
2840 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2841 ret = repair_inode_isize(trans, root, path, rec);
2842 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2843 ret = repair_inode_orphan_item(trans, root, path, rec);
2844 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2845 ret = repair_inode_nlinks(trans, root, path, rec);
2846 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2847 ret = repair_inode_nbytes(trans, root, path, rec);
2848 btrfs_commit_transaction(trans, root);
2849 btrfs_free_path(path);
2853 static int check_inode_recs(struct btrfs_root *root,
2854 struct cache_tree *inode_cache)
2856 struct cache_extent *cache;
2857 struct ptr_node *node;
2858 struct inode_record *rec;
2859 struct inode_backref *backref;
2864 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2866 if (btrfs_root_refs(&root->root_item) == 0) {
2867 if (!cache_tree_empty(inode_cache))
2868 fprintf(stderr, "warning line %d\n", __LINE__);
2873 * We need to record the highest inode number for later 'lost+found'
2875 * We must select a ino not used/refered by any existing inode, or
2876 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2877 * this may cause 'lost+found' dir has wrong nlinks.
2879 cache = last_cache_extent(inode_cache);
2881 node = container_of(cache, struct ptr_node, cache);
2883 if (rec->ino > root->highest_inode)
2884 root->highest_inode = rec->ino;
2888 * We need to repair backrefs first because we could change some of the
2889 * errors in the inode recs.
2891 * We also need to go through and delete invalid backrefs first and then
2892 * add the correct ones second. We do this because we may get EEXIST
2893 * when adding back the correct index because we hadn't yet deleted the
2896 * For example, if we were missing a dir index then the directories
2897 * isize would be wrong, so if we fixed the isize to what we thought it
2898 * would be and then fixed the backref we'd still have a invalid fs, so
2899 * we need to add back the dir index and then check to see if the isize
2904 if (stage == 3 && !err)
2907 cache = search_cache_extent(inode_cache, 0);
2908 while (repair && cache) {
2909 node = container_of(cache, struct ptr_node, cache);
2911 cache = next_cache_extent(cache);
2913 /* Need to free everything up and rescan */
2915 remove_cache_extent(inode_cache, &node->cache);
2917 free_inode_rec(rec);
2921 if (list_empty(&rec->backrefs))
2924 ret = repair_inode_backrefs(root, rec, inode_cache,
2938 rec = get_inode_rec(inode_cache, root_dirid, 0);
2939 BUG_ON(IS_ERR(rec));
2941 ret = check_root_dir(rec);
2943 fprintf(stderr, "root %llu root dir %llu error\n",
2944 (unsigned long long)root->root_key.objectid,
2945 (unsigned long long)root_dirid);
2946 print_inode_error(root, rec);
2951 struct btrfs_trans_handle *trans;
2953 trans = btrfs_start_transaction(root, 1);
2954 if (IS_ERR(trans)) {
2955 err = PTR_ERR(trans);
2960 "root %llu missing its root dir, recreating\n",
2961 (unsigned long long)root->objectid);
2963 ret = btrfs_make_root_dir(trans, root, root_dirid);
2966 btrfs_commit_transaction(trans, root);
2970 fprintf(stderr, "root %llu root dir %llu not found\n",
2971 (unsigned long long)root->root_key.objectid,
2972 (unsigned long long)root_dirid);
2976 cache = search_cache_extent(inode_cache, 0);
2979 node = container_of(cache, struct ptr_node, cache);
2981 remove_cache_extent(inode_cache, &node->cache);
2983 if (rec->ino == root_dirid ||
2984 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2985 free_inode_rec(rec);
2989 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2990 ret = check_orphan_item(root, rec->ino);
2992 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2993 if (can_free_inode_rec(rec)) {
2994 free_inode_rec(rec);
2999 if (!rec->found_inode_item)
3000 rec->errors |= I_ERR_NO_INODE_ITEM;
3001 if (rec->found_link != rec->nlink)
3002 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3004 ret = try_repair_inode(root, rec);
3005 if (ret == 0 && can_free_inode_rec(rec)) {
3006 free_inode_rec(rec);
3012 if (!(repair && ret == 0))
3014 print_inode_error(root, rec);
3015 list_for_each_entry(backref, &rec->backrefs, list) {
3016 if (!backref->found_dir_item)
3017 backref->errors |= REF_ERR_NO_DIR_ITEM;
3018 if (!backref->found_dir_index)
3019 backref->errors |= REF_ERR_NO_DIR_INDEX;
3020 if (!backref->found_inode_ref)
3021 backref->errors |= REF_ERR_NO_INODE_REF;
3022 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3023 " namelen %u name %s filetype %d errors %x",
3024 (unsigned long long)backref->dir,
3025 (unsigned long long)backref->index,
3026 backref->namelen, backref->name,
3027 backref->filetype, backref->errors);
3028 print_ref_error(backref->errors);
3030 free_inode_rec(rec);
3032 return (error > 0) ? -1 : 0;
3035 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3038 struct cache_extent *cache;
3039 struct root_record *rec = NULL;
3042 cache = lookup_cache_extent(root_cache, objectid, 1);
3044 rec = container_of(cache, struct root_record, cache);
3046 rec = calloc(1, sizeof(*rec));
3048 return ERR_PTR(-ENOMEM);
3049 rec->objectid = objectid;
3050 INIT_LIST_HEAD(&rec->backrefs);
3051 rec->cache.start = objectid;
3052 rec->cache.size = 1;
3054 ret = insert_cache_extent(root_cache, &rec->cache);
3056 return ERR_PTR(-EEXIST);
3061 static struct root_backref *get_root_backref(struct root_record *rec,
3062 u64 ref_root, u64 dir, u64 index,
3063 const char *name, int namelen)
3065 struct root_backref *backref;
3067 list_for_each_entry(backref, &rec->backrefs, list) {
3068 if (backref->ref_root != ref_root || backref->dir != dir ||
3069 backref->namelen != namelen)
3071 if (memcmp(name, backref->name, namelen))
3076 backref = calloc(1, sizeof(*backref) + namelen + 1);
3079 backref->ref_root = ref_root;
3081 backref->index = index;
3082 backref->namelen = namelen;
3083 memcpy(backref->name, name, namelen);
3084 backref->name[namelen] = '\0';
3085 list_add_tail(&backref->list, &rec->backrefs);
3089 static void free_root_record(struct cache_extent *cache)
3091 struct root_record *rec;
3092 struct root_backref *backref;
3094 rec = container_of(cache, struct root_record, cache);
3095 while (!list_empty(&rec->backrefs)) {
3096 backref = list_entry(rec->backrefs.next,
3097 struct root_backref, list);
3098 list_del(&backref->list);
3105 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3107 static int add_root_backref(struct cache_tree *root_cache,
3108 u64 root_id, u64 ref_root, u64 dir, u64 index,
3109 const char *name, int namelen,
3110 int item_type, int errors)
3112 struct root_record *rec;
3113 struct root_backref *backref;
3115 rec = get_root_rec(root_cache, root_id);
3116 BUG_ON(IS_ERR(rec));
3117 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3120 backref->errors |= errors;
3122 if (item_type != BTRFS_DIR_ITEM_KEY) {
3123 if (backref->found_dir_index || backref->found_back_ref ||
3124 backref->found_forward_ref) {
3125 if (backref->index != index)
3126 backref->errors |= REF_ERR_INDEX_UNMATCH;
3128 backref->index = index;
3132 if (item_type == BTRFS_DIR_ITEM_KEY) {
3133 if (backref->found_forward_ref)
3135 backref->found_dir_item = 1;
3136 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3137 backref->found_dir_index = 1;
3138 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3139 if (backref->found_forward_ref)
3140 backref->errors |= REF_ERR_DUP_ROOT_REF;
3141 else if (backref->found_dir_item)
3143 backref->found_forward_ref = 1;
3144 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3145 if (backref->found_back_ref)
3146 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3147 backref->found_back_ref = 1;
3152 if (backref->found_forward_ref && backref->found_dir_item)
3153 backref->reachable = 1;
3157 static int merge_root_recs(struct btrfs_root *root,
3158 struct cache_tree *src_cache,
3159 struct cache_tree *dst_cache)
3161 struct cache_extent *cache;
3162 struct ptr_node *node;
3163 struct inode_record *rec;
3164 struct inode_backref *backref;
3167 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3168 free_inode_recs_tree(src_cache);
3173 cache = search_cache_extent(src_cache, 0);
3176 node = container_of(cache, struct ptr_node, cache);
3178 remove_cache_extent(src_cache, &node->cache);
3181 ret = is_child_root(root, root->objectid, rec->ino);
3187 list_for_each_entry(backref, &rec->backrefs, list) {
3188 BUG_ON(backref->found_inode_ref);
3189 if (backref->found_dir_item)
3190 add_root_backref(dst_cache, rec->ino,
3191 root->root_key.objectid, backref->dir,
3192 backref->index, backref->name,
3193 backref->namelen, BTRFS_DIR_ITEM_KEY,
3195 if (backref->found_dir_index)
3196 add_root_backref(dst_cache, rec->ino,
3197 root->root_key.objectid, backref->dir,
3198 backref->index, backref->name,
3199 backref->namelen, BTRFS_DIR_INDEX_KEY,
3203 free_inode_rec(rec);
3210 static int check_root_refs(struct btrfs_root *root,
3211 struct cache_tree *root_cache)
3213 struct root_record *rec;
3214 struct root_record *ref_root;
3215 struct root_backref *backref;
3216 struct cache_extent *cache;
3222 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3223 BUG_ON(IS_ERR(rec));
3226 /* fixme: this can not detect circular references */
3229 cache = search_cache_extent(root_cache, 0);
3233 rec = container_of(cache, struct root_record, cache);
3234 cache = next_cache_extent(cache);
3236 if (rec->found_ref == 0)
3239 list_for_each_entry(backref, &rec->backrefs, list) {
3240 if (!backref->reachable)
3243 ref_root = get_root_rec(root_cache,
3245 BUG_ON(IS_ERR(ref_root));
3246 if (ref_root->found_ref > 0)
3249 backref->reachable = 0;
3251 if (rec->found_ref == 0)
3257 cache = search_cache_extent(root_cache, 0);
3261 rec = container_of(cache, struct root_record, cache);
3262 cache = next_cache_extent(cache);
3264 if (rec->found_ref == 0 &&
3265 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3266 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3267 ret = check_orphan_item(root->fs_info->tree_root,
3273 * If we don't have a root item then we likely just have
3274 * a dir item in a snapshot for this root but no actual
3275 * ref key or anything so it's meaningless.
3277 if (!rec->found_root_item)
3280 fprintf(stderr, "fs tree %llu not referenced\n",
3281 (unsigned long long)rec->objectid);
3285 if (rec->found_ref > 0 && !rec->found_root_item)
3287 list_for_each_entry(backref, &rec->backrefs, list) {
3288 if (!backref->found_dir_item)
3289 backref->errors |= REF_ERR_NO_DIR_ITEM;
3290 if (!backref->found_dir_index)
3291 backref->errors |= REF_ERR_NO_DIR_INDEX;
3292 if (!backref->found_back_ref)
3293 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3294 if (!backref->found_forward_ref)
3295 backref->errors |= REF_ERR_NO_ROOT_REF;
3296 if (backref->reachable && backref->errors)
3303 fprintf(stderr, "fs tree %llu refs %u %s\n",
3304 (unsigned long long)rec->objectid, rec->found_ref,
3305 rec->found_root_item ? "" : "not found");
3307 list_for_each_entry(backref, &rec->backrefs, list) {
3308 if (!backref->reachable)
3310 if (!backref->errors && rec->found_root_item)
3312 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3313 " index %llu namelen %u name %s errors %x\n",
3314 (unsigned long long)backref->ref_root,
3315 (unsigned long long)backref->dir,
3316 (unsigned long long)backref->index,
3317 backref->namelen, backref->name,
3319 print_ref_error(backref->errors);
3322 return errors > 0 ? 1 : 0;
3325 static int process_root_ref(struct extent_buffer *eb, int slot,
3326 struct btrfs_key *key,
3327 struct cache_tree *root_cache)
3333 struct btrfs_root_ref *ref;
3334 char namebuf[BTRFS_NAME_LEN];
3337 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3339 dirid = btrfs_root_ref_dirid(eb, ref);
3340 index = btrfs_root_ref_sequence(eb, ref);
3341 name_len = btrfs_root_ref_name_len(eb, ref);
3343 if (name_len <= BTRFS_NAME_LEN) {
3347 len = BTRFS_NAME_LEN;
3348 error = REF_ERR_NAME_TOO_LONG;
3350 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3352 if (key->type == BTRFS_ROOT_REF_KEY) {
3353 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3354 index, namebuf, len, key->type, error);
3356 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3357 index, namebuf, len, key->type, error);
3362 static void free_corrupt_block(struct cache_extent *cache)
3364 struct btrfs_corrupt_block *corrupt;
3366 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3370 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3373 * Repair the btree of the given root.
3375 * The fix is to remove the node key in corrupt_blocks cache_tree.
3376 * and rebalance the tree.
3377 * After the fix, the btree should be writeable.
3379 static int repair_btree(struct btrfs_root *root,
3380 struct cache_tree *corrupt_blocks)
3382 struct btrfs_trans_handle *trans;
3383 struct btrfs_path *path;
3384 struct btrfs_corrupt_block *corrupt;
3385 struct cache_extent *cache;
3386 struct btrfs_key key;
3391 if (cache_tree_empty(corrupt_blocks))
3394 path = btrfs_alloc_path();
3398 trans = btrfs_start_transaction(root, 1);
3399 if (IS_ERR(trans)) {
3400 ret = PTR_ERR(trans);
3401 fprintf(stderr, "Error starting transaction: %s\n",
3405 cache = first_cache_extent(corrupt_blocks);
3407 corrupt = container_of(cache, struct btrfs_corrupt_block,
3409 level = corrupt->level;
3410 path->lowest_level = level;
3411 key.objectid = corrupt->key.objectid;
3412 key.type = corrupt->key.type;
3413 key.offset = corrupt->key.offset;
3416 * Here we don't want to do any tree balance, since it may
3417 * cause a balance with corrupted brother leaf/node,
3418 * so ins_len set to 0 here.
3419 * Balance will be done after all corrupt node/leaf is deleted.
3421 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3424 offset = btrfs_node_blockptr(path->nodes[level],
3425 path->slots[level]);
3427 /* Remove the ptr */
3428 ret = btrfs_del_ptr(trans, root, path, level,
3429 path->slots[level]);
3433 * Remove the corresponding extent
3434 * return value is not concerned.
3436 btrfs_release_path(path);
3437 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3438 0, root->root_key.objectid,
3440 cache = next_cache_extent(cache);
3443 /* Balance the btree using btrfs_search_slot() */
3444 cache = first_cache_extent(corrupt_blocks);
3446 corrupt = container_of(cache, struct btrfs_corrupt_block,
3448 memcpy(&key, &corrupt->key, sizeof(key));
3449 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3452 /* return will always >0 since it won't find the item */
3454 btrfs_release_path(path);
3455 cache = next_cache_extent(cache);
3458 btrfs_commit_transaction(trans, root);
3460 btrfs_free_path(path);
3464 static int check_fs_root(struct btrfs_root *root,
3465 struct cache_tree *root_cache,
3466 struct walk_control *wc)
3472 struct btrfs_path path;
3473 struct shared_node root_node;
3474 struct root_record *rec;
3475 struct btrfs_root_item *root_item = &root->root_item;
3476 struct cache_tree corrupt_blocks;
3477 struct orphan_data_extent *orphan;
3478 struct orphan_data_extent *tmp;
3479 enum btrfs_tree_block_status status;
3482 * Reuse the corrupt_block cache tree to record corrupted tree block
3484 * Unlike the usage in extent tree check, here we do it in a per
3485 * fs/subvol tree base.
3487 cache_tree_init(&corrupt_blocks);
3488 root->fs_info->corrupt_blocks = &corrupt_blocks;
3490 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3491 rec = get_root_rec(root_cache, root->root_key.objectid);
3492 BUG_ON(IS_ERR(rec));
3493 if (btrfs_root_refs(root_item) > 0)
3494 rec->found_root_item = 1;
3497 btrfs_init_path(&path);
3498 memset(&root_node, 0, sizeof(root_node));
3499 cache_tree_init(&root_node.root_cache);
3500 cache_tree_init(&root_node.inode_cache);
3502 /* Move the orphan extent record to corresponding inode_record */
3503 list_for_each_entry_safe(orphan, tmp,
3504 &root->orphan_data_extents, list) {
3505 struct inode_record *inode;
3507 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3509 BUG_ON(IS_ERR(inode));
3510 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3511 list_move(&orphan->list, &inode->orphan_extents);
3514 level = btrfs_header_level(root->node);
3515 memset(wc->nodes, 0, sizeof(wc->nodes));
3516 wc->nodes[level] = &root_node;
3517 wc->active_node = level;
3518 wc->root_level = level;
3520 /* We may not have checked the root block, lets do that now */
3521 if (btrfs_is_leaf(root->node))
3522 status = btrfs_check_leaf(root, NULL, root->node);
3524 status = btrfs_check_node(root, NULL, root->node);
3525 if (status != BTRFS_TREE_BLOCK_CLEAN)
3528 if (btrfs_root_refs(root_item) > 0 ||
3529 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3530 path.nodes[level] = root->node;
3531 extent_buffer_get(root->node);
3532 path.slots[level] = 0;
3534 struct btrfs_key key;
3535 struct btrfs_disk_key found_key;
3537 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3538 level = root_item->drop_level;
3539 path.lowest_level = level;
3540 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3543 btrfs_node_key(path.nodes[level], &found_key,
3545 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3546 sizeof(found_key)));
3550 wret = walk_down_tree(root, &path, wc, &level);
3556 wret = walk_up_tree(root, &path, wc, &level);
3563 btrfs_release_path(&path);
3565 if (!cache_tree_empty(&corrupt_blocks)) {
3566 struct cache_extent *cache;
3567 struct btrfs_corrupt_block *corrupt;
3569 printf("The following tree block(s) is corrupted in tree %llu:\n",
3570 root->root_key.objectid);
3571 cache = first_cache_extent(&corrupt_blocks);
3573 corrupt = container_of(cache,
3574 struct btrfs_corrupt_block,
3576 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3577 cache->start, corrupt->level,
3578 corrupt->key.objectid, corrupt->key.type,
3579 corrupt->key.offset);
3580 cache = next_cache_extent(cache);
3583 printf("Try to repair the btree for root %llu\n",
3584 root->root_key.objectid);
3585 ret = repair_btree(root, &corrupt_blocks);
3587 fprintf(stderr, "Failed to repair btree: %s\n",
3590 printf("Btree for root %llu is fixed\n",
3591 root->root_key.objectid);
3595 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3599 if (root_node.current) {
3600 root_node.current->checked = 1;
3601 maybe_free_inode_rec(&root_node.inode_cache,
3605 err = check_inode_recs(root, &root_node.inode_cache);
3609 free_corrupt_blocks_tree(&corrupt_blocks);
3610 root->fs_info->corrupt_blocks = NULL;
3611 free_orphan_data_extents(&root->orphan_data_extents);
3615 static int fs_root_objectid(u64 objectid)
3617 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3618 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3620 return is_fstree(objectid);
3623 static int check_fs_roots(struct btrfs_root *root,
3624 struct cache_tree *root_cache)
3626 struct btrfs_path path;
3627 struct btrfs_key key;
3628 struct walk_control wc;
3629 struct extent_buffer *leaf, *tree_node;
3630 struct btrfs_root *tmp_root;
3631 struct btrfs_root *tree_root = root->fs_info->tree_root;
3635 if (ctx.progress_enabled) {
3636 ctx.tp = TASK_FS_ROOTS;
3637 task_start(ctx.info);
3641 * Just in case we made any changes to the extent tree that weren't
3642 * reflected into the free space cache yet.
3645 reset_cached_block_groups(root->fs_info);
3646 memset(&wc, 0, sizeof(wc));
3647 cache_tree_init(&wc.shared);
3648 btrfs_init_path(&path);
3653 key.type = BTRFS_ROOT_ITEM_KEY;
3654 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3659 tree_node = tree_root->node;
3661 if (tree_node != tree_root->node) {
3662 free_root_recs_tree(root_cache);
3663 btrfs_release_path(&path);
3666 leaf = path.nodes[0];
3667 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3668 ret = btrfs_next_leaf(tree_root, &path);
3674 leaf = path.nodes[0];
3676 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3677 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3678 fs_root_objectid(key.objectid)) {
3679 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3680 tmp_root = btrfs_read_fs_root_no_cache(
3681 root->fs_info, &key);
3683 key.offset = (u64)-1;
3684 tmp_root = btrfs_read_fs_root(
3685 root->fs_info, &key);
3687 if (IS_ERR(tmp_root)) {
3691 ret = check_fs_root(tmp_root, root_cache, &wc);
3692 if (ret == -EAGAIN) {
3693 free_root_recs_tree(root_cache);
3694 btrfs_release_path(&path);
3699 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3700 btrfs_free_fs_root(tmp_root);
3701 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3702 key.type == BTRFS_ROOT_BACKREF_KEY) {
3703 process_root_ref(leaf, path.slots[0], &key,
3710 btrfs_release_path(&path);
3712 free_extent_cache_tree(&wc.shared);
3713 if (!cache_tree_empty(&wc.shared))
3714 fprintf(stderr, "warning line %d\n", __LINE__);
3716 task_stop(ctx.info);
3721 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3723 struct list_head *cur = rec->backrefs.next;
3724 struct extent_backref *back;
3725 struct tree_backref *tback;
3726 struct data_backref *dback;
3730 while(cur != &rec->backrefs) {
3731 back = list_entry(cur, struct extent_backref, list);
3733 if (!back->found_extent_tree) {
3737 if (back->is_data) {
3738 dback = (struct data_backref *)back;
3739 fprintf(stderr, "Backref %llu %s %llu"
3740 " owner %llu offset %llu num_refs %lu"
3741 " not found in extent tree\n",
3742 (unsigned long long)rec->start,
3743 back->full_backref ?
3745 back->full_backref ?
3746 (unsigned long long)dback->parent:
3747 (unsigned long long)dback->root,
3748 (unsigned long long)dback->owner,
3749 (unsigned long long)dback->offset,
3750 (unsigned long)dback->num_refs);
3752 tback = (struct tree_backref *)back;
3753 fprintf(stderr, "Backref %llu parent %llu"
3754 " root %llu not found in extent tree\n",
3755 (unsigned long long)rec->start,
3756 (unsigned long long)tback->parent,
3757 (unsigned long long)tback->root);
3760 if (!back->is_data && !back->found_ref) {
3764 tback = (struct tree_backref *)back;
3765 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3766 (unsigned long long)rec->start,
3767 back->full_backref ? "parent" : "root",
3768 back->full_backref ?
3769 (unsigned long long)tback->parent :
3770 (unsigned long long)tback->root, back);
3772 if (back->is_data) {
3773 dback = (struct data_backref *)back;
3774 if (dback->found_ref != dback->num_refs) {
3778 fprintf(stderr, "Incorrect local backref count"
3779 " on %llu %s %llu owner %llu"
3780 " offset %llu found %u wanted %u back %p\n",
3781 (unsigned long long)rec->start,
3782 back->full_backref ?
3784 back->full_backref ?
3785 (unsigned long long)dback->parent:
3786 (unsigned long long)dback->root,
3787 (unsigned long long)dback->owner,
3788 (unsigned long long)dback->offset,
3789 dback->found_ref, dback->num_refs, back);
3791 if (dback->disk_bytenr != rec->start) {
3795 fprintf(stderr, "Backref disk bytenr does not"
3796 " match extent record, bytenr=%llu, "
3797 "ref bytenr=%llu\n",
3798 (unsigned long long)rec->start,
3799 (unsigned long long)dback->disk_bytenr);
3802 if (dback->bytes != rec->nr) {
3806 fprintf(stderr, "Backref bytes do not match "
3807 "extent backref, bytenr=%llu, ref "
3808 "bytes=%llu, backref bytes=%llu\n",
3809 (unsigned long long)rec->start,
3810 (unsigned long long)rec->nr,
3811 (unsigned long long)dback->bytes);
3814 if (!back->is_data) {
3817 dback = (struct data_backref *)back;
3818 found += dback->found_ref;
3821 if (found != rec->refs) {
3825 fprintf(stderr, "Incorrect global backref count "
3826 "on %llu found %llu wanted %llu\n",
3827 (unsigned long long)rec->start,
3828 (unsigned long long)found,
3829 (unsigned long long)rec->refs);
3835 static int free_all_extent_backrefs(struct extent_record *rec)
3837 struct extent_backref *back;
3838 struct list_head *cur;
3839 while (!list_empty(&rec->backrefs)) {
3840 cur = rec->backrefs.next;
3841 back = list_entry(cur, struct extent_backref, list);
3848 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3849 struct cache_tree *extent_cache)
3851 struct cache_extent *cache;
3852 struct extent_record *rec;
3855 cache = first_cache_extent(extent_cache);
3858 rec = container_of(cache, struct extent_record, cache);
3859 remove_cache_extent(extent_cache, cache);
3860 free_all_extent_backrefs(rec);
3865 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3866 struct extent_record *rec)
3868 if (rec->content_checked && rec->owner_ref_checked &&
3869 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3870 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3871 !rec->bad_full_backref && !rec->crossing_stripes &&
3872 !rec->wrong_chunk_type) {
3873 remove_cache_extent(extent_cache, &rec->cache);
3874 free_all_extent_backrefs(rec);
3875 list_del_init(&rec->list);
3881 static int check_owner_ref(struct btrfs_root *root,
3882 struct extent_record *rec,
3883 struct extent_buffer *buf)
3885 struct extent_backref *node;
3886 struct tree_backref *back;
3887 struct btrfs_root *ref_root;
3888 struct btrfs_key key;
3889 struct btrfs_path path;
3890 struct extent_buffer *parent;
3895 list_for_each_entry(node, &rec->backrefs, list) {
3898 if (!node->found_ref)
3900 if (node->full_backref)
3902 back = (struct tree_backref *)node;
3903 if (btrfs_header_owner(buf) == back->root)
3906 BUG_ON(rec->is_root);
3908 /* try to find the block by search corresponding fs tree */
3909 key.objectid = btrfs_header_owner(buf);
3910 key.type = BTRFS_ROOT_ITEM_KEY;
3911 key.offset = (u64)-1;
3913 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3914 if (IS_ERR(ref_root))
3917 level = btrfs_header_level(buf);
3919 btrfs_item_key_to_cpu(buf, &key, 0);
3921 btrfs_node_key_to_cpu(buf, &key, 0);
3923 btrfs_init_path(&path);
3924 path.lowest_level = level + 1;
3925 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3929 parent = path.nodes[level + 1];
3930 if (parent && buf->start == btrfs_node_blockptr(parent,
3931 path.slots[level + 1]))
3934 btrfs_release_path(&path);
3935 return found ? 0 : 1;
3938 static int is_extent_tree_record(struct extent_record *rec)
3940 struct list_head *cur = rec->backrefs.next;
3941 struct extent_backref *node;
3942 struct tree_backref *back;
3945 while(cur != &rec->backrefs) {
3946 node = list_entry(cur, struct extent_backref, list);
3950 back = (struct tree_backref *)node;
3951 if (node->full_backref)
3953 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3960 static int record_bad_block_io(struct btrfs_fs_info *info,
3961 struct cache_tree *extent_cache,
3964 struct extent_record *rec;
3965 struct cache_extent *cache;
3966 struct btrfs_key key;
3968 cache = lookup_cache_extent(extent_cache, start, len);
3972 rec = container_of(cache, struct extent_record, cache);
3973 if (!is_extent_tree_record(rec))
3976 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3977 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3980 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3981 struct extent_buffer *buf, int slot)
3983 if (btrfs_header_level(buf)) {
3984 struct btrfs_key_ptr ptr1, ptr2;
3986 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3987 sizeof(struct btrfs_key_ptr));
3988 read_extent_buffer(buf, &ptr2,
3989 btrfs_node_key_ptr_offset(slot + 1),
3990 sizeof(struct btrfs_key_ptr));
3991 write_extent_buffer(buf, &ptr1,
3992 btrfs_node_key_ptr_offset(slot + 1),
3993 sizeof(struct btrfs_key_ptr));
3994 write_extent_buffer(buf, &ptr2,
3995 btrfs_node_key_ptr_offset(slot),
3996 sizeof(struct btrfs_key_ptr));
3998 struct btrfs_disk_key key;
3999 btrfs_node_key(buf, &key, 0);
4000 btrfs_fixup_low_keys(root, path, &key,
4001 btrfs_header_level(buf) + 1);
4004 struct btrfs_item *item1, *item2;
4005 struct btrfs_key k1, k2;
4006 char *item1_data, *item2_data;
4007 u32 item1_offset, item2_offset, item1_size, item2_size;
4009 item1 = btrfs_item_nr(slot);
4010 item2 = btrfs_item_nr(slot + 1);
4011 btrfs_item_key_to_cpu(buf, &k1, slot);
4012 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4013 item1_offset = btrfs_item_offset(buf, item1);
4014 item2_offset = btrfs_item_offset(buf, item2);
4015 item1_size = btrfs_item_size(buf, item1);
4016 item2_size = btrfs_item_size(buf, item2);
4018 item1_data = malloc(item1_size);
4021 item2_data = malloc(item2_size);
4027 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4028 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4030 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4031 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4035 btrfs_set_item_offset(buf, item1, item2_offset);
4036 btrfs_set_item_offset(buf, item2, item1_offset);
4037 btrfs_set_item_size(buf, item1, item2_size);
4038 btrfs_set_item_size(buf, item2, item1_size);
4040 path->slots[0] = slot;
4041 btrfs_set_item_key_unsafe(root, path, &k2);
4042 path->slots[0] = slot + 1;
4043 btrfs_set_item_key_unsafe(root, path, &k1);
4048 static int fix_key_order(struct btrfs_trans_handle *trans,
4049 struct btrfs_root *root,
4050 struct btrfs_path *path)
4052 struct extent_buffer *buf;
4053 struct btrfs_key k1, k2;
4055 int level = path->lowest_level;
4058 buf = path->nodes[level];
4059 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4061 btrfs_node_key_to_cpu(buf, &k1, i);
4062 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4064 btrfs_item_key_to_cpu(buf, &k1, i);
4065 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4067 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4069 ret = swap_values(root, path, buf, i);
4072 btrfs_mark_buffer_dirty(buf);
4078 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4079 struct btrfs_root *root,
4080 struct btrfs_path *path,
4081 struct extent_buffer *buf, int slot)
4083 struct btrfs_key key;
4084 int nritems = btrfs_header_nritems(buf);
4086 btrfs_item_key_to_cpu(buf, &key, slot);
4088 /* These are all the keys we can deal with missing. */
4089 if (key.type != BTRFS_DIR_INDEX_KEY &&
4090 key.type != BTRFS_EXTENT_ITEM_KEY &&
4091 key.type != BTRFS_METADATA_ITEM_KEY &&
4092 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4093 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4096 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4097 (unsigned long long)key.objectid, key.type,
4098 (unsigned long long)key.offset, slot, buf->start);
4099 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4100 btrfs_item_nr_offset(slot + 1),
4101 sizeof(struct btrfs_item) *
4102 (nritems - slot - 1));
4103 btrfs_set_header_nritems(buf, nritems - 1);
4105 struct btrfs_disk_key disk_key;
4107 btrfs_item_key(buf, &disk_key, 0);
4108 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4110 btrfs_mark_buffer_dirty(buf);
4114 static int fix_item_offset(struct btrfs_trans_handle *trans,
4115 struct btrfs_root *root,
4116 struct btrfs_path *path)
4118 struct extent_buffer *buf;
4122 /* We should only get this for leaves */
4123 BUG_ON(path->lowest_level);
4124 buf = path->nodes[0];
4126 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4127 unsigned int shift = 0, offset;
4129 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4130 BTRFS_LEAF_DATA_SIZE(root)) {
4131 if (btrfs_item_end_nr(buf, i) >
4132 BTRFS_LEAF_DATA_SIZE(root)) {
4133 ret = delete_bogus_item(trans, root, path,
4137 fprintf(stderr, "item is off the end of the "
4138 "leaf, can't fix\n");
4142 shift = BTRFS_LEAF_DATA_SIZE(root) -
4143 btrfs_item_end_nr(buf, i);
4144 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4145 btrfs_item_offset_nr(buf, i - 1)) {
4146 if (btrfs_item_end_nr(buf, i) >
4147 btrfs_item_offset_nr(buf, i - 1)) {
4148 ret = delete_bogus_item(trans, root, path,
4152 fprintf(stderr, "items overlap, can't fix\n");
4156 shift = btrfs_item_offset_nr(buf, i - 1) -
4157 btrfs_item_end_nr(buf, i);
4162 printf("Shifting item nr %d by %u bytes in block %llu\n",
4163 i, shift, (unsigned long long)buf->start);
4164 offset = btrfs_item_offset_nr(buf, i);
4165 memmove_extent_buffer(buf,
4166 btrfs_leaf_data(buf) + offset + shift,
4167 btrfs_leaf_data(buf) + offset,
4168 btrfs_item_size_nr(buf, i));
4169 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4171 btrfs_mark_buffer_dirty(buf);
4175 * We may have moved things, in which case we want to exit so we don't
4176 * write those changes out. Once we have proper abort functionality in
4177 * progs this can be changed to something nicer.
4184 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4185 * then just return -EIO.
4187 static int try_to_fix_bad_block(struct btrfs_root *root,
4188 struct extent_buffer *buf,
4189 enum btrfs_tree_block_status status)
4191 struct btrfs_trans_handle *trans;
4192 struct ulist *roots;
4193 struct ulist_node *node;
4194 struct btrfs_root *search_root;
4195 struct btrfs_path *path;
4196 struct ulist_iterator iter;
4197 struct btrfs_key root_key, key;
4200 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4201 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4204 path = btrfs_alloc_path();
4208 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4211 btrfs_free_path(path);
4215 ULIST_ITER_INIT(&iter);
4216 while ((node = ulist_next(roots, &iter))) {
4217 root_key.objectid = node->val;
4218 root_key.type = BTRFS_ROOT_ITEM_KEY;
4219 root_key.offset = (u64)-1;
4221 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4228 trans = btrfs_start_transaction(search_root, 0);
4229 if (IS_ERR(trans)) {
4230 ret = PTR_ERR(trans);
4234 path->lowest_level = btrfs_header_level(buf);
4235 path->skip_check_block = 1;
4236 if (path->lowest_level)
4237 btrfs_node_key_to_cpu(buf, &key, 0);
4239 btrfs_item_key_to_cpu(buf, &key, 0);
4240 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4243 btrfs_commit_transaction(trans, search_root);
4246 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4247 ret = fix_key_order(trans, search_root, path);
4248 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4249 ret = fix_item_offset(trans, search_root, path);
4251 btrfs_commit_transaction(trans, search_root);
4254 btrfs_release_path(path);
4255 btrfs_commit_transaction(trans, search_root);
4258 btrfs_free_path(path);
4262 static int check_block(struct btrfs_root *root,
4263 struct cache_tree *extent_cache,
4264 struct extent_buffer *buf, u64 flags)
4266 struct extent_record *rec;
4267 struct cache_extent *cache;
4268 struct btrfs_key key;
4269 enum btrfs_tree_block_status status;
4273 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4276 rec = container_of(cache, struct extent_record, cache);
4277 rec->generation = btrfs_header_generation(buf);
4279 level = btrfs_header_level(buf);
4280 if (btrfs_header_nritems(buf) > 0) {
4283 btrfs_item_key_to_cpu(buf, &key, 0);
4285 btrfs_node_key_to_cpu(buf, &key, 0);
4287 rec->info_objectid = key.objectid;
4289 rec->info_level = level;
4291 if (btrfs_is_leaf(buf))
4292 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4294 status = btrfs_check_node(root, &rec->parent_key, buf);
4296 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4298 status = try_to_fix_bad_block(root, buf, status);
4299 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4301 fprintf(stderr, "bad block %llu\n",
4302 (unsigned long long)buf->start);
4305 * Signal to callers we need to start the scan over
4306 * again since we'll have cow'ed blocks.
4311 rec->content_checked = 1;
4312 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4313 rec->owner_ref_checked = 1;
4315 ret = check_owner_ref(root, rec, buf);
4317 rec->owner_ref_checked = 1;
4321 maybe_free_extent_rec(extent_cache, rec);
4325 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4326 u64 parent, u64 root)
4328 struct list_head *cur = rec->backrefs.next;
4329 struct extent_backref *node;
4330 struct tree_backref *back;
4332 while(cur != &rec->backrefs) {
4333 node = list_entry(cur, struct extent_backref, list);
4337 back = (struct tree_backref *)node;
4339 if (!node->full_backref)
4341 if (parent == back->parent)
4344 if (node->full_backref)
4346 if (back->root == root)
4353 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4354 u64 parent, u64 root)
4356 struct tree_backref *ref = malloc(sizeof(*ref));
4357 memset(&ref->node, 0, sizeof(ref->node));
4359 ref->parent = parent;
4360 ref->node.full_backref = 1;
4363 ref->node.full_backref = 0;
4365 list_add_tail(&ref->node.list, &rec->backrefs);
4370 static struct data_backref *find_data_backref(struct extent_record *rec,
4371 u64 parent, u64 root,
4372 u64 owner, u64 offset,
4374 u64 disk_bytenr, u64 bytes)
4376 struct list_head *cur = rec->backrefs.next;
4377 struct extent_backref *node;
4378 struct data_backref *back;
4380 while(cur != &rec->backrefs) {
4381 node = list_entry(cur, struct extent_backref, list);
4385 back = (struct data_backref *)node;
4387 if (!node->full_backref)
4389 if (parent == back->parent)
4392 if (node->full_backref)
4394 if (back->root == root && back->owner == owner &&
4395 back->offset == offset) {
4396 if (found_ref && node->found_ref &&
4397 (back->bytes != bytes ||
4398 back->disk_bytenr != disk_bytenr))
4407 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4408 u64 parent, u64 root,
4409 u64 owner, u64 offset,
4412 struct data_backref *ref = malloc(sizeof(*ref));
4413 memset(&ref->node, 0, sizeof(ref->node));
4414 ref->node.is_data = 1;
4417 ref->parent = parent;
4420 ref->node.full_backref = 1;
4424 ref->offset = offset;
4425 ref->node.full_backref = 0;
4427 ref->bytes = max_size;
4430 list_add_tail(&ref->node.list, &rec->backrefs);
4431 if (max_size > rec->max_size)
4432 rec->max_size = max_size;
4436 /* Check if the type of extent matches with its chunk */
4437 static void check_extent_type(struct extent_record *rec)
4439 struct btrfs_block_group_cache *bg_cache;
4441 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4445 /* data extent, check chunk directly*/
4446 if (!rec->metadata) {
4447 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4448 rec->wrong_chunk_type = 1;
4452 /* metadata extent, check the obvious case first */
4453 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4454 BTRFS_BLOCK_GROUP_METADATA))) {
4455 rec->wrong_chunk_type = 1;
4460 * Check SYSTEM extent, as it's also marked as metadata, we can only
4461 * make sure it's a SYSTEM extent by its backref
4463 if (!list_empty(&rec->backrefs)) {
4464 struct extent_backref *node;
4465 struct tree_backref *tback;
4468 node = list_entry(rec->backrefs.next, struct extent_backref,
4470 if (node->is_data) {
4471 /* tree block shouldn't have data backref */
4472 rec->wrong_chunk_type = 1;
4475 tback = container_of(node, struct tree_backref, node);
4477 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4478 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4480 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4481 if (!(bg_cache->flags & bg_type))
4482 rec->wrong_chunk_type = 1;
4486 static int add_extent_rec(struct cache_tree *extent_cache,
4487 struct btrfs_key *parent_key, u64 parent_gen,
4488 u64 start, u64 nr, u64 extent_item_refs,
4489 int is_root, int inc_ref, int set_checked,
4490 int metadata, int extent_rec, u64 max_size)
4492 struct extent_record *rec;
4493 struct cache_extent *cache;
4497 cache = lookup_cache_extent(extent_cache, start, nr);
4499 rec = container_of(cache, struct extent_record, cache);
4503 rec->nr = max(nr, max_size);
4506 * We need to make sure to reset nr to whatever the extent
4507 * record says was the real size, this way we can compare it to
4511 if (start != rec->start || rec->found_rec) {
4512 struct extent_record *tmp;
4515 if (list_empty(&rec->list))
4516 list_add_tail(&rec->list,
4517 &duplicate_extents);
4520 * We have to do this song and dance in case we
4521 * find an extent record that falls inside of
4522 * our current extent record but does not have
4523 * the same objectid.
4525 tmp = malloc(sizeof(*tmp));
4529 tmp->max_size = max_size;
4532 tmp->metadata = metadata;
4533 tmp->extent_item_refs = extent_item_refs;
4534 INIT_LIST_HEAD(&tmp->list);
4535 list_add_tail(&tmp->list, &rec->dups);
4536 rec->num_duplicates++;
4543 if (extent_item_refs && !dup) {
4544 if (rec->extent_item_refs) {
4545 fprintf(stderr, "block %llu rec "
4546 "extent_item_refs %llu, passed %llu\n",
4547 (unsigned long long)start,
4548 (unsigned long long)
4549 rec->extent_item_refs,
4550 (unsigned long long)extent_item_refs);
4552 rec->extent_item_refs = extent_item_refs;
4557 rec->content_checked = 1;
4558 rec->owner_ref_checked = 1;
4562 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4564 rec->parent_generation = parent_gen;
4566 if (rec->max_size < max_size)
4567 rec->max_size = max_size;
4570 * A metadata extent can't cross stripe_len boundary, otherwise
4571 * kernel scrub won't be able to handle it.
4572 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4575 if (metadata && check_crossing_stripes(rec->start,
4577 rec->crossing_stripes = 1;
4578 check_extent_type(rec);
4579 maybe_free_extent_rec(extent_cache, rec);
4582 rec = malloc(sizeof(*rec));
4584 rec->max_size = max_size;
4585 rec->nr = max(nr, max_size);
4586 rec->found_rec = !!extent_rec;
4587 rec->content_checked = 0;
4588 rec->owner_ref_checked = 0;
4589 rec->num_duplicates = 0;
4590 rec->metadata = metadata;
4591 rec->flag_block_full_backref = -1;
4592 rec->bad_full_backref = 0;
4593 rec->crossing_stripes = 0;
4594 rec->wrong_chunk_type = 0;
4595 INIT_LIST_HEAD(&rec->backrefs);
4596 INIT_LIST_HEAD(&rec->dups);
4597 INIT_LIST_HEAD(&rec->list);
4609 if (extent_item_refs)
4610 rec->extent_item_refs = extent_item_refs;
4612 rec->extent_item_refs = 0;
4615 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4617 memset(&rec->parent_key, 0, sizeof(*parent_key));
4620 rec->parent_generation = parent_gen;
4622 rec->parent_generation = 0;
4624 rec->cache.start = start;
4625 rec->cache.size = nr;
4626 ret = insert_cache_extent(extent_cache, &rec->cache);
4630 rec->content_checked = 1;
4631 rec->owner_ref_checked = 1;
4635 if (check_crossing_stripes(rec->start, rec->max_size))
4636 rec->crossing_stripes = 1;
4637 check_extent_type(rec);
4641 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4642 u64 parent, u64 root, int found_ref)
4644 struct extent_record *rec;
4645 struct tree_backref *back;
4646 struct cache_extent *cache;
4648 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4650 add_extent_rec(extent_cache, NULL, 0, bytenr,
4651 1, 0, 0, 0, 0, 1, 0, 0);
4652 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4657 rec = container_of(cache, struct extent_record, cache);
4658 if (rec->start != bytenr) {
4662 back = find_tree_backref(rec, parent, root);
4664 back = alloc_tree_backref(rec, parent, root);
4667 if (back->node.found_ref) {
4668 fprintf(stderr, "Extent back ref already exists "
4669 "for %llu parent %llu root %llu \n",
4670 (unsigned long long)bytenr,
4671 (unsigned long long)parent,
4672 (unsigned long long)root);
4674 back->node.found_ref = 1;
4676 if (back->node.found_extent_tree) {
4677 fprintf(stderr, "Extent back ref already exists "
4678 "for %llu parent %llu root %llu \n",
4679 (unsigned long long)bytenr,
4680 (unsigned long long)parent,
4681 (unsigned long long)root);
4683 back->node.found_extent_tree = 1;
4685 check_extent_type(rec);
4686 maybe_free_extent_rec(extent_cache, rec);
4690 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4691 u64 parent, u64 root, u64 owner, u64 offset,
4692 u32 num_refs, int found_ref, u64 max_size)
4694 struct extent_record *rec;
4695 struct data_backref *back;
4696 struct cache_extent *cache;
4698 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4700 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4702 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4707 rec = container_of(cache, struct extent_record, cache);
4708 if (rec->max_size < max_size)
4709 rec->max_size = max_size;
4712 * If found_ref is set then max_size is the real size and must match the
4713 * existing refs. So if we have already found a ref then we need to
4714 * make sure that this ref matches the existing one, otherwise we need
4715 * to add a new backref so we can notice that the backrefs don't match
4716 * and we need to figure out who is telling the truth. This is to
4717 * account for that awful fsync bug I introduced where we'd end up with
4718 * a btrfs_file_extent_item that would have its length include multiple
4719 * prealloc extents or point inside of a prealloc extent.
4721 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4724 back = alloc_data_backref(rec, parent, root, owner, offset,
4728 BUG_ON(num_refs != 1);
4729 if (back->node.found_ref)
4730 BUG_ON(back->bytes != max_size);
4731 back->node.found_ref = 1;
4732 back->found_ref += 1;
4733 back->bytes = max_size;
4734 back->disk_bytenr = bytenr;
4736 rec->content_checked = 1;
4737 rec->owner_ref_checked = 1;
4739 if (back->node.found_extent_tree) {
4740 fprintf(stderr, "Extent back ref already exists "
4741 "for %llu parent %llu root %llu "
4742 "owner %llu offset %llu num_refs %lu\n",
4743 (unsigned long long)bytenr,
4744 (unsigned long long)parent,
4745 (unsigned long long)root,
4746 (unsigned long long)owner,
4747 (unsigned long long)offset,
4748 (unsigned long)num_refs);
4750 back->num_refs = num_refs;
4751 back->node.found_extent_tree = 1;
4753 maybe_free_extent_rec(extent_cache, rec);
4757 static int add_pending(struct cache_tree *pending,
4758 struct cache_tree *seen, u64 bytenr, u32 size)
4761 ret = add_cache_extent(seen, bytenr, size);
4764 add_cache_extent(pending, bytenr, size);
4768 static int pick_next_pending(struct cache_tree *pending,
4769 struct cache_tree *reada,
4770 struct cache_tree *nodes,
4771 u64 last, struct block_info *bits, int bits_nr,
4774 unsigned long node_start = last;
4775 struct cache_extent *cache;
4778 cache = search_cache_extent(reada, 0);
4780 bits[0].start = cache->start;
4781 bits[0].size = cache->size;
4786 if (node_start > 32768)
4787 node_start -= 32768;
4789 cache = search_cache_extent(nodes, node_start);
4791 cache = search_cache_extent(nodes, 0);
4794 cache = search_cache_extent(pending, 0);
4799 bits[ret].start = cache->start;
4800 bits[ret].size = cache->size;
4801 cache = next_cache_extent(cache);
4803 } while (cache && ret < bits_nr);
4809 bits[ret].start = cache->start;
4810 bits[ret].size = cache->size;
4811 cache = next_cache_extent(cache);
4813 } while (cache && ret < bits_nr);
4815 if (bits_nr - ret > 8) {
4816 u64 lookup = bits[0].start + bits[0].size;
4817 struct cache_extent *next;
4818 next = search_cache_extent(pending, lookup);
4820 if (next->start - lookup > 32768)
4822 bits[ret].start = next->start;
4823 bits[ret].size = next->size;
4824 lookup = next->start + next->size;
4828 next = next_cache_extent(next);
4836 static void free_chunk_record(struct cache_extent *cache)
4838 struct chunk_record *rec;
4840 rec = container_of(cache, struct chunk_record, cache);
4841 list_del_init(&rec->list);
4842 list_del_init(&rec->dextents);
4846 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4848 cache_tree_free_extents(chunk_cache, free_chunk_record);
4851 static void free_device_record(struct rb_node *node)
4853 struct device_record *rec;
4855 rec = container_of(node, struct device_record, node);
4859 FREE_RB_BASED_TREE(device_cache, free_device_record);
4861 int insert_block_group_record(struct block_group_tree *tree,
4862 struct block_group_record *bg_rec)
4866 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4870 list_add_tail(&bg_rec->list, &tree->block_groups);
4874 static void free_block_group_record(struct cache_extent *cache)
4876 struct block_group_record *rec;
4878 rec = container_of(cache, struct block_group_record, cache);
4879 list_del_init(&rec->list);
4883 void free_block_group_tree(struct block_group_tree *tree)
4885 cache_tree_free_extents(&tree->tree, free_block_group_record);
4888 int insert_device_extent_record(struct device_extent_tree *tree,
4889 struct device_extent_record *de_rec)
4894 * Device extent is a bit different from the other extents, because
4895 * the extents which belong to the different devices may have the
4896 * same start and size, so we need use the special extent cache
4897 * search/insert functions.
4899 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4903 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4904 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4908 static void free_device_extent_record(struct cache_extent *cache)
4910 struct device_extent_record *rec;
4912 rec = container_of(cache, struct device_extent_record, cache);
4913 if (!list_empty(&rec->chunk_list))
4914 list_del_init(&rec->chunk_list);
4915 if (!list_empty(&rec->device_list))
4916 list_del_init(&rec->device_list);
4920 void free_device_extent_tree(struct device_extent_tree *tree)
4922 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4925 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4926 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4927 struct extent_buffer *leaf, int slot)
4929 struct btrfs_extent_ref_v0 *ref0;
4930 struct btrfs_key key;
4932 btrfs_item_key_to_cpu(leaf, &key, slot);
4933 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4934 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4935 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4937 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4938 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4944 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4945 struct btrfs_key *key,
4948 struct btrfs_chunk *ptr;
4949 struct chunk_record *rec;
4952 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4953 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4955 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4957 fprintf(stderr, "memory allocation failed\n");
4961 INIT_LIST_HEAD(&rec->list);
4962 INIT_LIST_HEAD(&rec->dextents);
4965 rec->cache.start = key->offset;
4966 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4968 rec->generation = btrfs_header_generation(leaf);
4970 rec->objectid = key->objectid;
4971 rec->type = key->type;
4972 rec->offset = key->offset;
4974 rec->length = rec->cache.size;
4975 rec->owner = btrfs_chunk_owner(leaf, ptr);
4976 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4977 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4978 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4979 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4980 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4981 rec->num_stripes = num_stripes;
4982 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4984 for (i = 0; i < rec->num_stripes; ++i) {
4985 rec->stripes[i].devid =
4986 btrfs_stripe_devid_nr(leaf, ptr, i);
4987 rec->stripes[i].offset =
4988 btrfs_stripe_offset_nr(leaf, ptr, i);
4989 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4990 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4997 static int process_chunk_item(struct cache_tree *chunk_cache,
4998 struct btrfs_key *key, struct extent_buffer *eb,
5001 struct chunk_record *rec;
5004 rec = btrfs_new_chunk_record(eb, key, slot);
5005 ret = insert_cache_extent(chunk_cache, &rec->cache);
5007 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5008 rec->offset, rec->length);
5015 static int process_device_item(struct rb_root *dev_cache,
5016 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5018 struct btrfs_dev_item *ptr;
5019 struct device_record *rec;
5022 ptr = btrfs_item_ptr(eb,
5023 slot, struct btrfs_dev_item);
5025 rec = malloc(sizeof(*rec));
5027 fprintf(stderr, "memory allocation failed\n");
5031 rec->devid = key->offset;
5032 rec->generation = btrfs_header_generation(eb);
5034 rec->objectid = key->objectid;
5035 rec->type = key->type;
5036 rec->offset = key->offset;
5038 rec->devid = btrfs_device_id(eb, ptr);
5039 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5040 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5042 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5044 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5051 struct block_group_record *
5052 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5055 struct btrfs_block_group_item *ptr;
5056 struct block_group_record *rec;
5058 rec = calloc(1, sizeof(*rec));
5060 fprintf(stderr, "memory allocation failed\n");
5064 rec->cache.start = key->objectid;
5065 rec->cache.size = key->offset;
5067 rec->generation = btrfs_header_generation(leaf);
5069 rec->objectid = key->objectid;
5070 rec->type = key->type;
5071 rec->offset = key->offset;
5073 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5074 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5076 INIT_LIST_HEAD(&rec->list);
5081 static int process_block_group_item(struct block_group_tree *block_group_cache,
5082 struct btrfs_key *key,
5083 struct extent_buffer *eb, int slot)
5085 struct block_group_record *rec;
5088 rec = btrfs_new_block_group_record(eb, key, slot);
5089 ret = insert_block_group_record(block_group_cache, rec);
5091 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5092 rec->objectid, rec->offset);
5099 struct device_extent_record *
5100 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5101 struct btrfs_key *key, int slot)
5103 struct device_extent_record *rec;
5104 struct btrfs_dev_extent *ptr;
5106 rec = calloc(1, sizeof(*rec));
5108 fprintf(stderr, "memory allocation failed\n");
5112 rec->cache.objectid = key->objectid;
5113 rec->cache.start = key->offset;
5115 rec->generation = btrfs_header_generation(leaf);
5117 rec->objectid = key->objectid;
5118 rec->type = key->type;
5119 rec->offset = key->offset;
5121 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5122 rec->chunk_objecteid =
5123 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5125 btrfs_dev_extent_chunk_offset(leaf, ptr);
5126 rec->length = btrfs_dev_extent_length(leaf, ptr);
5127 rec->cache.size = rec->length;
5129 INIT_LIST_HEAD(&rec->chunk_list);
5130 INIT_LIST_HEAD(&rec->device_list);
5136 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5137 struct btrfs_key *key, struct extent_buffer *eb,
5140 struct device_extent_record *rec;
5143 rec = btrfs_new_device_extent_record(eb, key, slot);
5144 ret = insert_device_extent_record(dev_extent_cache, rec);
5147 "Device extent[%llu, %llu, %llu] existed.\n",
5148 rec->objectid, rec->offset, rec->length);
5155 static int process_extent_item(struct btrfs_root *root,
5156 struct cache_tree *extent_cache,
5157 struct extent_buffer *eb, int slot)
5159 struct btrfs_extent_item *ei;
5160 struct btrfs_extent_inline_ref *iref;
5161 struct btrfs_extent_data_ref *dref;
5162 struct btrfs_shared_data_ref *sref;
5163 struct btrfs_key key;
5167 u32 item_size = btrfs_item_size_nr(eb, slot);
5173 btrfs_item_key_to_cpu(eb, &key, slot);
5175 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5177 num_bytes = root->leafsize;
5179 num_bytes = key.offset;
5182 if (item_size < sizeof(*ei)) {
5183 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5184 struct btrfs_extent_item_v0 *ei0;
5185 BUG_ON(item_size != sizeof(*ei0));
5186 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5187 refs = btrfs_extent_refs_v0(eb, ei0);
5191 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5192 num_bytes, refs, 0, 0, 0, metadata, 1,
5196 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5197 refs = btrfs_extent_refs(eb, ei);
5198 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5203 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5204 refs, 0, 0, 0, metadata, 1, num_bytes);
5206 ptr = (unsigned long)(ei + 1);
5207 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5208 key.type == BTRFS_EXTENT_ITEM_KEY)
5209 ptr += sizeof(struct btrfs_tree_block_info);
5211 end = (unsigned long)ei + item_size;
5213 iref = (struct btrfs_extent_inline_ref *)ptr;
5214 type = btrfs_extent_inline_ref_type(eb, iref);
5215 offset = btrfs_extent_inline_ref_offset(eb, iref);
5217 case BTRFS_TREE_BLOCK_REF_KEY:
5218 add_tree_backref(extent_cache, key.objectid,
5221 case BTRFS_SHARED_BLOCK_REF_KEY:
5222 add_tree_backref(extent_cache, key.objectid,
5225 case BTRFS_EXTENT_DATA_REF_KEY:
5226 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5227 add_data_backref(extent_cache, key.objectid, 0,
5228 btrfs_extent_data_ref_root(eb, dref),
5229 btrfs_extent_data_ref_objectid(eb,
5231 btrfs_extent_data_ref_offset(eb, dref),
5232 btrfs_extent_data_ref_count(eb, dref),
5235 case BTRFS_SHARED_DATA_REF_KEY:
5236 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5237 add_data_backref(extent_cache, key.objectid, offset,
5239 btrfs_shared_data_ref_count(eb, sref),
5243 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5244 key.objectid, key.type, num_bytes);
5247 ptr += btrfs_extent_inline_ref_size(type);
5254 static int check_cache_range(struct btrfs_root *root,
5255 struct btrfs_block_group_cache *cache,
5256 u64 offset, u64 bytes)
5258 struct btrfs_free_space *entry;
5264 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5265 bytenr = btrfs_sb_offset(i);
5266 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5267 cache->key.objectid, bytenr, 0,
5268 &logical, &nr, &stripe_len);
5273 if (logical[nr] + stripe_len <= offset)
5275 if (offset + bytes <= logical[nr])
5277 if (logical[nr] == offset) {
5278 if (stripe_len >= bytes) {
5282 bytes -= stripe_len;
5283 offset += stripe_len;
5284 } else if (logical[nr] < offset) {
5285 if (logical[nr] + stripe_len >=
5290 bytes = (offset + bytes) -
5291 (logical[nr] + stripe_len);
5292 offset = logical[nr] + stripe_len;
5295 * Could be tricky, the super may land in the
5296 * middle of the area we're checking. First
5297 * check the easiest case, it's at the end.
5299 if (logical[nr] + stripe_len >=
5301 bytes = logical[nr] - offset;
5305 /* Check the left side */
5306 ret = check_cache_range(root, cache,
5308 logical[nr] - offset);
5314 /* Now we continue with the right side */
5315 bytes = (offset + bytes) -
5316 (logical[nr] + stripe_len);
5317 offset = logical[nr] + stripe_len;
5324 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5326 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5327 offset, offset+bytes);
5331 if (entry->offset != offset) {
5332 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5337 if (entry->bytes != bytes) {
5338 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5339 bytes, entry->bytes, offset);
5343 unlink_free_space(cache->free_space_ctl, entry);
5348 static int verify_space_cache(struct btrfs_root *root,
5349 struct btrfs_block_group_cache *cache)
5351 struct btrfs_path *path;
5352 struct extent_buffer *leaf;
5353 struct btrfs_key key;
5357 path = btrfs_alloc_path();
5361 root = root->fs_info->extent_root;
5363 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5365 key.objectid = last;
5367 key.type = BTRFS_EXTENT_ITEM_KEY;
5369 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5374 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5375 ret = btrfs_next_leaf(root, path);
5383 leaf = path->nodes[0];
5384 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5385 if (key.objectid >= cache->key.offset + cache->key.objectid)
5387 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5388 key.type != BTRFS_METADATA_ITEM_KEY) {
5393 if (last == key.objectid) {
5394 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5395 last = key.objectid + key.offset;
5397 last = key.objectid + root->leafsize;
5402 ret = check_cache_range(root, cache, last,
5403 key.objectid - last);
5406 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5407 last = key.objectid + key.offset;
5409 last = key.objectid + root->leafsize;
5413 if (last < cache->key.objectid + cache->key.offset)
5414 ret = check_cache_range(root, cache, last,
5415 cache->key.objectid +
5416 cache->key.offset - last);
5419 btrfs_free_path(path);
5422 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5423 fprintf(stderr, "There are still entries left in the space "
5431 static int check_space_cache(struct btrfs_root *root)
5433 struct btrfs_block_group_cache *cache;
5434 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5438 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5439 btrfs_super_generation(root->fs_info->super_copy) !=
5440 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5441 printf("cache and super generation don't match, space cache "
5442 "will be invalidated\n");
5446 if (ctx.progress_enabled) {
5447 ctx.tp = TASK_FREE_SPACE;
5448 task_start(ctx.info);
5452 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5456 start = cache->key.objectid + cache->key.offset;
5457 if (!cache->free_space_ctl) {
5458 if (btrfs_init_free_space_ctl(cache,
5459 root->sectorsize)) {
5464 btrfs_remove_free_space_cache(cache);
5467 ret = load_free_space_cache(root->fs_info, cache);
5471 ret = verify_space_cache(root, cache);
5473 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5474 cache->key.objectid);
5479 task_stop(ctx.info);
5481 return error ? -EINVAL : 0;
5484 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5485 u64 num_bytes, unsigned long leaf_offset,
5486 struct extent_buffer *eb) {
5489 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5491 unsigned long csum_offset;
5495 u64 data_checked = 0;
5501 if (num_bytes % root->sectorsize)
5504 data = malloc(num_bytes);
5508 while (offset < num_bytes) {
5511 read_len = num_bytes - offset;
5512 /* read as much space once a time */
5513 ret = read_extent_data(root, data + offset,
5514 bytenr + offset, &read_len, mirror);
5518 /* verify every 4k data's checksum */
5519 while (data_checked < read_len) {
5521 tmp = offset + data_checked;
5523 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5524 csum, root->sectorsize);
5525 btrfs_csum_final(csum, (char *)&csum);
5527 csum_offset = leaf_offset +
5528 tmp / root->sectorsize * csum_size;
5529 read_extent_buffer(eb, (char *)&csum_expected,
5530 csum_offset, csum_size);
5531 /* try another mirror */
5532 if (csum != csum_expected) {
5533 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5534 mirror, bytenr + tmp,
5535 csum, csum_expected);
5536 num_copies = btrfs_num_copies(
5537 &root->fs_info->mapping_tree,
5539 if (mirror < num_copies - 1) {
5544 data_checked += root->sectorsize;
5553 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5556 struct btrfs_path *path;
5557 struct extent_buffer *leaf;
5558 struct btrfs_key key;
5561 path = btrfs_alloc_path();
5563 fprintf(stderr, "Error allocing path\n");
5567 key.objectid = bytenr;
5568 key.type = BTRFS_EXTENT_ITEM_KEY;
5569 key.offset = (u64)-1;
5572 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5575 fprintf(stderr, "Error looking up extent record %d\n", ret);
5576 btrfs_free_path(path);
5579 if (path->slots[0] > 0) {
5582 ret = btrfs_prev_leaf(root, path);
5585 } else if (ret > 0) {
5592 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5595 * Block group items come before extent items if they have the same
5596 * bytenr, so walk back one more just in case. Dear future traveler,
5597 * first congrats on mastering time travel. Now if it's not too much
5598 * trouble could you go back to 2006 and tell Chris to make the
5599 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5600 * EXTENT_ITEM_KEY please?
5602 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5603 if (path->slots[0] > 0) {
5606 ret = btrfs_prev_leaf(root, path);
5609 } else if (ret > 0) {
5614 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5618 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5619 ret = btrfs_next_leaf(root, path);
5621 fprintf(stderr, "Error going to next leaf "
5623 btrfs_free_path(path);
5629 leaf = path->nodes[0];
5630 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5631 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5635 if (key.objectid + key.offset < bytenr) {
5639 if (key.objectid > bytenr + num_bytes)
5642 if (key.objectid == bytenr) {
5643 if (key.offset >= num_bytes) {
5647 num_bytes -= key.offset;
5648 bytenr += key.offset;
5649 } else if (key.objectid < bytenr) {
5650 if (key.objectid + key.offset >= bytenr + num_bytes) {
5654 num_bytes = (bytenr + num_bytes) -
5655 (key.objectid + key.offset);
5656 bytenr = key.objectid + key.offset;
5658 if (key.objectid + key.offset < bytenr + num_bytes) {
5659 u64 new_start = key.objectid + key.offset;
5660 u64 new_bytes = bytenr + num_bytes - new_start;
5663 * Weird case, the extent is in the middle of
5664 * our range, we'll have to search one side
5665 * and then the other. Not sure if this happens
5666 * in real life, but no harm in coding it up
5667 * anyway just in case.
5669 btrfs_release_path(path);
5670 ret = check_extent_exists(root, new_start,
5673 fprintf(stderr, "Right section didn't "
5677 num_bytes = key.objectid - bytenr;
5680 num_bytes = key.objectid - bytenr;
5687 if (num_bytes && !ret) {
5688 fprintf(stderr, "There are no extents for csum range "
5689 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5693 btrfs_free_path(path);
5697 static int check_csums(struct btrfs_root *root)
5699 struct btrfs_path *path;
5700 struct extent_buffer *leaf;
5701 struct btrfs_key key;
5702 u64 offset = 0, num_bytes = 0;
5703 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5707 unsigned long leaf_offset;
5709 root = root->fs_info->csum_root;
5710 if (!extent_buffer_uptodate(root->node)) {
5711 fprintf(stderr, "No valid csum tree found\n");
5715 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5716 key.type = BTRFS_EXTENT_CSUM_KEY;
5719 path = btrfs_alloc_path();
5723 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5725 fprintf(stderr, "Error searching csum tree %d\n", ret);
5726 btrfs_free_path(path);
5730 if (ret > 0 && path->slots[0])
5735 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5736 ret = btrfs_next_leaf(root, path);
5738 fprintf(stderr, "Error going to next leaf "
5745 leaf = path->nodes[0];
5747 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5748 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5753 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5754 csum_size) * root->sectorsize;
5755 if (!check_data_csum)
5756 goto skip_csum_check;
5757 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5758 ret = check_extent_csums(root, key.offset, data_len,
5764 offset = key.offset;
5765 } else if (key.offset != offset + num_bytes) {
5766 ret = check_extent_exists(root, offset, num_bytes);
5768 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5769 "there is no extent record\n",
5770 offset, offset+num_bytes);
5773 offset = key.offset;
5776 num_bytes += data_len;
5780 btrfs_free_path(path);
5784 static int is_dropped_key(struct btrfs_key *key,
5785 struct btrfs_key *drop_key) {
5786 if (key->objectid < drop_key->objectid)
5788 else if (key->objectid == drop_key->objectid) {
5789 if (key->type < drop_key->type)
5791 else if (key->type == drop_key->type) {
5792 if (key->offset < drop_key->offset)
5800 * Here are the rules for FULL_BACKREF.
5802 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5803 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5805 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5806 * if it happened after the relocation occurred since we'll have dropped the
5807 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5808 * have no real way to know for sure.
5810 * We process the blocks one root at a time, and we start from the lowest root
5811 * objectid and go to the highest. So we can just lookup the owner backref for
5812 * the record and if we don't find it then we know it doesn't exist and we have
5815 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5816 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5817 * be set or not and then we can check later once we've gathered all the refs.
5819 static int calc_extent_flag(struct btrfs_root *root,
5820 struct cache_tree *extent_cache,
5821 struct extent_buffer *buf,
5822 struct root_item_record *ri,
5825 struct extent_record *rec;
5826 struct cache_extent *cache;
5827 struct tree_backref *tback;
5830 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5831 /* we have added this extent before */
5833 rec = container_of(cache, struct extent_record, cache);
5836 * Except file/reloc tree, we can not have
5839 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5844 if (buf->start == ri->bytenr)
5847 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5850 owner = btrfs_header_owner(buf);
5851 if (owner == ri->objectid)
5854 tback = find_tree_backref(rec, 0, owner);
5859 if (rec->flag_block_full_backref != -1 &&
5860 rec->flag_block_full_backref != 0)
5861 rec->bad_full_backref = 1;
5864 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5865 if (rec->flag_block_full_backref != -1 &&
5866 rec->flag_block_full_backref != 1)
5867 rec->bad_full_backref = 1;
5871 static int run_next_block(struct btrfs_root *root,
5872 struct block_info *bits,
5875 struct cache_tree *pending,
5876 struct cache_tree *seen,
5877 struct cache_tree *reada,
5878 struct cache_tree *nodes,
5879 struct cache_tree *extent_cache,
5880 struct cache_tree *chunk_cache,
5881 struct rb_root *dev_cache,
5882 struct block_group_tree *block_group_cache,
5883 struct device_extent_tree *dev_extent_cache,
5884 struct root_item_record *ri)
5886 struct extent_buffer *buf;
5887 struct extent_record *rec = NULL;
5898 struct btrfs_key key;
5899 struct cache_extent *cache;
5902 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5903 bits_nr, &reada_bits);
5908 for(i = 0; i < nritems; i++) {
5909 ret = add_cache_extent(reada, bits[i].start,
5914 /* fixme, get the parent transid */
5915 readahead_tree_block(root, bits[i].start,
5919 *last = bits[0].start;
5920 bytenr = bits[0].start;
5921 size = bits[0].size;
5923 cache = lookup_cache_extent(pending, bytenr, size);
5925 remove_cache_extent(pending, cache);
5928 cache = lookup_cache_extent(reada, bytenr, size);
5930 remove_cache_extent(reada, cache);
5933 cache = lookup_cache_extent(nodes, bytenr, size);
5935 remove_cache_extent(nodes, cache);
5938 cache = lookup_cache_extent(extent_cache, bytenr, size);
5940 rec = container_of(cache, struct extent_record, cache);
5941 gen = rec->parent_generation;
5944 /* fixme, get the real parent transid */
5945 buf = read_tree_block(root, bytenr, size, gen);
5946 if (!extent_buffer_uptodate(buf)) {
5947 record_bad_block_io(root->fs_info,
5948 extent_cache, bytenr, size);
5952 nritems = btrfs_header_nritems(buf);
5955 if (!init_extent_tree) {
5956 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5957 btrfs_header_level(buf), 1, NULL,
5960 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5962 fprintf(stderr, "Couldn't calc extent flags\n");
5963 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5968 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5970 fprintf(stderr, "Couldn't calc extent flags\n");
5971 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5975 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5977 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5978 ri->objectid == btrfs_header_owner(buf)) {
5980 * Ok we got to this block from it's original owner and
5981 * we have FULL_BACKREF set. Relocation can leave
5982 * converted blocks over so this is altogether possible,
5983 * however it's not possible if the generation > the
5984 * last snapshot, so check for this case.
5986 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5987 btrfs_header_generation(buf) > ri->last_snapshot) {
5988 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5989 rec->bad_full_backref = 1;
5994 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5995 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5996 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5997 rec->bad_full_backref = 1;
6001 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6002 rec->flag_block_full_backref = 1;
6006 rec->flag_block_full_backref = 0;
6008 owner = btrfs_header_owner(buf);
6011 ret = check_block(root, extent_cache, buf, flags);
6015 if (btrfs_is_leaf(buf)) {
6016 btree_space_waste += btrfs_leaf_free_space(root, buf);
6017 for (i = 0; i < nritems; i++) {
6018 struct btrfs_file_extent_item *fi;
6019 btrfs_item_key_to_cpu(buf, &key, i);
6020 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6021 process_extent_item(root, extent_cache, buf,
6025 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6026 process_extent_item(root, extent_cache, buf,
6030 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6032 btrfs_item_size_nr(buf, i);
6035 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6036 process_chunk_item(chunk_cache, &key, buf, i);
6039 if (key.type == BTRFS_DEV_ITEM_KEY) {
6040 process_device_item(dev_cache, &key, buf, i);
6043 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6044 process_block_group_item(block_group_cache,
6048 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6049 process_device_extent_item(dev_extent_cache,
6054 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6055 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6056 process_extent_ref_v0(extent_cache, buf, i);
6063 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6064 add_tree_backref(extent_cache, key.objectid, 0,
6068 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6069 add_tree_backref(extent_cache, key.objectid,
6073 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6074 struct btrfs_extent_data_ref *ref;
6075 ref = btrfs_item_ptr(buf, i,
6076 struct btrfs_extent_data_ref);
6077 add_data_backref(extent_cache,
6079 btrfs_extent_data_ref_root(buf, ref),
6080 btrfs_extent_data_ref_objectid(buf,
6082 btrfs_extent_data_ref_offset(buf, ref),
6083 btrfs_extent_data_ref_count(buf, ref),
6084 0, root->sectorsize);
6087 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6088 struct btrfs_shared_data_ref *ref;
6089 ref = btrfs_item_ptr(buf, i,
6090 struct btrfs_shared_data_ref);
6091 add_data_backref(extent_cache,
6092 key.objectid, key.offset, 0, 0, 0,
6093 btrfs_shared_data_ref_count(buf, ref),
6094 0, root->sectorsize);
6097 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6098 struct bad_item *bad;
6100 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6104 bad = malloc(sizeof(struct bad_item));
6107 INIT_LIST_HEAD(&bad->list);
6108 memcpy(&bad->key, &key,
6109 sizeof(struct btrfs_key));
6110 bad->root_id = owner;
6111 list_add_tail(&bad->list, &delete_items);
6114 if (key.type != BTRFS_EXTENT_DATA_KEY)
6116 fi = btrfs_item_ptr(buf, i,
6117 struct btrfs_file_extent_item);
6118 if (btrfs_file_extent_type(buf, fi) ==
6119 BTRFS_FILE_EXTENT_INLINE)
6121 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6124 data_bytes_allocated +=
6125 btrfs_file_extent_disk_num_bytes(buf, fi);
6126 if (data_bytes_allocated < root->sectorsize) {
6129 data_bytes_referenced +=
6130 btrfs_file_extent_num_bytes(buf, fi);
6131 add_data_backref(extent_cache,
6132 btrfs_file_extent_disk_bytenr(buf, fi),
6133 parent, owner, key.objectid, key.offset -
6134 btrfs_file_extent_offset(buf, fi), 1, 1,
6135 btrfs_file_extent_disk_num_bytes(buf, fi));
6139 struct btrfs_key first_key;
6141 first_key.objectid = 0;
6144 btrfs_item_key_to_cpu(buf, &first_key, 0);
6145 level = btrfs_header_level(buf);
6146 for (i = 0; i < nritems; i++) {
6147 ptr = btrfs_node_blockptr(buf, i);
6148 size = btrfs_level_size(root, level - 1);
6149 btrfs_node_key_to_cpu(buf, &key, i);
6151 if ((level == ri->drop_level)
6152 && is_dropped_key(&key, &ri->drop_key)) {
6156 ret = add_extent_rec(extent_cache, &key,
6157 btrfs_node_ptr_generation(buf, i),
6158 ptr, size, 0, 0, 1, 0, 1, 0,
6162 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6165 add_pending(nodes, seen, ptr, size);
6167 add_pending(pending, seen, ptr, size);
6170 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6171 nritems) * sizeof(struct btrfs_key_ptr);
6173 total_btree_bytes += buf->len;
6174 if (fs_root_objectid(btrfs_header_owner(buf)))
6175 total_fs_tree_bytes += buf->len;
6176 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6177 total_extent_tree_bytes += buf->len;
6178 if (!found_old_backref &&
6179 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6180 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6181 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6182 found_old_backref = 1;
6184 free_extent_buffer(buf);
6188 static int add_root_to_pending(struct extent_buffer *buf,
6189 struct cache_tree *extent_cache,
6190 struct cache_tree *pending,
6191 struct cache_tree *seen,
6192 struct cache_tree *nodes,
6195 if (btrfs_header_level(buf) > 0)
6196 add_pending(nodes, seen, buf->start, buf->len);
6198 add_pending(pending, seen, buf->start, buf->len);
6199 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6200 0, 1, 1, 0, 1, 0, buf->len);
6202 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6203 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6204 add_tree_backref(extent_cache, buf->start, buf->start,
6207 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6211 /* as we fix the tree, we might be deleting blocks that
6212 * we're tracking for repair. This hook makes sure we
6213 * remove any backrefs for blocks as we are fixing them.
6215 static int free_extent_hook(struct btrfs_trans_handle *trans,
6216 struct btrfs_root *root,
6217 u64 bytenr, u64 num_bytes, u64 parent,
6218 u64 root_objectid, u64 owner, u64 offset,
6221 struct extent_record *rec;
6222 struct cache_extent *cache;
6224 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6226 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6227 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6231 rec = container_of(cache, struct extent_record, cache);
6233 struct data_backref *back;
6234 back = find_data_backref(rec, parent, root_objectid, owner,
6235 offset, 1, bytenr, num_bytes);
6238 if (back->node.found_ref) {
6239 back->found_ref -= refs_to_drop;
6241 rec->refs -= refs_to_drop;
6243 if (back->node.found_extent_tree) {
6244 back->num_refs -= refs_to_drop;
6245 if (rec->extent_item_refs)
6246 rec->extent_item_refs -= refs_to_drop;
6248 if (back->found_ref == 0)
6249 back->node.found_ref = 0;
6250 if (back->num_refs == 0)
6251 back->node.found_extent_tree = 0;
6253 if (!back->node.found_extent_tree && back->node.found_ref) {
6254 list_del(&back->node.list);
6258 struct tree_backref *back;
6259 back = find_tree_backref(rec, parent, root_objectid);
6262 if (back->node.found_ref) {
6265 back->node.found_ref = 0;
6267 if (back->node.found_extent_tree) {
6268 if (rec->extent_item_refs)
6269 rec->extent_item_refs--;
6270 back->node.found_extent_tree = 0;
6272 if (!back->node.found_extent_tree && back->node.found_ref) {
6273 list_del(&back->node.list);
6277 maybe_free_extent_rec(extent_cache, rec);
6282 static int delete_extent_records(struct btrfs_trans_handle *trans,
6283 struct btrfs_root *root,
6284 struct btrfs_path *path,
6285 u64 bytenr, u64 new_len)
6287 struct btrfs_key key;
6288 struct btrfs_key found_key;
6289 struct extent_buffer *leaf;
6294 key.objectid = bytenr;
6296 key.offset = (u64)-1;
6299 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6306 if (path->slots[0] == 0)
6312 leaf = path->nodes[0];
6313 slot = path->slots[0];
6315 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6316 if (found_key.objectid != bytenr)
6319 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6320 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6321 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6322 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6323 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6324 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6325 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6326 btrfs_release_path(path);
6327 if (found_key.type == 0) {
6328 if (found_key.offset == 0)
6330 key.offset = found_key.offset - 1;
6331 key.type = found_key.type;
6333 key.type = found_key.type - 1;
6334 key.offset = (u64)-1;
6338 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6339 found_key.objectid, found_key.type, found_key.offset);
6341 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6344 btrfs_release_path(path);
6346 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6347 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6348 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6349 found_key.offset : root->leafsize;
6351 ret = btrfs_update_block_group(trans, root, bytenr,
6358 btrfs_release_path(path);
6363 * for a single backref, this will allocate a new extent
6364 * and add the backref to it.
6366 static int record_extent(struct btrfs_trans_handle *trans,
6367 struct btrfs_fs_info *info,
6368 struct btrfs_path *path,
6369 struct extent_record *rec,
6370 struct extent_backref *back,
6371 int allocated, u64 flags)
6374 struct btrfs_root *extent_root = info->extent_root;
6375 struct extent_buffer *leaf;
6376 struct btrfs_key ins_key;
6377 struct btrfs_extent_item *ei;
6378 struct tree_backref *tback;
6379 struct data_backref *dback;
6380 struct btrfs_tree_block_info *bi;
6383 rec->max_size = max_t(u64, rec->max_size,
6384 info->extent_root->leafsize);
6387 u32 item_size = sizeof(*ei);
6390 item_size += sizeof(*bi);
6392 ins_key.objectid = rec->start;
6393 ins_key.offset = rec->max_size;
6394 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6396 ret = btrfs_insert_empty_item(trans, extent_root, path,
6397 &ins_key, item_size);
6401 leaf = path->nodes[0];
6402 ei = btrfs_item_ptr(leaf, path->slots[0],
6403 struct btrfs_extent_item);
6405 btrfs_set_extent_refs(leaf, ei, 0);
6406 btrfs_set_extent_generation(leaf, ei, rec->generation);
6408 if (back->is_data) {
6409 btrfs_set_extent_flags(leaf, ei,
6410 BTRFS_EXTENT_FLAG_DATA);
6412 struct btrfs_disk_key copy_key;;
6414 tback = (struct tree_backref *)back;
6415 bi = (struct btrfs_tree_block_info *)(ei + 1);
6416 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6419 btrfs_set_disk_key_objectid(©_key,
6420 rec->info_objectid);
6421 btrfs_set_disk_key_type(©_key, 0);
6422 btrfs_set_disk_key_offset(©_key, 0);
6424 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6425 btrfs_set_tree_block_key(leaf, bi, ©_key);
6427 btrfs_set_extent_flags(leaf, ei,
6428 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6431 btrfs_mark_buffer_dirty(leaf);
6432 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6433 rec->max_size, 1, 0);
6436 btrfs_release_path(path);
6439 if (back->is_data) {
6443 dback = (struct data_backref *)back;
6444 if (back->full_backref)
6445 parent = dback->parent;
6449 for (i = 0; i < dback->found_ref; i++) {
6450 /* if parent != 0, we're doing a full backref
6451 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6452 * just makes the backref allocator create a data
6455 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6456 rec->start, rec->max_size,
6460 BTRFS_FIRST_FREE_OBJECTID :
6466 fprintf(stderr, "adding new data backref"
6467 " on %llu %s %llu owner %llu"
6468 " offset %llu found %d\n",
6469 (unsigned long long)rec->start,
6470 back->full_backref ?
6472 back->full_backref ?
6473 (unsigned long long)parent :
6474 (unsigned long long)dback->root,
6475 (unsigned long long)dback->owner,
6476 (unsigned long long)dback->offset,
6481 tback = (struct tree_backref *)back;
6482 if (back->full_backref)
6483 parent = tback->parent;
6487 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6488 rec->start, rec->max_size,
6489 parent, tback->root, 0, 0);
6490 fprintf(stderr, "adding new tree backref on "
6491 "start %llu len %llu parent %llu root %llu\n",
6492 rec->start, rec->max_size, parent, tback->root);
6495 btrfs_release_path(path);
6499 struct extent_entry {
6504 struct list_head list;
6507 static struct extent_entry *find_entry(struct list_head *entries,
6508 u64 bytenr, u64 bytes)
6510 struct extent_entry *entry = NULL;
6512 list_for_each_entry(entry, entries, list) {
6513 if (entry->bytenr == bytenr && entry->bytes == bytes)
6520 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6522 struct extent_entry *entry, *best = NULL, *prev = NULL;
6524 list_for_each_entry(entry, entries, list) {
6531 * If there are as many broken entries as entries then we know
6532 * not to trust this particular entry.
6534 if (entry->broken == entry->count)
6538 * If our current entry == best then we can't be sure our best
6539 * is really the best, so we need to keep searching.
6541 if (best && best->count == entry->count) {
6547 /* Prev == entry, not good enough, have to keep searching */
6548 if (!prev->broken && prev->count == entry->count)
6552 best = (prev->count > entry->count) ? prev : entry;
6553 else if (best->count < entry->count)
6561 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6562 struct data_backref *dback, struct extent_entry *entry)
6564 struct btrfs_trans_handle *trans;
6565 struct btrfs_root *root;
6566 struct btrfs_file_extent_item *fi;
6567 struct extent_buffer *leaf;
6568 struct btrfs_key key;
6572 key.objectid = dback->root;
6573 key.type = BTRFS_ROOT_ITEM_KEY;
6574 key.offset = (u64)-1;
6575 root = btrfs_read_fs_root(info, &key);
6577 fprintf(stderr, "Couldn't find root for our ref\n");
6582 * The backref points to the original offset of the extent if it was
6583 * split, so we need to search down to the offset we have and then walk
6584 * forward until we find the backref we're looking for.
6586 key.objectid = dback->owner;
6587 key.type = BTRFS_EXTENT_DATA_KEY;
6588 key.offset = dback->offset;
6589 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6591 fprintf(stderr, "Error looking up ref %d\n", ret);
6596 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6597 ret = btrfs_next_leaf(root, path);
6599 fprintf(stderr, "Couldn't find our ref, next\n");
6603 leaf = path->nodes[0];
6604 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6605 if (key.objectid != dback->owner ||
6606 key.type != BTRFS_EXTENT_DATA_KEY) {
6607 fprintf(stderr, "Couldn't find our ref, search\n");
6610 fi = btrfs_item_ptr(leaf, path->slots[0],
6611 struct btrfs_file_extent_item);
6612 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6613 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6615 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6620 btrfs_release_path(path);
6622 trans = btrfs_start_transaction(root, 1);
6624 return PTR_ERR(trans);
6627 * Ok we have the key of the file extent we want to fix, now we can cow
6628 * down to the thing and fix it.
6630 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6632 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6633 key.objectid, key.type, key.offset, ret);
6637 fprintf(stderr, "Well that's odd, we just found this key "
6638 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6643 leaf = path->nodes[0];
6644 fi = btrfs_item_ptr(leaf, path->slots[0],
6645 struct btrfs_file_extent_item);
6647 if (btrfs_file_extent_compression(leaf, fi) &&
6648 dback->disk_bytenr != entry->bytenr) {
6649 fprintf(stderr, "Ref doesn't match the record start and is "
6650 "compressed, please take a btrfs-image of this file "
6651 "system and send it to a btrfs developer so they can "
6652 "complete this functionality for bytenr %Lu\n",
6653 dback->disk_bytenr);
6658 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6659 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6660 } else if (dback->disk_bytenr > entry->bytenr) {
6661 u64 off_diff, offset;
6663 off_diff = dback->disk_bytenr - entry->bytenr;
6664 offset = btrfs_file_extent_offset(leaf, fi);
6665 if (dback->disk_bytenr + offset +
6666 btrfs_file_extent_num_bytes(leaf, fi) >
6667 entry->bytenr + entry->bytes) {
6668 fprintf(stderr, "Ref is past the entry end, please "
6669 "take a btrfs-image of this file system and "
6670 "send it to a btrfs developer, ref %Lu\n",
6671 dback->disk_bytenr);
6676 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6677 btrfs_set_file_extent_offset(leaf, fi, offset);
6678 } else if (dback->disk_bytenr < entry->bytenr) {
6681 offset = btrfs_file_extent_offset(leaf, fi);
6682 if (dback->disk_bytenr + offset < entry->bytenr) {
6683 fprintf(stderr, "Ref is before the entry start, please"
6684 " take a btrfs-image of this file system and "
6685 "send it to a btrfs developer, ref %Lu\n",
6686 dback->disk_bytenr);
6691 offset += dback->disk_bytenr;
6692 offset -= entry->bytenr;
6693 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6694 btrfs_set_file_extent_offset(leaf, fi, offset);
6697 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6700 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6701 * only do this if we aren't using compression, otherwise it's a
6704 if (!btrfs_file_extent_compression(leaf, fi))
6705 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6707 printf("ram bytes may be wrong?\n");
6708 btrfs_mark_buffer_dirty(leaf);
6710 err = btrfs_commit_transaction(trans, root);
6711 btrfs_release_path(path);
6712 return ret ? ret : err;
6715 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6716 struct extent_record *rec)
6718 struct extent_backref *back;
6719 struct data_backref *dback;
6720 struct extent_entry *entry, *best = NULL;
6723 int broken_entries = 0;
6728 * Metadata is easy and the backrefs should always agree on bytenr and
6729 * size, if not we've got bigger issues.
6734 list_for_each_entry(back, &rec->backrefs, list) {
6735 if (back->full_backref || !back->is_data)
6738 dback = (struct data_backref *)back;
6741 * We only pay attention to backrefs that we found a real
6744 if (dback->found_ref == 0)
6748 * For now we only catch when the bytes don't match, not the
6749 * bytenr. We can easily do this at the same time, but I want
6750 * to have a fs image to test on before we just add repair
6751 * functionality willy-nilly so we know we won't screw up the
6755 entry = find_entry(&entries, dback->disk_bytenr,
6758 entry = malloc(sizeof(struct extent_entry));
6763 memset(entry, 0, sizeof(*entry));
6764 entry->bytenr = dback->disk_bytenr;
6765 entry->bytes = dback->bytes;
6766 list_add_tail(&entry->list, &entries);
6771 * If we only have on entry we may think the entries agree when
6772 * in reality they don't so we have to do some extra checking.
6774 if (dback->disk_bytenr != rec->start ||
6775 dback->bytes != rec->nr || back->broken)
6786 /* Yay all the backrefs agree, carry on good sir */
6787 if (nr_entries <= 1 && !mismatch)
6790 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6791 "%Lu\n", rec->start);
6794 * First we want to see if the backrefs can agree amongst themselves who
6795 * is right, so figure out which one of the entries has the highest
6798 best = find_most_right_entry(&entries);
6801 * Ok so we may have an even split between what the backrefs think, so
6802 * this is where we use the extent ref to see what it thinks.
6805 entry = find_entry(&entries, rec->start, rec->nr);
6806 if (!entry && (!broken_entries || !rec->found_rec)) {
6807 fprintf(stderr, "Backrefs don't agree with each other "
6808 "and extent record doesn't agree with anybody,"
6809 " so we can't fix bytenr %Lu bytes %Lu\n",
6810 rec->start, rec->nr);
6813 } else if (!entry) {
6815 * Ok our backrefs were broken, we'll assume this is the
6816 * correct value and add an entry for this range.
6818 entry = malloc(sizeof(struct extent_entry));
6823 memset(entry, 0, sizeof(*entry));
6824 entry->bytenr = rec->start;
6825 entry->bytes = rec->nr;
6826 list_add_tail(&entry->list, &entries);
6830 best = find_most_right_entry(&entries);
6832 fprintf(stderr, "Backrefs and extent record evenly "
6833 "split on who is right, this is going to "
6834 "require user input to fix bytenr %Lu bytes "
6835 "%Lu\n", rec->start, rec->nr);
6842 * I don't think this can happen currently as we'll abort() if we catch
6843 * this case higher up, but in case somebody removes that we still can't
6844 * deal with it properly here yet, so just bail out of that's the case.
6846 if (best->bytenr != rec->start) {
6847 fprintf(stderr, "Extent start and backref starts don't match, "
6848 "please use btrfs-image on this file system and send "
6849 "it to a btrfs developer so they can make fsck fix "
6850 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6851 rec->start, rec->nr);
6857 * Ok great we all agreed on an extent record, let's go find the real
6858 * references and fix up the ones that don't match.
6860 list_for_each_entry(back, &rec->backrefs, list) {
6861 if (back->full_backref || !back->is_data)
6864 dback = (struct data_backref *)back;
6867 * Still ignoring backrefs that don't have a real ref attached
6870 if (dback->found_ref == 0)
6873 if (dback->bytes == best->bytes &&
6874 dback->disk_bytenr == best->bytenr)
6877 ret = repair_ref(info, path, dback, best);
6883 * Ok we messed with the actual refs, which means we need to drop our
6884 * entire cache and go back and rescan. I know this is a huge pain and
6885 * adds a lot of extra work, but it's the only way to be safe. Once all
6886 * the backrefs agree we may not need to do anything to the extent
6891 while (!list_empty(&entries)) {
6892 entry = list_entry(entries.next, struct extent_entry, list);
6893 list_del_init(&entry->list);
6899 static int process_duplicates(struct btrfs_root *root,
6900 struct cache_tree *extent_cache,
6901 struct extent_record *rec)
6903 struct extent_record *good, *tmp;
6904 struct cache_extent *cache;
6908 * If we found a extent record for this extent then return, or if we
6909 * have more than one duplicate we are likely going to need to delete
6912 if (rec->found_rec || rec->num_duplicates > 1)
6915 /* Shouldn't happen but just in case */
6916 BUG_ON(!rec->num_duplicates);
6919 * So this happens if we end up with a backref that doesn't match the
6920 * actual extent entry. So either the backref is bad or the extent
6921 * entry is bad. Either way we want to have the extent_record actually
6922 * reflect what we found in the extent_tree, so we need to take the
6923 * duplicate out and use that as the extent_record since the only way we
6924 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6926 remove_cache_extent(extent_cache, &rec->cache);
6928 good = list_entry(rec->dups.next, struct extent_record, list);
6929 list_del_init(&good->list);
6930 INIT_LIST_HEAD(&good->backrefs);
6931 INIT_LIST_HEAD(&good->dups);
6932 good->cache.start = good->start;
6933 good->cache.size = good->nr;
6934 good->content_checked = 0;
6935 good->owner_ref_checked = 0;
6936 good->num_duplicates = 0;
6937 good->refs = rec->refs;
6938 list_splice_init(&rec->backrefs, &good->backrefs);
6940 cache = lookup_cache_extent(extent_cache, good->start,
6944 tmp = container_of(cache, struct extent_record, cache);
6947 * If we find another overlapping extent and it's found_rec is
6948 * set then it's a duplicate and we need to try and delete
6951 if (tmp->found_rec || tmp->num_duplicates > 0) {
6952 if (list_empty(&good->list))
6953 list_add_tail(&good->list,
6954 &duplicate_extents);
6955 good->num_duplicates += tmp->num_duplicates + 1;
6956 list_splice_init(&tmp->dups, &good->dups);
6957 list_del_init(&tmp->list);
6958 list_add_tail(&tmp->list, &good->dups);
6959 remove_cache_extent(extent_cache, &tmp->cache);
6964 * Ok we have another non extent item backed extent rec, so lets
6965 * just add it to this extent and carry on like we did above.
6967 good->refs += tmp->refs;
6968 list_splice_init(&tmp->backrefs, &good->backrefs);
6969 remove_cache_extent(extent_cache, &tmp->cache);
6972 ret = insert_cache_extent(extent_cache, &good->cache);
6975 return good->num_duplicates ? 0 : 1;
6978 static int delete_duplicate_records(struct btrfs_root *root,
6979 struct extent_record *rec)
6981 struct btrfs_trans_handle *trans;
6982 LIST_HEAD(delete_list);
6983 struct btrfs_path *path;
6984 struct extent_record *tmp, *good, *n;
6987 struct btrfs_key key;
6989 path = btrfs_alloc_path();
6996 /* Find the record that covers all of the duplicates. */
6997 list_for_each_entry(tmp, &rec->dups, list) {
6998 if (good->start < tmp->start)
7000 if (good->nr > tmp->nr)
7003 if (tmp->start + tmp->nr < good->start + good->nr) {
7004 fprintf(stderr, "Ok we have overlapping extents that "
7005 "aren't completely covered by eachother, this "
7006 "is going to require more careful thought. "
7007 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7008 tmp->start, tmp->nr, good->start, good->nr);
7015 list_add_tail(&rec->list, &delete_list);
7017 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7020 list_move_tail(&tmp->list, &delete_list);
7023 root = root->fs_info->extent_root;
7024 trans = btrfs_start_transaction(root, 1);
7025 if (IS_ERR(trans)) {
7026 ret = PTR_ERR(trans);
7030 list_for_each_entry(tmp, &delete_list, list) {
7031 if (tmp->found_rec == 0)
7033 key.objectid = tmp->start;
7034 key.type = BTRFS_EXTENT_ITEM_KEY;
7035 key.offset = tmp->nr;
7037 /* Shouldn't happen but just in case */
7038 if (tmp->metadata) {
7039 fprintf(stderr, "Well this shouldn't happen, extent "
7040 "record overlaps but is metadata? "
7041 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7045 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7051 ret = btrfs_del_item(trans, root, path);
7054 btrfs_release_path(path);
7057 err = btrfs_commit_transaction(trans, root);
7061 while (!list_empty(&delete_list)) {
7062 tmp = list_entry(delete_list.next, struct extent_record, list);
7063 list_del_init(&tmp->list);
7069 while (!list_empty(&rec->dups)) {
7070 tmp = list_entry(rec->dups.next, struct extent_record, list);
7071 list_del_init(&tmp->list);
7075 btrfs_free_path(path);
7077 if (!ret && !nr_del)
7078 rec->num_duplicates = 0;
7080 return ret ? ret : nr_del;
7083 static int find_possible_backrefs(struct btrfs_fs_info *info,
7084 struct btrfs_path *path,
7085 struct cache_tree *extent_cache,
7086 struct extent_record *rec)
7088 struct btrfs_root *root;
7089 struct extent_backref *back;
7090 struct data_backref *dback;
7091 struct cache_extent *cache;
7092 struct btrfs_file_extent_item *fi;
7093 struct btrfs_key key;
7097 list_for_each_entry(back, &rec->backrefs, list) {
7098 /* Don't care about full backrefs (poor unloved backrefs) */
7099 if (back->full_backref || !back->is_data)
7102 dback = (struct data_backref *)back;
7104 /* We found this one, we don't need to do a lookup */
7105 if (dback->found_ref)
7108 key.objectid = dback->root;
7109 key.type = BTRFS_ROOT_ITEM_KEY;
7110 key.offset = (u64)-1;
7112 root = btrfs_read_fs_root(info, &key);
7114 /* No root, definitely a bad ref, skip */
7115 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7117 /* Other err, exit */
7119 return PTR_ERR(root);
7121 key.objectid = dback->owner;
7122 key.type = BTRFS_EXTENT_DATA_KEY;
7123 key.offset = dback->offset;
7124 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7126 btrfs_release_path(path);
7129 /* Didn't find it, we can carry on */
7134 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7135 struct btrfs_file_extent_item);
7136 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7137 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7138 btrfs_release_path(path);
7139 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7141 struct extent_record *tmp;
7142 tmp = container_of(cache, struct extent_record, cache);
7145 * If we found an extent record for the bytenr for this
7146 * particular backref then we can't add it to our
7147 * current extent record. We only want to add backrefs
7148 * that don't have a corresponding extent item in the
7149 * extent tree since they likely belong to this record
7150 * and we need to fix it if it doesn't match bytenrs.
7156 dback->found_ref += 1;
7157 dback->disk_bytenr = bytenr;
7158 dback->bytes = bytes;
7161 * Set this so the verify backref code knows not to trust the
7162 * values in this backref.
7171 * Record orphan data ref into corresponding root.
7173 * Return 0 if the extent item contains data ref and recorded.
7174 * Return 1 if the extent item contains no useful data ref
7175 * On that case, it may contains only shared_dataref or metadata backref
7176 * or the file extent exists(this should be handled by the extent bytenr
7178 * Return <0 if something goes wrong.
7180 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7181 struct extent_record *rec)
7183 struct btrfs_key key;
7184 struct btrfs_root *dest_root;
7185 struct extent_backref *back;
7186 struct data_backref *dback;
7187 struct orphan_data_extent *orphan;
7188 struct btrfs_path *path;
7189 int recorded_data_ref = 0;
7194 path = btrfs_alloc_path();
7197 list_for_each_entry(back, &rec->backrefs, list) {
7198 if (back->full_backref || !back->is_data ||
7199 !back->found_extent_tree)
7201 dback = (struct data_backref *)back;
7202 if (dback->found_ref)
7204 key.objectid = dback->root;
7205 key.type = BTRFS_ROOT_ITEM_KEY;
7206 key.offset = (u64)-1;
7208 dest_root = btrfs_read_fs_root(fs_info, &key);
7210 /* For non-exist root we just skip it */
7211 if (IS_ERR(dest_root) || !dest_root)
7214 key.objectid = dback->owner;
7215 key.type = BTRFS_EXTENT_DATA_KEY;
7216 key.offset = dback->offset;
7218 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7220 * For ret < 0, it's OK since the fs-tree may be corrupted,
7221 * we need to record it for inode/file extent rebuild.
7222 * For ret > 0, we record it only for file extent rebuild.
7223 * For ret == 0, the file extent exists but only bytenr
7224 * mismatch, let the original bytenr fix routine to handle,
7230 orphan = malloc(sizeof(*orphan));
7235 INIT_LIST_HEAD(&orphan->list);
7236 orphan->root = dback->root;
7237 orphan->objectid = dback->owner;
7238 orphan->offset = dback->offset;
7239 orphan->disk_bytenr = rec->cache.start;
7240 orphan->disk_len = rec->cache.size;
7241 list_add(&dest_root->orphan_data_extents, &orphan->list);
7242 recorded_data_ref = 1;
7245 btrfs_free_path(path);
7247 return !recorded_data_ref;
7253 * when an incorrect extent item is found, this will delete
7254 * all of the existing entries for it and recreate them
7255 * based on what the tree scan found.
7257 static int fixup_extent_refs(struct btrfs_fs_info *info,
7258 struct cache_tree *extent_cache,
7259 struct extent_record *rec)
7261 struct btrfs_trans_handle *trans = NULL;
7263 struct btrfs_path *path;
7264 struct list_head *cur = rec->backrefs.next;
7265 struct cache_extent *cache;
7266 struct extent_backref *back;
7270 if (rec->flag_block_full_backref)
7271 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7273 path = btrfs_alloc_path();
7277 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7279 * Sometimes the backrefs themselves are so broken they don't
7280 * get attached to any meaningful rec, so first go back and
7281 * check any of our backrefs that we couldn't find and throw
7282 * them into the list if we find the backref so that
7283 * verify_backrefs can figure out what to do.
7285 ret = find_possible_backrefs(info, path, extent_cache, rec);
7290 /* step one, make sure all of the backrefs agree */
7291 ret = verify_backrefs(info, path, rec);
7295 trans = btrfs_start_transaction(info->extent_root, 1);
7296 if (IS_ERR(trans)) {
7297 ret = PTR_ERR(trans);
7301 /* step two, delete all the existing records */
7302 ret = delete_extent_records(trans, info->extent_root, path,
7303 rec->start, rec->max_size);
7308 /* was this block corrupt? If so, don't add references to it */
7309 cache = lookup_cache_extent(info->corrupt_blocks,
7310 rec->start, rec->max_size);
7316 /* step three, recreate all the refs we did find */
7317 while(cur != &rec->backrefs) {
7318 back = list_entry(cur, struct extent_backref, list);
7322 * if we didn't find any references, don't create a
7325 if (!back->found_ref)
7328 rec->bad_full_backref = 0;
7329 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7337 int err = btrfs_commit_transaction(trans, info->extent_root);
7342 btrfs_free_path(path);
7346 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7347 struct extent_record *rec)
7349 struct btrfs_trans_handle *trans;
7350 struct btrfs_root *root = fs_info->extent_root;
7351 struct btrfs_path *path;
7352 struct btrfs_extent_item *ei;
7353 struct btrfs_key key;
7357 key.objectid = rec->start;
7358 if (rec->metadata) {
7359 key.type = BTRFS_METADATA_ITEM_KEY;
7360 key.offset = rec->info_level;
7362 key.type = BTRFS_EXTENT_ITEM_KEY;
7363 key.offset = rec->max_size;
7366 path = btrfs_alloc_path();
7370 trans = btrfs_start_transaction(root, 0);
7371 if (IS_ERR(trans)) {
7372 btrfs_free_path(path);
7373 return PTR_ERR(trans);
7376 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7378 btrfs_free_path(path);
7379 btrfs_commit_transaction(trans, root);
7382 fprintf(stderr, "Didn't find extent for %llu\n",
7383 (unsigned long long)rec->start);
7384 btrfs_free_path(path);
7385 btrfs_commit_transaction(trans, root);
7389 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7390 struct btrfs_extent_item);
7391 flags = btrfs_extent_flags(path->nodes[0], ei);
7392 if (rec->flag_block_full_backref) {
7393 fprintf(stderr, "setting full backref on %llu\n",
7394 (unsigned long long)key.objectid);
7395 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7397 fprintf(stderr, "clearing full backref on %llu\n",
7398 (unsigned long long)key.objectid);
7399 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7401 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7402 btrfs_mark_buffer_dirty(path->nodes[0]);
7403 btrfs_free_path(path);
7404 return btrfs_commit_transaction(trans, root);
7407 /* right now we only prune from the extent allocation tree */
7408 static int prune_one_block(struct btrfs_trans_handle *trans,
7409 struct btrfs_fs_info *info,
7410 struct btrfs_corrupt_block *corrupt)
7413 struct btrfs_path path;
7414 struct extent_buffer *eb;
7418 int level = corrupt->level + 1;
7420 btrfs_init_path(&path);
7422 /* we want to stop at the parent to our busted block */
7423 path.lowest_level = level;
7425 ret = btrfs_search_slot(trans, info->extent_root,
7426 &corrupt->key, &path, -1, 1);
7431 eb = path.nodes[level];
7438 * hopefully the search gave us the block we want to prune,
7439 * lets try that first
7441 slot = path.slots[level];
7442 found = btrfs_node_blockptr(eb, slot);
7443 if (found == corrupt->cache.start)
7446 nritems = btrfs_header_nritems(eb);
7448 /* the search failed, lets scan this node and hope we find it */
7449 for (slot = 0; slot < nritems; slot++) {
7450 found = btrfs_node_blockptr(eb, slot);
7451 if (found == corrupt->cache.start)
7455 * we couldn't find the bad block. TODO, search all the nodes for pointers
7458 if (eb == info->extent_root->node) {
7463 btrfs_release_path(&path);
7468 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7469 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7472 btrfs_release_path(&path);
7476 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7478 struct btrfs_trans_handle *trans = NULL;
7479 struct cache_extent *cache;
7480 struct btrfs_corrupt_block *corrupt;
7483 cache = search_cache_extent(info->corrupt_blocks, 0);
7487 trans = btrfs_start_transaction(info->extent_root, 1);
7489 return PTR_ERR(trans);
7491 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7492 prune_one_block(trans, info, corrupt);
7493 remove_cache_extent(info->corrupt_blocks, cache);
7496 return btrfs_commit_transaction(trans, info->extent_root);
7500 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7502 struct btrfs_block_group_cache *cache;
7507 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7508 &start, &end, EXTENT_DIRTY);
7511 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7517 cache = btrfs_lookup_first_block_group(fs_info, start);
7522 start = cache->key.objectid + cache->key.offset;
7526 static int check_extent_refs(struct btrfs_root *root,
7527 struct cache_tree *extent_cache)
7529 struct extent_record *rec;
7530 struct cache_extent *cache;
7539 * if we're doing a repair, we have to make sure
7540 * we don't allocate from the problem extents.
7541 * In the worst case, this will be all the
7544 cache = search_cache_extent(extent_cache, 0);
7546 rec = container_of(cache, struct extent_record, cache);
7547 set_extent_dirty(root->fs_info->excluded_extents,
7549 rec->start + rec->max_size - 1,
7551 cache = next_cache_extent(cache);
7554 /* pin down all the corrupted blocks too */
7555 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7557 set_extent_dirty(root->fs_info->excluded_extents,
7559 cache->start + cache->size - 1,
7561 cache = next_cache_extent(cache);
7563 prune_corrupt_blocks(root->fs_info);
7564 reset_cached_block_groups(root->fs_info);
7567 reset_cached_block_groups(root->fs_info);
7570 * We need to delete any duplicate entries we find first otherwise we
7571 * could mess up the extent tree when we have backrefs that actually
7572 * belong to a different extent item and not the weird duplicate one.
7574 while (repair && !list_empty(&duplicate_extents)) {
7575 rec = list_entry(duplicate_extents.next, struct extent_record,
7577 list_del_init(&rec->list);
7579 /* Sometimes we can find a backref before we find an actual
7580 * extent, so we need to process it a little bit to see if there
7581 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7582 * if this is a backref screwup. If we need to delete stuff
7583 * process_duplicates() will return 0, otherwise it will return
7586 if (process_duplicates(root, extent_cache, rec))
7588 ret = delete_duplicate_records(root, rec);
7592 * delete_duplicate_records will return the number of entries
7593 * deleted, so if it's greater than 0 then we know we actually
7594 * did something and we need to remove.
7608 cache = search_cache_extent(extent_cache, 0);
7611 rec = container_of(cache, struct extent_record, cache);
7612 if (rec->num_duplicates) {
7613 fprintf(stderr, "extent item %llu has multiple extent "
7614 "items\n", (unsigned long long)rec->start);
7619 if (rec->refs != rec->extent_item_refs) {
7620 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7621 (unsigned long long)rec->start,
7622 (unsigned long long)rec->nr);
7623 fprintf(stderr, "extent item %llu, found %llu\n",
7624 (unsigned long long)rec->extent_item_refs,
7625 (unsigned long long)rec->refs);
7626 ret = record_orphan_data_extents(root->fs_info, rec);
7633 * we can't use the extent to repair file
7634 * extent, let the fallback method handle it.
7636 if (!fixed && repair) {
7637 ret = fixup_extent_refs(
7648 if (all_backpointers_checked(rec, 1)) {
7649 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7650 (unsigned long long)rec->start,
7651 (unsigned long long)rec->nr);
7653 if (!fixed && !recorded && repair) {
7654 ret = fixup_extent_refs(root->fs_info,
7663 if (!rec->owner_ref_checked) {
7664 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7665 (unsigned long long)rec->start,
7666 (unsigned long long)rec->nr);
7667 if (!fixed && !recorded && repair) {
7668 ret = fixup_extent_refs(root->fs_info,
7677 if (rec->bad_full_backref) {
7678 fprintf(stderr, "bad full backref, on [%llu]\n",
7679 (unsigned long long)rec->start);
7681 ret = fixup_extent_flags(root->fs_info, rec);
7690 * Although it's not a extent ref's problem, we reuse this
7691 * routine for error reporting.
7692 * No repair function yet.
7694 if (rec->crossing_stripes) {
7696 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7697 rec->start, rec->start + rec->max_size);
7702 if (rec->wrong_chunk_type) {
7704 "bad extent [%llu, %llu), type mismatch with chunk\n",
7705 rec->start, rec->start + rec->max_size);
7710 remove_cache_extent(extent_cache, cache);
7711 free_all_extent_backrefs(rec);
7712 if (!init_extent_tree && repair && (!cur_err || fixed))
7713 clear_extent_dirty(root->fs_info->excluded_extents,
7715 rec->start + rec->max_size - 1,
7721 if (ret && ret != -EAGAIN) {
7722 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7725 struct btrfs_trans_handle *trans;
7727 root = root->fs_info->extent_root;
7728 trans = btrfs_start_transaction(root, 1);
7729 if (IS_ERR(trans)) {
7730 ret = PTR_ERR(trans);
7734 btrfs_fix_block_accounting(trans, root);
7735 ret = btrfs_commit_transaction(trans, root);
7740 fprintf(stderr, "repaired damaged extent references\n");
7746 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7750 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7751 stripe_size = length;
7752 stripe_size /= num_stripes;
7753 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7754 stripe_size = length * 2;
7755 stripe_size /= num_stripes;
7756 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7757 stripe_size = length;
7758 stripe_size /= (num_stripes - 1);
7759 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7760 stripe_size = length;
7761 stripe_size /= (num_stripes - 2);
7763 stripe_size = length;
7769 * Check the chunk with its block group/dev list ref:
7770 * Return 0 if all refs seems valid.
7771 * Return 1 if part of refs seems valid, need later check for rebuild ref
7772 * like missing block group and needs to search extent tree to rebuild them.
7773 * Return -1 if essential refs are missing and unable to rebuild.
7775 static int check_chunk_refs(struct chunk_record *chunk_rec,
7776 struct block_group_tree *block_group_cache,
7777 struct device_extent_tree *dev_extent_cache,
7780 struct cache_extent *block_group_item;
7781 struct block_group_record *block_group_rec;
7782 struct cache_extent *dev_extent_item;
7783 struct device_extent_record *dev_extent_rec;
7787 int metadump_v2 = 0;
7791 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7794 if (block_group_item) {
7795 block_group_rec = container_of(block_group_item,
7796 struct block_group_record,
7798 if (chunk_rec->length != block_group_rec->offset ||
7799 chunk_rec->offset != block_group_rec->objectid ||
7801 chunk_rec->type_flags != block_group_rec->flags)) {
7804 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7805 chunk_rec->objectid,
7810 chunk_rec->type_flags,
7811 block_group_rec->objectid,
7812 block_group_rec->type,
7813 block_group_rec->offset,
7814 block_group_rec->offset,
7815 block_group_rec->objectid,
7816 block_group_rec->flags);
7819 list_del_init(&block_group_rec->list);
7820 chunk_rec->bg_rec = block_group_rec;
7825 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7826 chunk_rec->objectid,
7831 chunk_rec->type_flags);
7838 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7839 chunk_rec->num_stripes);
7840 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7841 devid = chunk_rec->stripes[i].devid;
7842 offset = chunk_rec->stripes[i].offset;
7843 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7844 devid, offset, length);
7845 if (dev_extent_item) {
7846 dev_extent_rec = container_of(dev_extent_item,
7847 struct device_extent_record,
7849 if (dev_extent_rec->objectid != devid ||
7850 dev_extent_rec->offset != offset ||
7851 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7852 dev_extent_rec->length != length) {
7855 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7856 chunk_rec->objectid,
7859 chunk_rec->stripes[i].devid,
7860 chunk_rec->stripes[i].offset,
7861 dev_extent_rec->objectid,
7862 dev_extent_rec->offset,
7863 dev_extent_rec->length);
7866 list_move(&dev_extent_rec->chunk_list,
7867 &chunk_rec->dextents);
7872 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7873 chunk_rec->objectid,
7876 chunk_rec->stripes[i].devid,
7877 chunk_rec->stripes[i].offset);
7884 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7885 int check_chunks(struct cache_tree *chunk_cache,
7886 struct block_group_tree *block_group_cache,
7887 struct device_extent_tree *dev_extent_cache,
7888 struct list_head *good, struct list_head *bad,
7889 struct list_head *rebuild, int silent)
7891 struct cache_extent *chunk_item;
7892 struct chunk_record *chunk_rec;
7893 struct block_group_record *bg_rec;
7894 struct device_extent_record *dext_rec;
7898 chunk_item = first_cache_extent(chunk_cache);
7899 while (chunk_item) {
7900 chunk_rec = container_of(chunk_item, struct chunk_record,
7902 err = check_chunk_refs(chunk_rec, block_group_cache,
7903 dev_extent_cache, silent);
7906 if (err == 0 && good)
7907 list_add_tail(&chunk_rec->list, good);
7908 if (err > 0 && rebuild)
7909 list_add_tail(&chunk_rec->list, rebuild);
7911 list_add_tail(&chunk_rec->list, bad);
7912 chunk_item = next_cache_extent(chunk_item);
7915 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7918 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7926 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7930 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7941 static int check_device_used(struct device_record *dev_rec,
7942 struct device_extent_tree *dext_cache)
7944 struct cache_extent *cache;
7945 struct device_extent_record *dev_extent_rec;
7948 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7950 dev_extent_rec = container_of(cache,
7951 struct device_extent_record,
7953 if (dev_extent_rec->objectid != dev_rec->devid)
7956 list_del_init(&dev_extent_rec->device_list);
7957 total_byte += dev_extent_rec->length;
7958 cache = next_cache_extent(cache);
7961 if (total_byte != dev_rec->byte_used) {
7963 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7964 total_byte, dev_rec->byte_used, dev_rec->objectid,
7965 dev_rec->type, dev_rec->offset);
7972 /* check btrfs_dev_item -> btrfs_dev_extent */
7973 static int check_devices(struct rb_root *dev_cache,
7974 struct device_extent_tree *dev_extent_cache)
7976 struct rb_node *dev_node;
7977 struct device_record *dev_rec;
7978 struct device_extent_record *dext_rec;
7982 dev_node = rb_first(dev_cache);
7984 dev_rec = container_of(dev_node, struct device_record, node);
7985 err = check_device_used(dev_rec, dev_extent_cache);
7989 dev_node = rb_next(dev_node);
7991 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7994 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7995 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8002 static int add_root_item_to_list(struct list_head *head,
8003 u64 objectid, u64 bytenr, u64 last_snapshot,
8004 u8 level, u8 drop_level,
8005 int level_size, struct btrfs_key *drop_key)
8008 struct root_item_record *ri_rec;
8009 ri_rec = malloc(sizeof(*ri_rec));
8012 ri_rec->bytenr = bytenr;
8013 ri_rec->objectid = objectid;
8014 ri_rec->level = level;
8015 ri_rec->level_size = level_size;
8016 ri_rec->drop_level = drop_level;
8017 ri_rec->last_snapshot = last_snapshot;
8019 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8020 list_add_tail(&ri_rec->list, head);
8025 static void free_root_item_list(struct list_head *list)
8027 struct root_item_record *ri_rec;
8029 while (!list_empty(list)) {
8030 ri_rec = list_first_entry(list, struct root_item_record,
8032 list_del_init(&ri_rec->list);
8037 static int deal_root_from_list(struct list_head *list,
8038 struct btrfs_root *root,
8039 struct block_info *bits,
8041 struct cache_tree *pending,
8042 struct cache_tree *seen,
8043 struct cache_tree *reada,
8044 struct cache_tree *nodes,
8045 struct cache_tree *extent_cache,
8046 struct cache_tree *chunk_cache,
8047 struct rb_root *dev_cache,
8048 struct block_group_tree *block_group_cache,
8049 struct device_extent_tree *dev_extent_cache)
8054 while (!list_empty(list)) {
8055 struct root_item_record *rec;
8056 struct extent_buffer *buf;
8057 rec = list_entry(list->next,
8058 struct root_item_record, list);
8060 buf = read_tree_block(root->fs_info->tree_root,
8061 rec->bytenr, rec->level_size, 0);
8062 if (!extent_buffer_uptodate(buf)) {
8063 free_extent_buffer(buf);
8067 add_root_to_pending(buf, extent_cache, pending,
8068 seen, nodes, rec->objectid);
8070 * To rebuild extent tree, we need deal with snapshot
8071 * one by one, otherwise we deal with node firstly which
8072 * can maximize readahead.
8075 ret = run_next_block(root, bits, bits_nr, &last,
8076 pending, seen, reada, nodes,
8077 extent_cache, chunk_cache,
8078 dev_cache, block_group_cache,
8079 dev_extent_cache, rec);
8083 free_extent_buffer(buf);
8084 list_del(&rec->list);
8090 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8091 reada, nodes, extent_cache, chunk_cache,
8092 dev_cache, block_group_cache,
8093 dev_extent_cache, NULL);
8103 static int check_chunks_and_extents(struct btrfs_root *root)
8105 struct rb_root dev_cache;
8106 struct cache_tree chunk_cache;
8107 struct block_group_tree block_group_cache;
8108 struct device_extent_tree dev_extent_cache;
8109 struct cache_tree extent_cache;
8110 struct cache_tree seen;
8111 struct cache_tree pending;
8112 struct cache_tree reada;
8113 struct cache_tree nodes;
8114 struct extent_io_tree excluded_extents;
8115 struct cache_tree corrupt_blocks;
8116 struct btrfs_path path;
8117 struct btrfs_key key;
8118 struct btrfs_key found_key;
8120 struct block_info *bits;
8122 struct extent_buffer *leaf;
8124 struct btrfs_root_item ri;
8125 struct list_head dropping_trees;
8126 struct list_head normal_trees;
8127 struct btrfs_root *root1;
8132 dev_cache = RB_ROOT;
8133 cache_tree_init(&chunk_cache);
8134 block_group_tree_init(&block_group_cache);
8135 device_extent_tree_init(&dev_extent_cache);
8137 cache_tree_init(&extent_cache);
8138 cache_tree_init(&seen);
8139 cache_tree_init(&pending);
8140 cache_tree_init(&nodes);
8141 cache_tree_init(&reada);
8142 cache_tree_init(&corrupt_blocks);
8143 extent_io_tree_init(&excluded_extents);
8144 INIT_LIST_HEAD(&dropping_trees);
8145 INIT_LIST_HEAD(&normal_trees);
8148 root->fs_info->excluded_extents = &excluded_extents;
8149 root->fs_info->fsck_extent_cache = &extent_cache;
8150 root->fs_info->free_extent_hook = free_extent_hook;
8151 root->fs_info->corrupt_blocks = &corrupt_blocks;
8155 bits = malloc(bits_nr * sizeof(struct block_info));
8161 if (ctx.progress_enabled) {
8162 ctx.tp = TASK_EXTENTS;
8163 task_start(ctx.info);
8167 root1 = root->fs_info->tree_root;
8168 level = btrfs_header_level(root1->node);
8169 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8170 root1->node->start, 0, level, 0,
8171 btrfs_level_size(root1, level), NULL);
8174 root1 = root->fs_info->chunk_root;
8175 level = btrfs_header_level(root1->node);
8176 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8177 root1->node->start, 0, level, 0,
8178 btrfs_level_size(root1, level), NULL);
8181 btrfs_init_path(&path);
8184 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8185 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8190 leaf = path.nodes[0];
8191 slot = path.slots[0];
8192 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8193 ret = btrfs_next_leaf(root, &path);
8196 leaf = path.nodes[0];
8197 slot = path.slots[0];
8199 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8200 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8201 unsigned long offset;
8204 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8205 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8206 last_snapshot = btrfs_root_last_snapshot(&ri);
8207 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8208 level = btrfs_root_level(&ri);
8209 level_size = btrfs_level_size(root, level);
8210 ret = add_root_item_to_list(&normal_trees,
8212 btrfs_root_bytenr(&ri),
8213 last_snapshot, level,
8214 0, level_size, NULL);
8218 level = btrfs_root_level(&ri);
8219 level_size = btrfs_level_size(root, level);
8220 objectid = found_key.objectid;
8221 btrfs_disk_key_to_cpu(&found_key,
8223 ret = add_root_item_to_list(&dropping_trees,
8225 btrfs_root_bytenr(&ri),
8226 last_snapshot, level,
8228 level_size, &found_key);
8235 btrfs_release_path(&path);
8238 * check_block can return -EAGAIN if it fixes something, please keep
8239 * this in mind when dealing with return values from these functions, if
8240 * we get -EAGAIN we want to fall through and restart the loop.
8242 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8243 &seen, &reada, &nodes, &extent_cache,
8244 &chunk_cache, &dev_cache, &block_group_cache,
8251 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8252 &pending, &seen, &reada, &nodes,
8253 &extent_cache, &chunk_cache, &dev_cache,
8254 &block_group_cache, &dev_extent_cache);
8261 ret = check_chunks(&chunk_cache, &block_group_cache,
8262 &dev_extent_cache, NULL, NULL, NULL, 0);
8269 ret = check_extent_refs(root, &extent_cache);
8276 ret = check_devices(&dev_cache, &dev_extent_cache);
8281 task_stop(ctx.info);
8283 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8284 extent_io_tree_cleanup(&excluded_extents);
8285 root->fs_info->fsck_extent_cache = NULL;
8286 root->fs_info->free_extent_hook = NULL;
8287 root->fs_info->corrupt_blocks = NULL;
8288 root->fs_info->excluded_extents = NULL;
8291 free_chunk_cache_tree(&chunk_cache);
8292 free_device_cache_tree(&dev_cache);
8293 free_block_group_tree(&block_group_cache);
8294 free_device_extent_tree(&dev_extent_cache);
8295 free_extent_cache_tree(&seen);
8296 free_extent_cache_tree(&pending);
8297 free_extent_cache_tree(&reada);
8298 free_extent_cache_tree(&nodes);
8301 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8302 free_extent_cache_tree(&seen);
8303 free_extent_cache_tree(&pending);
8304 free_extent_cache_tree(&reada);
8305 free_extent_cache_tree(&nodes);
8306 free_chunk_cache_tree(&chunk_cache);
8307 free_block_group_tree(&block_group_cache);
8308 free_device_cache_tree(&dev_cache);
8309 free_device_extent_tree(&dev_extent_cache);
8310 free_extent_record_cache(root->fs_info, &extent_cache);
8311 free_root_item_list(&normal_trees);
8312 free_root_item_list(&dropping_trees);
8313 extent_io_tree_cleanup(&excluded_extents);
8317 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8318 struct btrfs_root *root, int overwrite)
8320 struct extent_buffer *c;
8321 struct extent_buffer *old = root->node;
8324 struct btrfs_disk_key disk_key = {0,0,0};
8330 extent_buffer_get(c);
8333 c = btrfs_alloc_free_block(trans, root,
8334 btrfs_level_size(root, 0),
8335 root->root_key.objectid,
8336 &disk_key, level, 0, 0);
8339 extent_buffer_get(c);
8343 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8344 btrfs_set_header_level(c, level);
8345 btrfs_set_header_bytenr(c, c->start);
8346 btrfs_set_header_generation(c, trans->transid);
8347 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8348 btrfs_set_header_owner(c, root->root_key.objectid);
8350 write_extent_buffer(c, root->fs_info->fsid,
8351 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8353 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8354 btrfs_header_chunk_tree_uuid(c),
8357 btrfs_mark_buffer_dirty(c);
8359 * this case can happen in the following case:
8361 * 1.overwrite previous root.
8363 * 2.reinit reloc data root, this is because we skip pin
8364 * down reloc data tree before which means we can allocate
8365 * same block bytenr here.
8367 if (old->start == c->start) {
8368 btrfs_set_root_generation(&root->root_item,
8370 root->root_item.level = btrfs_header_level(root->node);
8371 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8372 &root->root_key, &root->root_item);
8374 free_extent_buffer(c);
8378 free_extent_buffer(old);
8380 add_root_to_dirty_list(root);
8384 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8385 struct extent_buffer *eb, int tree_root)
8387 struct extent_buffer *tmp;
8388 struct btrfs_root_item *ri;
8389 struct btrfs_key key;
8392 int level = btrfs_header_level(eb);
8398 * If we have pinned this block before, don't pin it again.
8399 * This can not only avoid forever loop with broken filesystem
8400 * but also give us some speedups.
8402 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8403 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8406 btrfs_pin_extent(fs_info, eb->start, eb->len);
8408 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8409 nritems = btrfs_header_nritems(eb);
8410 for (i = 0; i < nritems; i++) {
8412 btrfs_item_key_to_cpu(eb, &key, i);
8413 if (key.type != BTRFS_ROOT_ITEM_KEY)
8415 /* Skip the extent root and reloc roots */
8416 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8417 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8418 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8420 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8421 bytenr = btrfs_disk_root_bytenr(eb, ri);
8424 * If at any point we start needing the real root we
8425 * will have to build a stump root for the root we are
8426 * in, but for now this doesn't actually use the root so
8427 * just pass in extent_root.
8429 tmp = read_tree_block(fs_info->extent_root, bytenr,
8431 if (!extent_buffer_uptodate(tmp)) {
8432 fprintf(stderr, "Error reading root block\n");
8435 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8436 free_extent_buffer(tmp);
8440 bytenr = btrfs_node_blockptr(eb, i);
8442 /* If we aren't the tree root don't read the block */
8443 if (level == 1 && !tree_root) {
8444 btrfs_pin_extent(fs_info, bytenr, leafsize);
8448 tmp = read_tree_block(fs_info->extent_root, bytenr,
8450 if (!extent_buffer_uptodate(tmp)) {
8451 fprintf(stderr, "Error reading tree block\n");
8454 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8455 free_extent_buffer(tmp);
8464 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8468 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8472 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8475 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8477 struct btrfs_block_group_cache *cache;
8478 struct btrfs_path *path;
8479 struct extent_buffer *leaf;
8480 struct btrfs_chunk *chunk;
8481 struct btrfs_key key;
8485 path = btrfs_alloc_path();
8490 key.type = BTRFS_CHUNK_ITEM_KEY;
8493 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8495 btrfs_free_path(path);
8500 * We do this in case the block groups were screwed up and had alloc
8501 * bits that aren't actually set on the chunks. This happens with
8502 * restored images every time and could happen in real life I guess.
8504 fs_info->avail_data_alloc_bits = 0;
8505 fs_info->avail_metadata_alloc_bits = 0;
8506 fs_info->avail_system_alloc_bits = 0;
8508 /* First we need to create the in-memory block groups */
8510 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8511 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8513 btrfs_free_path(path);
8521 leaf = path->nodes[0];
8522 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8523 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8528 chunk = btrfs_item_ptr(leaf, path->slots[0],
8529 struct btrfs_chunk);
8530 btrfs_add_block_group(fs_info, 0,
8531 btrfs_chunk_type(leaf, chunk),
8532 key.objectid, key.offset,
8533 btrfs_chunk_length(leaf, chunk));
8534 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8535 key.offset + btrfs_chunk_length(leaf, chunk),
8541 cache = btrfs_lookup_first_block_group(fs_info, start);
8545 start = cache->key.objectid + cache->key.offset;
8548 btrfs_free_path(path);
8552 static int reset_balance(struct btrfs_trans_handle *trans,
8553 struct btrfs_fs_info *fs_info)
8555 struct btrfs_root *root = fs_info->tree_root;
8556 struct btrfs_path *path;
8557 struct extent_buffer *leaf;
8558 struct btrfs_key key;
8559 int del_slot, del_nr = 0;
8563 path = btrfs_alloc_path();
8567 key.objectid = BTRFS_BALANCE_OBJECTID;
8568 key.type = BTRFS_BALANCE_ITEM_KEY;
8571 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8576 goto reinit_data_reloc;
8581 ret = btrfs_del_item(trans, root, path);
8584 btrfs_release_path(path);
8586 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8587 key.type = BTRFS_ROOT_ITEM_KEY;
8590 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8594 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8599 ret = btrfs_del_items(trans, root, path,
8606 btrfs_release_path(path);
8609 ret = btrfs_search_slot(trans, root, &key, path,
8616 leaf = path->nodes[0];
8617 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8618 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8620 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8625 del_slot = path->slots[0];
8634 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8638 btrfs_release_path(path);
8641 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8642 key.type = BTRFS_ROOT_ITEM_KEY;
8643 key.offset = (u64)-1;
8644 root = btrfs_read_fs_root(fs_info, &key);
8646 fprintf(stderr, "Error reading data reloc tree\n");
8647 ret = PTR_ERR(root);
8650 record_root_in_trans(trans, root);
8651 ret = btrfs_fsck_reinit_root(trans, root, 0);
8654 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8656 btrfs_free_path(path);
8660 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8661 struct btrfs_fs_info *fs_info)
8667 * The only reason we don't do this is because right now we're just
8668 * walking the trees we find and pinning down their bytes, we don't look
8669 * at any of the leaves. In order to do mixed groups we'd have to check
8670 * the leaves of any fs roots and pin down the bytes for any file
8671 * extents we find. Not hard but why do it if we don't have to?
8673 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8674 fprintf(stderr, "We don't support re-initing the extent tree "
8675 "for mixed block groups yet, please notify a btrfs "
8676 "developer you want to do this so they can add this "
8677 "functionality.\n");
8682 * first we need to walk all of the trees except the extent tree and pin
8683 * down the bytes that are in use so we don't overwrite any existing
8686 ret = pin_metadata_blocks(fs_info);
8688 fprintf(stderr, "error pinning down used bytes\n");
8693 * Need to drop all the block groups since we're going to recreate all
8696 btrfs_free_block_groups(fs_info);
8697 ret = reset_block_groups(fs_info);
8699 fprintf(stderr, "error resetting the block groups\n");
8703 /* Ok we can allocate now, reinit the extent root */
8704 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8706 fprintf(stderr, "extent root initialization failed\n");
8708 * When the transaction code is updated we should end the
8709 * transaction, but for now progs only knows about commit so
8710 * just return an error.
8716 * Now we have all the in-memory block groups setup so we can make
8717 * allocations properly, and the metadata we care about is safe since we
8718 * pinned all of it above.
8721 struct btrfs_block_group_cache *cache;
8723 cache = btrfs_lookup_first_block_group(fs_info, start);
8726 start = cache->key.objectid + cache->key.offset;
8727 ret = btrfs_insert_item(trans, fs_info->extent_root,
8728 &cache->key, &cache->item,
8729 sizeof(cache->item));
8731 fprintf(stderr, "Error adding block group\n");
8734 btrfs_extent_post_op(trans, fs_info->extent_root);
8737 ret = reset_balance(trans, fs_info);
8739 fprintf(stderr, "error reseting the pending balance\n");
8744 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8746 struct btrfs_path *path;
8747 struct btrfs_trans_handle *trans;
8748 struct btrfs_key key;
8751 printf("Recowing metadata block %llu\n", eb->start);
8752 key.objectid = btrfs_header_owner(eb);
8753 key.type = BTRFS_ROOT_ITEM_KEY;
8754 key.offset = (u64)-1;
8756 root = btrfs_read_fs_root(root->fs_info, &key);
8758 fprintf(stderr, "Couldn't find owner root %llu\n",
8760 return PTR_ERR(root);
8763 path = btrfs_alloc_path();
8767 trans = btrfs_start_transaction(root, 1);
8768 if (IS_ERR(trans)) {
8769 btrfs_free_path(path);
8770 return PTR_ERR(trans);
8773 path->lowest_level = btrfs_header_level(eb);
8774 if (path->lowest_level)
8775 btrfs_node_key_to_cpu(eb, &key, 0);
8777 btrfs_item_key_to_cpu(eb, &key, 0);
8779 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8780 btrfs_commit_transaction(trans, root);
8781 btrfs_free_path(path);
8785 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8787 struct btrfs_path *path;
8788 struct btrfs_trans_handle *trans;
8789 struct btrfs_key key;
8792 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8793 bad->key.type, bad->key.offset);
8794 key.objectid = bad->root_id;
8795 key.type = BTRFS_ROOT_ITEM_KEY;
8796 key.offset = (u64)-1;
8798 root = btrfs_read_fs_root(root->fs_info, &key);
8800 fprintf(stderr, "Couldn't find owner root %llu\n",
8802 return PTR_ERR(root);
8805 path = btrfs_alloc_path();
8809 trans = btrfs_start_transaction(root, 1);
8810 if (IS_ERR(trans)) {
8811 btrfs_free_path(path);
8812 return PTR_ERR(trans);
8815 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8821 ret = btrfs_del_item(trans, root, path);
8823 btrfs_commit_transaction(trans, root);
8824 btrfs_free_path(path);
8828 static int zero_log_tree(struct btrfs_root *root)
8830 struct btrfs_trans_handle *trans;
8833 trans = btrfs_start_transaction(root, 1);
8834 if (IS_ERR(trans)) {
8835 ret = PTR_ERR(trans);
8838 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8839 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8840 ret = btrfs_commit_transaction(trans, root);
8844 static int populate_csum(struct btrfs_trans_handle *trans,
8845 struct btrfs_root *csum_root, char *buf, u64 start,
8852 while (offset < len) {
8853 sectorsize = csum_root->sectorsize;
8854 ret = read_extent_data(csum_root, buf, start + offset,
8858 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8859 start + offset, buf, sectorsize);
8862 offset += sectorsize;
8867 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8868 struct btrfs_root *csum_root,
8869 struct btrfs_root *cur_root)
8871 struct btrfs_path *path;
8872 struct btrfs_key key;
8873 struct extent_buffer *node;
8874 struct btrfs_file_extent_item *fi;
8881 path = btrfs_alloc_path();
8884 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8894 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8897 /* Iterate all regular file extents and fill its csum */
8899 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8901 if (key.type != BTRFS_EXTENT_DATA_KEY)
8903 node = path->nodes[0];
8904 slot = path->slots[0];
8905 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8906 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8908 start = btrfs_file_extent_disk_bytenr(node, fi);
8909 len = btrfs_file_extent_disk_num_bytes(node, fi);
8911 ret = populate_csum(trans, csum_root, buf, start, len);
8918 * TODO: if next leaf is corrupted, jump to nearest next valid
8921 ret = btrfs_next_item(cur_root, path);
8931 btrfs_free_path(path);
8936 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8937 struct btrfs_root *csum_root)
8939 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8940 struct btrfs_path *path;
8941 struct btrfs_root *tree_root = fs_info->tree_root;
8942 struct btrfs_root *cur_root;
8943 struct extent_buffer *node;
8944 struct btrfs_key key;
8948 path = btrfs_alloc_path();
8952 key.objectid = BTRFS_FS_TREE_OBJECTID;
8954 key.type = BTRFS_ROOT_ITEM_KEY;
8956 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8965 node = path->nodes[0];
8966 slot = path->slots[0];
8967 btrfs_item_key_to_cpu(node, &key, slot);
8968 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8970 if (key.type != BTRFS_ROOT_ITEM_KEY)
8972 if (!is_fstree(key.objectid))
8974 key.offset = (u64)-1;
8976 cur_root = btrfs_read_fs_root(fs_info, &key);
8977 if (IS_ERR(cur_root) || !cur_root) {
8978 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8982 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8987 ret = btrfs_next_item(tree_root, path);
8997 btrfs_free_path(path);
9001 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9002 struct btrfs_root *csum_root)
9004 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9005 struct btrfs_path *path;
9006 struct btrfs_extent_item *ei;
9007 struct extent_buffer *leaf;
9009 struct btrfs_key key;
9012 path = btrfs_alloc_path();
9017 key.type = BTRFS_EXTENT_ITEM_KEY;
9020 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9022 btrfs_free_path(path);
9026 buf = malloc(csum_root->sectorsize);
9028 btrfs_free_path(path);
9033 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9034 ret = btrfs_next_leaf(extent_root, path);
9042 leaf = path->nodes[0];
9044 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9045 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9050 ei = btrfs_item_ptr(leaf, path->slots[0],
9051 struct btrfs_extent_item);
9052 if (!(btrfs_extent_flags(leaf, ei) &
9053 BTRFS_EXTENT_FLAG_DATA)) {
9058 ret = populate_csum(trans, csum_root, buf, key.objectid,
9065 btrfs_free_path(path);
9071 * Recalculate the csum and put it into the csum tree.
9073 * Extent tree init will wipe out all the extent info, so in that case, we
9074 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9075 * will use fs/subvol trees to init the csum tree.
9077 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9078 struct btrfs_root *csum_root,
9082 return fill_csum_tree_from_fs(trans, csum_root);
9084 return fill_csum_tree_from_extent(trans, csum_root);
9087 struct root_item_info {
9088 /* level of the root */
9090 /* number of nodes at this level, must be 1 for a root */
9094 struct cache_extent cache_extent;
9097 static struct cache_tree *roots_info_cache = NULL;
9099 static void free_roots_info_cache(void)
9101 if (!roots_info_cache)
9104 while (!cache_tree_empty(roots_info_cache)) {
9105 struct cache_extent *entry;
9106 struct root_item_info *rii;
9108 entry = first_cache_extent(roots_info_cache);
9111 remove_cache_extent(roots_info_cache, entry);
9112 rii = container_of(entry, struct root_item_info, cache_extent);
9116 free(roots_info_cache);
9117 roots_info_cache = NULL;
9120 static int build_roots_info_cache(struct btrfs_fs_info *info)
9123 struct btrfs_key key;
9124 struct extent_buffer *leaf;
9125 struct btrfs_path *path;
9127 if (!roots_info_cache) {
9128 roots_info_cache = malloc(sizeof(*roots_info_cache));
9129 if (!roots_info_cache)
9131 cache_tree_init(roots_info_cache);
9134 path = btrfs_alloc_path();
9139 key.type = BTRFS_EXTENT_ITEM_KEY;
9142 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9145 leaf = path->nodes[0];
9148 struct btrfs_key found_key;
9149 struct btrfs_extent_item *ei;
9150 struct btrfs_extent_inline_ref *iref;
9151 int slot = path->slots[0];
9156 struct cache_extent *entry;
9157 struct root_item_info *rii;
9159 if (slot >= btrfs_header_nritems(leaf)) {
9160 ret = btrfs_next_leaf(info->extent_root, path);
9167 leaf = path->nodes[0];
9168 slot = path->slots[0];
9171 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9173 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9174 found_key.type != BTRFS_METADATA_ITEM_KEY)
9177 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9178 flags = btrfs_extent_flags(leaf, ei);
9180 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9181 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9184 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9185 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9186 level = found_key.offset;
9188 struct btrfs_tree_block_info *info;
9190 info = (struct btrfs_tree_block_info *)(ei + 1);
9191 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9192 level = btrfs_tree_block_level(leaf, info);
9196 * For a root extent, it must be of the following type and the
9197 * first (and only one) iref in the item.
9199 type = btrfs_extent_inline_ref_type(leaf, iref);
9200 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9203 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9204 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9206 rii = malloc(sizeof(struct root_item_info));
9211 rii->cache_extent.start = root_id;
9212 rii->cache_extent.size = 1;
9213 rii->level = (u8)-1;
9214 entry = &rii->cache_extent;
9215 ret = insert_cache_extent(roots_info_cache, entry);
9218 rii = container_of(entry, struct root_item_info,
9222 ASSERT(rii->cache_extent.start == root_id);
9223 ASSERT(rii->cache_extent.size == 1);
9225 if (level > rii->level || rii->level == (u8)-1) {
9227 rii->bytenr = found_key.objectid;
9228 rii->gen = btrfs_extent_generation(leaf, ei);
9229 rii->node_count = 1;
9230 } else if (level == rii->level) {
9238 btrfs_free_path(path);
9243 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9244 struct btrfs_path *path,
9245 const struct btrfs_key *root_key,
9246 const int read_only_mode)
9248 const u64 root_id = root_key->objectid;
9249 struct cache_extent *entry;
9250 struct root_item_info *rii;
9251 struct btrfs_root_item ri;
9252 unsigned long offset;
9254 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9257 "Error: could not find extent items for root %llu\n",
9258 root_key->objectid);
9262 rii = container_of(entry, struct root_item_info, cache_extent);
9263 ASSERT(rii->cache_extent.start == root_id);
9264 ASSERT(rii->cache_extent.size == 1);
9266 if (rii->node_count != 1) {
9268 "Error: could not find btree root extent for root %llu\n",
9273 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9274 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9276 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9277 btrfs_root_level(&ri) != rii->level ||
9278 btrfs_root_generation(&ri) != rii->gen) {
9281 * If we're in repair mode but our caller told us to not update
9282 * the root item, i.e. just check if it needs to be updated, don't
9283 * print this message, since the caller will call us again shortly
9284 * for the same root item without read only mode (the caller will
9285 * open a transaction first).
9287 if (!(read_only_mode && repair))
9289 "%sroot item for root %llu,"
9290 " current bytenr %llu, current gen %llu, current level %u,"
9291 " new bytenr %llu, new gen %llu, new level %u\n",
9292 (read_only_mode ? "" : "fixing "),
9294 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9295 btrfs_root_level(&ri),
9296 rii->bytenr, rii->gen, rii->level);
9298 if (btrfs_root_generation(&ri) > rii->gen) {
9300 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9301 root_id, btrfs_root_generation(&ri), rii->gen);
9305 if (!read_only_mode) {
9306 btrfs_set_root_bytenr(&ri, rii->bytenr);
9307 btrfs_set_root_level(&ri, rii->level);
9308 btrfs_set_root_generation(&ri, rii->gen);
9309 write_extent_buffer(path->nodes[0], &ri,
9310 offset, sizeof(ri));
9320 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9321 * caused read-only snapshots to be corrupted if they were created at a moment
9322 * when the source subvolume/snapshot had orphan items. The issue was that the
9323 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9324 * node instead of the post orphan cleanup root node.
9325 * So this function, and its callees, just detects and fixes those cases. Even
9326 * though the regression was for read-only snapshots, this function applies to
9327 * any snapshot/subvolume root.
9328 * This must be run before any other repair code - not doing it so, makes other
9329 * repair code delete or modify backrefs in the extent tree for example, which
9330 * will result in an inconsistent fs after repairing the root items.
9332 static int repair_root_items(struct btrfs_fs_info *info)
9334 struct btrfs_path *path = NULL;
9335 struct btrfs_key key;
9336 struct extent_buffer *leaf;
9337 struct btrfs_trans_handle *trans = NULL;
9342 ret = build_roots_info_cache(info);
9346 path = btrfs_alloc_path();
9352 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9353 key.type = BTRFS_ROOT_ITEM_KEY;
9358 * Avoid opening and committing transactions if a leaf doesn't have
9359 * any root items that need to be fixed, so that we avoid rotating
9360 * backup roots unnecessarily.
9363 trans = btrfs_start_transaction(info->tree_root, 1);
9364 if (IS_ERR(trans)) {
9365 ret = PTR_ERR(trans);
9370 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9374 leaf = path->nodes[0];
9377 struct btrfs_key found_key;
9379 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9380 int no_more_keys = find_next_key(path, &key);
9382 btrfs_release_path(path);
9384 ret = btrfs_commit_transaction(trans,
9396 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9398 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9400 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9403 ret = maybe_repair_root_item(info, path, &found_key,
9408 if (!trans && repair) {
9411 btrfs_release_path(path);
9421 free_roots_info_cache();
9422 btrfs_free_path(path);
9424 btrfs_commit_transaction(trans, info->tree_root);
9431 const char * const cmd_check_usage[] = {
9432 "btrfs check [options] <device>",
9433 "Check structural inegrity of a filesystem (unmounted).",
9434 "Check structural inegrity of an unmounted filesystem. Verify internal",
9435 "trees' consistency and item connectivity. In the repair mode try to",
9436 "fix the problems found.",
9437 "WARNING: the repair mode is considered dangerous",
9439 "-s|--super <superblock> use this superblock copy",
9440 "-b|--backup use the backup root copy",
9441 "--repair try to repair the filesystem",
9442 "--readonly run in read-only mode (default)",
9443 "--init-csum-tree create a new CRC tree",
9444 "--init-extent-tree create a new extent tree",
9445 "--check-data-csum verify checkums of data blocks",
9446 "-Q|--qgroup-report print a report on qgroup consistency",
9447 "-E|--subvol-extents <subvolid>",
9448 " print subvolume extents and sharing state",
9449 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9450 "-p|--progress indicate progress",
9454 int cmd_check(int argc, char **argv)
9456 struct cache_tree root_cache;
9457 struct btrfs_root *root;
9458 struct btrfs_fs_info *info;
9461 u64 tree_root_bytenr = 0;
9462 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9465 int init_csum_tree = 0;
9467 int qgroup_report = 0;
9468 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9472 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9473 OPT_CHECK_CSUM, OPT_READONLY };
9474 static const struct option long_options[] = {
9475 { "super", required_argument, NULL, 's' },
9476 { "repair", no_argument, NULL, OPT_REPAIR },
9477 { "readonly", no_argument, NULL, OPT_READONLY },
9478 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9479 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9480 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9481 { "backup", no_argument, NULL, 'b' },
9482 { "subvol-extents", required_argument, NULL, 'E' },
9483 { "qgroup-report", no_argument, NULL, 'Q' },
9484 { "tree-root", required_argument, NULL, 'r' },
9485 { "progress", no_argument, NULL, 'p' },
9489 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9493 case 'a': /* ignored */ break;
9495 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9498 num = arg_strtou64(optarg);
9499 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9501 "ERROR: super mirror should be less than: %d\n",
9502 BTRFS_SUPER_MIRROR_MAX);
9505 bytenr = btrfs_sb_offset(((int)num));
9506 printf("using SB copy %llu, bytenr %llu\n", num,
9507 (unsigned long long)bytenr);
9513 subvolid = arg_strtou64(optarg);
9516 tree_root_bytenr = arg_strtou64(optarg);
9519 ctx.progress_enabled = true;
9523 usage(cmd_check_usage);
9525 printf("enabling repair mode\n");
9527 ctree_flags |= OPEN_CTREE_WRITES;
9533 printf("Creating a new CRC tree\n");
9536 ctree_flags |= OPEN_CTREE_WRITES;
9538 case OPT_INIT_EXTENT:
9539 init_extent_tree = 1;
9540 ctree_flags |= (OPEN_CTREE_WRITES |
9541 OPEN_CTREE_NO_BLOCK_GROUPS);
9544 case OPT_CHECK_CSUM:
9545 check_data_csum = 1;
9549 argc = argc - optind;
9551 if (check_argc_exact(argc, 1))
9552 usage(cmd_check_usage);
9554 if (ctx.progress_enabled) {
9555 ctx.tp = TASK_NOTHING;
9556 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9559 /* This check is the only reason for --readonly to exist */
9560 if (readonly && repair) {
9561 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9566 cache_tree_init(&root_cache);
9568 if((ret = check_mounted(argv[optind])) < 0) {
9569 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9572 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9577 /* only allow partial opening under repair mode */
9579 ctree_flags |= OPEN_CTREE_PARTIAL;
9581 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9584 fprintf(stderr, "Couldn't open file system\n");
9590 root = info->fs_root;
9593 * repair mode will force us to commit transaction which
9594 * will make us fail to load log tree when mounting.
9596 if (repair && btrfs_super_log_root(info->super_copy)) {
9597 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9602 ret = zero_log_tree(root);
9604 fprintf(stderr, "fail to zero log tree\n");
9609 uuid_unparse(info->super_copy->fsid, uuidbuf);
9610 if (qgroup_report) {
9611 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9613 ret = qgroup_verify_all(info);
9615 print_qgroup_report(1);
9619 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9620 subvolid, argv[optind], uuidbuf);
9621 ret = print_extent_state(info, subvolid);
9624 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9626 if (!extent_buffer_uptodate(info->tree_root->node) ||
9627 !extent_buffer_uptodate(info->dev_root->node) ||
9628 !extent_buffer_uptodate(info->chunk_root->node)) {
9629 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9634 if (init_extent_tree || init_csum_tree) {
9635 struct btrfs_trans_handle *trans;
9637 trans = btrfs_start_transaction(info->extent_root, 0);
9638 if (IS_ERR(trans)) {
9639 fprintf(stderr, "Error starting transaction\n");
9640 ret = PTR_ERR(trans);
9644 if (init_extent_tree) {
9645 printf("Creating a new extent tree\n");
9646 ret = reinit_extent_tree(trans, info);
9651 if (init_csum_tree) {
9652 fprintf(stderr, "Reinit crc root\n");
9653 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9655 fprintf(stderr, "crc root initialization failed\n");
9660 ret = fill_csum_tree(trans, info->csum_root,
9663 fprintf(stderr, "crc refilling failed\n");
9668 * Ok now we commit and run the normal fsck, which will add
9669 * extent entries for all of the items it finds.
9671 ret = btrfs_commit_transaction(trans, info->extent_root);
9675 if (!extent_buffer_uptodate(info->extent_root->node)) {
9676 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9680 if (!extent_buffer_uptodate(info->csum_root->node)) {
9681 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9686 if (!ctx.progress_enabled)
9687 fprintf(stderr, "checking extents\n");
9688 ret = check_chunks_and_extents(root);
9690 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9692 ret = repair_root_items(info);
9696 fprintf(stderr, "Fixed %d roots.\n", ret);
9698 } else if (ret > 0) {
9700 "Found %d roots with an outdated root item.\n",
9703 "Please run a filesystem check with the option --repair to fix them.\n");
9708 if (!ctx.progress_enabled)
9709 fprintf(stderr, "checking free space cache\n");
9710 ret = check_space_cache(root);
9715 * We used to have to have these hole extents in between our real
9716 * extents so if we don't have this flag set we need to make sure there
9717 * are no gaps in the file extents for inodes, otherwise we can just
9718 * ignore it when this happens.
9720 no_holes = btrfs_fs_incompat(root->fs_info,
9721 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9722 if (!ctx.progress_enabled)
9723 fprintf(stderr, "checking fs roots\n");
9724 ret = check_fs_roots(root, &root_cache);
9728 fprintf(stderr, "checking csums\n");
9729 ret = check_csums(root);
9733 fprintf(stderr, "checking root refs\n");
9734 ret = check_root_refs(root, &root_cache);
9738 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9739 struct extent_buffer *eb;
9741 eb = list_first_entry(&root->fs_info->recow_ebs,
9742 struct extent_buffer, recow);
9743 list_del_init(&eb->recow);
9744 ret = recow_extent_buffer(root, eb);
9749 while (!list_empty(&delete_items)) {
9750 struct bad_item *bad;
9752 bad = list_first_entry(&delete_items, struct bad_item, list);
9753 list_del_init(&bad->list);
9755 ret = delete_bad_item(root, bad);
9759 if (info->quota_enabled) {
9761 fprintf(stderr, "checking quota groups\n");
9762 err = qgroup_verify_all(info);
9767 if (!list_empty(&root->fs_info->recow_ebs)) {
9768 fprintf(stderr, "Transid errors in file system\n");
9772 print_qgroup_report(0);
9773 if (found_old_backref) { /*
9774 * there was a disk format change when mixed
9775 * backref was in testing tree. The old format
9776 * existed about one week.
9778 printf("\n * Found old mixed backref format. "
9779 "The old format is not supported! *"
9780 "\n * Please mount the FS in readonly mode, "
9781 "backup data and re-format the FS. *\n\n");
9784 printf("found %llu bytes used err is %d\n",
9785 (unsigned long long)bytes_used, ret);
9786 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9787 printf("total tree bytes: %llu\n",
9788 (unsigned long long)total_btree_bytes);
9789 printf("total fs tree bytes: %llu\n",
9790 (unsigned long long)total_fs_tree_bytes);
9791 printf("total extent tree bytes: %llu\n",
9792 (unsigned long long)total_extent_tree_bytes);
9793 printf("btree space waste bytes: %llu\n",
9794 (unsigned long long)btree_space_waste);
9795 printf("file data blocks allocated: %llu\n referenced %llu\n",
9796 (unsigned long long)data_bytes_allocated,
9797 (unsigned long long)data_bytes_referenced);
9799 free_root_recs_tree(&root_cache);
9803 if (ctx.progress_enabled)
9804 task_deinit(ctx.info);