2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct inode_backref *tmp;
570 struct orphan_data_extent *src_orphan;
571 struct orphan_data_extent *dst_orphan;
575 rec = malloc(sizeof(*rec));
577 return ERR_PTR(-ENOMEM);
578 memcpy(rec, orig_rec, sizeof(*rec));
580 INIT_LIST_HEAD(&rec->backrefs);
581 INIT_LIST_HEAD(&rec->orphan_extents);
582 rec->holes = RB_ROOT;
584 list_for_each_entry(orig, &orig_rec->backrefs, list) {
585 size = sizeof(*orig) + orig->namelen + 1;
586 backref = malloc(size);
591 memcpy(backref, orig, size);
592 list_add_tail(&backref->list, &rec->backrefs);
594 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595 dst_orphan = malloc(sizeof(*dst_orphan));
600 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
603 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
609 if (!list_empty(&rec->backrefs))
610 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611 list_del(&orig->list);
615 if (!list_empty(&rec->orphan_extents))
616 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617 list_del(&orig->list);
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
629 struct orphan_data_extent *orphan;
631 if (list_empty(orphan_extents))
633 printf("The following data extent is lost in tree %llu:\n",
635 list_for_each_entry(orphan, orphan_extents, list) {
636 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637 orphan->objectid, orphan->offset, orphan->disk_bytenr,
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
644 u64 root_objectid = root->root_key.objectid;
645 int errors = rec->errors;
649 /* reloc root errors, we print its corresponding fs root objectid*/
650 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651 root_objectid = root->root_key.offset;
652 fprintf(stderr, "reloc");
654 fprintf(stderr, "root %llu inode %llu errors %x",
655 (unsigned long long) root_objectid,
656 (unsigned long long) rec->ino, rec->errors);
658 if (errors & I_ERR_NO_INODE_ITEM)
659 fprintf(stderr, ", no inode item");
660 if (errors & I_ERR_NO_ORPHAN_ITEM)
661 fprintf(stderr, ", no orphan item");
662 if (errors & I_ERR_DUP_INODE_ITEM)
663 fprintf(stderr, ", dup inode item");
664 if (errors & I_ERR_DUP_DIR_INDEX)
665 fprintf(stderr, ", dup dir index");
666 if (errors & I_ERR_ODD_DIR_ITEM)
667 fprintf(stderr, ", odd dir item");
668 if (errors & I_ERR_ODD_FILE_EXTENT)
669 fprintf(stderr, ", odd file extent");
670 if (errors & I_ERR_BAD_FILE_EXTENT)
671 fprintf(stderr, ", bad file extent");
672 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673 fprintf(stderr, ", file extent overlap");
674 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675 fprintf(stderr, ", file extent discount");
676 if (errors & I_ERR_DIR_ISIZE_WRONG)
677 fprintf(stderr, ", dir isize wrong");
678 if (errors & I_ERR_FILE_NBYTES_WRONG)
679 fprintf(stderr, ", nbytes wrong");
680 if (errors & I_ERR_ODD_CSUM_ITEM)
681 fprintf(stderr, ", odd csum item");
682 if (errors & I_ERR_SOME_CSUM_MISSING)
683 fprintf(stderr, ", some csum missing");
684 if (errors & I_ERR_LINK_COUNT_WRONG)
685 fprintf(stderr, ", link count wrong");
686 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687 fprintf(stderr, ", orphan file extent");
688 fprintf(stderr, "\n");
689 /* Print the orphan extents if needed */
690 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
693 /* Print the holes if needed */
694 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695 struct file_extent_hole *hole;
696 struct rb_node *node;
699 node = rb_first(&rec->holes);
700 fprintf(stderr, "Found file extent holes:\n");
703 hole = rb_entry(node, struct file_extent_hole, node);
704 fprintf(stderr, "\tstart: %llu, len: %llu\n",
705 hole->start, hole->len);
706 node = rb_next(node);
709 fprintf(stderr, "\tstart: 0, len: %llu\n",
710 round_up(rec->isize, root->sectorsize));
714 static void print_ref_error(int errors)
716 if (errors & REF_ERR_NO_DIR_ITEM)
717 fprintf(stderr, ", no dir item");
718 if (errors & REF_ERR_NO_DIR_INDEX)
719 fprintf(stderr, ", no dir index");
720 if (errors & REF_ERR_NO_INODE_REF)
721 fprintf(stderr, ", no inode ref");
722 if (errors & REF_ERR_DUP_DIR_ITEM)
723 fprintf(stderr, ", dup dir item");
724 if (errors & REF_ERR_DUP_DIR_INDEX)
725 fprintf(stderr, ", dup dir index");
726 if (errors & REF_ERR_DUP_INODE_REF)
727 fprintf(stderr, ", dup inode ref");
728 if (errors & REF_ERR_INDEX_UNMATCH)
729 fprintf(stderr, ", index unmatch");
730 if (errors & REF_ERR_FILETYPE_UNMATCH)
731 fprintf(stderr, ", filetype unmatch");
732 if (errors & REF_ERR_NAME_TOO_LONG)
733 fprintf(stderr, ", name too long");
734 if (errors & REF_ERR_NO_ROOT_REF)
735 fprintf(stderr, ", no root ref");
736 if (errors & REF_ERR_NO_ROOT_BACKREF)
737 fprintf(stderr, ", no root backref");
738 if (errors & REF_ERR_DUP_ROOT_REF)
739 fprintf(stderr, ", dup root ref");
740 if (errors & REF_ERR_DUP_ROOT_BACKREF)
741 fprintf(stderr, ", dup root backref");
742 fprintf(stderr, "\n");
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
748 struct ptr_node *node;
749 struct cache_extent *cache;
750 struct inode_record *rec = NULL;
753 cache = lookup_cache_extent(inode_cache, ino, 1);
755 node = container_of(cache, struct ptr_node, cache);
757 if (mod && rec->refs > 1) {
758 node->data = clone_inode_rec(rec);
759 if (IS_ERR(node->data))
765 rec = calloc(1, sizeof(*rec));
767 return ERR_PTR(-ENOMEM);
769 rec->extent_start = (u64)-1;
771 INIT_LIST_HEAD(&rec->backrefs);
772 INIT_LIST_HEAD(&rec->orphan_extents);
773 rec->holes = RB_ROOT;
775 node = malloc(sizeof(*node));
778 return ERR_PTR(-ENOMEM);
780 node->cache.start = ino;
781 node->cache.size = 1;
784 if (ino == BTRFS_FREE_INO_OBJECTID)
787 ret = insert_cache_extent(inode_cache, &node->cache);
789 return ERR_PTR(-EEXIST);
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
796 struct orphan_data_extent *orphan;
798 while (!list_empty(orphan_extents)) {
799 orphan = list_entry(orphan_extents->next,
800 struct orphan_data_extent, list);
801 list_del(&orphan->list);
806 static void free_inode_rec(struct inode_record *rec)
808 struct inode_backref *backref;
813 while (!list_empty(&rec->backrefs)) {
814 backref = list_entry(rec->backrefs.next,
815 struct inode_backref, list);
816 list_del(&backref->list);
819 free_orphan_data_extents(&rec->orphan_extents);
820 free_file_extent_holes(&rec->holes);
824 static int can_free_inode_rec(struct inode_record *rec)
826 if (!rec->errors && rec->checked && rec->found_inode_item &&
827 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833 struct inode_record *rec)
835 struct cache_extent *cache;
836 struct inode_backref *tmp, *backref;
837 struct ptr_node *node;
838 unsigned char filetype;
840 if (!rec->found_inode_item)
843 filetype = imode_to_type(rec->imode);
844 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845 if (backref->found_dir_item && backref->found_dir_index) {
846 if (backref->filetype != filetype)
847 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848 if (!backref->errors && backref->found_inode_ref &&
849 rec->nlink == rec->found_link) {
850 list_del(&backref->list);
856 if (!rec->checked || rec->merging)
859 if (S_ISDIR(rec->imode)) {
860 if (rec->found_size != rec->isize)
861 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862 if (rec->found_file_extent)
863 rec->errors |= I_ERR_ODD_FILE_EXTENT;
864 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865 if (rec->found_dir_item)
866 rec->errors |= I_ERR_ODD_DIR_ITEM;
867 if (rec->found_size != rec->nbytes)
868 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869 if (rec->nlink > 0 && !no_holes &&
870 (rec->extent_end < rec->isize ||
871 first_extent_gap(&rec->holes) < rec->isize))
872 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
875 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876 if (rec->found_csum_item && rec->nodatasum)
877 rec->errors |= I_ERR_ODD_CSUM_ITEM;
878 if (rec->some_csum_missing && !rec->nodatasum)
879 rec->errors |= I_ERR_SOME_CSUM_MISSING;
882 BUG_ON(rec->refs != 1);
883 if (can_free_inode_rec(rec)) {
884 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885 node = container_of(cache, struct ptr_node, cache);
886 BUG_ON(node->data != rec);
887 remove_cache_extent(inode_cache, &node->cache);
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
895 struct btrfs_path path;
896 struct btrfs_key key;
899 key.objectid = BTRFS_ORPHAN_OBJECTID;
900 key.type = BTRFS_ORPHAN_ITEM_KEY;
903 btrfs_init_path(&path);
904 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905 btrfs_release_path(&path);
911 static int process_inode_item(struct extent_buffer *eb,
912 int slot, struct btrfs_key *key,
913 struct shared_node *active_node)
915 struct inode_record *rec;
916 struct btrfs_inode_item *item;
918 rec = active_node->current;
919 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920 if (rec->found_inode_item) {
921 rec->errors |= I_ERR_DUP_INODE_ITEM;
924 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925 rec->nlink = btrfs_inode_nlink(eb, item);
926 rec->isize = btrfs_inode_size(eb, item);
927 rec->nbytes = btrfs_inode_nbytes(eb, item);
928 rec->imode = btrfs_inode_mode(eb, item);
929 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
931 rec->found_inode_item = 1;
933 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934 maybe_free_inode_rec(&active_node->inode_cache, rec);
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
940 int namelen, u64 dir)
942 struct inode_backref *backref;
944 list_for_each_entry(backref, &rec->backrefs, list) {
945 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
947 if (backref->dir != dir || backref->namelen != namelen)
949 if (memcmp(name, backref->name, namelen))
954 backref = malloc(sizeof(*backref) + namelen + 1);
957 memset(backref, 0, sizeof(*backref));
959 backref->namelen = namelen;
960 memcpy(backref->name, name, namelen);
961 backref->name[namelen] = '\0';
962 list_add_tail(&backref->list, &rec->backrefs);
966 static int add_inode_backref(struct cache_tree *inode_cache,
967 u64 ino, u64 dir, u64 index,
968 const char *name, int namelen,
969 int filetype, int itemtype, int errors)
971 struct inode_record *rec;
972 struct inode_backref *backref;
974 rec = get_inode_rec(inode_cache, ino, 1);
976 backref = get_inode_backref(rec, name, namelen, dir);
979 backref->errors |= errors;
980 if (itemtype == BTRFS_DIR_INDEX_KEY) {
981 if (backref->found_dir_index)
982 backref->errors |= REF_ERR_DUP_DIR_INDEX;
983 if (backref->found_inode_ref && backref->index != index)
984 backref->errors |= REF_ERR_INDEX_UNMATCH;
985 if (backref->found_dir_item && backref->filetype != filetype)
986 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
988 backref->index = index;
989 backref->filetype = filetype;
990 backref->found_dir_index = 1;
991 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
993 if (backref->found_dir_item)
994 backref->errors |= REF_ERR_DUP_DIR_ITEM;
995 if (backref->found_dir_index && backref->filetype != filetype)
996 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
998 backref->filetype = filetype;
999 backref->found_dir_item = 1;
1000 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1001 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1002 if (backref->found_inode_ref)
1003 backref->errors |= REF_ERR_DUP_INODE_REF;
1004 if (backref->found_dir_index && backref->index != index)
1005 backref->errors |= REF_ERR_INDEX_UNMATCH;
1007 backref->index = index;
1009 backref->ref_type = itemtype;
1010 backref->found_inode_ref = 1;
1015 maybe_free_inode_rec(inode_cache, rec);
1019 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1020 struct cache_tree *dst_cache)
1022 struct inode_backref *backref;
1027 list_for_each_entry(backref, &src->backrefs, list) {
1028 if (backref->found_dir_index) {
1029 add_inode_backref(dst_cache, dst->ino, backref->dir,
1030 backref->index, backref->name,
1031 backref->namelen, backref->filetype,
1032 BTRFS_DIR_INDEX_KEY, backref->errors);
1034 if (backref->found_dir_item) {
1036 add_inode_backref(dst_cache, dst->ino,
1037 backref->dir, 0, backref->name,
1038 backref->namelen, backref->filetype,
1039 BTRFS_DIR_ITEM_KEY, backref->errors);
1041 if (backref->found_inode_ref) {
1042 add_inode_backref(dst_cache, dst->ino,
1043 backref->dir, backref->index,
1044 backref->name, backref->namelen, 0,
1045 backref->ref_type, backref->errors);
1049 if (src->found_dir_item)
1050 dst->found_dir_item = 1;
1051 if (src->found_file_extent)
1052 dst->found_file_extent = 1;
1053 if (src->found_csum_item)
1054 dst->found_csum_item = 1;
1055 if (src->some_csum_missing)
1056 dst->some_csum_missing = 1;
1057 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1058 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1063 BUG_ON(src->found_link < dir_count);
1064 dst->found_link += src->found_link - dir_count;
1065 dst->found_size += src->found_size;
1066 if (src->extent_start != (u64)-1) {
1067 if (dst->extent_start == (u64)-1) {
1068 dst->extent_start = src->extent_start;
1069 dst->extent_end = src->extent_end;
1071 if (dst->extent_end > src->extent_start)
1072 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1073 else if (dst->extent_end < src->extent_start) {
1074 ret = add_file_extent_hole(&dst->holes,
1076 src->extent_start - dst->extent_end);
1078 if (dst->extent_end < src->extent_end)
1079 dst->extent_end = src->extent_end;
1083 dst->errors |= src->errors;
1084 if (src->found_inode_item) {
1085 if (!dst->found_inode_item) {
1086 dst->nlink = src->nlink;
1087 dst->isize = src->isize;
1088 dst->nbytes = src->nbytes;
1089 dst->imode = src->imode;
1090 dst->nodatasum = src->nodatasum;
1091 dst->found_inode_item = 1;
1093 dst->errors |= I_ERR_DUP_INODE_ITEM;
1101 static int splice_shared_node(struct shared_node *src_node,
1102 struct shared_node *dst_node)
1104 struct cache_extent *cache;
1105 struct ptr_node *node, *ins;
1106 struct cache_tree *src, *dst;
1107 struct inode_record *rec, *conflict;
1108 u64 current_ino = 0;
1112 if (--src_node->refs == 0)
1114 if (src_node->current)
1115 current_ino = src_node->current->ino;
1117 src = &src_node->root_cache;
1118 dst = &dst_node->root_cache;
1120 cache = search_cache_extent(src, 0);
1122 node = container_of(cache, struct ptr_node, cache);
1124 cache = next_cache_extent(cache);
1127 remove_cache_extent(src, &node->cache);
1130 ins = malloc(sizeof(*ins));
1132 ins->cache.start = node->cache.start;
1133 ins->cache.size = node->cache.size;
1137 ret = insert_cache_extent(dst, &ins->cache);
1138 if (ret == -EEXIST) {
1139 conflict = get_inode_rec(dst, rec->ino, 1);
1140 BUG_ON(IS_ERR(conflict));
1141 merge_inode_recs(rec, conflict, dst);
1143 conflict->checked = 1;
1144 if (dst_node->current == conflict)
1145 dst_node->current = NULL;
1147 maybe_free_inode_rec(dst, conflict);
1148 free_inode_rec(rec);
1155 if (src == &src_node->root_cache) {
1156 src = &src_node->inode_cache;
1157 dst = &dst_node->inode_cache;
1161 if (current_ino > 0 && (!dst_node->current ||
1162 current_ino > dst_node->current->ino)) {
1163 if (dst_node->current) {
1164 dst_node->current->checked = 1;
1165 maybe_free_inode_rec(dst, dst_node->current);
1167 dst_node->current = get_inode_rec(dst, current_ino, 1);
1168 BUG_ON(IS_ERR(dst_node->current));
1173 static void free_inode_ptr(struct cache_extent *cache)
1175 struct ptr_node *node;
1176 struct inode_record *rec;
1178 node = container_of(cache, struct ptr_node, cache);
1180 free_inode_rec(rec);
1184 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1186 static struct shared_node *find_shared_node(struct cache_tree *shared,
1189 struct cache_extent *cache;
1190 struct shared_node *node;
1192 cache = lookup_cache_extent(shared, bytenr, 1);
1194 node = container_of(cache, struct shared_node, cache);
1200 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1203 struct shared_node *node;
1205 node = calloc(1, sizeof(*node));
1208 node->cache.start = bytenr;
1209 node->cache.size = 1;
1210 cache_tree_init(&node->root_cache);
1211 cache_tree_init(&node->inode_cache);
1214 ret = insert_cache_extent(shared, &node->cache);
1219 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1220 struct walk_control *wc, int level)
1222 struct shared_node *node;
1223 struct shared_node *dest;
1226 if (level == wc->active_node)
1229 BUG_ON(wc->active_node <= level);
1230 node = find_shared_node(&wc->shared, bytenr);
1232 ret = add_shared_node(&wc->shared, bytenr, refs);
1234 node = find_shared_node(&wc->shared, bytenr);
1235 wc->nodes[level] = node;
1236 wc->active_node = level;
1240 if (wc->root_level == wc->active_node &&
1241 btrfs_root_refs(&root->root_item) == 0) {
1242 if (--node->refs == 0) {
1243 free_inode_recs_tree(&node->root_cache);
1244 free_inode_recs_tree(&node->inode_cache);
1245 remove_cache_extent(&wc->shared, &node->cache);
1251 dest = wc->nodes[wc->active_node];
1252 splice_shared_node(node, dest);
1253 if (node->refs == 0) {
1254 remove_cache_extent(&wc->shared, &node->cache);
1260 static int leave_shared_node(struct btrfs_root *root,
1261 struct walk_control *wc, int level)
1263 struct shared_node *node;
1264 struct shared_node *dest;
1267 if (level == wc->root_level)
1270 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1274 BUG_ON(i >= BTRFS_MAX_LEVEL);
1276 node = wc->nodes[wc->active_node];
1277 wc->nodes[wc->active_node] = NULL;
1278 wc->active_node = i;
1280 dest = wc->nodes[wc->active_node];
1281 if (wc->active_node < wc->root_level ||
1282 btrfs_root_refs(&root->root_item) > 0) {
1283 BUG_ON(node->refs <= 1);
1284 splice_shared_node(node, dest);
1286 BUG_ON(node->refs < 2);
1295 * 1 - if the root with id child_root_id is a child of root parent_root_id
1296 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1297 * has other root(s) as parent(s)
1298 * 2 - if the root child_root_id doesn't have any parent roots
1300 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1303 struct btrfs_path path;
1304 struct btrfs_key key;
1305 struct extent_buffer *leaf;
1309 btrfs_init_path(&path);
1311 key.objectid = parent_root_id;
1312 key.type = BTRFS_ROOT_REF_KEY;
1313 key.offset = child_root_id;
1314 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1318 btrfs_release_path(&path);
1322 key.objectid = child_root_id;
1323 key.type = BTRFS_ROOT_BACKREF_KEY;
1325 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1331 leaf = path.nodes[0];
1332 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1333 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1336 leaf = path.nodes[0];
1339 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1340 if (key.objectid != child_root_id ||
1341 key.type != BTRFS_ROOT_BACKREF_KEY)
1346 if (key.offset == parent_root_id) {
1347 btrfs_release_path(&path);
1354 btrfs_release_path(&path);
1357 return has_parent ? 0 : 2;
1360 static int process_dir_item(struct btrfs_root *root,
1361 struct extent_buffer *eb,
1362 int slot, struct btrfs_key *key,
1363 struct shared_node *active_node)
1373 struct btrfs_dir_item *di;
1374 struct inode_record *rec;
1375 struct cache_tree *root_cache;
1376 struct cache_tree *inode_cache;
1377 struct btrfs_key location;
1378 char namebuf[BTRFS_NAME_LEN];
1380 root_cache = &active_node->root_cache;
1381 inode_cache = &active_node->inode_cache;
1382 rec = active_node->current;
1383 rec->found_dir_item = 1;
1385 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1386 total = btrfs_item_size_nr(eb, slot);
1387 while (cur < total) {
1389 btrfs_dir_item_key_to_cpu(eb, di, &location);
1390 name_len = btrfs_dir_name_len(eb, di);
1391 data_len = btrfs_dir_data_len(eb, di);
1392 filetype = btrfs_dir_type(eb, di);
1394 rec->found_size += name_len;
1395 if (name_len <= BTRFS_NAME_LEN) {
1399 len = BTRFS_NAME_LEN;
1400 error = REF_ERR_NAME_TOO_LONG;
1402 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1404 if (location.type == BTRFS_INODE_ITEM_KEY) {
1405 add_inode_backref(inode_cache, location.objectid,
1406 key->objectid, key->offset, namebuf,
1407 len, filetype, key->type, error);
1408 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1409 add_inode_backref(root_cache, location.objectid,
1410 key->objectid, key->offset,
1411 namebuf, len, filetype,
1414 fprintf(stderr, "invalid location in dir item %u\n",
1416 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1417 key->objectid, key->offset, namebuf,
1418 len, filetype, key->type, error);
1421 len = sizeof(*di) + name_len + data_len;
1422 di = (struct btrfs_dir_item *)((char *)di + len);
1425 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1426 rec->errors |= I_ERR_DUP_DIR_INDEX;
1431 static int process_inode_ref(struct extent_buffer *eb,
1432 int slot, struct btrfs_key *key,
1433 struct shared_node *active_node)
1441 struct cache_tree *inode_cache;
1442 struct btrfs_inode_ref *ref;
1443 char namebuf[BTRFS_NAME_LEN];
1445 inode_cache = &active_node->inode_cache;
1447 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1448 total = btrfs_item_size_nr(eb, slot);
1449 while (cur < total) {
1450 name_len = btrfs_inode_ref_name_len(eb, ref);
1451 index = btrfs_inode_ref_index(eb, ref);
1452 if (name_len <= BTRFS_NAME_LEN) {
1456 len = BTRFS_NAME_LEN;
1457 error = REF_ERR_NAME_TOO_LONG;
1459 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1460 add_inode_backref(inode_cache, key->objectid, key->offset,
1461 index, namebuf, len, 0, key->type, error);
1463 len = sizeof(*ref) + name_len;
1464 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1470 static int process_inode_extref(struct extent_buffer *eb,
1471 int slot, struct btrfs_key *key,
1472 struct shared_node *active_node)
1481 struct cache_tree *inode_cache;
1482 struct btrfs_inode_extref *extref;
1483 char namebuf[BTRFS_NAME_LEN];
1485 inode_cache = &active_node->inode_cache;
1487 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1490 name_len = btrfs_inode_extref_name_len(eb, extref);
1491 index = btrfs_inode_extref_index(eb, extref);
1492 parent = btrfs_inode_extref_parent(eb, extref);
1493 if (name_len <= BTRFS_NAME_LEN) {
1497 len = BTRFS_NAME_LEN;
1498 error = REF_ERR_NAME_TOO_LONG;
1500 read_extent_buffer(eb, namebuf,
1501 (unsigned long)(extref + 1), len);
1502 add_inode_backref(inode_cache, key->objectid, parent,
1503 index, namebuf, len, 0, key->type, error);
1505 len = sizeof(*extref) + name_len;
1506 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1513 static int count_csum_range(struct btrfs_root *root, u64 start,
1514 u64 len, u64 *found)
1516 struct btrfs_key key;
1517 struct btrfs_path path;
1518 struct extent_buffer *leaf;
1523 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1525 btrfs_init_path(&path);
1527 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1529 key.type = BTRFS_EXTENT_CSUM_KEY;
1531 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1535 if (ret > 0 && path.slots[0] > 0) {
1536 leaf = path.nodes[0];
1537 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1538 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1539 key.type == BTRFS_EXTENT_CSUM_KEY)
1544 leaf = path.nodes[0];
1545 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1546 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1551 leaf = path.nodes[0];
1554 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1555 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1556 key.type != BTRFS_EXTENT_CSUM_KEY)
1559 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1560 if (key.offset >= start + len)
1563 if (key.offset > start)
1566 size = btrfs_item_size_nr(leaf, path.slots[0]);
1567 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1568 if (csum_end > start) {
1569 size = min(csum_end - start, len);
1578 btrfs_release_path(&path);
1584 static int process_file_extent(struct btrfs_root *root,
1585 struct extent_buffer *eb,
1586 int slot, struct btrfs_key *key,
1587 struct shared_node *active_node)
1589 struct inode_record *rec;
1590 struct btrfs_file_extent_item *fi;
1592 u64 disk_bytenr = 0;
1593 u64 extent_offset = 0;
1594 u64 mask = root->sectorsize - 1;
1598 rec = active_node->current;
1599 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1600 rec->found_file_extent = 1;
1602 if (rec->extent_start == (u64)-1) {
1603 rec->extent_start = key->offset;
1604 rec->extent_end = key->offset;
1607 if (rec->extent_end > key->offset)
1608 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1609 else if (rec->extent_end < key->offset) {
1610 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1611 key->offset - rec->extent_end);
1616 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1617 extent_type = btrfs_file_extent_type(eb, fi);
1619 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1620 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1622 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1623 rec->found_size += num_bytes;
1624 num_bytes = (num_bytes + mask) & ~mask;
1625 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1626 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1627 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1628 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1629 extent_offset = btrfs_file_extent_offset(eb, fi);
1630 if (num_bytes == 0 || (num_bytes & mask))
1631 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1632 if (num_bytes + extent_offset >
1633 btrfs_file_extent_ram_bytes(eb, fi))
1634 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1635 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1636 (btrfs_file_extent_compression(eb, fi) ||
1637 btrfs_file_extent_encryption(eb, fi) ||
1638 btrfs_file_extent_other_encoding(eb, fi)))
1639 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1640 if (disk_bytenr > 0)
1641 rec->found_size += num_bytes;
1643 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1645 rec->extent_end = key->offset + num_bytes;
1648 * The data reloc tree will copy full extents into its inode and then
1649 * copy the corresponding csums. Because the extent it copied could be
1650 * a preallocated extent that hasn't been written to yet there may be no
1651 * csums to copy, ergo we won't have csums for our file extent. This is
1652 * ok so just don't bother checking csums if the inode belongs to the
1655 if (disk_bytenr > 0 &&
1656 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1658 if (btrfs_file_extent_compression(eb, fi))
1659 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1661 disk_bytenr += extent_offset;
1663 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1666 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1668 rec->found_csum_item = 1;
1669 if (found < num_bytes)
1670 rec->some_csum_missing = 1;
1671 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1673 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1679 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1680 struct walk_control *wc)
1682 struct btrfs_key key;
1686 struct cache_tree *inode_cache;
1687 struct shared_node *active_node;
1689 if (wc->root_level == wc->active_node &&
1690 btrfs_root_refs(&root->root_item) == 0)
1693 active_node = wc->nodes[wc->active_node];
1694 inode_cache = &active_node->inode_cache;
1695 nritems = btrfs_header_nritems(eb);
1696 for (i = 0; i < nritems; i++) {
1697 btrfs_item_key_to_cpu(eb, &key, i);
1699 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1701 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1704 if (active_node->current == NULL ||
1705 active_node->current->ino < key.objectid) {
1706 if (active_node->current) {
1707 active_node->current->checked = 1;
1708 maybe_free_inode_rec(inode_cache,
1709 active_node->current);
1711 active_node->current = get_inode_rec(inode_cache,
1713 BUG_ON(IS_ERR(active_node->current));
1716 case BTRFS_DIR_ITEM_KEY:
1717 case BTRFS_DIR_INDEX_KEY:
1718 ret = process_dir_item(root, eb, i, &key, active_node);
1720 case BTRFS_INODE_REF_KEY:
1721 ret = process_inode_ref(eb, i, &key, active_node);
1723 case BTRFS_INODE_EXTREF_KEY:
1724 ret = process_inode_extref(eb, i, &key, active_node);
1726 case BTRFS_INODE_ITEM_KEY:
1727 ret = process_inode_item(eb, i, &key, active_node);
1729 case BTRFS_EXTENT_DATA_KEY:
1730 ret = process_file_extent(root, eb, i, &key,
1740 static void reada_walk_down(struct btrfs_root *root,
1741 struct extent_buffer *node, int slot)
1750 level = btrfs_header_level(node);
1754 nritems = btrfs_header_nritems(node);
1755 blocksize = btrfs_level_size(root, level - 1);
1756 for (i = slot; i < nritems; i++) {
1757 bytenr = btrfs_node_blockptr(node, i);
1758 ptr_gen = btrfs_node_ptr_generation(node, i);
1759 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1764 * Check the child node/leaf by the following condition:
1765 * 1. the first item key of the node/leaf should be the same with the one
1767 * 2. block in parent node should match the child node/leaf.
1768 * 3. generation of parent node and child's header should be consistent.
1770 * Or the child node/leaf pointed by the key in parent is not valid.
1772 * We hope to check leaf owner too, but since subvol may share leaves,
1773 * which makes leaf owner check not so strong, key check should be
1774 * sufficient enough for that case.
1776 static int check_child_node(struct btrfs_root *root,
1777 struct extent_buffer *parent, int slot,
1778 struct extent_buffer *child)
1780 struct btrfs_key parent_key;
1781 struct btrfs_key child_key;
1784 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785 if (btrfs_header_level(child) == 0)
1786 btrfs_item_key_to_cpu(child, &child_key, 0);
1788 btrfs_node_key_to_cpu(child, &child_key, 0);
1790 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1793 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794 parent_key.objectid, parent_key.type, parent_key.offset,
1795 child_key.objectid, child_key.type, child_key.offset);
1797 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1799 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800 btrfs_node_blockptr(parent, slot),
1801 btrfs_header_bytenr(child));
1803 if (btrfs_node_ptr_generation(parent, slot) !=
1804 btrfs_header_generation(child)) {
1806 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807 btrfs_header_generation(child),
1808 btrfs_node_ptr_generation(parent, slot));
1813 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1814 struct walk_control *wc, int *level)
1816 enum btrfs_tree_block_status status;
1819 struct extent_buffer *next;
1820 struct extent_buffer *cur;
1825 WARN_ON(*level < 0);
1826 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1827 ret = btrfs_lookup_extent_info(NULL, root,
1828 path->nodes[*level]->start,
1829 *level, 1, &refs, NULL);
1836 ret = enter_shared_node(root, path->nodes[*level]->start,
1844 while (*level >= 0) {
1845 WARN_ON(*level < 0);
1846 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1847 cur = path->nodes[*level];
1849 if (btrfs_header_level(cur) != *level)
1852 if (path->slots[*level] >= btrfs_header_nritems(cur))
1855 ret = process_one_leaf(root, cur, wc);
1860 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1861 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1862 blocksize = btrfs_level_size(root, *level - 1);
1863 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1869 ret = enter_shared_node(root, bytenr, refs,
1872 path->slots[*level]++;
1877 next = btrfs_find_tree_block(root, bytenr, blocksize);
1878 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1879 free_extent_buffer(next);
1880 reada_walk_down(root, cur, path->slots[*level]);
1881 next = read_tree_block(root, bytenr, blocksize,
1883 if (!extent_buffer_uptodate(next)) {
1884 struct btrfs_key node_key;
1886 btrfs_node_key_to_cpu(path->nodes[*level],
1888 path->slots[*level]);
1889 btrfs_add_corrupt_extent_record(root->fs_info,
1891 path->nodes[*level]->start,
1892 root->leafsize, *level);
1898 ret = check_child_node(root, cur, path->slots[*level], next);
1904 if (btrfs_is_leaf(next))
1905 status = btrfs_check_leaf(root, NULL, next);
1907 status = btrfs_check_node(root, NULL, next);
1908 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1909 free_extent_buffer(next);
1914 *level = *level - 1;
1915 free_extent_buffer(path->nodes[*level]);
1916 path->nodes[*level] = next;
1917 path->slots[*level] = 0;
1920 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1924 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1925 struct walk_control *wc, int *level)
1928 struct extent_buffer *leaf;
1930 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1931 leaf = path->nodes[i];
1932 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1937 free_extent_buffer(path->nodes[*level]);
1938 path->nodes[*level] = NULL;
1939 BUG_ON(*level > wc->active_node);
1940 if (*level == wc->active_node)
1941 leave_shared_node(root, wc, *level);
1948 static int check_root_dir(struct inode_record *rec)
1950 struct inode_backref *backref;
1953 if (!rec->found_inode_item || rec->errors)
1955 if (rec->nlink != 1 || rec->found_link != 0)
1957 if (list_empty(&rec->backrefs))
1959 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1960 if (!backref->found_inode_ref)
1962 if (backref->index != 0 || backref->namelen != 2 ||
1963 memcmp(backref->name, "..", 2))
1965 if (backref->found_dir_index || backref->found_dir_item)
1972 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1973 struct btrfs_root *root, struct btrfs_path *path,
1974 struct inode_record *rec)
1976 struct btrfs_inode_item *ei;
1977 struct btrfs_key key;
1980 key.objectid = rec->ino;
1981 key.type = BTRFS_INODE_ITEM_KEY;
1982 key.offset = (u64)-1;
1984 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1988 if (!path->slots[0]) {
1995 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1996 if (key.objectid != rec->ino) {
2001 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2002 struct btrfs_inode_item);
2003 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2004 btrfs_mark_buffer_dirty(path->nodes[0]);
2005 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2006 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2007 root->root_key.objectid);
2009 btrfs_release_path(path);
2013 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2014 struct btrfs_root *root,
2015 struct btrfs_path *path,
2016 struct inode_record *rec)
2020 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2021 btrfs_release_path(path);
2023 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2027 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2028 struct btrfs_root *root,
2029 struct btrfs_path *path,
2030 struct inode_record *rec)
2032 struct btrfs_inode_item *ei;
2033 struct btrfs_key key;
2036 key.objectid = rec->ino;
2037 key.type = BTRFS_INODE_ITEM_KEY;
2040 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2047 /* Since ret == 0, no need to check anything */
2048 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2049 struct btrfs_inode_item);
2050 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2051 btrfs_mark_buffer_dirty(path->nodes[0]);
2052 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2053 printf("reset nbytes for ino %llu root %llu\n",
2054 rec->ino, root->root_key.objectid);
2056 btrfs_release_path(path);
2060 static int add_missing_dir_index(struct btrfs_root *root,
2061 struct cache_tree *inode_cache,
2062 struct inode_record *rec,
2063 struct inode_backref *backref)
2065 struct btrfs_path *path;
2066 struct btrfs_trans_handle *trans;
2067 struct btrfs_dir_item *dir_item;
2068 struct extent_buffer *leaf;
2069 struct btrfs_key key;
2070 struct btrfs_disk_key disk_key;
2071 struct inode_record *dir_rec;
2072 unsigned long name_ptr;
2073 u32 data_size = sizeof(*dir_item) + backref->namelen;
2076 path = btrfs_alloc_path();
2080 trans = btrfs_start_transaction(root, 1);
2081 if (IS_ERR(trans)) {
2082 btrfs_free_path(path);
2083 return PTR_ERR(trans);
2086 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2087 (unsigned long long)rec->ino);
2088 key.objectid = backref->dir;
2089 key.type = BTRFS_DIR_INDEX_KEY;
2090 key.offset = backref->index;
2092 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2095 leaf = path->nodes[0];
2096 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2098 disk_key.objectid = cpu_to_le64(rec->ino);
2099 disk_key.type = BTRFS_INODE_ITEM_KEY;
2100 disk_key.offset = 0;
2102 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2103 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2104 btrfs_set_dir_data_len(leaf, dir_item, 0);
2105 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2106 name_ptr = (unsigned long)(dir_item + 1);
2107 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2108 btrfs_mark_buffer_dirty(leaf);
2109 btrfs_free_path(path);
2110 btrfs_commit_transaction(trans, root);
2112 backref->found_dir_index = 1;
2113 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2114 BUG_ON(IS_ERR(dir_rec));
2117 dir_rec->found_size += backref->namelen;
2118 if (dir_rec->found_size == dir_rec->isize &&
2119 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2120 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2121 if (dir_rec->found_size != dir_rec->isize)
2122 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2127 static int delete_dir_index(struct btrfs_root *root,
2128 struct cache_tree *inode_cache,
2129 struct inode_record *rec,
2130 struct inode_backref *backref)
2132 struct btrfs_trans_handle *trans;
2133 struct btrfs_dir_item *di;
2134 struct btrfs_path *path;
2137 path = btrfs_alloc_path();
2141 trans = btrfs_start_transaction(root, 1);
2142 if (IS_ERR(trans)) {
2143 btrfs_free_path(path);
2144 return PTR_ERR(trans);
2148 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2149 (unsigned long long)backref->dir,
2150 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2151 (unsigned long long)root->objectid);
2153 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2154 backref->name, backref->namelen,
2155 backref->index, -1);
2158 btrfs_free_path(path);
2159 btrfs_commit_transaction(trans, root);
2166 ret = btrfs_del_item(trans, root, path);
2168 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2170 btrfs_free_path(path);
2171 btrfs_commit_transaction(trans, root);
2175 static int create_inode_item(struct btrfs_root *root,
2176 struct inode_record *rec,
2177 struct inode_backref *backref, int root_dir)
2179 struct btrfs_trans_handle *trans;
2180 struct btrfs_inode_item inode_item;
2181 time_t now = time(NULL);
2184 trans = btrfs_start_transaction(root, 1);
2185 if (IS_ERR(trans)) {
2186 ret = PTR_ERR(trans);
2190 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2191 "be incomplete, please check permissions and content after "
2192 "the fsck completes.\n", (unsigned long long)root->objectid,
2193 (unsigned long long)rec->ino);
2195 memset(&inode_item, 0, sizeof(inode_item));
2196 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2198 btrfs_set_stack_inode_nlink(&inode_item, 1);
2200 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2201 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2202 if (rec->found_dir_item) {
2203 if (rec->found_file_extent)
2204 fprintf(stderr, "root %llu inode %llu has both a dir "
2205 "item and extents, unsure if it is a dir or a "
2206 "regular file so setting it as a directory\n",
2207 (unsigned long long)root->objectid,
2208 (unsigned long long)rec->ino);
2209 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2210 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2211 } else if (!rec->found_dir_item) {
2212 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2213 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2215 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2216 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2217 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2218 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2219 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2220 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2221 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2222 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2224 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2226 btrfs_commit_transaction(trans, root);
2230 static int repair_inode_backrefs(struct btrfs_root *root,
2231 struct inode_record *rec,
2232 struct cache_tree *inode_cache,
2235 struct inode_backref *tmp, *backref;
2236 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2240 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2241 if (!delete && rec->ino == root_dirid) {
2242 if (!rec->found_inode_item) {
2243 ret = create_inode_item(root, rec, backref, 1);
2250 /* Index 0 for root dir's are special, don't mess with it */
2251 if (rec->ino == root_dirid && backref->index == 0)
2255 ((backref->found_dir_index && !backref->found_inode_ref) ||
2256 (backref->found_dir_index && backref->found_inode_ref &&
2257 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2258 ret = delete_dir_index(root, inode_cache, rec, backref);
2262 list_del(&backref->list);
2266 if (!delete && !backref->found_dir_index &&
2267 backref->found_dir_item && backref->found_inode_ref) {
2268 ret = add_missing_dir_index(root, inode_cache, rec,
2273 if (backref->found_dir_item &&
2274 backref->found_dir_index &&
2275 backref->found_dir_index) {
2276 if (!backref->errors &&
2277 backref->found_inode_ref) {
2278 list_del(&backref->list);
2284 if (!delete && (!backref->found_dir_index &&
2285 !backref->found_dir_item &&
2286 backref->found_inode_ref)) {
2287 struct btrfs_trans_handle *trans;
2288 struct btrfs_key location;
2290 ret = check_dir_conflict(root, backref->name,
2296 * let nlink fixing routine to handle it,
2297 * which can do it better.
2302 location.objectid = rec->ino;
2303 location.type = BTRFS_INODE_ITEM_KEY;
2304 location.offset = 0;
2306 trans = btrfs_start_transaction(root, 1);
2307 if (IS_ERR(trans)) {
2308 ret = PTR_ERR(trans);
2311 fprintf(stderr, "adding missing dir index/item pair "
2313 (unsigned long long)rec->ino);
2314 ret = btrfs_insert_dir_item(trans, root, backref->name,
2316 backref->dir, &location,
2317 imode_to_type(rec->imode),
2320 btrfs_commit_transaction(trans, root);
2324 if (!delete && (backref->found_inode_ref &&
2325 backref->found_dir_index &&
2326 backref->found_dir_item &&
2327 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2328 !rec->found_inode_item)) {
2329 ret = create_inode_item(root, rec, backref, 0);
2336 return ret ? ret : repaired;
2340 * To determine the file type for nlink/inode_item repair
2342 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2343 * Return -ENOENT if file type is not found.
2345 static int find_file_type(struct inode_record *rec, u8 *type)
2347 struct inode_backref *backref;
2349 /* For inode item recovered case */
2350 if (rec->found_inode_item) {
2351 *type = imode_to_type(rec->imode);
2355 list_for_each_entry(backref, &rec->backrefs, list) {
2356 if (backref->found_dir_index || backref->found_dir_item) {
2357 *type = backref->filetype;
2365 * To determine the file name for nlink repair
2367 * Return 0 if file name is found, set name and namelen.
2368 * Return -ENOENT if file name is not found.
2370 static int find_file_name(struct inode_record *rec,
2371 char *name, int *namelen)
2373 struct inode_backref *backref;
2375 list_for_each_entry(backref, &rec->backrefs, list) {
2376 if (backref->found_dir_index || backref->found_dir_item ||
2377 backref->found_inode_ref) {
2378 memcpy(name, backref->name, backref->namelen);
2379 *namelen = backref->namelen;
2386 /* Reset the nlink of the inode to the correct one */
2387 static int reset_nlink(struct btrfs_trans_handle *trans,
2388 struct btrfs_root *root,
2389 struct btrfs_path *path,
2390 struct inode_record *rec)
2392 struct inode_backref *backref;
2393 struct inode_backref *tmp;
2394 struct btrfs_key key;
2395 struct btrfs_inode_item *inode_item;
2398 /* We don't believe this either, reset it and iterate backref */
2399 rec->found_link = 0;
2401 /* Remove all backref including the valid ones */
2402 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2403 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2404 backref->index, backref->name,
2405 backref->namelen, 0);
2409 /* remove invalid backref, so it won't be added back */
2410 if (!(backref->found_dir_index &&
2411 backref->found_dir_item &&
2412 backref->found_inode_ref)) {
2413 list_del(&backref->list);
2420 /* Set nlink to 0 */
2421 key.objectid = rec->ino;
2422 key.type = BTRFS_INODE_ITEM_KEY;
2424 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2431 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2432 struct btrfs_inode_item);
2433 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2434 btrfs_mark_buffer_dirty(path->nodes[0]);
2435 btrfs_release_path(path);
2438 * Add back valid inode_ref/dir_item/dir_index,
2439 * add_link() will handle the nlink inc, so new nlink must be correct
2441 list_for_each_entry(backref, &rec->backrefs, list) {
2442 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2443 backref->name, backref->namelen,
2444 backref->filetype, &backref->index, 1);
2449 btrfs_release_path(path);
2453 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2454 struct btrfs_root *root,
2455 struct btrfs_path *path,
2456 struct inode_record *rec)
2458 char *dir_name = "lost+found";
2459 char namebuf[BTRFS_NAME_LEN] = {0};
2464 int name_recovered = 0;
2465 int type_recovered = 0;
2469 * Get file name and type first before these invalid inode ref
2470 * are deleted by remove_all_invalid_backref()
2472 name_recovered = !find_file_name(rec, namebuf, &namelen);
2473 type_recovered = !find_file_type(rec, &type);
2475 if (!name_recovered) {
2476 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2477 rec->ino, rec->ino);
2478 namelen = count_digits(rec->ino);
2479 sprintf(namebuf, "%llu", rec->ino);
2482 if (!type_recovered) {
2483 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2485 type = BTRFS_FT_REG_FILE;
2489 ret = reset_nlink(trans, root, path, rec);
2492 "Failed to reset nlink for inode %llu: %s\n",
2493 rec->ino, strerror(-ret));
2497 if (rec->found_link == 0) {
2498 lost_found_ino = root->highest_inode;
2499 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2504 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2505 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2508 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2509 dir_name, strerror(-ret));
2512 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2513 namebuf, namelen, type, NULL, 1);
2515 * Add ".INO" suffix several times to handle case where
2516 * "FILENAME.INO" is already taken by another file.
2518 while (ret == -EEXIST) {
2520 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2522 if (namelen + count_digits(rec->ino) + 1 >
2527 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2529 namelen += count_digits(rec->ino) + 1;
2530 ret = btrfs_add_link(trans, root, rec->ino,
2531 lost_found_ino, namebuf,
2532 namelen, type, NULL, 1);
2536 "Failed to link the inode %llu to %s dir: %s\n",
2537 rec->ino, dir_name, strerror(-ret));
2541 * Just increase the found_link, don't actually add the
2542 * backref. This will make things easier and this inode
2543 * record will be freed after the repair is done.
2544 * So fsck will not report problem about this inode.
2547 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2548 namelen, namebuf, dir_name);
2550 printf("Fixed the nlink of inode %llu\n", rec->ino);
2553 * Clear the flag anyway, or we will loop forever for the same inode
2554 * as it will not be removed from the bad inode list and the dead loop
2557 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2558 btrfs_release_path(path);
2563 * Check if there is any normal(reg or prealloc) file extent for given
2565 * This is used to determine the file type when neither its dir_index/item or
2566 * inode_item exists.
2568 * This will *NOT* report error, if any error happens, just consider it does
2569 * not have any normal file extent.
2571 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2573 struct btrfs_path *path;
2574 struct btrfs_key key;
2575 struct btrfs_key found_key;
2576 struct btrfs_file_extent_item *fi;
2580 path = btrfs_alloc_path();
2584 key.type = BTRFS_EXTENT_DATA_KEY;
2587 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2592 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2593 ret = btrfs_next_leaf(root, path);
2600 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2602 if (found_key.objectid != ino ||
2603 found_key.type != BTRFS_EXTENT_DATA_KEY)
2605 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2606 struct btrfs_file_extent_item);
2607 type = btrfs_file_extent_type(path->nodes[0], fi);
2608 if (type != BTRFS_FILE_EXTENT_INLINE) {
2614 btrfs_free_path(path);
2618 static u32 btrfs_type_to_imode(u8 type)
2620 static u32 imode_by_btrfs_type[] = {
2621 [BTRFS_FT_REG_FILE] = S_IFREG,
2622 [BTRFS_FT_DIR] = S_IFDIR,
2623 [BTRFS_FT_CHRDEV] = S_IFCHR,
2624 [BTRFS_FT_BLKDEV] = S_IFBLK,
2625 [BTRFS_FT_FIFO] = S_IFIFO,
2626 [BTRFS_FT_SOCK] = S_IFSOCK,
2627 [BTRFS_FT_SYMLINK] = S_IFLNK,
2630 return imode_by_btrfs_type[(type)];
2633 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2634 struct btrfs_root *root,
2635 struct btrfs_path *path,
2636 struct inode_record *rec)
2640 int type_recovered = 0;
2643 printf("Trying to rebuild inode:%llu\n", rec->ino);
2645 type_recovered = !find_file_type(rec, &filetype);
2648 * Try to determine inode type if type not found.
2650 * For found regular file extent, it must be FILE.
2651 * For found dir_item/index, it must be DIR.
2653 * For undetermined one, use FILE as fallback.
2656 * 1. If found backref(inode_index/item is already handled) to it,
2658 * Need new inode-inode ref structure to allow search for that.
2660 if (!type_recovered) {
2661 if (rec->found_file_extent &&
2662 find_normal_file_extent(root, rec->ino)) {
2664 filetype = BTRFS_FT_REG_FILE;
2665 } else if (rec->found_dir_item) {
2667 filetype = BTRFS_FT_DIR;
2668 } else if (!list_empty(&rec->orphan_extents)) {
2670 filetype = BTRFS_FT_REG_FILE;
2672 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2675 filetype = BTRFS_FT_REG_FILE;
2679 ret = btrfs_new_inode(trans, root, rec->ino,
2680 mode | btrfs_type_to_imode(filetype));
2685 * Here inode rebuild is done, we only rebuild the inode item,
2686 * don't repair the nlink(like move to lost+found).
2687 * That is the job of nlink repair.
2689 * We just fill the record and return
2691 rec->found_dir_item = 1;
2692 rec->imode = mode | btrfs_type_to_imode(filetype);
2694 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2695 /* Ensure the inode_nlinks repair function will be called */
2696 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2701 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2702 struct btrfs_root *root,
2703 struct btrfs_path *path,
2704 struct inode_record *rec)
2706 struct orphan_data_extent *orphan;
2707 struct orphan_data_extent *tmp;
2710 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2712 * Check for conflicting file extents
2714 * Here we don't know whether the extents is compressed or not,
2715 * so we can only assume it not compressed nor data offset,
2716 * and use its disk_len as extent length.
2718 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2719 orphan->offset, orphan->disk_len, 0);
2720 btrfs_release_path(path);
2725 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2726 orphan->disk_bytenr, orphan->disk_len);
2727 ret = btrfs_free_extent(trans,
2728 root->fs_info->extent_root,
2729 orphan->disk_bytenr, orphan->disk_len,
2730 0, root->objectid, orphan->objectid,
2735 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2736 orphan->offset, orphan->disk_bytenr,
2737 orphan->disk_len, orphan->disk_len);
2741 /* Update file size info */
2742 rec->found_size += orphan->disk_len;
2743 if (rec->found_size == rec->nbytes)
2744 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2746 /* Update the file extent hole info too */
2747 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2751 if (RB_EMPTY_ROOT(&rec->holes))
2752 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2754 list_del(&orphan->list);
2757 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2762 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2763 struct btrfs_root *root,
2764 struct btrfs_path *path,
2765 struct inode_record *rec)
2767 struct rb_node *node;
2768 struct file_extent_hole *hole;
2772 node = rb_first(&rec->holes);
2776 hole = rb_entry(node, struct file_extent_hole, node);
2777 ret = btrfs_punch_hole(trans, root, rec->ino,
2778 hole->start, hole->len);
2781 ret = del_file_extent_hole(&rec->holes, hole->start,
2785 if (RB_EMPTY_ROOT(&rec->holes))
2786 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2787 node = rb_first(&rec->holes);
2789 /* special case for a file losing all its file extent */
2791 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2792 round_up(rec->isize, root->sectorsize));
2796 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2797 rec->ino, root->objectid);
2802 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2804 struct btrfs_trans_handle *trans;
2805 struct btrfs_path *path;
2808 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2809 I_ERR_NO_ORPHAN_ITEM |
2810 I_ERR_LINK_COUNT_WRONG |
2811 I_ERR_NO_INODE_ITEM |
2812 I_ERR_FILE_EXTENT_ORPHAN |
2813 I_ERR_FILE_EXTENT_DISCOUNT|
2814 I_ERR_FILE_NBYTES_WRONG)))
2817 path = btrfs_alloc_path();
2822 * For nlink repair, it may create a dir and add link, so
2823 * 2 for parent(256)'s dir_index and dir_item
2824 * 2 for lost+found dir's inode_item and inode_ref
2825 * 1 for the new inode_ref of the file
2826 * 2 for lost+found dir's dir_index and dir_item for the file
2828 trans = btrfs_start_transaction(root, 7);
2829 if (IS_ERR(trans)) {
2830 btrfs_free_path(path);
2831 return PTR_ERR(trans);
2834 if (rec->errors & I_ERR_NO_INODE_ITEM)
2835 ret = repair_inode_no_item(trans, root, path, rec);
2836 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2837 ret = repair_inode_orphan_extent(trans, root, path, rec);
2838 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2839 ret = repair_inode_discount_extent(trans, root, path, rec);
2840 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2841 ret = repair_inode_isize(trans, root, path, rec);
2842 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2843 ret = repair_inode_orphan_item(trans, root, path, rec);
2844 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2845 ret = repair_inode_nlinks(trans, root, path, rec);
2846 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2847 ret = repair_inode_nbytes(trans, root, path, rec);
2848 btrfs_commit_transaction(trans, root);
2849 btrfs_free_path(path);
2853 static int check_inode_recs(struct btrfs_root *root,
2854 struct cache_tree *inode_cache)
2856 struct cache_extent *cache;
2857 struct ptr_node *node;
2858 struct inode_record *rec;
2859 struct inode_backref *backref;
2864 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2866 if (btrfs_root_refs(&root->root_item) == 0) {
2867 if (!cache_tree_empty(inode_cache))
2868 fprintf(stderr, "warning line %d\n", __LINE__);
2873 * We need to record the highest inode number for later 'lost+found'
2875 * We must select a ino not used/refered by any existing inode, or
2876 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2877 * this may cause 'lost+found' dir has wrong nlinks.
2879 cache = last_cache_extent(inode_cache);
2881 node = container_of(cache, struct ptr_node, cache);
2883 if (rec->ino > root->highest_inode)
2884 root->highest_inode = rec->ino;
2888 * We need to repair backrefs first because we could change some of the
2889 * errors in the inode recs.
2891 * We also need to go through and delete invalid backrefs first and then
2892 * add the correct ones second. We do this because we may get EEXIST
2893 * when adding back the correct index because we hadn't yet deleted the
2896 * For example, if we were missing a dir index then the directories
2897 * isize would be wrong, so if we fixed the isize to what we thought it
2898 * would be and then fixed the backref we'd still have a invalid fs, so
2899 * we need to add back the dir index and then check to see if the isize
2904 if (stage == 3 && !err)
2907 cache = search_cache_extent(inode_cache, 0);
2908 while (repair && cache) {
2909 node = container_of(cache, struct ptr_node, cache);
2911 cache = next_cache_extent(cache);
2913 /* Need to free everything up and rescan */
2915 remove_cache_extent(inode_cache, &node->cache);
2917 free_inode_rec(rec);
2921 if (list_empty(&rec->backrefs))
2924 ret = repair_inode_backrefs(root, rec, inode_cache,
2938 rec = get_inode_rec(inode_cache, root_dirid, 0);
2939 BUG_ON(IS_ERR(rec));
2941 ret = check_root_dir(rec);
2943 fprintf(stderr, "root %llu root dir %llu error\n",
2944 (unsigned long long)root->root_key.objectid,
2945 (unsigned long long)root_dirid);
2946 print_inode_error(root, rec);
2951 struct btrfs_trans_handle *trans;
2953 trans = btrfs_start_transaction(root, 1);
2954 if (IS_ERR(trans)) {
2955 err = PTR_ERR(trans);
2960 "root %llu missing its root dir, recreating\n",
2961 (unsigned long long)root->objectid);
2963 ret = btrfs_make_root_dir(trans, root, root_dirid);
2966 btrfs_commit_transaction(trans, root);
2970 fprintf(stderr, "root %llu root dir %llu not found\n",
2971 (unsigned long long)root->root_key.objectid,
2972 (unsigned long long)root_dirid);
2976 cache = search_cache_extent(inode_cache, 0);
2979 node = container_of(cache, struct ptr_node, cache);
2981 remove_cache_extent(inode_cache, &node->cache);
2983 if (rec->ino == root_dirid ||
2984 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2985 free_inode_rec(rec);
2989 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2990 ret = check_orphan_item(root, rec->ino);
2992 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2993 if (can_free_inode_rec(rec)) {
2994 free_inode_rec(rec);
2999 if (!rec->found_inode_item)
3000 rec->errors |= I_ERR_NO_INODE_ITEM;
3001 if (rec->found_link != rec->nlink)
3002 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3004 ret = try_repair_inode(root, rec);
3005 if (ret == 0 && can_free_inode_rec(rec)) {
3006 free_inode_rec(rec);
3012 if (!(repair && ret == 0))
3014 print_inode_error(root, rec);
3015 list_for_each_entry(backref, &rec->backrefs, list) {
3016 if (!backref->found_dir_item)
3017 backref->errors |= REF_ERR_NO_DIR_ITEM;
3018 if (!backref->found_dir_index)
3019 backref->errors |= REF_ERR_NO_DIR_INDEX;
3020 if (!backref->found_inode_ref)
3021 backref->errors |= REF_ERR_NO_INODE_REF;
3022 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3023 " namelen %u name %s filetype %d errors %x",
3024 (unsigned long long)backref->dir,
3025 (unsigned long long)backref->index,
3026 backref->namelen, backref->name,
3027 backref->filetype, backref->errors);
3028 print_ref_error(backref->errors);
3030 free_inode_rec(rec);
3032 return (error > 0) ? -1 : 0;
3035 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3038 struct cache_extent *cache;
3039 struct root_record *rec = NULL;
3042 cache = lookup_cache_extent(root_cache, objectid, 1);
3044 rec = container_of(cache, struct root_record, cache);
3046 rec = calloc(1, sizeof(*rec));
3048 return ERR_PTR(-ENOMEM);
3049 rec->objectid = objectid;
3050 INIT_LIST_HEAD(&rec->backrefs);
3051 rec->cache.start = objectid;
3052 rec->cache.size = 1;
3054 ret = insert_cache_extent(root_cache, &rec->cache);
3056 return ERR_PTR(-EEXIST);
3061 static struct root_backref *get_root_backref(struct root_record *rec,
3062 u64 ref_root, u64 dir, u64 index,
3063 const char *name, int namelen)
3065 struct root_backref *backref;
3067 list_for_each_entry(backref, &rec->backrefs, list) {
3068 if (backref->ref_root != ref_root || backref->dir != dir ||
3069 backref->namelen != namelen)
3071 if (memcmp(name, backref->name, namelen))
3076 backref = calloc(1, sizeof(*backref) + namelen + 1);
3079 backref->ref_root = ref_root;
3081 backref->index = index;
3082 backref->namelen = namelen;
3083 memcpy(backref->name, name, namelen);
3084 backref->name[namelen] = '\0';
3085 list_add_tail(&backref->list, &rec->backrefs);
3089 static void free_root_record(struct cache_extent *cache)
3091 struct root_record *rec;
3092 struct root_backref *backref;
3094 rec = container_of(cache, struct root_record, cache);
3095 while (!list_empty(&rec->backrefs)) {
3096 backref = list_entry(rec->backrefs.next,
3097 struct root_backref, list);
3098 list_del(&backref->list);
3105 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3107 static int add_root_backref(struct cache_tree *root_cache,
3108 u64 root_id, u64 ref_root, u64 dir, u64 index,
3109 const char *name, int namelen,
3110 int item_type, int errors)
3112 struct root_record *rec;
3113 struct root_backref *backref;
3115 rec = get_root_rec(root_cache, root_id);
3116 BUG_ON(IS_ERR(rec));
3117 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3120 backref->errors |= errors;
3122 if (item_type != BTRFS_DIR_ITEM_KEY) {
3123 if (backref->found_dir_index || backref->found_back_ref ||
3124 backref->found_forward_ref) {
3125 if (backref->index != index)
3126 backref->errors |= REF_ERR_INDEX_UNMATCH;
3128 backref->index = index;
3132 if (item_type == BTRFS_DIR_ITEM_KEY) {
3133 if (backref->found_forward_ref)
3135 backref->found_dir_item = 1;
3136 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3137 backref->found_dir_index = 1;
3138 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3139 if (backref->found_forward_ref)
3140 backref->errors |= REF_ERR_DUP_ROOT_REF;
3141 else if (backref->found_dir_item)
3143 backref->found_forward_ref = 1;
3144 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3145 if (backref->found_back_ref)
3146 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3147 backref->found_back_ref = 1;
3152 if (backref->found_forward_ref && backref->found_dir_item)
3153 backref->reachable = 1;
3157 static int merge_root_recs(struct btrfs_root *root,
3158 struct cache_tree *src_cache,
3159 struct cache_tree *dst_cache)
3161 struct cache_extent *cache;
3162 struct ptr_node *node;
3163 struct inode_record *rec;
3164 struct inode_backref *backref;
3167 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3168 free_inode_recs_tree(src_cache);
3173 cache = search_cache_extent(src_cache, 0);
3176 node = container_of(cache, struct ptr_node, cache);
3178 remove_cache_extent(src_cache, &node->cache);
3181 ret = is_child_root(root, root->objectid, rec->ino);
3187 list_for_each_entry(backref, &rec->backrefs, list) {
3188 BUG_ON(backref->found_inode_ref);
3189 if (backref->found_dir_item)
3190 add_root_backref(dst_cache, rec->ino,
3191 root->root_key.objectid, backref->dir,
3192 backref->index, backref->name,
3193 backref->namelen, BTRFS_DIR_ITEM_KEY,
3195 if (backref->found_dir_index)
3196 add_root_backref(dst_cache, rec->ino,
3197 root->root_key.objectid, backref->dir,
3198 backref->index, backref->name,
3199 backref->namelen, BTRFS_DIR_INDEX_KEY,
3203 free_inode_rec(rec);
3210 static int check_root_refs(struct btrfs_root *root,
3211 struct cache_tree *root_cache)
3213 struct root_record *rec;
3214 struct root_record *ref_root;
3215 struct root_backref *backref;
3216 struct cache_extent *cache;
3222 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3223 BUG_ON(IS_ERR(rec));
3226 /* fixme: this can not detect circular references */
3229 cache = search_cache_extent(root_cache, 0);
3233 rec = container_of(cache, struct root_record, cache);
3234 cache = next_cache_extent(cache);
3236 if (rec->found_ref == 0)
3239 list_for_each_entry(backref, &rec->backrefs, list) {
3240 if (!backref->reachable)
3243 ref_root = get_root_rec(root_cache,
3245 BUG_ON(IS_ERR(ref_root));
3246 if (ref_root->found_ref > 0)
3249 backref->reachable = 0;
3251 if (rec->found_ref == 0)
3257 cache = search_cache_extent(root_cache, 0);
3261 rec = container_of(cache, struct root_record, cache);
3262 cache = next_cache_extent(cache);
3264 if (rec->found_ref == 0 &&
3265 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3266 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3267 ret = check_orphan_item(root->fs_info->tree_root,
3273 * If we don't have a root item then we likely just have
3274 * a dir item in a snapshot for this root but no actual
3275 * ref key or anything so it's meaningless.
3277 if (!rec->found_root_item)
3280 fprintf(stderr, "fs tree %llu not referenced\n",
3281 (unsigned long long)rec->objectid);
3285 if (rec->found_ref > 0 && !rec->found_root_item)
3287 list_for_each_entry(backref, &rec->backrefs, list) {
3288 if (!backref->found_dir_item)
3289 backref->errors |= REF_ERR_NO_DIR_ITEM;
3290 if (!backref->found_dir_index)
3291 backref->errors |= REF_ERR_NO_DIR_INDEX;
3292 if (!backref->found_back_ref)
3293 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3294 if (!backref->found_forward_ref)
3295 backref->errors |= REF_ERR_NO_ROOT_REF;
3296 if (backref->reachable && backref->errors)
3303 fprintf(stderr, "fs tree %llu refs %u %s\n",
3304 (unsigned long long)rec->objectid, rec->found_ref,
3305 rec->found_root_item ? "" : "not found");
3307 list_for_each_entry(backref, &rec->backrefs, list) {
3308 if (!backref->reachable)
3310 if (!backref->errors && rec->found_root_item)
3312 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3313 " index %llu namelen %u name %s errors %x\n",
3314 (unsigned long long)backref->ref_root,
3315 (unsigned long long)backref->dir,
3316 (unsigned long long)backref->index,
3317 backref->namelen, backref->name,
3319 print_ref_error(backref->errors);
3322 return errors > 0 ? 1 : 0;
3325 static int process_root_ref(struct extent_buffer *eb, int slot,
3326 struct btrfs_key *key,
3327 struct cache_tree *root_cache)
3333 struct btrfs_root_ref *ref;
3334 char namebuf[BTRFS_NAME_LEN];
3337 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3339 dirid = btrfs_root_ref_dirid(eb, ref);
3340 index = btrfs_root_ref_sequence(eb, ref);
3341 name_len = btrfs_root_ref_name_len(eb, ref);
3343 if (name_len <= BTRFS_NAME_LEN) {
3347 len = BTRFS_NAME_LEN;
3348 error = REF_ERR_NAME_TOO_LONG;
3350 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3352 if (key->type == BTRFS_ROOT_REF_KEY) {
3353 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3354 index, namebuf, len, key->type, error);
3356 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3357 index, namebuf, len, key->type, error);
3362 static void free_corrupt_block(struct cache_extent *cache)
3364 struct btrfs_corrupt_block *corrupt;
3366 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3370 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3373 * Repair the btree of the given root.
3375 * The fix is to remove the node key in corrupt_blocks cache_tree.
3376 * and rebalance the tree.
3377 * After the fix, the btree should be writeable.
3379 static int repair_btree(struct btrfs_root *root,
3380 struct cache_tree *corrupt_blocks)
3382 struct btrfs_trans_handle *trans;
3383 struct btrfs_path *path;
3384 struct btrfs_corrupt_block *corrupt;
3385 struct cache_extent *cache;
3386 struct btrfs_key key;
3391 if (cache_tree_empty(corrupt_blocks))
3394 path = btrfs_alloc_path();
3398 trans = btrfs_start_transaction(root, 1);
3399 if (IS_ERR(trans)) {
3400 ret = PTR_ERR(trans);
3401 fprintf(stderr, "Error starting transaction: %s\n",
3405 cache = first_cache_extent(corrupt_blocks);
3407 corrupt = container_of(cache, struct btrfs_corrupt_block,
3409 level = corrupt->level;
3410 path->lowest_level = level;
3411 key.objectid = corrupt->key.objectid;
3412 key.type = corrupt->key.type;
3413 key.offset = corrupt->key.offset;
3416 * Here we don't want to do any tree balance, since it may
3417 * cause a balance with corrupted brother leaf/node,
3418 * so ins_len set to 0 here.
3419 * Balance will be done after all corrupt node/leaf is deleted.
3421 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3424 offset = btrfs_node_blockptr(path->nodes[level],
3425 path->slots[level]);
3427 /* Remove the ptr */
3428 ret = btrfs_del_ptr(trans, root, path, level,
3429 path->slots[level]);
3433 * Remove the corresponding extent
3434 * return value is not concerned.
3436 btrfs_release_path(path);
3437 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3438 0, root->root_key.objectid,
3440 cache = next_cache_extent(cache);
3443 /* Balance the btree using btrfs_search_slot() */
3444 cache = first_cache_extent(corrupt_blocks);
3446 corrupt = container_of(cache, struct btrfs_corrupt_block,
3448 memcpy(&key, &corrupt->key, sizeof(key));
3449 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3452 /* return will always >0 since it won't find the item */
3454 btrfs_release_path(path);
3455 cache = next_cache_extent(cache);
3458 btrfs_commit_transaction(trans, root);
3460 btrfs_free_path(path);
3464 static int check_fs_root(struct btrfs_root *root,
3465 struct cache_tree *root_cache,
3466 struct walk_control *wc)
3472 struct btrfs_path path;
3473 struct shared_node root_node;
3474 struct root_record *rec;
3475 struct btrfs_root_item *root_item = &root->root_item;
3476 struct cache_tree corrupt_blocks;
3477 struct orphan_data_extent *orphan;
3478 struct orphan_data_extent *tmp;
3479 enum btrfs_tree_block_status status;
3482 * Reuse the corrupt_block cache tree to record corrupted tree block
3484 * Unlike the usage in extent tree check, here we do it in a per
3485 * fs/subvol tree base.
3487 cache_tree_init(&corrupt_blocks);
3488 root->fs_info->corrupt_blocks = &corrupt_blocks;
3490 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3491 rec = get_root_rec(root_cache, root->root_key.objectid);
3492 BUG_ON(IS_ERR(rec));
3493 if (btrfs_root_refs(root_item) > 0)
3494 rec->found_root_item = 1;
3497 btrfs_init_path(&path);
3498 memset(&root_node, 0, sizeof(root_node));
3499 cache_tree_init(&root_node.root_cache);
3500 cache_tree_init(&root_node.inode_cache);
3502 /* Move the orphan extent record to corresponding inode_record */
3503 list_for_each_entry_safe(orphan, tmp,
3504 &root->orphan_data_extents, list) {
3505 struct inode_record *inode;
3507 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3509 BUG_ON(IS_ERR(inode));
3510 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3511 list_move(&orphan->list, &inode->orphan_extents);
3514 level = btrfs_header_level(root->node);
3515 memset(wc->nodes, 0, sizeof(wc->nodes));
3516 wc->nodes[level] = &root_node;
3517 wc->active_node = level;
3518 wc->root_level = level;
3520 /* We may not have checked the root block, lets do that now */
3521 if (btrfs_is_leaf(root->node))
3522 status = btrfs_check_leaf(root, NULL, root->node);
3524 status = btrfs_check_node(root, NULL, root->node);
3525 if (status != BTRFS_TREE_BLOCK_CLEAN)
3528 if (btrfs_root_refs(root_item) > 0 ||
3529 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3530 path.nodes[level] = root->node;
3531 extent_buffer_get(root->node);
3532 path.slots[level] = 0;
3534 struct btrfs_key key;
3535 struct btrfs_disk_key found_key;
3537 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3538 level = root_item->drop_level;
3539 path.lowest_level = level;
3540 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3543 btrfs_node_key(path.nodes[level], &found_key,
3545 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3546 sizeof(found_key)));
3550 wret = walk_down_tree(root, &path, wc, &level);
3556 wret = walk_up_tree(root, &path, wc, &level);
3563 btrfs_release_path(&path);
3565 if (!cache_tree_empty(&corrupt_blocks)) {
3566 struct cache_extent *cache;
3567 struct btrfs_corrupt_block *corrupt;
3569 printf("The following tree block(s) is corrupted in tree %llu:\n",
3570 root->root_key.objectid);
3571 cache = first_cache_extent(&corrupt_blocks);
3573 corrupt = container_of(cache,
3574 struct btrfs_corrupt_block,
3576 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3577 cache->start, corrupt->level,
3578 corrupt->key.objectid, corrupt->key.type,
3579 corrupt->key.offset);
3580 cache = next_cache_extent(cache);
3583 printf("Try to repair the btree for root %llu\n",
3584 root->root_key.objectid);
3585 ret = repair_btree(root, &corrupt_blocks);
3587 fprintf(stderr, "Failed to repair btree: %s\n",
3590 printf("Btree for root %llu is fixed\n",
3591 root->root_key.objectid);
3595 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3599 if (root_node.current) {
3600 root_node.current->checked = 1;
3601 maybe_free_inode_rec(&root_node.inode_cache,
3605 err = check_inode_recs(root, &root_node.inode_cache);
3609 free_corrupt_blocks_tree(&corrupt_blocks);
3610 root->fs_info->corrupt_blocks = NULL;
3611 free_orphan_data_extents(&root->orphan_data_extents);
3615 static int fs_root_objectid(u64 objectid)
3617 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3618 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3620 return is_fstree(objectid);
3623 static int check_fs_roots(struct btrfs_root *root,
3624 struct cache_tree *root_cache)
3626 struct btrfs_path path;
3627 struct btrfs_key key;
3628 struct walk_control wc;
3629 struct extent_buffer *leaf, *tree_node;
3630 struct btrfs_root *tmp_root;
3631 struct btrfs_root *tree_root = root->fs_info->tree_root;
3635 if (ctx.progress_enabled) {
3636 ctx.tp = TASK_FS_ROOTS;
3637 task_start(ctx.info);
3641 * Just in case we made any changes to the extent tree that weren't
3642 * reflected into the free space cache yet.
3645 reset_cached_block_groups(root->fs_info);
3646 memset(&wc, 0, sizeof(wc));
3647 cache_tree_init(&wc.shared);
3648 btrfs_init_path(&path);
3653 key.type = BTRFS_ROOT_ITEM_KEY;
3654 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3659 tree_node = tree_root->node;
3661 if (tree_node != tree_root->node) {
3662 free_root_recs_tree(root_cache);
3663 btrfs_release_path(&path);
3666 leaf = path.nodes[0];
3667 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3668 ret = btrfs_next_leaf(tree_root, &path);
3674 leaf = path.nodes[0];
3676 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3677 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3678 fs_root_objectid(key.objectid)) {
3679 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3680 tmp_root = btrfs_read_fs_root_no_cache(
3681 root->fs_info, &key);
3683 key.offset = (u64)-1;
3684 tmp_root = btrfs_read_fs_root(
3685 root->fs_info, &key);
3687 if (IS_ERR(tmp_root)) {
3691 ret = check_fs_root(tmp_root, root_cache, &wc);
3692 if (ret == -EAGAIN) {
3693 free_root_recs_tree(root_cache);
3694 btrfs_release_path(&path);
3699 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3700 btrfs_free_fs_root(tmp_root);
3701 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3702 key.type == BTRFS_ROOT_BACKREF_KEY) {
3703 process_root_ref(leaf, path.slots[0], &key,
3710 btrfs_release_path(&path);
3712 free_extent_cache_tree(&wc.shared);
3713 if (!cache_tree_empty(&wc.shared))
3714 fprintf(stderr, "warning line %d\n", __LINE__);
3716 task_stop(ctx.info);
3721 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3723 struct list_head *cur = rec->backrefs.next;
3724 struct extent_backref *back;
3725 struct tree_backref *tback;
3726 struct data_backref *dback;
3730 while(cur != &rec->backrefs) {
3731 back = list_entry(cur, struct extent_backref, list);
3733 if (!back->found_extent_tree) {
3737 if (back->is_data) {
3738 dback = (struct data_backref *)back;
3739 fprintf(stderr, "Backref %llu %s %llu"
3740 " owner %llu offset %llu num_refs %lu"
3741 " not found in extent tree\n",
3742 (unsigned long long)rec->start,
3743 back->full_backref ?
3745 back->full_backref ?
3746 (unsigned long long)dback->parent:
3747 (unsigned long long)dback->root,
3748 (unsigned long long)dback->owner,
3749 (unsigned long long)dback->offset,
3750 (unsigned long)dback->num_refs);
3752 tback = (struct tree_backref *)back;
3753 fprintf(stderr, "Backref %llu parent %llu"
3754 " root %llu not found in extent tree\n",
3755 (unsigned long long)rec->start,
3756 (unsigned long long)tback->parent,
3757 (unsigned long long)tback->root);
3760 if (!back->is_data && !back->found_ref) {
3764 tback = (struct tree_backref *)back;
3765 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3766 (unsigned long long)rec->start,
3767 back->full_backref ? "parent" : "root",
3768 back->full_backref ?
3769 (unsigned long long)tback->parent :
3770 (unsigned long long)tback->root, back);
3772 if (back->is_data) {
3773 dback = (struct data_backref *)back;
3774 if (dback->found_ref != dback->num_refs) {
3778 fprintf(stderr, "Incorrect local backref count"
3779 " on %llu %s %llu owner %llu"
3780 " offset %llu found %u wanted %u back %p\n",
3781 (unsigned long long)rec->start,
3782 back->full_backref ?
3784 back->full_backref ?
3785 (unsigned long long)dback->parent:
3786 (unsigned long long)dback->root,
3787 (unsigned long long)dback->owner,
3788 (unsigned long long)dback->offset,
3789 dback->found_ref, dback->num_refs, back);
3791 if (dback->disk_bytenr != rec->start) {
3795 fprintf(stderr, "Backref disk bytenr does not"
3796 " match extent record, bytenr=%llu, "
3797 "ref bytenr=%llu\n",
3798 (unsigned long long)rec->start,
3799 (unsigned long long)dback->disk_bytenr);
3802 if (dback->bytes != rec->nr) {
3806 fprintf(stderr, "Backref bytes do not match "
3807 "extent backref, bytenr=%llu, ref "
3808 "bytes=%llu, backref bytes=%llu\n",
3809 (unsigned long long)rec->start,
3810 (unsigned long long)rec->nr,
3811 (unsigned long long)dback->bytes);
3814 if (!back->is_data) {
3817 dback = (struct data_backref *)back;
3818 found += dback->found_ref;
3821 if (found != rec->refs) {
3825 fprintf(stderr, "Incorrect global backref count "
3826 "on %llu found %llu wanted %llu\n",
3827 (unsigned long long)rec->start,
3828 (unsigned long long)found,
3829 (unsigned long long)rec->refs);
3835 static int free_all_extent_backrefs(struct extent_record *rec)
3837 struct extent_backref *back;
3838 struct list_head *cur;
3839 while (!list_empty(&rec->backrefs)) {
3840 cur = rec->backrefs.next;
3841 back = list_entry(cur, struct extent_backref, list);
3848 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3849 struct cache_tree *extent_cache)
3851 struct cache_extent *cache;
3852 struct extent_record *rec;
3855 cache = first_cache_extent(extent_cache);
3858 rec = container_of(cache, struct extent_record, cache);
3859 remove_cache_extent(extent_cache, cache);
3860 free_all_extent_backrefs(rec);
3865 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3866 struct extent_record *rec)
3868 if (rec->content_checked && rec->owner_ref_checked &&
3869 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3870 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3871 !rec->bad_full_backref && !rec->crossing_stripes &&
3872 !rec->wrong_chunk_type) {
3873 remove_cache_extent(extent_cache, &rec->cache);
3874 free_all_extent_backrefs(rec);
3875 list_del_init(&rec->list);
3881 static int check_owner_ref(struct btrfs_root *root,
3882 struct extent_record *rec,
3883 struct extent_buffer *buf)
3885 struct extent_backref *node;
3886 struct tree_backref *back;
3887 struct btrfs_root *ref_root;
3888 struct btrfs_key key;
3889 struct btrfs_path path;
3890 struct extent_buffer *parent;
3895 list_for_each_entry(node, &rec->backrefs, list) {
3898 if (!node->found_ref)
3900 if (node->full_backref)
3902 back = (struct tree_backref *)node;
3903 if (btrfs_header_owner(buf) == back->root)
3906 BUG_ON(rec->is_root);
3908 /* try to find the block by search corresponding fs tree */
3909 key.objectid = btrfs_header_owner(buf);
3910 key.type = BTRFS_ROOT_ITEM_KEY;
3911 key.offset = (u64)-1;
3913 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3914 if (IS_ERR(ref_root))
3917 level = btrfs_header_level(buf);
3919 btrfs_item_key_to_cpu(buf, &key, 0);
3921 btrfs_node_key_to_cpu(buf, &key, 0);
3923 btrfs_init_path(&path);
3924 path.lowest_level = level + 1;
3925 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3929 parent = path.nodes[level + 1];
3930 if (parent && buf->start == btrfs_node_blockptr(parent,
3931 path.slots[level + 1]))
3934 btrfs_release_path(&path);
3935 return found ? 0 : 1;
3938 static int is_extent_tree_record(struct extent_record *rec)
3940 struct list_head *cur = rec->backrefs.next;
3941 struct extent_backref *node;
3942 struct tree_backref *back;
3945 while(cur != &rec->backrefs) {
3946 node = list_entry(cur, struct extent_backref, list);
3950 back = (struct tree_backref *)node;
3951 if (node->full_backref)
3953 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3960 static int record_bad_block_io(struct btrfs_fs_info *info,
3961 struct cache_tree *extent_cache,
3964 struct extent_record *rec;
3965 struct cache_extent *cache;
3966 struct btrfs_key key;
3968 cache = lookup_cache_extent(extent_cache, start, len);
3972 rec = container_of(cache, struct extent_record, cache);
3973 if (!is_extent_tree_record(rec))
3976 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3977 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3980 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3981 struct extent_buffer *buf, int slot)
3983 if (btrfs_header_level(buf)) {
3984 struct btrfs_key_ptr ptr1, ptr2;
3986 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3987 sizeof(struct btrfs_key_ptr));
3988 read_extent_buffer(buf, &ptr2,
3989 btrfs_node_key_ptr_offset(slot + 1),
3990 sizeof(struct btrfs_key_ptr));
3991 write_extent_buffer(buf, &ptr1,
3992 btrfs_node_key_ptr_offset(slot + 1),
3993 sizeof(struct btrfs_key_ptr));
3994 write_extent_buffer(buf, &ptr2,
3995 btrfs_node_key_ptr_offset(slot),
3996 sizeof(struct btrfs_key_ptr));
3998 struct btrfs_disk_key key;
3999 btrfs_node_key(buf, &key, 0);
4000 btrfs_fixup_low_keys(root, path, &key,
4001 btrfs_header_level(buf) + 1);
4004 struct btrfs_item *item1, *item2;
4005 struct btrfs_key k1, k2;
4006 char *item1_data, *item2_data;
4007 u32 item1_offset, item2_offset, item1_size, item2_size;
4009 item1 = btrfs_item_nr(slot);
4010 item2 = btrfs_item_nr(slot + 1);
4011 btrfs_item_key_to_cpu(buf, &k1, slot);
4012 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4013 item1_offset = btrfs_item_offset(buf, item1);
4014 item2_offset = btrfs_item_offset(buf, item2);
4015 item1_size = btrfs_item_size(buf, item1);
4016 item2_size = btrfs_item_size(buf, item2);
4018 item1_data = malloc(item1_size);
4021 item2_data = malloc(item2_size);
4027 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4028 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4030 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4031 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4035 btrfs_set_item_offset(buf, item1, item2_offset);
4036 btrfs_set_item_offset(buf, item2, item1_offset);
4037 btrfs_set_item_size(buf, item1, item2_size);
4038 btrfs_set_item_size(buf, item2, item1_size);
4040 path->slots[0] = slot;
4041 btrfs_set_item_key_unsafe(root, path, &k2);
4042 path->slots[0] = slot + 1;
4043 btrfs_set_item_key_unsafe(root, path, &k1);
4048 static int fix_key_order(struct btrfs_trans_handle *trans,
4049 struct btrfs_root *root,
4050 struct btrfs_path *path)
4052 struct extent_buffer *buf;
4053 struct btrfs_key k1, k2;
4055 int level = path->lowest_level;
4058 buf = path->nodes[level];
4059 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4061 btrfs_node_key_to_cpu(buf, &k1, i);
4062 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4064 btrfs_item_key_to_cpu(buf, &k1, i);
4065 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4067 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4069 ret = swap_values(root, path, buf, i);
4072 btrfs_mark_buffer_dirty(buf);
4078 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4079 struct btrfs_root *root,
4080 struct btrfs_path *path,
4081 struct extent_buffer *buf, int slot)
4083 struct btrfs_key key;
4084 int nritems = btrfs_header_nritems(buf);
4086 btrfs_item_key_to_cpu(buf, &key, slot);
4088 /* These are all the keys we can deal with missing. */
4089 if (key.type != BTRFS_DIR_INDEX_KEY &&
4090 key.type != BTRFS_EXTENT_ITEM_KEY &&
4091 key.type != BTRFS_METADATA_ITEM_KEY &&
4092 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4093 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4096 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4097 (unsigned long long)key.objectid, key.type,
4098 (unsigned long long)key.offset, slot, buf->start);
4099 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4100 btrfs_item_nr_offset(slot + 1),
4101 sizeof(struct btrfs_item) *
4102 (nritems - slot - 1));
4103 btrfs_set_header_nritems(buf, nritems - 1);
4105 struct btrfs_disk_key disk_key;
4107 btrfs_item_key(buf, &disk_key, 0);
4108 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4110 btrfs_mark_buffer_dirty(buf);
4114 static int fix_item_offset(struct btrfs_trans_handle *trans,
4115 struct btrfs_root *root,
4116 struct btrfs_path *path)
4118 struct extent_buffer *buf;
4122 /* We should only get this for leaves */
4123 BUG_ON(path->lowest_level);
4124 buf = path->nodes[0];
4126 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4127 unsigned int shift = 0, offset;
4129 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4130 BTRFS_LEAF_DATA_SIZE(root)) {
4131 if (btrfs_item_end_nr(buf, i) >
4132 BTRFS_LEAF_DATA_SIZE(root)) {
4133 ret = delete_bogus_item(trans, root, path,
4137 fprintf(stderr, "item is off the end of the "
4138 "leaf, can't fix\n");
4142 shift = BTRFS_LEAF_DATA_SIZE(root) -
4143 btrfs_item_end_nr(buf, i);
4144 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4145 btrfs_item_offset_nr(buf, i - 1)) {
4146 if (btrfs_item_end_nr(buf, i) >
4147 btrfs_item_offset_nr(buf, i - 1)) {
4148 ret = delete_bogus_item(trans, root, path,
4152 fprintf(stderr, "items overlap, can't fix\n");
4156 shift = btrfs_item_offset_nr(buf, i - 1) -
4157 btrfs_item_end_nr(buf, i);
4162 printf("Shifting item nr %d by %u bytes in block %llu\n",
4163 i, shift, (unsigned long long)buf->start);
4164 offset = btrfs_item_offset_nr(buf, i);
4165 memmove_extent_buffer(buf,
4166 btrfs_leaf_data(buf) + offset + shift,
4167 btrfs_leaf_data(buf) + offset,
4168 btrfs_item_size_nr(buf, i));
4169 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4171 btrfs_mark_buffer_dirty(buf);
4175 * We may have moved things, in which case we want to exit so we don't
4176 * write those changes out. Once we have proper abort functionality in
4177 * progs this can be changed to something nicer.
4184 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4185 * then just return -EIO.
4187 static int try_to_fix_bad_block(struct btrfs_root *root,
4188 struct extent_buffer *buf,
4189 enum btrfs_tree_block_status status)
4191 struct btrfs_trans_handle *trans;
4192 struct ulist *roots;
4193 struct ulist_node *node;
4194 struct btrfs_root *search_root;
4195 struct btrfs_path *path;
4196 struct ulist_iterator iter;
4197 struct btrfs_key root_key, key;
4200 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4201 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4204 path = btrfs_alloc_path();
4208 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4211 btrfs_free_path(path);
4215 ULIST_ITER_INIT(&iter);
4216 while ((node = ulist_next(roots, &iter))) {
4217 root_key.objectid = node->val;
4218 root_key.type = BTRFS_ROOT_ITEM_KEY;
4219 root_key.offset = (u64)-1;
4221 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4228 trans = btrfs_start_transaction(search_root, 0);
4229 if (IS_ERR(trans)) {
4230 ret = PTR_ERR(trans);
4234 path->lowest_level = btrfs_header_level(buf);
4235 path->skip_check_block = 1;
4236 if (path->lowest_level)
4237 btrfs_node_key_to_cpu(buf, &key, 0);
4239 btrfs_item_key_to_cpu(buf, &key, 0);
4240 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4243 btrfs_commit_transaction(trans, search_root);
4246 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4247 ret = fix_key_order(trans, search_root, path);
4248 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4249 ret = fix_item_offset(trans, search_root, path);
4251 btrfs_commit_transaction(trans, search_root);
4254 btrfs_release_path(path);
4255 btrfs_commit_transaction(trans, search_root);
4258 btrfs_free_path(path);
4262 static int check_block(struct btrfs_root *root,
4263 struct cache_tree *extent_cache,
4264 struct extent_buffer *buf, u64 flags)
4266 struct extent_record *rec;
4267 struct cache_extent *cache;
4268 struct btrfs_key key;
4269 enum btrfs_tree_block_status status;
4273 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4276 rec = container_of(cache, struct extent_record, cache);
4277 rec->generation = btrfs_header_generation(buf);
4279 level = btrfs_header_level(buf);
4280 if (btrfs_header_nritems(buf) > 0) {
4283 btrfs_item_key_to_cpu(buf, &key, 0);
4285 btrfs_node_key_to_cpu(buf, &key, 0);
4287 rec->info_objectid = key.objectid;
4289 rec->info_level = level;
4291 if (btrfs_is_leaf(buf))
4292 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4294 status = btrfs_check_node(root, &rec->parent_key, buf);
4296 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4298 status = try_to_fix_bad_block(root, buf, status);
4299 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4301 fprintf(stderr, "bad block %llu\n",
4302 (unsigned long long)buf->start);
4305 * Signal to callers we need to start the scan over
4306 * again since we'll have cow'ed blocks.
4311 rec->content_checked = 1;
4312 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4313 rec->owner_ref_checked = 1;
4315 ret = check_owner_ref(root, rec, buf);
4317 rec->owner_ref_checked = 1;
4321 maybe_free_extent_rec(extent_cache, rec);
4325 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4326 u64 parent, u64 root)
4328 struct list_head *cur = rec->backrefs.next;
4329 struct extent_backref *node;
4330 struct tree_backref *back;
4332 while(cur != &rec->backrefs) {
4333 node = list_entry(cur, struct extent_backref, list);
4337 back = (struct tree_backref *)node;
4339 if (!node->full_backref)
4341 if (parent == back->parent)
4344 if (node->full_backref)
4346 if (back->root == root)
4353 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4354 u64 parent, u64 root)
4356 struct tree_backref *ref = malloc(sizeof(*ref));
4360 memset(&ref->node, 0, sizeof(ref->node));
4362 ref->parent = parent;
4363 ref->node.full_backref = 1;
4366 ref->node.full_backref = 0;
4368 list_add_tail(&ref->node.list, &rec->backrefs);
4373 static struct data_backref *find_data_backref(struct extent_record *rec,
4374 u64 parent, u64 root,
4375 u64 owner, u64 offset,
4377 u64 disk_bytenr, u64 bytes)
4379 struct list_head *cur = rec->backrefs.next;
4380 struct extent_backref *node;
4381 struct data_backref *back;
4383 while(cur != &rec->backrefs) {
4384 node = list_entry(cur, struct extent_backref, list);
4388 back = (struct data_backref *)node;
4390 if (!node->full_backref)
4392 if (parent == back->parent)
4395 if (node->full_backref)
4397 if (back->root == root && back->owner == owner &&
4398 back->offset == offset) {
4399 if (found_ref && node->found_ref &&
4400 (back->bytes != bytes ||
4401 back->disk_bytenr != disk_bytenr))
4410 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4411 u64 parent, u64 root,
4412 u64 owner, u64 offset,
4415 struct data_backref *ref = malloc(sizeof(*ref));
4419 memset(&ref->node, 0, sizeof(ref->node));
4420 ref->node.is_data = 1;
4423 ref->parent = parent;
4426 ref->node.full_backref = 1;
4430 ref->offset = offset;
4431 ref->node.full_backref = 0;
4433 ref->bytes = max_size;
4436 list_add_tail(&ref->node.list, &rec->backrefs);
4437 if (max_size > rec->max_size)
4438 rec->max_size = max_size;
4442 /* Check if the type of extent matches with its chunk */
4443 static void check_extent_type(struct extent_record *rec)
4445 struct btrfs_block_group_cache *bg_cache;
4447 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4451 /* data extent, check chunk directly*/
4452 if (!rec->metadata) {
4453 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4454 rec->wrong_chunk_type = 1;
4458 /* metadata extent, check the obvious case first */
4459 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4460 BTRFS_BLOCK_GROUP_METADATA))) {
4461 rec->wrong_chunk_type = 1;
4466 * Check SYSTEM extent, as it's also marked as metadata, we can only
4467 * make sure it's a SYSTEM extent by its backref
4469 if (!list_empty(&rec->backrefs)) {
4470 struct extent_backref *node;
4471 struct tree_backref *tback;
4474 node = list_entry(rec->backrefs.next, struct extent_backref,
4476 if (node->is_data) {
4477 /* tree block shouldn't have data backref */
4478 rec->wrong_chunk_type = 1;
4481 tback = container_of(node, struct tree_backref, node);
4483 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4484 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4486 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4487 if (!(bg_cache->flags & bg_type))
4488 rec->wrong_chunk_type = 1;
4492 static int add_extent_rec(struct cache_tree *extent_cache,
4493 struct btrfs_key *parent_key, u64 parent_gen,
4494 u64 start, u64 nr, u64 extent_item_refs,
4495 int is_root, int inc_ref, int set_checked,
4496 int metadata, int extent_rec, u64 max_size)
4498 struct extent_record *rec;
4499 struct cache_extent *cache;
4503 cache = lookup_cache_extent(extent_cache, start, nr);
4505 rec = container_of(cache, struct extent_record, cache);
4509 rec->nr = max(nr, max_size);
4512 * We need to make sure to reset nr to whatever the extent
4513 * record says was the real size, this way we can compare it to
4517 if (start != rec->start || rec->found_rec) {
4518 struct extent_record *tmp;
4521 if (list_empty(&rec->list))
4522 list_add_tail(&rec->list,
4523 &duplicate_extents);
4526 * We have to do this song and dance in case we
4527 * find an extent record that falls inside of
4528 * our current extent record but does not have
4529 * the same objectid.
4531 tmp = malloc(sizeof(*tmp));
4535 tmp->max_size = max_size;
4538 tmp->metadata = metadata;
4539 tmp->extent_item_refs = extent_item_refs;
4540 INIT_LIST_HEAD(&tmp->list);
4541 list_add_tail(&tmp->list, &rec->dups);
4542 rec->num_duplicates++;
4549 if (extent_item_refs && !dup) {
4550 if (rec->extent_item_refs) {
4551 fprintf(stderr, "block %llu rec "
4552 "extent_item_refs %llu, passed %llu\n",
4553 (unsigned long long)start,
4554 (unsigned long long)
4555 rec->extent_item_refs,
4556 (unsigned long long)extent_item_refs);
4558 rec->extent_item_refs = extent_item_refs;
4563 rec->content_checked = 1;
4564 rec->owner_ref_checked = 1;
4568 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4570 rec->parent_generation = parent_gen;
4572 if (rec->max_size < max_size)
4573 rec->max_size = max_size;
4576 * A metadata extent can't cross stripe_len boundary, otherwise
4577 * kernel scrub won't be able to handle it.
4578 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4581 if (metadata && check_crossing_stripes(rec->start,
4583 rec->crossing_stripes = 1;
4584 check_extent_type(rec);
4585 maybe_free_extent_rec(extent_cache, rec);
4588 rec = malloc(sizeof(*rec));
4592 rec->max_size = max_size;
4593 rec->nr = max(nr, max_size);
4594 rec->found_rec = !!extent_rec;
4595 rec->content_checked = 0;
4596 rec->owner_ref_checked = 0;
4597 rec->num_duplicates = 0;
4598 rec->metadata = metadata;
4599 rec->flag_block_full_backref = -1;
4600 rec->bad_full_backref = 0;
4601 rec->crossing_stripes = 0;
4602 rec->wrong_chunk_type = 0;
4603 INIT_LIST_HEAD(&rec->backrefs);
4604 INIT_LIST_HEAD(&rec->dups);
4605 INIT_LIST_HEAD(&rec->list);
4617 if (extent_item_refs)
4618 rec->extent_item_refs = extent_item_refs;
4620 rec->extent_item_refs = 0;
4623 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4625 memset(&rec->parent_key, 0, sizeof(*parent_key));
4628 rec->parent_generation = parent_gen;
4630 rec->parent_generation = 0;
4632 rec->cache.start = start;
4633 rec->cache.size = nr;
4634 ret = insert_cache_extent(extent_cache, &rec->cache);
4638 rec->content_checked = 1;
4639 rec->owner_ref_checked = 1;
4643 if (check_crossing_stripes(rec->start, rec->max_size))
4644 rec->crossing_stripes = 1;
4645 check_extent_type(rec);
4649 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4650 u64 parent, u64 root, int found_ref)
4652 struct extent_record *rec;
4653 struct tree_backref *back;
4654 struct cache_extent *cache;
4656 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4658 add_extent_rec(extent_cache, NULL, 0, bytenr,
4659 1, 0, 0, 0, 0, 1, 0, 0);
4660 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4665 rec = container_of(cache, struct extent_record, cache);
4666 if (rec->start != bytenr) {
4670 back = find_tree_backref(rec, parent, root);
4672 back = alloc_tree_backref(rec, parent, root);
4677 if (back->node.found_ref) {
4678 fprintf(stderr, "Extent back ref already exists "
4679 "for %llu parent %llu root %llu \n",
4680 (unsigned long long)bytenr,
4681 (unsigned long long)parent,
4682 (unsigned long long)root);
4684 back->node.found_ref = 1;
4686 if (back->node.found_extent_tree) {
4687 fprintf(stderr, "Extent back ref already exists "
4688 "for %llu parent %llu root %llu \n",
4689 (unsigned long long)bytenr,
4690 (unsigned long long)parent,
4691 (unsigned long long)root);
4693 back->node.found_extent_tree = 1;
4695 check_extent_type(rec);
4696 maybe_free_extent_rec(extent_cache, rec);
4700 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4701 u64 parent, u64 root, u64 owner, u64 offset,
4702 u32 num_refs, int found_ref, u64 max_size)
4704 struct extent_record *rec;
4705 struct data_backref *back;
4706 struct cache_extent *cache;
4708 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4710 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4712 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4717 rec = container_of(cache, struct extent_record, cache);
4718 if (rec->max_size < max_size)
4719 rec->max_size = max_size;
4722 * If found_ref is set then max_size is the real size and must match the
4723 * existing refs. So if we have already found a ref then we need to
4724 * make sure that this ref matches the existing one, otherwise we need
4725 * to add a new backref so we can notice that the backrefs don't match
4726 * and we need to figure out who is telling the truth. This is to
4727 * account for that awful fsync bug I introduced where we'd end up with
4728 * a btrfs_file_extent_item that would have its length include multiple
4729 * prealloc extents or point inside of a prealloc extent.
4731 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4734 back = alloc_data_backref(rec, parent, root, owner, offset,
4740 BUG_ON(num_refs != 1);
4741 if (back->node.found_ref)
4742 BUG_ON(back->bytes != max_size);
4743 back->node.found_ref = 1;
4744 back->found_ref += 1;
4745 back->bytes = max_size;
4746 back->disk_bytenr = bytenr;
4748 rec->content_checked = 1;
4749 rec->owner_ref_checked = 1;
4751 if (back->node.found_extent_tree) {
4752 fprintf(stderr, "Extent back ref already exists "
4753 "for %llu parent %llu root %llu "
4754 "owner %llu offset %llu num_refs %lu\n",
4755 (unsigned long long)bytenr,
4756 (unsigned long long)parent,
4757 (unsigned long long)root,
4758 (unsigned long long)owner,
4759 (unsigned long long)offset,
4760 (unsigned long)num_refs);
4762 back->num_refs = num_refs;
4763 back->node.found_extent_tree = 1;
4765 maybe_free_extent_rec(extent_cache, rec);
4769 static int add_pending(struct cache_tree *pending,
4770 struct cache_tree *seen, u64 bytenr, u32 size)
4773 ret = add_cache_extent(seen, bytenr, size);
4776 add_cache_extent(pending, bytenr, size);
4780 static int pick_next_pending(struct cache_tree *pending,
4781 struct cache_tree *reada,
4782 struct cache_tree *nodes,
4783 u64 last, struct block_info *bits, int bits_nr,
4786 unsigned long node_start = last;
4787 struct cache_extent *cache;
4790 cache = search_cache_extent(reada, 0);
4792 bits[0].start = cache->start;
4793 bits[0].size = cache->size;
4798 if (node_start > 32768)
4799 node_start -= 32768;
4801 cache = search_cache_extent(nodes, node_start);
4803 cache = search_cache_extent(nodes, 0);
4806 cache = search_cache_extent(pending, 0);
4811 bits[ret].start = cache->start;
4812 bits[ret].size = cache->size;
4813 cache = next_cache_extent(cache);
4815 } while (cache && ret < bits_nr);
4821 bits[ret].start = cache->start;
4822 bits[ret].size = cache->size;
4823 cache = next_cache_extent(cache);
4825 } while (cache && ret < bits_nr);
4827 if (bits_nr - ret > 8) {
4828 u64 lookup = bits[0].start + bits[0].size;
4829 struct cache_extent *next;
4830 next = search_cache_extent(pending, lookup);
4832 if (next->start - lookup > 32768)
4834 bits[ret].start = next->start;
4835 bits[ret].size = next->size;
4836 lookup = next->start + next->size;
4840 next = next_cache_extent(next);
4848 static void free_chunk_record(struct cache_extent *cache)
4850 struct chunk_record *rec;
4852 rec = container_of(cache, struct chunk_record, cache);
4853 list_del_init(&rec->list);
4854 list_del_init(&rec->dextents);
4858 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4860 cache_tree_free_extents(chunk_cache, free_chunk_record);
4863 static void free_device_record(struct rb_node *node)
4865 struct device_record *rec;
4867 rec = container_of(node, struct device_record, node);
4871 FREE_RB_BASED_TREE(device_cache, free_device_record);
4873 int insert_block_group_record(struct block_group_tree *tree,
4874 struct block_group_record *bg_rec)
4878 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4882 list_add_tail(&bg_rec->list, &tree->block_groups);
4886 static void free_block_group_record(struct cache_extent *cache)
4888 struct block_group_record *rec;
4890 rec = container_of(cache, struct block_group_record, cache);
4891 list_del_init(&rec->list);
4895 void free_block_group_tree(struct block_group_tree *tree)
4897 cache_tree_free_extents(&tree->tree, free_block_group_record);
4900 int insert_device_extent_record(struct device_extent_tree *tree,
4901 struct device_extent_record *de_rec)
4906 * Device extent is a bit different from the other extents, because
4907 * the extents which belong to the different devices may have the
4908 * same start and size, so we need use the special extent cache
4909 * search/insert functions.
4911 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4915 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4916 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4920 static void free_device_extent_record(struct cache_extent *cache)
4922 struct device_extent_record *rec;
4924 rec = container_of(cache, struct device_extent_record, cache);
4925 if (!list_empty(&rec->chunk_list))
4926 list_del_init(&rec->chunk_list);
4927 if (!list_empty(&rec->device_list))
4928 list_del_init(&rec->device_list);
4932 void free_device_extent_tree(struct device_extent_tree *tree)
4934 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4937 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4938 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4939 struct extent_buffer *leaf, int slot)
4941 struct btrfs_extent_ref_v0 *ref0;
4942 struct btrfs_key key;
4944 btrfs_item_key_to_cpu(leaf, &key, slot);
4945 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4946 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4947 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4949 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4950 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4956 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4957 struct btrfs_key *key,
4960 struct btrfs_chunk *ptr;
4961 struct chunk_record *rec;
4964 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4965 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4967 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4969 fprintf(stderr, "memory allocation failed\n");
4973 INIT_LIST_HEAD(&rec->list);
4974 INIT_LIST_HEAD(&rec->dextents);
4977 rec->cache.start = key->offset;
4978 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4980 rec->generation = btrfs_header_generation(leaf);
4982 rec->objectid = key->objectid;
4983 rec->type = key->type;
4984 rec->offset = key->offset;
4986 rec->length = rec->cache.size;
4987 rec->owner = btrfs_chunk_owner(leaf, ptr);
4988 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4989 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4990 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4991 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4992 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4993 rec->num_stripes = num_stripes;
4994 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4996 for (i = 0; i < rec->num_stripes; ++i) {
4997 rec->stripes[i].devid =
4998 btrfs_stripe_devid_nr(leaf, ptr, i);
4999 rec->stripes[i].offset =
5000 btrfs_stripe_offset_nr(leaf, ptr, i);
5001 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5002 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5009 static int process_chunk_item(struct cache_tree *chunk_cache,
5010 struct btrfs_key *key, struct extent_buffer *eb,
5013 struct chunk_record *rec;
5016 rec = btrfs_new_chunk_record(eb, key, slot);
5017 ret = insert_cache_extent(chunk_cache, &rec->cache);
5019 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5020 rec->offset, rec->length);
5027 static int process_device_item(struct rb_root *dev_cache,
5028 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5030 struct btrfs_dev_item *ptr;
5031 struct device_record *rec;
5034 ptr = btrfs_item_ptr(eb,
5035 slot, struct btrfs_dev_item);
5037 rec = malloc(sizeof(*rec));
5039 fprintf(stderr, "memory allocation failed\n");
5043 rec->devid = key->offset;
5044 rec->generation = btrfs_header_generation(eb);
5046 rec->objectid = key->objectid;
5047 rec->type = key->type;
5048 rec->offset = key->offset;
5050 rec->devid = btrfs_device_id(eb, ptr);
5051 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5052 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5054 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5056 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5063 struct block_group_record *
5064 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5067 struct btrfs_block_group_item *ptr;
5068 struct block_group_record *rec;
5070 rec = calloc(1, sizeof(*rec));
5072 fprintf(stderr, "memory allocation failed\n");
5076 rec->cache.start = key->objectid;
5077 rec->cache.size = key->offset;
5079 rec->generation = btrfs_header_generation(leaf);
5081 rec->objectid = key->objectid;
5082 rec->type = key->type;
5083 rec->offset = key->offset;
5085 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5086 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5088 INIT_LIST_HEAD(&rec->list);
5093 static int process_block_group_item(struct block_group_tree *block_group_cache,
5094 struct btrfs_key *key,
5095 struct extent_buffer *eb, int slot)
5097 struct block_group_record *rec;
5100 rec = btrfs_new_block_group_record(eb, key, slot);
5101 ret = insert_block_group_record(block_group_cache, rec);
5103 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5104 rec->objectid, rec->offset);
5111 struct device_extent_record *
5112 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5113 struct btrfs_key *key, int slot)
5115 struct device_extent_record *rec;
5116 struct btrfs_dev_extent *ptr;
5118 rec = calloc(1, sizeof(*rec));
5120 fprintf(stderr, "memory allocation failed\n");
5124 rec->cache.objectid = key->objectid;
5125 rec->cache.start = key->offset;
5127 rec->generation = btrfs_header_generation(leaf);
5129 rec->objectid = key->objectid;
5130 rec->type = key->type;
5131 rec->offset = key->offset;
5133 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5134 rec->chunk_objecteid =
5135 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5137 btrfs_dev_extent_chunk_offset(leaf, ptr);
5138 rec->length = btrfs_dev_extent_length(leaf, ptr);
5139 rec->cache.size = rec->length;
5141 INIT_LIST_HEAD(&rec->chunk_list);
5142 INIT_LIST_HEAD(&rec->device_list);
5148 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5149 struct btrfs_key *key, struct extent_buffer *eb,
5152 struct device_extent_record *rec;
5155 rec = btrfs_new_device_extent_record(eb, key, slot);
5156 ret = insert_device_extent_record(dev_extent_cache, rec);
5159 "Device extent[%llu, %llu, %llu] existed.\n",
5160 rec->objectid, rec->offset, rec->length);
5167 static int process_extent_item(struct btrfs_root *root,
5168 struct cache_tree *extent_cache,
5169 struct extent_buffer *eb, int slot)
5171 struct btrfs_extent_item *ei;
5172 struct btrfs_extent_inline_ref *iref;
5173 struct btrfs_extent_data_ref *dref;
5174 struct btrfs_shared_data_ref *sref;
5175 struct btrfs_key key;
5179 u32 item_size = btrfs_item_size_nr(eb, slot);
5185 btrfs_item_key_to_cpu(eb, &key, slot);
5187 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5189 num_bytes = root->leafsize;
5191 num_bytes = key.offset;
5194 if (item_size < sizeof(*ei)) {
5195 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5196 struct btrfs_extent_item_v0 *ei0;
5197 BUG_ON(item_size != sizeof(*ei0));
5198 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5199 refs = btrfs_extent_refs_v0(eb, ei0);
5203 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5204 num_bytes, refs, 0, 0, 0, metadata, 1,
5208 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5209 refs = btrfs_extent_refs(eb, ei);
5210 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5215 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5216 refs, 0, 0, 0, metadata, 1, num_bytes);
5218 ptr = (unsigned long)(ei + 1);
5219 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5220 key.type == BTRFS_EXTENT_ITEM_KEY)
5221 ptr += sizeof(struct btrfs_tree_block_info);
5223 end = (unsigned long)ei + item_size;
5225 iref = (struct btrfs_extent_inline_ref *)ptr;
5226 type = btrfs_extent_inline_ref_type(eb, iref);
5227 offset = btrfs_extent_inline_ref_offset(eb, iref);
5229 case BTRFS_TREE_BLOCK_REF_KEY:
5230 add_tree_backref(extent_cache, key.objectid,
5233 case BTRFS_SHARED_BLOCK_REF_KEY:
5234 add_tree_backref(extent_cache, key.objectid,
5237 case BTRFS_EXTENT_DATA_REF_KEY:
5238 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5239 add_data_backref(extent_cache, key.objectid, 0,
5240 btrfs_extent_data_ref_root(eb, dref),
5241 btrfs_extent_data_ref_objectid(eb,
5243 btrfs_extent_data_ref_offset(eb, dref),
5244 btrfs_extent_data_ref_count(eb, dref),
5247 case BTRFS_SHARED_DATA_REF_KEY:
5248 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5249 add_data_backref(extent_cache, key.objectid, offset,
5251 btrfs_shared_data_ref_count(eb, sref),
5255 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5256 key.objectid, key.type, num_bytes);
5259 ptr += btrfs_extent_inline_ref_size(type);
5266 static int check_cache_range(struct btrfs_root *root,
5267 struct btrfs_block_group_cache *cache,
5268 u64 offset, u64 bytes)
5270 struct btrfs_free_space *entry;
5276 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5277 bytenr = btrfs_sb_offset(i);
5278 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5279 cache->key.objectid, bytenr, 0,
5280 &logical, &nr, &stripe_len);
5285 if (logical[nr] + stripe_len <= offset)
5287 if (offset + bytes <= logical[nr])
5289 if (logical[nr] == offset) {
5290 if (stripe_len >= bytes) {
5294 bytes -= stripe_len;
5295 offset += stripe_len;
5296 } else if (logical[nr] < offset) {
5297 if (logical[nr] + stripe_len >=
5302 bytes = (offset + bytes) -
5303 (logical[nr] + stripe_len);
5304 offset = logical[nr] + stripe_len;
5307 * Could be tricky, the super may land in the
5308 * middle of the area we're checking. First
5309 * check the easiest case, it's at the end.
5311 if (logical[nr] + stripe_len >=
5313 bytes = logical[nr] - offset;
5317 /* Check the left side */
5318 ret = check_cache_range(root, cache,
5320 logical[nr] - offset);
5326 /* Now we continue with the right side */
5327 bytes = (offset + bytes) -
5328 (logical[nr] + stripe_len);
5329 offset = logical[nr] + stripe_len;
5336 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5338 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5339 offset, offset+bytes);
5343 if (entry->offset != offset) {
5344 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5349 if (entry->bytes != bytes) {
5350 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5351 bytes, entry->bytes, offset);
5355 unlink_free_space(cache->free_space_ctl, entry);
5360 static int verify_space_cache(struct btrfs_root *root,
5361 struct btrfs_block_group_cache *cache)
5363 struct btrfs_path *path;
5364 struct extent_buffer *leaf;
5365 struct btrfs_key key;
5369 path = btrfs_alloc_path();
5373 root = root->fs_info->extent_root;
5375 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5377 key.objectid = last;
5379 key.type = BTRFS_EXTENT_ITEM_KEY;
5381 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5386 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5387 ret = btrfs_next_leaf(root, path);
5395 leaf = path->nodes[0];
5396 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5397 if (key.objectid >= cache->key.offset + cache->key.objectid)
5399 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5400 key.type != BTRFS_METADATA_ITEM_KEY) {
5405 if (last == key.objectid) {
5406 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5407 last = key.objectid + key.offset;
5409 last = key.objectid + root->leafsize;
5414 ret = check_cache_range(root, cache, last,
5415 key.objectid - last);
5418 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5419 last = key.objectid + key.offset;
5421 last = key.objectid + root->leafsize;
5425 if (last < cache->key.objectid + cache->key.offset)
5426 ret = check_cache_range(root, cache, last,
5427 cache->key.objectid +
5428 cache->key.offset - last);
5431 btrfs_free_path(path);
5434 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5435 fprintf(stderr, "There are still entries left in the space "
5443 static int check_space_cache(struct btrfs_root *root)
5445 struct btrfs_block_group_cache *cache;
5446 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5450 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5451 btrfs_super_generation(root->fs_info->super_copy) !=
5452 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5453 printf("cache and super generation don't match, space cache "
5454 "will be invalidated\n");
5458 if (ctx.progress_enabled) {
5459 ctx.tp = TASK_FREE_SPACE;
5460 task_start(ctx.info);
5464 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5468 start = cache->key.objectid + cache->key.offset;
5469 if (!cache->free_space_ctl) {
5470 if (btrfs_init_free_space_ctl(cache,
5471 root->sectorsize)) {
5476 btrfs_remove_free_space_cache(cache);
5479 ret = load_free_space_cache(root->fs_info, cache);
5483 ret = verify_space_cache(root, cache);
5485 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5486 cache->key.objectid);
5491 task_stop(ctx.info);
5493 return error ? -EINVAL : 0;
5496 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5497 u64 num_bytes, unsigned long leaf_offset,
5498 struct extent_buffer *eb) {
5501 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5503 unsigned long csum_offset;
5507 u64 data_checked = 0;
5513 if (num_bytes % root->sectorsize)
5516 data = malloc(num_bytes);
5520 while (offset < num_bytes) {
5523 read_len = num_bytes - offset;
5524 /* read as much space once a time */
5525 ret = read_extent_data(root, data + offset,
5526 bytenr + offset, &read_len, mirror);
5530 /* verify every 4k data's checksum */
5531 while (data_checked < read_len) {
5533 tmp = offset + data_checked;
5535 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5536 csum, root->sectorsize);
5537 btrfs_csum_final(csum, (char *)&csum);
5539 csum_offset = leaf_offset +
5540 tmp / root->sectorsize * csum_size;
5541 read_extent_buffer(eb, (char *)&csum_expected,
5542 csum_offset, csum_size);
5543 /* try another mirror */
5544 if (csum != csum_expected) {
5545 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5546 mirror, bytenr + tmp,
5547 csum, csum_expected);
5548 num_copies = btrfs_num_copies(
5549 &root->fs_info->mapping_tree,
5551 if (mirror < num_copies - 1) {
5556 data_checked += root->sectorsize;
5565 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5568 struct btrfs_path *path;
5569 struct extent_buffer *leaf;
5570 struct btrfs_key key;
5573 path = btrfs_alloc_path();
5575 fprintf(stderr, "Error allocing path\n");
5579 key.objectid = bytenr;
5580 key.type = BTRFS_EXTENT_ITEM_KEY;
5581 key.offset = (u64)-1;
5584 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5587 fprintf(stderr, "Error looking up extent record %d\n", ret);
5588 btrfs_free_path(path);
5591 if (path->slots[0] > 0) {
5594 ret = btrfs_prev_leaf(root, path);
5597 } else if (ret > 0) {
5604 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5607 * Block group items come before extent items if they have the same
5608 * bytenr, so walk back one more just in case. Dear future traveler,
5609 * first congrats on mastering time travel. Now if it's not too much
5610 * trouble could you go back to 2006 and tell Chris to make the
5611 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5612 * EXTENT_ITEM_KEY please?
5614 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5615 if (path->slots[0] > 0) {
5618 ret = btrfs_prev_leaf(root, path);
5621 } else if (ret > 0) {
5626 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5630 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5631 ret = btrfs_next_leaf(root, path);
5633 fprintf(stderr, "Error going to next leaf "
5635 btrfs_free_path(path);
5641 leaf = path->nodes[0];
5642 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5643 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5647 if (key.objectid + key.offset < bytenr) {
5651 if (key.objectid > bytenr + num_bytes)
5654 if (key.objectid == bytenr) {
5655 if (key.offset >= num_bytes) {
5659 num_bytes -= key.offset;
5660 bytenr += key.offset;
5661 } else if (key.objectid < bytenr) {
5662 if (key.objectid + key.offset >= bytenr + num_bytes) {
5666 num_bytes = (bytenr + num_bytes) -
5667 (key.objectid + key.offset);
5668 bytenr = key.objectid + key.offset;
5670 if (key.objectid + key.offset < bytenr + num_bytes) {
5671 u64 new_start = key.objectid + key.offset;
5672 u64 new_bytes = bytenr + num_bytes - new_start;
5675 * Weird case, the extent is in the middle of
5676 * our range, we'll have to search one side
5677 * and then the other. Not sure if this happens
5678 * in real life, but no harm in coding it up
5679 * anyway just in case.
5681 btrfs_release_path(path);
5682 ret = check_extent_exists(root, new_start,
5685 fprintf(stderr, "Right section didn't "
5689 num_bytes = key.objectid - bytenr;
5692 num_bytes = key.objectid - bytenr;
5699 if (num_bytes && !ret) {
5700 fprintf(stderr, "There are no extents for csum range "
5701 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5705 btrfs_free_path(path);
5709 static int check_csums(struct btrfs_root *root)
5711 struct btrfs_path *path;
5712 struct extent_buffer *leaf;
5713 struct btrfs_key key;
5714 u64 offset = 0, num_bytes = 0;
5715 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5719 unsigned long leaf_offset;
5721 root = root->fs_info->csum_root;
5722 if (!extent_buffer_uptodate(root->node)) {
5723 fprintf(stderr, "No valid csum tree found\n");
5727 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5728 key.type = BTRFS_EXTENT_CSUM_KEY;
5731 path = btrfs_alloc_path();
5735 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5737 fprintf(stderr, "Error searching csum tree %d\n", ret);
5738 btrfs_free_path(path);
5742 if (ret > 0 && path->slots[0])
5747 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5748 ret = btrfs_next_leaf(root, path);
5750 fprintf(stderr, "Error going to next leaf "
5757 leaf = path->nodes[0];
5759 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5760 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5765 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5766 csum_size) * root->sectorsize;
5767 if (!check_data_csum)
5768 goto skip_csum_check;
5769 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5770 ret = check_extent_csums(root, key.offset, data_len,
5776 offset = key.offset;
5777 } else if (key.offset != offset + num_bytes) {
5778 ret = check_extent_exists(root, offset, num_bytes);
5780 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5781 "there is no extent record\n",
5782 offset, offset+num_bytes);
5785 offset = key.offset;
5788 num_bytes += data_len;
5792 btrfs_free_path(path);
5796 static int is_dropped_key(struct btrfs_key *key,
5797 struct btrfs_key *drop_key) {
5798 if (key->objectid < drop_key->objectid)
5800 else if (key->objectid == drop_key->objectid) {
5801 if (key->type < drop_key->type)
5803 else if (key->type == drop_key->type) {
5804 if (key->offset < drop_key->offset)
5812 * Here are the rules for FULL_BACKREF.
5814 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5815 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5817 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5818 * if it happened after the relocation occurred since we'll have dropped the
5819 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5820 * have no real way to know for sure.
5822 * We process the blocks one root at a time, and we start from the lowest root
5823 * objectid and go to the highest. So we can just lookup the owner backref for
5824 * the record and if we don't find it then we know it doesn't exist and we have
5827 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5828 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5829 * be set or not and then we can check later once we've gathered all the refs.
5831 static int calc_extent_flag(struct btrfs_root *root,
5832 struct cache_tree *extent_cache,
5833 struct extent_buffer *buf,
5834 struct root_item_record *ri,
5837 struct extent_record *rec;
5838 struct cache_extent *cache;
5839 struct tree_backref *tback;
5842 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5843 /* we have added this extent before */
5845 rec = container_of(cache, struct extent_record, cache);
5848 * Except file/reloc tree, we can not have
5851 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5856 if (buf->start == ri->bytenr)
5859 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5862 owner = btrfs_header_owner(buf);
5863 if (owner == ri->objectid)
5866 tback = find_tree_backref(rec, 0, owner);
5871 if (rec->flag_block_full_backref != -1 &&
5872 rec->flag_block_full_backref != 0)
5873 rec->bad_full_backref = 1;
5876 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5877 if (rec->flag_block_full_backref != -1 &&
5878 rec->flag_block_full_backref != 1)
5879 rec->bad_full_backref = 1;
5883 static int run_next_block(struct btrfs_root *root,
5884 struct block_info *bits,
5887 struct cache_tree *pending,
5888 struct cache_tree *seen,
5889 struct cache_tree *reada,
5890 struct cache_tree *nodes,
5891 struct cache_tree *extent_cache,
5892 struct cache_tree *chunk_cache,
5893 struct rb_root *dev_cache,
5894 struct block_group_tree *block_group_cache,
5895 struct device_extent_tree *dev_extent_cache,
5896 struct root_item_record *ri)
5898 struct extent_buffer *buf;
5899 struct extent_record *rec = NULL;
5910 struct btrfs_key key;
5911 struct cache_extent *cache;
5914 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5915 bits_nr, &reada_bits);
5920 for(i = 0; i < nritems; i++) {
5921 ret = add_cache_extent(reada, bits[i].start,
5926 /* fixme, get the parent transid */
5927 readahead_tree_block(root, bits[i].start,
5931 *last = bits[0].start;
5932 bytenr = bits[0].start;
5933 size = bits[0].size;
5935 cache = lookup_cache_extent(pending, bytenr, size);
5937 remove_cache_extent(pending, cache);
5940 cache = lookup_cache_extent(reada, bytenr, size);
5942 remove_cache_extent(reada, cache);
5945 cache = lookup_cache_extent(nodes, bytenr, size);
5947 remove_cache_extent(nodes, cache);
5950 cache = lookup_cache_extent(extent_cache, bytenr, size);
5952 rec = container_of(cache, struct extent_record, cache);
5953 gen = rec->parent_generation;
5956 /* fixme, get the real parent transid */
5957 buf = read_tree_block(root, bytenr, size, gen);
5958 if (!extent_buffer_uptodate(buf)) {
5959 record_bad_block_io(root->fs_info,
5960 extent_cache, bytenr, size);
5964 nritems = btrfs_header_nritems(buf);
5967 if (!init_extent_tree) {
5968 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5969 btrfs_header_level(buf), 1, NULL,
5972 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5974 fprintf(stderr, "Couldn't calc extent flags\n");
5975 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5980 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5982 fprintf(stderr, "Couldn't calc extent flags\n");
5983 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5987 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5989 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5990 ri->objectid == btrfs_header_owner(buf)) {
5992 * Ok we got to this block from it's original owner and
5993 * we have FULL_BACKREF set. Relocation can leave
5994 * converted blocks over so this is altogether possible,
5995 * however it's not possible if the generation > the
5996 * last snapshot, so check for this case.
5998 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5999 btrfs_header_generation(buf) > ri->last_snapshot) {
6000 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6001 rec->bad_full_backref = 1;
6006 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6007 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6008 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6009 rec->bad_full_backref = 1;
6013 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6014 rec->flag_block_full_backref = 1;
6018 rec->flag_block_full_backref = 0;
6020 owner = btrfs_header_owner(buf);
6023 ret = check_block(root, extent_cache, buf, flags);
6027 if (btrfs_is_leaf(buf)) {
6028 btree_space_waste += btrfs_leaf_free_space(root, buf);
6029 for (i = 0; i < nritems; i++) {
6030 struct btrfs_file_extent_item *fi;
6031 btrfs_item_key_to_cpu(buf, &key, i);
6032 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6033 process_extent_item(root, extent_cache, buf,
6037 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6038 process_extent_item(root, extent_cache, buf,
6042 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6044 btrfs_item_size_nr(buf, i);
6047 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6048 process_chunk_item(chunk_cache, &key, buf, i);
6051 if (key.type == BTRFS_DEV_ITEM_KEY) {
6052 process_device_item(dev_cache, &key, buf, i);
6055 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6056 process_block_group_item(block_group_cache,
6060 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6061 process_device_extent_item(dev_extent_cache,
6066 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6067 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6068 process_extent_ref_v0(extent_cache, buf, i);
6075 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6076 add_tree_backref(extent_cache, key.objectid, 0,
6080 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6081 add_tree_backref(extent_cache, key.objectid,
6085 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6086 struct btrfs_extent_data_ref *ref;
6087 ref = btrfs_item_ptr(buf, i,
6088 struct btrfs_extent_data_ref);
6089 add_data_backref(extent_cache,
6091 btrfs_extent_data_ref_root(buf, ref),
6092 btrfs_extent_data_ref_objectid(buf,
6094 btrfs_extent_data_ref_offset(buf, ref),
6095 btrfs_extent_data_ref_count(buf, ref),
6096 0, root->sectorsize);
6099 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6100 struct btrfs_shared_data_ref *ref;
6101 ref = btrfs_item_ptr(buf, i,
6102 struct btrfs_shared_data_ref);
6103 add_data_backref(extent_cache,
6104 key.objectid, key.offset, 0, 0, 0,
6105 btrfs_shared_data_ref_count(buf, ref),
6106 0, root->sectorsize);
6109 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6110 struct bad_item *bad;
6112 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6116 bad = malloc(sizeof(struct bad_item));
6119 INIT_LIST_HEAD(&bad->list);
6120 memcpy(&bad->key, &key,
6121 sizeof(struct btrfs_key));
6122 bad->root_id = owner;
6123 list_add_tail(&bad->list, &delete_items);
6126 if (key.type != BTRFS_EXTENT_DATA_KEY)
6128 fi = btrfs_item_ptr(buf, i,
6129 struct btrfs_file_extent_item);
6130 if (btrfs_file_extent_type(buf, fi) ==
6131 BTRFS_FILE_EXTENT_INLINE)
6133 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6136 data_bytes_allocated +=
6137 btrfs_file_extent_disk_num_bytes(buf, fi);
6138 if (data_bytes_allocated < root->sectorsize) {
6141 data_bytes_referenced +=
6142 btrfs_file_extent_num_bytes(buf, fi);
6143 add_data_backref(extent_cache,
6144 btrfs_file_extent_disk_bytenr(buf, fi),
6145 parent, owner, key.objectid, key.offset -
6146 btrfs_file_extent_offset(buf, fi), 1, 1,
6147 btrfs_file_extent_disk_num_bytes(buf, fi));
6151 struct btrfs_key first_key;
6153 first_key.objectid = 0;
6156 btrfs_item_key_to_cpu(buf, &first_key, 0);
6157 level = btrfs_header_level(buf);
6158 for (i = 0; i < nritems; i++) {
6159 ptr = btrfs_node_blockptr(buf, i);
6160 size = btrfs_level_size(root, level - 1);
6161 btrfs_node_key_to_cpu(buf, &key, i);
6163 if ((level == ri->drop_level)
6164 && is_dropped_key(&key, &ri->drop_key)) {
6168 ret = add_extent_rec(extent_cache, &key,
6169 btrfs_node_ptr_generation(buf, i),
6170 ptr, size, 0, 0, 1, 0, 1, 0,
6174 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6177 add_pending(nodes, seen, ptr, size);
6179 add_pending(pending, seen, ptr, size);
6182 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6183 nritems) * sizeof(struct btrfs_key_ptr);
6185 total_btree_bytes += buf->len;
6186 if (fs_root_objectid(btrfs_header_owner(buf)))
6187 total_fs_tree_bytes += buf->len;
6188 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6189 total_extent_tree_bytes += buf->len;
6190 if (!found_old_backref &&
6191 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6192 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6193 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6194 found_old_backref = 1;
6196 free_extent_buffer(buf);
6200 static int add_root_to_pending(struct extent_buffer *buf,
6201 struct cache_tree *extent_cache,
6202 struct cache_tree *pending,
6203 struct cache_tree *seen,
6204 struct cache_tree *nodes,
6207 if (btrfs_header_level(buf) > 0)
6208 add_pending(nodes, seen, buf->start, buf->len);
6210 add_pending(pending, seen, buf->start, buf->len);
6211 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6212 0, 1, 1, 0, 1, 0, buf->len);
6214 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6215 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6216 add_tree_backref(extent_cache, buf->start, buf->start,
6219 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6223 /* as we fix the tree, we might be deleting blocks that
6224 * we're tracking for repair. This hook makes sure we
6225 * remove any backrefs for blocks as we are fixing them.
6227 static int free_extent_hook(struct btrfs_trans_handle *trans,
6228 struct btrfs_root *root,
6229 u64 bytenr, u64 num_bytes, u64 parent,
6230 u64 root_objectid, u64 owner, u64 offset,
6233 struct extent_record *rec;
6234 struct cache_extent *cache;
6236 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6238 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6239 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6243 rec = container_of(cache, struct extent_record, cache);
6245 struct data_backref *back;
6246 back = find_data_backref(rec, parent, root_objectid, owner,
6247 offset, 1, bytenr, num_bytes);
6250 if (back->node.found_ref) {
6251 back->found_ref -= refs_to_drop;
6253 rec->refs -= refs_to_drop;
6255 if (back->node.found_extent_tree) {
6256 back->num_refs -= refs_to_drop;
6257 if (rec->extent_item_refs)
6258 rec->extent_item_refs -= refs_to_drop;
6260 if (back->found_ref == 0)
6261 back->node.found_ref = 0;
6262 if (back->num_refs == 0)
6263 back->node.found_extent_tree = 0;
6265 if (!back->node.found_extent_tree && back->node.found_ref) {
6266 list_del(&back->node.list);
6270 struct tree_backref *back;
6271 back = find_tree_backref(rec, parent, root_objectid);
6274 if (back->node.found_ref) {
6277 back->node.found_ref = 0;
6279 if (back->node.found_extent_tree) {
6280 if (rec->extent_item_refs)
6281 rec->extent_item_refs--;
6282 back->node.found_extent_tree = 0;
6284 if (!back->node.found_extent_tree && back->node.found_ref) {
6285 list_del(&back->node.list);
6289 maybe_free_extent_rec(extent_cache, rec);
6294 static int delete_extent_records(struct btrfs_trans_handle *trans,
6295 struct btrfs_root *root,
6296 struct btrfs_path *path,
6297 u64 bytenr, u64 new_len)
6299 struct btrfs_key key;
6300 struct btrfs_key found_key;
6301 struct extent_buffer *leaf;
6306 key.objectid = bytenr;
6308 key.offset = (u64)-1;
6311 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6318 if (path->slots[0] == 0)
6324 leaf = path->nodes[0];
6325 slot = path->slots[0];
6327 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6328 if (found_key.objectid != bytenr)
6331 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6332 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6333 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6334 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6335 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6336 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6337 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6338 btrfs_release_path(path);
6339 if (found_key.type == 0) {
6340 if (found_key.offset == 0)
6342 key.offset = found_key.offset - 1;
6343 key.type = found_key.type;
6345 key.type = found_key.type - 1;
6346 key.offset = (u64)-1;
6350 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6351 found_key.objectid, found_key.type, found_key.offset);
6353 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6356 btrfs_release_path(path);
6358 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6359 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6360 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6361 found_key.offset : root->leafsize;
6363 ret = btrfs_update_block_group(trans, root, bytenr,
6370 btrfs_release_path(path);
6375 * for a single backref, this will allocate a new extent
6376 * and add the backref to it.
6378 static int record_extent(struct btrfs_trans_handle *trans,
6379 struct btrfs_fs_info *info,
6380 struct btrfs_path *path,
6381 struct extent_record *rec,
6382 struct extent_backref *back,
6383 int allocated, u64 flags)
6386 struct btrfs_root *extent_root = info->extent_root;
6387 struct extent_buffer *leaf;
6388 struct btrfs_key ins_key;
6389 struct btrfs_extent_item *ei;
6390 struct tree_backref *tback;
6391 struct data_backref *dback;
6392 struct btrfs_tree_block_info *bi;
6395 rec->max_size = max_t(u64, rec->max_size,
6396 info->extent_root->leafsize);
6399 u32 item_size = sizeof(*ei);
6402 item_size += sizeof(*bi);
6404 ins_key.objectid = rec->start;
6405 ins_key.offset = rec->max_size;
6406 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6408 ret = btrfs_insert_empty_item(trans, extent_root, path,
6409 &ins_key, item_size);
6413 leaf = path->nodes[0];
6414 ei = btrfs_item_ptr(leaf, path->slots[0],
6415 struct btrfs_extent_item);
6417 btrfs_set_extent_refs(leaf, ei, 0);
6418 btrfs_set_extent_generation(leaf, ei, rec->generation);
6420 if (back->is_data) {
6421 btrfs_set_extent_flags(leaf, ei,
6422 BTRFS_EXTENT_FLAG_DATA);
6424 struct btrfs_disk_key copy_key;;
6426 tback = (struct tree_backref *)back;
6427 bi = (struct btrfs_tree_block_info *)(ei + 1);
6428 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6431 btrfs_set_disk_key_objectid(©_key,
6432 rec->info_objectid);
6433 btrfs_set_disk_key_type(©_key, 0);
6434 btrfs_set_disk_key_offset(©_key, 0);
6436 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6437 btrfs_set_tree_block_key(leaf, bi, ©_key);
6439 btrfs_set_extent_flags(leaf, ei,
6440 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6443 btrfs_mark_buffer_dirty(leaf);
6444 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6445 rec->max_size, 1, 0);
6448 btrfs_release_path(path);
6451 if (back->is_data) {
6455 dback = (struct data_backref *)back;
6456 if (back->full_backref)
6457 parent = dback->parent;
6461 for (i = 0; i < dback->found_ref; i++) {
6462 /* if parent != 0, we're doing a full backref
6463 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6464 * just makes the backref allocator create a data
6467 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6468 rec->start, rec->max_size,
6472 BTRFS_FIRST_FREE_OBJECTID :
6478 fprintf(stderr, "adding new data backref"
6479 " on %llu %s %llu owner %llu"
6480 " offset %llu found %d\n",
6481 (unsigned long long)rec->start,
6482 back->full_backref ?
6484 back->full_backref ?
6485 (unsigned long long)parent :
6486 (unsigned long long)dback->root,
6487 (unsigned long long)dback->owner,
6488 (unsigned long long)dback->offset,
6493 tback = (struct tree_backref *)back;
6494 if (back->full_backref)
6495 parent = tback->parent;
6499 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6500 rec->start, rec->max_size,
6501 parent, tback->root, 0, 0);
6502 fprintf(stderr, "adding new tree backref on "
6503 "start %llu len %llu parent %llu root %llu\n",
6504 rec->start, rec->max_size, parent, tback->root);
6507 btrfs_release_path(path);
6511 struct extent_entry {
6516 struct list_head list;
6519 static struct extent_entry *find_entry(struct list_head *entries,
6520 u64 bytenr, u64 bytes)
6522 struct extent_entry *entry = NULL;
6524 list_for_each_entry(entry, entries, list) {
6525 if (entry->bytenr == bytenr && entry->bytes == bytes)
6532 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6534 struct extent_entry *entry, *best = NULL, *prev = NULL;
6536 list_for_each_entry(entry, entries, list) {
6543 * If there are as many broken entries as entries then we know
6544 * not to trust this particular entry.
6546 if (entry->broken == entry->count)
6550 * If our current entry == best then we can't be sure our best
6551 * is really the best, so we need to keep searching.
6553 if (best && best->count == entry->count) {
6559 /* Prev == entry, not good enough, have to keep searching */
6560 if (!prev->broken && prev->count == entry->count)
6564 best = (prev->count > entry->count) ? prev : entry;
6565 else if (best->count < entry->count)
6573 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6574 struct data_backref *dback, struct extent_entry *entry)
6576 struct btrfs_trans_handle *trans;
6577 struct btrfs_root *root;
6578 struct btrfs_file_extent_item *fi;
6579 struct extent_buffer *leaf;
6580 struct btrfs_key key;
6584 key.objectid = dback->root;
6585 key.type = BTRFS_ROOT_ITEM_KEY;
6586 key.offset = (u64)-1;
6587 root = btrfs_read_fs_root(info, &key);
6589 fprintf(stderr, "Couldn't find root for our ref\n");
6594 * The backref points to the original offset of the extent if it was
6595 * split, so we need to search down to the offset we have and then walk
6596 * forward until we find the backref we're looking for.
6598 key.objectid = dback->owner;
6599 key.type = BTRFS_EXTENT_DATA_KEY;
6600 key.offset = dback->offset;
6601 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6603 fprintf(stderr, "Error looking up ref %d\n", ret);
6608 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6609 ret = btrfs_next_leaf(root, path);
6611 fprintf(stderr, "Couldn't find our ref, next\n");
6615 leaf = path->nodes[0];
6616 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6617 if (key.objectid != dback->owner ||
6618 key.type != BTRFS_EXTENT_DATA_KEY) {
6619 fprintf(stderr, "Couldn't find our ref, search\n");
6622 fi = btrfs_item_ptr(leaf, path->slots[0],
6623 struct btrfs_file_extent_item);
6624 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6625 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6627 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6632 btrfs_release_path(path);
6634 trans = btrfs_start_transaction(root, 1);
6636 return PTR_ERR(trans);
6639 * Ok we have the key of the file extent we want to fix, now we can cow
6640 * down to the thing and fix it.
6642 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6644 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6645 key.objectid, key.type, key.offset, ret);
6649 fprintf(stderr, "Well that's odd, we just found this key "
6650 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6655 leaf = path->nodes[0];
6656 fi = btrfs_item_ptr(leaf, path->slots[0],
6657 struct btrfs_file_extent_item);
6659 if (btrfs_file_extent_compression(leaf, fi) &&
6660 dback->disk_bytenr != entry->bytenr) {
6661 fprintf(stderr, "Ref doesn't match the record start and is "
6662 "compressed, please take a btrfs-image of this file "
6663 "system and send it to a btrfs developer so they can "
6664 "complete this functionality for bytenr %Lu\n",
6665 dback->disk_bytenr);
6670 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6671 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6672 } else if (dback->disk_bytenr > entry->bytenr) {
6673 u64 off_diff, offset;
6675 off_diff = dback->disk_bytenr - entry->bytenr;
6676 offset = btrfs_file_extent_offset(leaf, fi);
6677 if (dback->disk_bytenr + offset +
6678 btrfs_file_extent_num_bytes(leaf, fi) >
6679 entry->bytenr + entry->bytes) {
6680 fprintf(stderr, "Ref is past the entry end, please "
6681 "take a btrfs-image of this file system and "
6682 "send it to a btrfs developer, ref %Lu\n",
6683 dback->disk_bytenr);
6688 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6689 btrfs_set_file_extent_offset(leaf, fi, offset);
6690 } else if (dback->disk_bytenr < entry->bytenr) {
6693 offset = btrfs_file_extent_offset(leaf, fi);
6694 if (dback->disk_bytenr + offset < entry->bytenr) {
6695 fprintf(stderr, "Ref is before the entry start, please"
6696 " take a btrfs-image of this file system and "
6697 "send it to a btrfs developer, ref %Lu\n",
6698 dback->disk_bytenr);
6703 offset += dback->disk_bytenr;
6704 offset -= entry->bytenr;
6705 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6706 btrfs_set_file_extent_offset(leaf, fi, offset);
6709 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6712 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6713 * only do this if we aren't using compression, otherwise it's a
6716 if (!btrfs_file_extent_compression(leaf, fi))
6717 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6719 printf("ram bytes may be wrong?\n");
6720 btrfs_mark_buffer_dirty(leaf);
6722 err = btrfs_commit_transaction(trans, root);
6723 btrfs_release_path(path);
6724 return ret ? ret : err;
6727 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6728 struct extent_record *rec)
6730 struct extent_backref *back;
6731 struct data_backref *dback;
6732 struct extent_entry *entry, *best = NULL;
6735 int broken_entries = 0;
6740 * Metadata is easy and the backrefs should always agree on bytenr and
6741 * size, if not we've got bigger issues.
6746 list_for_each_entry(back, &rec->backrefs, list) {
6747 if (back->full_backref || !back->is_data)
6750 dback = (struct data_backref *)back;
6753 * We only pay attention to backrefs that we found a real
6756 if (dback->found_ref == 0)
6760 * For now we only catch when the bytes don't match, not the
6761 * bytenr. We can easily do this at the same time, but I want
6762 * to have a fs image to test on before we just add repair
6763 * functionality willy-nilly so we know we won't screw up the
6767 entry = find_entry(&entries, dback->disk_bytenr,
6770 entry = malloc(sizeof(struct extent_entry));
6775 memset(entry, 0, sizeof(*entry));
6776 entry->bytenr = dback->disk_bytenr;
6777 entry->bytes = dback->bytes;
6778 list_add_tail(&entry->list, &entries);
6783 * If we only have on entry we may think the entries agree when
6784 * in reality they don't so we have to do some extra checking.
6786 if (dback->disk_bytenr != rec->start ||
6787 dback->bytes != rec->nr || back->broken)
6798 /* Yay all the backrefs agree, carry on good sir */
6799 if (nr_entries <= 1 && !mismatch)
6802 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6803 "%Lu\n", rec->start);
6806 * First we want to see if the backrefs can agree amongst themselves who
6807 * is right, so figure out which one of the entries has the highest
6810 best = find_most_right_entry(&entries);
6813 * Ok so we may have an even split between what the backrefs think, so
6814 * this is where we use the extent ref to see what it thinks.
6817 entry = find_entry(&entries, rec->start, rec->nr);
6818 if (!entry && (!broken_entries || !rec->found_rec)) {
6819 fprintf(stderr, "Backrefs don't agree with each other "
6820 "and extent record doesn't agree with anybody,"
6821 " so we can't fix bytenr %Lu bytes %Lu\n",
6822 rec->start, rec->nr);
6825 } else if (!entry) {
6827 * Ok our backrefs were broken, we'll assume this is the
6828 * correct value and add an entry for this range.
6830 entry = malloc(sizeof(struct extent_entry));
6835 memset(entry, 0, sizeof(*entry));
6836 entry->bytenr = rec->start;
6837 entry->bytes = rec->nr;
6838 list_add_tail(&entry->list, &entries);
6842 best = find_most_right_entry(&entries);
6844 fprintf(stderr, "Backrefs and extent record evenly "
6845 "split on who is right, this is going to "
6846 "require user input to fix bytenr %Lu bytes "
6847 "%Lu\n", rec->start, rec->nr);
6854 * I don't think this can happen currently as we'll abort() if we catch
6855 * this case higher up, but in case somebody removes that we still can't
6856 * deal with it properly here yet, so just bail out of that's the case.
6858 if (best->bytenr != rec->start) {
6859 fprintf(stderr, "Extent start and backref starts don't match, "
6860 "please use btrfs-image on this file system and send "
6861 "it to a btrfs developer so they can make fsck fix "
6862 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6863 rec->start, rec->nr);
6869 * Ok great we all agreed on an extent record, let's go find the real
6870 * references and fix up the ones that don't match.
6872 list_for_each_entry(back, &rec->backrefs, list) {
6873 if (back->full_backref || !back->is_data)
6876 dback = (struct data_backref *)back;
6879 * Still ignoring backrefs that don't have a real ref attached
6882 if (dback->found_ref == 0)
6885 if (dback->bytes == best->bytes &&
6886 dback->disk_bytenr == best->bytenr)
6889 ret = repair_ref(info, path, dback, best);
6895 * Ok we messed with the actual refs, which means we need to drop our
6896 * entire cache and go back and rescan. I know this is a huge pain and
6897 * adds a lot of extra work, but it's the only way to be safe. Once all
6898 * the backrefs agree we may not need to do anything to the extent
6903 while (!list_empty(&entries)) {
6904 entry = list_entry(entries.next, struct extent_entry, list);
6905 list_del_init(&entry->list);
6911 static int process_duplicates(struct btrfs_root *root,
6912 struct cache_tree *extent_cache,
6913 struct extent_record *rec)
6915 struct extent_record *good, *tmp;
6916 struct cache_extent *cache;
6920 * If we found a extent record for this extent then return, or if we
6921 * have more than one duplicate we are likely going to need to delete
6924 if (rec->found_rec || rec->num_duplicates > 1)
6927 /* Shouldn't happen but just in case */
6928 BUG_ON(!rec->num_duplicates);
6931 * So this happens if we end up with a backref that doesn't match the
6932 * actual extent entry. So either the backref is bad or the extent
6933 * entry is bad. Either way we want to have the extent_record actually
6934 * reflect what we found in the extent_tree, so we need to take the
6935 * duplicate out and use that as the extent_record since the only way we
6936 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6938 remove_cache_extent(extent_cache, &rec->cache);
6940 good = list_entry(rec->dups.next, struct extent_record, list);
6941 list_del_init(&good->list);
6942 INIT_LIST_HEAD(&good->backrefs);
6943 INIT_LIST_HEAD(&good->dups);
6944 good->cache.start = good->start;
6945 good->cache.size = good->nr;
6946 good->content_checked = 0;
6947 good->owner_ref_checked = 0;
6948 good->num_duplicates = 0;
6949 good->refs = rec->refs;
6950 list_splice_init(&rec->backrefs, &good->backrefs);
6952 cache = lookup_cache_extent(extent_cache, good->start,
6956 tmp = container_of(cache, struct extent_record, cache);
6959 * If we find another overlapping extent and it's found_rec is
6960 * set then it's a duplicate and we need to try and delete
6963 if (tmp->found_rec || tmp->num_duplicates > 0) {
6964 if (list_empty(&good->list))
6965 list_add_tail(&good->list,
6966 &duplicate_extents);
6967 good->num_duplicates += tmp->num_duplicates + 1;
6968 list_splice_init(&tmp->dups, &good->dups);
6969 list_del_init(&tmp->list);
6970 list_add_tail(&tmp->list, &good->dups);
6971 remove_cache_extent(extent_cache, &tmp->cache);
6976 * Ok we have another non extent item backed extent rec, so lets
6977 * just add it to this extent and carry on like we did above.
6979 good->refs += tmp->refs;
6980 list_splice_init(&tmp->backrefs, &good->backrefs);
6981 remove_cache_extent(extent_cache, &tmp->cache);
6984 ret = insert_cache_extent(extent_cache, &good->cache);
6987 return good->num_duplicates ? 0 : 1;
6990 static int delete_duplicate_records(struct btrfs_root *root,
6991 struct extent_record *rec)
6993 struct btrfs_trans_handle *trans;
6994 LIST_HEAD(delete_list);
6995 struct btrfs_path *path;
6996 struct extent_record *tmp, *good, *n;
6999 struct btrfs_key key;
7001 path = btrfs_alloc_path();
7008 /* Find the record that covers all of the duplicates. */
7009 list_for_each_entry(tmp, &rec->dups, list) {
7010 if (good->start < tmp->start)
7012 if (good->nr > tmp->nr)
7015 if (tmp->start + tmp->nr < good->start + good->nr) {
7016 fprintf(stderr, "Ok we have overlapping extents that "
7017 "aren't completely covered by eachother, this "
7018 "is going to require more careful thought. "
7019 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7020 tmp->start, tmp->nr, good->start, good->nr);
7027 list_add_tail(&rec->list, &delete_list);
7029 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7032 list_move_tail(&tmp->list, &delete_list);
7035 root = root->fs_info->extent_root;
7036 trans = btrfs_start_transaction(root, 1);
7037 if (IS_ERR(trans)) {
7038 ret = PTR_ERR(trans);
7042 list_for_each_entry(tmp, &delete_list, list) {
7043 if (tmp->found_rec == 0)
7045 key.objectid = tmp->start;
7046 key.type = BTRFS_EXTENT_ITEM_KEY;
7047 key.offset = tmp->nr;
7049 /* Shouldn't happen but just in case */
7050 if (tmp->metadata) {
7051 fprintf(stderr, "Well this shouldn't happen, extent "
7052 "record overlaps but is metadata? "
7053 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7057 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7063 ret = btrfs_del_item(trans, root, path);
7066 btrfs_release_path(path);
7069 err = btrfs_commit_transaction(trans, root);
7073 while (!list_empty(&delete_list)) {
7074 tmp = list_entry(delete_list.next, struct extent_record, list);
7075 list_del_init(&tmp->list);
7081 while (!list_empty(&rec->dups)) {
7082 tmp = list_entry(rec->dups.next, struct extent_record, list);
7083 list_del_init(&tmp->list);
7087 btrfs_free_path(path);
7089 if (!ret && !nr_del)
7090 rec->num_duplicates = 0;
7092 return ret ? ret : nr_del;
7095 static int find_possible_backrefs(struct btrfs_fs_info *info,
7096 struct btrfs_path *path,
7097 struct cache_tree *extent_cache,
7098 struct extent_record *rec)
7100 struct btrfs_root *root;
7101 struct extent_backref *back;
7102 struct data_backref *dback;
7103 struct cache_extent *cache;
7104 struct btrfs_file_extent_item *fi;
7105 struct btrfs_key key;
7109 list_for_each_entry(back, &rec->backrefs, list) {
7110 /* Don't care about full backrefs (poor unloved backrefs) */
7111 if (back->full_backref || !back->is_data)
7114 dback = (struct data_backref *)back;
7116 /* We found this one, we don't need to do a lookup */
7117 if (dback->found_ref)
7120 key.objectid = dback->root;
7121 key.type = BTRFS_ROOT_ITEM_KEY;
7122 key.offset = (u64)-1;
7124 root = btrfs_read_fs_root(info, &key);
7126 /* No root, definitely a bad ref, skip */
7127 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7129 /* Other err, exit */
7131 return PTR_ERR(root);
7133 key.objectid = dback->owner;
7134 key.type = BTRFS_EXTENT_DATA_KEY;
7135 key.offset = dback->offset;
7136 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7138 btrfs_release_path(path);
7141 /* Didn't find it, we can carry on */
7146 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7147 struct btrfs_file_extent_item);
7148 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7149 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7150 btrfs_release_path(path);
7151 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7153 struct extent_record *tmp;
7154 tmp = container_of(cache, struct extent_record, cache);
7157 * If we found an extent record for the bytenr for this
7158 * particular backref then we can't add it to our
7159 * current extent record. We only want to add backrefs
7160 * that don't have a corresponding extent item in the
7161 * extent tree since they likely belong to this record
7162 * and we need to fix it if it doesn't match bytenrs.
7168 dback->found_ref += 1;
7169 dback->disk_bytenr = bytenr;
7170 dback->bytes = bytes;
7173 * Set this so the verify backref code knows not to trust the
7174 * values in this backref.
7183 * Record orphan data ref into corresponding root.
7185 * Return 0 if the extent item contains data ref and recorded.
7186 * Return 1 if the extent item contains no useful data ref
7187 * On that case, it may contains only shared_dataref or metadata backref
7188 * or the file extent exists(this should be handled by the extent bytenr
7190 * Return <0 if something goes wrong.
7192 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7193 struct extent_record *rec)
7195 struct btrfs_key key;
7196 struct btrfs_root *dest_root;
7197 struct extent_backref *back;
7198 struct data_backref *dback;
7199 struct orphan_data_extent *orphan;
7200 struct btrfs_path *path;
7201 int recorded_data_ref = 0;
7206 path = btrfs_alloc_path();
7209 list_for_each_entry(back, &rec->backrefs, list) {
7210 if (back->full_backref || !back->is_data ||
7211 !back->found_extent_tree)
7213 dback = (struct data_backref *)back;
7214 if (dback->found_ref)
7216 key.objectid = dback->root;
7217 key.type = BTRFS_ROOT_ITEM_KEY;
7218 key.offset = (u64)-1;
7220 dest_root = btrfs_read_fs_root(fs_info, &key);
7222 /* For non-exist root we just skip it */
7223 if (IS_ERR(dest_root) || !dest_root)
7226 key.objectid = dback->owner;
7227 key.type = BTRFS_EXTENT_DATA_KEY;
7228 key.offset = dback->offset;
7230 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7232 * For ret < 0, it's OK since the fs-tree may be corrupted,
7233 * we need to record it for inode/file extent rebuild.
7234 * For ret > 0, we record it only for file extent rebuild.
7235 * For ret == 0, the file extent exists but only bytenr
7236 * mismatch, let the original bytenr fix routine to handle,
7242 orphan = malloc(sizeof(*orphan));
7247 INIT_LIST_HEAD(&orphan->list);
7248 orphan->root = dback->root;
7249 orphan->objectid = dback->owner;
7250 orphan->offset = dback->offset;
7251 orphan->disk_bytenr = rec->cache.start;
7252 orphan->disk_len = rec->cache.size;
7253 list_add(&dest_root->orphan_data_extents, &orphan->list);
7254 recorded_data_ref = 1;
7257 btrfs_free_path(path);
7259 return !recorded_data_ref;
7265 * when an incorrect extent item is found, this will delete
7266 * all of the existing entries for it and recreate them
7267 * based on what the tree scan found.
7269 static int fixup_extent_refs(struct btrfs_fs_info *info,
7270 struct cache_tree *extent_cache,
7271 struct extent_record *rec)
7273 struct btrfs_trans_handle *trans = NULL;
7275 struct btrfs_path *path;
7276 struct list_head *cur = rec->backrefs.next;
7277 struct cache_extent *cache;
7278 struct extent_backref *back;
7282 if (rec->flag_block_full_backref)
7283 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7285 path = btrfs_alloc_path();
7289 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7291 * Sometimes the backrefs themselves are so broken they don't
7292 * get attached to any meaningful rec, so first go back and
7293 * check any of our backrefs that we couldn't find and throw
7294 * them into the list if we find the backref so that
7295 * verify_backrefs can figure out what to do.
7297 ret = find_possible_backrefs(info, path, extent_cache, rec);
7302 /* step one, make sure all of the backrefs agree */
7303 ret = verify_backrefs(info, path, rec);
7307 trans = btrfs_start_transaction(info->extent_root, 1);
7308 if (IS_ERR(trans)) {
7309 ret = PTR_ERR(trans);
7313 /* step two, delete all the existing records */
7314 ret = delete_extent_records(trans, info->extent_root, path,
7315 rec->start, rec->max_size);
7320 /* was this block corrupt? If so, don't add references to it */
7321 cache = lookup_cache_extent(info->corrupt_blocks,
7322 rec->start, rec->max_size);
7328 /* step three, recreate all the refs we did find */
7329 while(cur != &rec->backrefs) {
7330 back = list_entry(cur, struct extent_backref, list);
7334 * if we didn't find any references, don't create a
7337 if (!back->found_ref)
7340 rec->bad_full_backref = 0;
7341 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7349 int err = btrfs_commit_transaction(trans, info->extent_root);
7354 btrfs_free_path(path);
7358 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7359 struct extent_record *rec)
7361 struct btrfs_trans_handle *trans;
7362 struct btrfs_root *root = fs_info->extent_root;
7363 struct btrfs_path *path;
7364 struct btrfs_extent_item *ei;
7365 struct btrfs_key key;
7369 key.objectid = rec->start;
7370 if (rec->metadata) {
7371 key.type = BTRFS_METADATA_ITEM_KEY;
7372 key.offset = rec->info_level;
7374 key.type = BTRFS_EXTENT_ITEM_KEY;
7375 key.offset = rec->max_size;
7378 path = btrfs_alloc_path();
7382 trans = btrfs_start_transaction(root, 0);
7383 if (IS_ERR(trans)) {
7384 btrfs_free_path(path);
7385 return PTR_ERR(trans);
7388 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7390 btrfs_free_path(path);
7391 btrfs_commit_transaction(trans, root);
7394 fprintf(stderr, "Didn't find extent for %llu\n",
7395 (unsigned long long)rec->start);
7396 btrfs_free_path(path);
7397 btrfs_commit_transaction(trans, root);
7401 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7402 struct btrfs_extent_item);
7403 flags = btrfs_extent_flags(path->nodes[0], ei);
7404 if (rec->flag_block_full_backref) {
7405 fprintf(stderr, "setting full backref on %llu\n",
7406 (unsigned long long)key.objectid);
7407 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7409 fprintf(stderr, "clearing full backref on %llu\n",
7410 (unsigned long long)key.objectid);
7411 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7413 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7414 btrfs_mark_buffer_dirty(path->nodes[0]);
7415 btrfs_free_path(path);
7416 return btrfs_commit_transaction(trans, root);
7419 /* right now we only prune from the extent allocation tree */
7420 static int prune_one_block(struct btrfs_trans_handle *trans,
7421 struct btrfs_fs_info *info,
7422 struct btrfs_corrupt_block *corrupt)
7425 struct btrfs_path path;
7426 struct extent_buffer *eb;
7430 int level = corrupt->level + 1;
7432 btrfs_init_path(&path);
7434 /* we want to stop at the parent to our busted block */
7435 path.lowest_level = level;
7437 ret = btrfs_search_slot(trans, info->extent_root,
7438 &corrupt->key, &path, -1, 1);
7443 eb = path.nodes[level];
7450 * hopefully the search gave us the block we want to prune,
7451 * lets try that first
7453 slot = path.slots[level];
7454 found = btrfs_node_blockptr(eb, slot);
7455 if (found == corrupt->cache.start)
7458 nritems = btrfs_header_nritems(eb);
7460 /* the search failed, lets scan this node and hope we find it */
7461 for (slot = 0; slot < nritems; slot++) {
7462 found = btrfs_node_blockptr(eb, slot);
7463 if (found == corrupt->cache.start)
7467 * we couldn't find the bad block. TODO, search all the nodes for pointers
7470 if (eb == info->extent_root->node) {
7475 btrfs_release_path(&path);
7480 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7481 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7484 btrfs_release_path(&path);
7488 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7490 struct btrfs_trans_handle *trans = NULL;
7491 struct cache_extent *cache;
7492 struct btrfs_corrupt_block *corrupt;
7495 cache = search_cache_extent(info->corrupt_blocks, 0);
7499 trans = btrfs_start_transaction(info->extent_root, 1);
7501 return PTR_ERR(trans);
7503 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7504 prune_one_block(trans, info, corrupt);
7505 remove_cache_extent(info->corrupt_blocks, cache);
7508 return btrfs_commit_transaction(trans, info->extent_root);
7512 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7514 struct btrfs_block_group_cache *cache;
7519 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7520 &start, &end, EXTENT_DIRTY);
7523 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7529 cache = btrfs_lookup_first_block_group(fs_info, start);
7534 start = cache->key.objectid + cache->key.offset;
7538 static int check_extent_refs(struct btrfs_root *root,
7539 struct cache_tree *extent_cache)
7541 struct extent_record *rec;
7542 struct cache_extent *cache;
7551 * if we're doing a repair, we have to make sure
7552 * we don't allocate from the problem extents.
7553 * In the worst case, this will be all the
7556 cache = search_cache_extent(extent_cache, 0);
7558 rec = container_of(cache, struct extent_record, cache);
7559 set_extent_dirty(root->fs_info->excluded_extents,
7561 rec->start + rec->max_size - 1,
7563 cache = next_cache_extent(cache);
7566 /* pin down all the corrupted blocks too */
7567 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7569 set_extent_dirty(root->fs_info->excluded_extents,
7571 cache->start + cache->size - 1,
7573 cache = next_cache_extent(cache);
7575 prune_corrupt_blocks(root->fs_info);
7576 reset_cached_block_groups(root->fs_info);
7579 reset_cached_block_groups(root->fs_info);
7582 * We need to delete any duplicate entries we find first otherwise we
7583 * could mess up the extent tree when we have backrefs that actually
7584 * belong to a different extent item and not the weird duplicate one.
7586 while (repair && !list_empty(&duplicate_extents)) {
7587 rec = list_entry(duplicate_extents.next, struct extent_record,
7589 list_del_init(&rec->list);
7591 /* Sometimes we can find a backref before we find an actual
7592 * extent, so we need to process it a little bit to see if there
7593 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7594 * if this is a backref screwup. If we need to delete stuff
7595 * process_duplicates() will return 0, otherwise it will return
7598 if (process_duplicates(root, extent_cache, rec))
7600 ret = delete_duplicate_records(root, rec);
7604 * delete_duplicate_records will return the number of entries
7605 * deleted, so if it's greater than 0 then we know we actually
7606 * did something and we need to remove.
7620 cache = search_cache_extent(extent_cache, 0);
7623 rec = container_of(cache, struct extent_record, cache);
7624 if (rec->num_duplicates) {
7625 fprintf(stderr, "extent item %llu has multiple extent "
7626 "items\n", (unsigned long long)rec->start);
7631 if (rec->refs != rec->extent_item_refs) {
7632 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7633 (unsigned long long)rec->start,
7634 (unsigned long long)rec->nr);
7635 fprintf(stderr, "extent item %llu, found %llu\n",
7636 (unsigned long long)rec->extent_item_refs,
7637 (unsigned long long)rec->refs);
7638 ret = record_orphan_data_extents(root->fs_info, rec);
7645 * we can't use the extent to repair file
7646 * extent, let the fallback method handle it.
7648 if (!fixed && repair) {
7649 ret = fixup_extent_refs(
7660 if (all_backpointers_checked(rec, 1)) {
7661 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7662 (unsigned long long)rec->start,
7663 (unsigned long long)rec->nr);
7665 if (!fixed && !recorded && repair) {
7666 ret = fixup_extent_refs(root->fs_info,
7675 if (!rec->owner_ref_checked) {
7676 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7677 (unsigned long long)rec->start,
7678 (unsigned long long)rec->nr);
7679 if (!fixed && !recorded && repair) {
7680 ret = fixup_extent_refs(root->fs_info,
7689 if (rec->bad_full_backref) {
7690 fprintf(stderr, "bad full backref, on [%llu]\n",
7691 (unsigned long long)rec->start);
7693 ret = fixup_extent_flags(root->fs_info, rec);
7702 * Although it's not a extent ref's problem, we reuse this
7703 * routine for error reporting.
7704 * No repair function yet.
7706 if (rec->crossing_stripes) {
7708 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7709 rec->start, rec->start + rec->max_size);
7714 if (rec->wrong_chunk_type) {
7716 "bad extent [%llu, %llu), type mismatch with chunk\n",
7717 rec->start, rec->start + rec->max_size);
7722 remove_cache_extent(extent_cache, cache);
7723 free_all_extent_backrefs(rec);
7724 if (!init_extent_tree && repair && (!cur_err || fixed))
7725 clear_extent_dirty(root->fs_info->excluded_extents,
7727 rec->start + rec->max_size - 1,
7733 if (ret && ret != -EAGAIN) {
7734 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7737 struct btrfs_trans_handle *trans;
7739 root = root->fs_info->extent_root;
7740 trans = btrfs_start_transaction(root, 1);
7741 if (IS_ERR(trans)) {
7742 ret = PTR_ERR(trans);
7746 btrfs_fix_block_accounting(trans, root);
7747 ret = btrfs_commit_transaction(trans, root);
7752 fprintf(stderr, "repaired damaged extent references\n");
7758 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7762 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7763 stripe_size = length;
7764 stripe_size /= num_stripes;
7765 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7766 stripe_size = length * 2;
7767 stripe_size /= num_stripes;
7768 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7769 stripe_size = length;
7770 stripe_size /= (num_stripes - 1);
7771 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7772 stripe_size = length;
7773 stripe_size /= (num_stripes - 2);
7775 stripe_size = length;
7781 * Check the chunk with its block group/dev list ref:
7782 * Return 0 if all refs seems valid.
7783 * Return 1 if part of refs seems valid, need later check for rebuild ref
7784 * like missing block group and needs to search extent tree to rebuild them.
7785 * Return -1 if essential refs are missing and unable to rebuild.
7787 static int check_chunk_refs(struct chunk_record *chunk_rec,
7788 struct block_group_tree *block_group_cache,
7789 struct device_extent_tree *dev_extent_cache,
7792 struct cache_extent *block_group_item;
7793 struct block_group_record *block_group_rec;
7794 struct cache_extent *dev_extent_item;
7795 struct device_extent_record *dev_extent_rec;
7799 int metadump_v2 = 0;
7803 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7806 if (block_group_item) {
7807 block_group_rec = container_of(block_group_item,
7808 struct block_group_record,
7810 if (chunk_rec->length != block_group_rec->offset ||
7811 chunk_rec->offset != block_group_rec->objectid ||
7813 chunk_rec->type_flags != block_group_rec->flags)) {
7816 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7817 chunk_rec->objectid,
7822 chunk_rec->type_flags,
7823 block_group_rec->objectid,
7824 block_group_rec->type,
7825 block_group_rec->offset,
7826 block_group_rec->offset,
7827 block_group_rec->objectid,
7828 block_group_rec->flags);
7831 list_del_init(&block_group_rec->list);
7832 chunk_rec->bg_rec = block_group_rec;
7837 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7838 chunk_rec->objectid,
7843 chunk_rec->type_flags);
7850 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7851 chunk_rec->num_stripes);
7852 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7853 devid = chunk_rec->stripes[i].devid;
7854 offset = chunk_rec->stripes[i].offset;
7855 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7856 devid, offset, length);
7857 if (dev_extent_item) {
7858 dev_extent_rec = container_of(dev_extent_item,
7859 struct device_extent_record,
7861 if (dev_extent_rec->objectid != devid ||
7862 dev_extent_rec->offset != offset ||
7863 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7864 dev_extent_rec->length != length) {
7867 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7868 chunk_rec->objectid,
7871 chunk_rec->stripes[i].devid,
7872 chunk_rec->stripes[i].offset,
7873 dev_extent_rec->objectid,
7874 dev_extent_rec->offset,
7875 dev_extent_rec->length);
7878 list_move(&dev_extent_rec->chunk_list,
7879 &chunk_rec->dextents);
7884 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7885 chunk_rec->objectid,
7888 chunk_rec->stripes[i].devid,
7889 chunk_rec->stripes[i].offset);
7896 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7897 int check_chunks(struct cache_tree *chunk_cache,
7898 struct block_group_tree *block_group_cache,
7899 struct device_extent_tree *dev_extent_cache,
7900 struct list_head *good, struct list_head *bad,
7901 struct list_head *rebuild, int silent)
7903 struct cache_extent *chunk_item;
7904 struct chunk_record *chunk_rec;
7905 struct block_group_record *bg_rec;
7906 struct device_extent_record *dext_rec;
7910 chunk_item = first_cache_extent(chunk_cache);
7911 while (chunk_item) {
7912 chunk_rec = container_of(chunk_item, struct chunk_record,
7914 err = check_chunk_refs(chunk_rec, block_group_cache,
7915 dev_extent_cache, silent);
7918 if (err == 0 && good)
7919 list_add_tail(&chunk_rec->list, good);
7920 if (err > 0 && rebuild)
7921 list_add_tail(&chunk_rec->list, rebuild);
7923 list_add_tail(&chunk_rec->list, bad);
7924 chunk_item = next_cache_extent(chunk_item);
7927 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7930 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7938 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7942 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7953 static int check_device_used(struct device_record *dev_rec,
7954 struct device_extent_tree *dext_cache)
7956 struct cache_extent *cache;
7957 struct device_extent_record *dev_extent_rec;
7960 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7962 dev_extent_rec = container_of(cache,
7963 struct device_extent_record,
7965 if (dev_extent_rec->objectid != dev_rec->devid)
7968 list_del_init(&dev_extent_rec->device_list);
7969 total_byte += dev_extent_rec->length;
7970 cache = next_cache_extent(cache);
7973 if (total_byte != dev_rec->byte_used) {
7975 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7976 total_byte, dev_rec->byte_used, dev_rec->objectid,
7977 dev_rec->type, dev_rec->offset);
7984 /* check btrfs_dev_item -> btrfs_dev_extent */
7985 static int check_devices(struct rb_root *dev_cache,
7986 struct device_extent_tree *dev_extent_cache)
7988 struct rb_node *dev_node;
7989 struct device_record *dev_rec;
7990 struct device_extent_record *dext_rec;
7994 dev_node = rb_first(dev_cache);
7996 dev_rec = container_of(dev_node, struct device_record, node);
7997 err = check_device_used(dev_rec, dev_extent_cache);
8001 dev_node = rb_next(dev_node);
8003 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8006 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8007 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8014 static int add_root_item_to_list(struct list_head *head,
8015 u64 objectid, u64 bytenr, u64 last_snapshot,
8016 u8 level, u8 drop_level,
8017 int level_size, struct btrfs_key *drop_key)
8020 struct root_item_record *ri_rec;
8021 ri_rec = malloc(sizeof(*ri_rec));
8024 ri_rec->bytenr = bytenr;
8025 ri_rec->objectid = objectid;
8026 ri_rec->level = level;
8027 ri_rec->level_size = level_size;
8028 ri_rec->drop_level = drop_level;
8029 ri_rec->last_snapshot = last_snapshot;
8031 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8032 list_add_tail(&ri_rec->list, head);
8037 static void free_root_item_list(struct list_head *list)
8039 struct root_item_record *ri_rec;
8041 while (!list_empty(list)) {
8042 ri_rec = list_first_entry(list, struct root_item_record,
8044 list_del_init(&ri_rec->list);
8049 static int deal_root_from_list(struct list_head *list,
8050 struct btrfs_root *root,
8051 struct block_info *bits,
8053 struct cache_tree *pending,
8054 struct cache_tree *seen,
8055 struct cache_tree *reada,
8056 struct cache_tree *nodes,
8057 struct cache_tree *extent_cache,
8058 struct cache_tree *chunk_cache,
8059 struct rb_root *dev_cache,
8060 struct block_group_tree *block_group_cache,
8061 struct device_extent_tree *dev_extent_cache)
8066 while (!list_empty(list)) {
8067 struct root_item_record *rec;
8068 struct extent_buffer *buf;
8069 rec = list_entry(list->next,
8070 struct root_item_record, list);
8072 buf = read_tree_block(root->fs_info->tree_root,
8073 rec->bytenr, rec->level_size, 0);
8074 if (!extent_buffer_uptodate(buf)) {
8075 free_extent_buffer(buf);
8079 add_root_to_pending(buf, extent_cache, pending,
8080 seen, nodes, rec->objectid);
8082 * To rebuild extent tree, we need deal with snapshot
8083 * one by one, otherwise we deal with node firstly which
8084 * can maximize readahead.
8087 ret = run_next_block(root, bits, bits_nr, &last,
8088 pending, seen, reada, nodes,
8089 extent_cache, chunk_cache,
8090 dev_cache, block_group_cache,
8091 dev_extent_cache, rec);
8095 free_extent_buffer(buf);
8096 list_del(&rec->list);
8102 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8103 reada, nodes, extent_cache, chunk_cache,
8104 dev_cache, block_group_cache,
8105 dev_extent_cache, NULL);
8115 static int check_chunks_and_extents(struct btrfs_root *root)
8117 struct rb_root dev_cache;
8118 struct cache_tree chunk_cache;
8119 struct block_group_tree block_group_cache;
8120 struct device_extent_tree dev_extent_cache;
8121 struct cache_tree extent_cache;
8122 struct cache_tree seen;
8123 struct cache_tree pending;
8124 struct cache_tree reada;
8125 struct cache_tree nodes;
8126 struct extent_io_tree excluded_extents;
8127 struct cache_tree corrupt_blocks;
8128 struct btrfs_path path;
8129 struct btrfs_key key;
8130 struct btrfs_key found_key;
8132 struct block_info *bits;
8134 struct extent_buffer *leaf;
8136 struct btrfs_root_item ri;
8137 struct list_head dropping_trees;
8138 struct list_head normal_trees;
8139 struct btrfs_root *root1;
8144 dev_cache = RB_ROOT;
8145 cache_tree_init(&chunk_cache);
8146 block_group_tree_init(&block_group_cache);
8147 device_extent_tree_init(&dev_extent_cache);
8149 cache_tree_init(&extent_cache);
8150 cache_tree_init(&seen);
8151 cache_tree_init(&pending);
8152 cache_tree_init(&nodes);
8153 cache_tree_init(&reada);
8154 cache_tree_init(&corrupt_blocks);
8155 extent_io_tree_init(&excluded_extents);
8156 INIT_LIST_HEAD(&dropping_trees);
8157 INIT_LIST_HEAD(&normal_trees);
8160 root->fs_info->excluded_extents = &excluded_extents;
8161 root->fs_info->fsck_extent_cache = &extent_cache;
8162 root->fs_info->free_extent_hook = free_extent_hook;
8163 root->fs_info->corrupt_blocks = &corrupt_blocks;
8167 bits = malloc(bits_nr * sizeof(struct block_info));
8173 if (ctx.progress_enabled) {
8174 ctx.tp = TASK_EXTENTS;
8175 task_start(ctx.info);
8179 root1 = root->fs_info->tree_root;
8180 level = btrfs_header_level(root1->node);
8181 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8182 root1->node->start, 0, level, 0,
8183 btrfs_level_size(root1, level), NULL);
8186 root1 = root->fs_info->chunk_root;
8187 level = btrfs_header_level(root1->node);
8188 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8189 root1->node->start, 0, level, 0,
8190 btrfs_level_size(root1, level), NULL);
8193 btrfs_init_path(&path);
8196 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8197 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8202 leaf = path.nodes[0];
8203 slot = path.slots[0];
8204 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8205 ret = btrfs_next_leaf(root, &path);
8208 leaf = path.nodes[0];
8209 slot = path.slots[0];
8211 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8212 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8213 unsigned long offset;
8216 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8217 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8218 last_snapshot = btrfs_root_last_snapshot(&ri);
8219 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8220 level = btrfs_root_level(&ri);
8221 level_size = btrfs_level_size(root, level);
8222 ret = add_root_item_to_list(&normal_trees,
8224 btrfs_root_bytenr(&ri),
8225 last_snapshot, level,
8226 0, level_size, NULL);
8230 level = btrfs_root_level(&ri);
8231 level_size = btrfs_level_size(root, level);
8232 objectid = found_key.objectid;
8233 btrfs_disk_key_to_cpu(&found_key,
8235 ret = add_root_item_to_list(&dropping_trees,
8237 btrfs_root_bytenr(&ri),
8238 last_snapshot, level,
8240 level_size, &found_key);
8247 btrfs_release_path(&path);
8250 * check_block can return -EAGAIN if it fixes something, please keep
8251 * this in mind when dealing with return values from these functions, if
8252 * we get -EAGAIN we want to fall through and restart the loop.
8254 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8255 &seen, &reada, &nodes, &extent_cache,
8256 &chunk_cache, &dev_cache, &block_group_cache,
8263 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8264 &pending, &seen, &reada, &nodes,
8265 &extent_cache, &chunk_cache, &dev_cache,
8266 &block_group_cache, &dev_extent_cache);
8273 ret = check_chunks(&chunk_cache, &block_group_cache,
8274 &dev_extent_cache, NULL, NULL, NULL, 0);
8281 ret = check_extent_refs(root, &extent_cache);
8288 ret = check_devices(&dev_cache, &dev_extent_cache);
8293 task_stop(ctx.info);
8295 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8296 extent_io_tree_cleanup(&excluded_extents);
8297 root->fs_info->fsck_extent_cache = NULL;
8298 root->fs_info->free_extent_hook = NULL;
8299 root->fs_info->corrupt_blocks = NULL;
8300 root->fs_info->excluded_extents = NULL;
8303 free_chunk_cache_tree(&chunk_cache);
8304 free_device_cache_tree(&dev_cache);
8305 free_block_group_tree(&block_group_cache);
8306 free_device_extent_tree(&dev_extent_cache);
8307 free_extent_cache_tree(&seen);
8308 free_extent_cache_tree(&pending);
8309 free_extent_cache_tree(&reada);
8310 free_extent_cache_tree(&nodes);
8313 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8314 free_extent_cache_tree(&seen);
8315 free_extent_cache_tree(&pending);
8316 free_extent_cache_tree(&reada);
8317 free_extent_cache_tree(&nodes);
8318 free_chunk_cache_tree(&chunk_cache);
8319 free_block_group_tree(&block_group_cache);
8320 free_device_cache_tree(&dev_cache);
8321 free_device_extent_tree(&dev_extent_cache);
8322 free_extent_record_cache(root->fs_info, &extent_cache);
8323 free_root_item_list(&normal_trees);
8324 free_root_item_list(&dropping_trees);
8325 extent_io_tree_cleanup(&excluded_extents);
8329 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8330 struct btrfs_root *root, int overwrite)
8332 struct extent_buffer *c;
8333 struct extent_buffer *old = root->node;
8336 struct btrfs_disk_key disk_key = {0,0,0};
8342 extent_buffer_get(c);
8345 c = btrfs_alloc_free_block(trans, root,
8346 btrfs_level_size(root, 0),
8347 root->root_key.objectid,
8348 &disk_key, level, 0, 0);
8351 extent_buffer_get(c);
8355 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8356 btrfs_set_header_level(c, level);
8357 btrfs_set_header_bytenr(c, c->start);
8358 btrfs_set_header_generation(c, trans->transid);
8359 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8360 btrfs_set_header_owner(c, root->root_key.objectid);
8362 write_extent_buffer(c, root->fs_info->fsid,
8363 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8365 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8366 btrfs_header_chunk_tree_uuid(c),
8369 btrfs_mark_buffer_dirty(c);
8371 * this case can happen in the following case:
8373 * 1.overwrite previous root.
8375 * 2.reinit reloc data root, this is because we skip pin
8376 * down reloc data tree before which means we can allocate
8377 * same block bytenr here.
8379 if (old->start == c->start) {
8380 btrfs_set_root_generation(&root->root_item,
8382 root->root_item.level = btrfs_header_level(root->node);
8383 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8384 &root->root_key, &root->root_item);
8386 free_extent_buffer(c);
8390 free_extent_buffer(old);
8392 add_root_to_dirty_list(root);
8396 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8397 struct extent_buffer *eb, int tree_root)
8399 struct extent_buffer *tmp;
8400 struct btrfs_root_item *ri;
8401 struct btrfs_key key;
8404 int level = btrfs_header_level(eb);
8410 * If we have pinned this block before, don't pin it again.
8411 * This can not only avoid forever loop with broken filesystem
8412 * but also give us some speedups.
8414 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8415 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8418 btrfs_pin_extent(fs_info, eb->start, eb->len);
8420 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8421 nritems = btrfs_header_nritems(eb);
8422 for (i = 0; i < nritems; i++) {
8424 btrfs_item_key_to_cpu(eb, &key, i);
8425 if (key.type != BTRFS_ROOT_ITEM_KEY)
8427 /* Skip the extent root and reloc roots */
8428 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8429 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8430 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8432 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8433 bytenr = btrfs_disk_root_bytenr(eb, ri);
8436 * If at any point we start needing the real root we
8437 * will have to build a stump root for the root we are
8438 * in, but for now this doesn't actually use the root so
8439 * just pass in extent_root.
8441 tmp = read_tree_block(fs_info->extent_root, bytenr,
8443 if (!extent_buffer_uptodate(tmp)) {
8444 fprintf(stderr, "Error reading root block\n");
8447 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8448 free_extent_buffer(tmp);
8452 bytenr = btrfs_node_blockptr(eb, i);
8454 /* If we aren't the tree root don't read the block */
8455 if (level == 1 && !tree_root) {
8456 btrfs_pin_extent(fs_info, bytenr, leafsize);
8460 tmp = read_tree_block(fs_info->extent_root, bytenr,
8462 if (!extent_buffer_uptodate(tmp)) {
8463 fprintf(stderr, "Error reading tree block\n");
8466 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8467 free_extent_buffer(tmp);
8476 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8480 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8484 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8487 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8489 struct btrfs_block_group_cache *cache;
8490 struct btrfs_path *path;
8491 struct extent_buffer *leaf;
8492 struct btrfs_chunk *chunk;
8493 struct btrfs_key key;
8497 path = btrfs_alloc_path();
8502 key.type = BTRFS_CHUNK_ITEM_KEY;
8505 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8507 btrfs_free_path(path);
8512 * We do this in case the block groups were screwed up and had alloc
8513 * bits that aren't actually set on the chunks. This happens with
8514 * restored images every time and could happen in real life I guess.
8516 fs_info->avail_data_alloc_bits = 0;
8517 fs_info->avail_metadata_alloc_bits = 0;
8518 fs_info->avail_system_alloc_bits = 0;
8520 /* First we need to create the in-memory block groups */
8522 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8523 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8525 btrfs_free_path(path);
8533 leaf = path->nodes[0];
8534 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8535 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8540 chunk = btrfs_item_ptr(leaf, path->slots[0],
8541 struct btrfs_chunk);
8542 btrfs_add_block_group(fs_info, 0,
8543 btrfs_chunk_type(leaf, chunk),
8544 key.objectid, key.offset,
8545 btrfs_chunk_length(leaf, chunk));
8546 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8547 key.offset + btrfs_chunk_length(leaf, chunk),
8553 cache = btrfs_lookup_first_block_group(fs_info, start);
8557 start = cache->key.objectid + cache->key.offset;
8560 btrfs_free_path(path);
8564 static int reset_balance(struct btrfs_trans_handle *trans,
8565 struct btrfs_fs_info *fs_info)
8567 struct btrfs_root *root = fs_info->tree_root;
8568 struct btrfs_path *path;
8569 struct extent_buffer *leaf;
8570 struct btrfs_key key;
8571 int del_slot, del_nr = 0;
8575 path = btrfs_alloc_path();
8579 key.objectid = BTRFS_BALANCE_OBJECTID;
8580 key.type = BTRFS_BALANCE_ITEM_KEY;
8583 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8588 goto reinit_data_reloc;
8593 ret = btrfs_del_item(trans, root, path);
8596 btrfs_release_path(path);
8598 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8599 key.type = BTRFS_ROOT_ITEM_KEY;
8602 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8606 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8611 ret = btrfs_del_items(trans, root, path,
8618 btrfs_release_path(path);
8621 ret = btrfs_search_slot(trans, root, &key, path,
8628 leaf = path->nodes[0];
8629 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8630 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8632 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8637 del_slot = path->slots[0];
8646 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8650 btrfs_release_path(path);
8653 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8654 key.type = BTRFS_ROOT_ITEM_KEY;
8655 key.offset = (u64)-1;
8656 root = btrfs_read_fs_root(fs_info, &key);
8658 fprintf(stderr, "Error reading data reloc tree\n");
8659 ret = PTR_ERR(root);
8662 record_root_in_trans(trans, root);
8663 ret = btrfs_fsck_reinit_root(trans, root, 0);
8666 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8668 btrfs_free_path(path);
8672 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8673 struct btrfs_fs_info *fs_info)
8679 * The only reason we don't do this is because right now we're just
8680 * walking the trees we find and pinning down their bytes, we don't look
8681 * at any of the leaves. In order to do mixed groups we'd have to check
8682 * the leaves of any fs roots and pin down the bytes for any file
8683 * extents we find. Not hard but why do it if we don't have to?
8685 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8686 fprintf(stderr, "We don't support re-initing the extent tree "
8687 "for mixed block groups yet, please notify a btrfs "
8688 "developer you want to do this so they can add this "
8689 "functionality.\n");
8694 * first we need to walk all of the trees except the extent tree and pin
8695 * down the bytes that are in use so we don't overwrite any existing
8698 ret = pin_metadata_blocks(fs_info);
8700 fprintf(stderr, "error pinning down used bytes\n");
8705 * Need to drop all the block groups since we're going to recreate all
8708 btrfs_free_block_groups(fs_info);
8709 ret = reset_block_groups(fs_info);
8711 fprintf(stderr, "error resetting the block groups\n");
8715 /* Ok we can allocate now, reinit the extent root */
8716 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8718 fprintf(stderr, "extent root initialization failed\n");
8720 * When the transaction code is updated we should end the
8721 * transaction, but for now progs only knows about commit so
8722 * just return an error.
8728 * Now we have all the in-memory block groups setup so we can make
8729 * allocations properly, and the metadata we care about is safe since we
8730 * pinned all of it above.
8733 struct btrfs_block_group_cache *cache;
8735 cache = btrfs_lookup_first_block_group(fs_info, start);
8738 start = cache->key.objectid + cache->key.offset;
8739 ret = btrfs_insert_item(trans, fs_info->extent_root,
8740 &cache->key, &cache->item,
8741 sizeof(cache->item));
8743 fprintf(stderr, "Error adding block group\n");
8746 btrfs_extent_post_op(trans, fs_info->extent_root);
8749 ret = reset_balance(trans, fs_info);
8751 fprintf(stderr, "error reseting the pending balance\n");
8756 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8758 struct btrfs_path *path;
8759 struct btrfs_trans_handle *trans;
8760 struct btrfs_key key;
8763 printf("Recowing metadata block %llu\n", eb->start);
8764 key.objectid = btrfs_header_owner(eb);
8765 key.type = BTRFS_ROOT_ITEM_KEY;
8766 key.offset = (u64)-1;
8768 root = btrfs_read_fs_root(root->fs_info, &key);
8770 fprintf(stderr, "Couldn't find owner root %llu\n",
8772 return PTR_ERR(root);
8775 path = btrfs_alloc_path();
8779 trans = btrfs_start_transaction(root, 1);
8780 if (IS_ERR(trans)) {
8781 btrfs_free_path(path);
8782 return PTR_ERR(trans);
8785 path->lowest_level = btrfs_header_level(eb);
8786 if (path->lowest_level)
8787 btrfs_node_key_to_cpu(eb, &key, 0);
8789 btrfs_item_key_to_cpu(eb, &key, 0);
8791 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8792 btrfs_commit_transaction(trans, root);
8793 btrfs_free_path(path);
8797 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8799 struct btrfs_path *path;
8800 struct btrfs_trans_handle *trans;
8801 struct btrfs_key key;
8804 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8805 bad->key.type, bad->key.offset);
8806 key.objectid = bad->root_id;
8807 key.type = BTRFS_ROOT_ITEM_KEY;
8808 key.offset = (u64)-1;
8810 root = btrfs_read_fs_root(root->fs_info, &key);
8812 fprintf(stderr, "Couldn't find owner root %llu\n",
8814 return PTR_ERR(root);
8817 path = btrfs_alloc_path();
8821 trans = btrfs_start_transaction(root, 1);
8822 if (IS_ERR(trans)) {
8823 btrfs_free_path(path);
8824 return PTR_ERR(trans);
8827 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8833 ret = btrfs_del_item(trans, root, path);
8835 btrfs_commit_transaction(trans, root);
8836 btrfs_free_path(path);
8840 static int zero_log_tree(struct btrfs_root *root)
8842 struct btrfs_trans_handle *trans;
8845 trans = btrfs_start_transaction(root, 1);
8846 if (IS_ERR(trans)) {
8847 ret = PTR_ERR(trans);
8850 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8851 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8852 ret = btrfs_commit_transaction(trans, root);
8856 static int populate_csum(struct btrfs_trans_handle *trans,
8857 struct btrfs_root *csum_root, char *buf, u64 start,
8864 while (offset < len) {
8865 sectorsize = csum_root->sectorsize;
8866 ret = read_extent_data(csum_root, buf, start + offset,
8870 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8871 start + offset, buf, sectorsize);
8874 offset += sectorsize;
8879 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8880 struct btrfs_root *csum_root,
8881 struct btrfs_root *cur_root)
8883 struct btrfs_path *path;
8884 struct btrfs_key key;
8885 struct extent_buffer *node;
8886 struct btrfs_file_extent_item *fi;
8893 path = btrfs_alloc_path();
8896 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8906 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8909 /* Iterate all regular file extents and fill its csum */
8911 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8913 if (key.type != BTRFS_EXTENT_DATA_KEY)
8915 node = path->nodes[0];
8916 slot = path->slots[0];
8917 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8918 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8920 start = btrfs_file_extent_disk_bytenr(node, fi);
8921 len = btrfs_file_extent_disk_num_bytes(node, fi);
8923 ret = populate_csum(trans, csum_root, buf, start, len);
8930 * TODO: if next leaf is corrupted, jump to nearest next valid
8933 ret = btrfs_next_item(cur_root, path);
8943 btrfs_free_path(path);
8948 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8949 struct btrfs_root *csum_root)
8951 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8952 struct btrfs_path *path;
8953 struct btrfs_root *tree_root = fs_info->tree_root;
8954 struct btrfs_root *cur_root;
8955 struct extent_buffer *node;
8956 struct btrfs_key key;
8960 path = btrfs_alloc_path();
8964 key.objectid = BTRFS_FS_TREE_OBJECTID;
8966 key.type = BTRFS_ROOT_ITEM_KEY;
8968 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8977 node = path->nodes[0];
8978 slot = path->slots[0];
8979 btrfs_item_key_to_cpu(node, &key, slot);
8980 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8982 if (key.type != BTRFS_ROOT_ITEM_KEY)
8984 if (!is_fstree(key.objectid))
8986 key.offset = (u64)-1;
8988 cur_root = btrfs_read_fs_root(fs_info, &key);
8989 if (IS_ERR(cur_root) || !cur_root) {
8990 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8994 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8999 ret = btrfs_next_item(tree_root, path);
9009 btrfs_free_path(path);
9013 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9014 struct btrfs_root *csum_root)
9016 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9017 struct btrfs_path *path;
9018 struct btrfs_extent_item *ei;
9019 struct extent_buffer *leaf;
9021 struct btrfs_key key;
9024 path = btrfs_alloc_path();
9029 key.type = BTRFS_EXTENT_ITEM_KEY;
9032 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9034 btrfs_free_path(path);
9038 buf = malloc(csum_root->sectorsize);
9040 btrfs_free_path(path);
9045 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9046 ret = btrfs_next_leaf(extent_root, path);
9054 leaf = path->nodes[0];
9056 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9057 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9062 ei = btrfs_item_ptr(leaf, path->slots[0],
9063 struct btrfs_extent_item);
9064 if (!(btrfs_extent_flags(leaf, ei) &
9065 BTRFS_EXTENT_FLAG_DATA)) {
9070 ret = populate_csum(trans, csum_root, buf, key.objectid,
9077 btrfs_free_path(path);
9083 * Recalculate the csum and put it into the csum tree.
9085 * Extent tree init will wipe out all the extent info, so in that case, we
9086 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9087 * will use fs/subvol trees to init the csum tree.
9089 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9090 struct btrfs_root *csum_root,
9094 return fill_csum_tree_from_fs(trans, csum_root);
9096 return fill_csum_tree_from_extent(trans, csum_root);
9099 struct root_item_info {
9100 /* level of the root */
9102 /* number of nodes at this level, must be 1 for a root */
9106 struct cache_extent cache_extent;
9109 static struct cache_tree *roots_info_cache = NULL;
9111 static void free_roots_info_cache(void)
9113 if (!roots_info_cache)
9116 while (!cache_tree_empty(roots_info_cache)) {
9117 struct cache_extent *entry;
9118 struct root_item_info *rii;
9120 entry = first_cache_extent(roots_info_cache);
9123 remove_cache_extent(roots_info_cache, entry);
9124 rii = container_of(entry, struct root_item_info, cache_extent);
9128 free(roots_info_cache);
9129 roots_info_cache = NULL;
9132 static int build_roots_info_cache(struct btrfs_fs_info *info)
9135 struct btrfs_key key;
9136 struct extent_buffer *leaf;
9137 struct btrfs_path *path;
9139 if (!roots_info_cache) {
9140 roots_info_cache = malloc(sizeof(*roots_info_cache));
9141 if (!roots_info_cache)
9143 cache_tree_init(roots_info_cache);
9146 path = btrfs_alloc_path();
9151 key.type = BTRFS_EXTENT_ITEM_KEY;
9154 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9157 leaf = path->nodes[0];
9160 struct btrfs_key found_key;
9161 struct btrfs_extent_item *ei;
9162 struct btrfs_extent_inline_ref *iref;
9163 int slot = path->slots[0];
9168 struct cache_extent *entry;
9169 struct root_item_info *rii;
9171 if (slot >= btrfs_header_nritems(leaf)) {
9172 ret = btrfs_next_leaf(info->extent_root, path);
9179 leaf = path->nodes[0];
9180 slot = path->slots[0];
9183 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9185 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9186 found_key.type != BTRFS_METADATA_ITEM_KEY)
9189 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9190 flags = btrfs_extent_flags(leaf, ei);
9192 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9193 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9196 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9197 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9198 level = found_key.offset;
9200 struct btrfs_tree_block_info *info;
9202 info = (struct btrfs_tree_block_info *)(ei + 1);
9203 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9204 level = btrfs_tree_block_level(leaf, info);
9208 * For a root extent, it must be of the following type and the
9209 * first (and only one) iref in the item.
9211 type = btrfs_extent_inline_ref_type(leaf, iref);
9212 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9215 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9216 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9218 rii = malloc(sizeof(struct root_item_info));
9223 rii->cache_extent.start = root_id;
9224 rii->cache_extent.size = 1;
9225 rii->level = (u8)-1;
9226 entry = &rii->cache_extent;
9227 ret = insert_cache_extent(roots_info_cache, entry);
9230 rii = container_of(entry, struct root_item_info,
9234 ASSERT(rii->cache_extent.start == root_id);
9235 ASSERT(rii->cache_extent.size == 1);
9237 if (level > rii->level || rii->level == (u8)-1) {
9239 rii->bytenr = found_key.objectid;
9240 rii->gen = btrfs_extent_generation(leaf, ei);
9241 rii->node_count = 1;
9242 } else if (level == rii->level) {
9250 btrfs_free_path(path);
9255 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9256 struct btrfs_path *path,
9257 const struct btrfs_key *root_key,
9258 const int read_only_mode)
9260 const u64 root_id = root_key->objectid;
9261 struct cache_extent *entry;
9262 struct root_item_info *rii;
9263 struct btrfs_root_item ri;
9264 unsigned long offset;
9266 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9269 "Error: could not find extent items for root %llu\n",
9270 root_key->objectid);
9274 rii = container_of(entry, struct root_item_info, cache_extent);
9275 ASSERT(rii->cache_extent.start == root_id);
9276 ASSERT(rii->cache_extent.size == 1);
9278 if (rii->node_count != 1) {
9280 "Error: could not find btree root extent for root %llu\n",
9285 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9286 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9288 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9289 btrfs_root_level(&ri) != rii->level ||
9290 btrfs_root_generation(&ri) != rii->gen) {
9293 * If we're in repair mode but our caller told us to not update
9294 * the root item, i.e. just check if it needs to be updated, don't
9295 * print this message, since the caller will call us again shortly
9296 * for the same root item without read only mode (the caller will
9297 * open a transaction first).
9299 if (!(read_only_mode && repair))
9301 "%sroot item for root %llu,"
9302 " current bytenr %llu, current gen %llu, current level %u,"
9303 " new bytenr %llu, new gen %llu, new level %u\n",
9304 (read_only_mode ? "" : "fixing "),
9306 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9307 btrfs_root_level(&ri),
9308 rii->bytenr, rii->gen, rii->level);
9310 if (btrfs_root_generation(&ri) > rii->gen) {
9312 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9313 root_id, btrfs_root_generation(&ri), rii->gen);
9317 if (!read_only_mode) {
9318 btrfs_set_root_bytenr(&ri, rii->bytenr);
9319 btrfs_set_root_level(&ri, rii->level);
9320 btrfs_set_root_generation(&ri, rii->gen);
9321 write_extent_buffer(path->nodes[0], &ri,
9322 offset, sizeof(ri));
9332 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9333 * caused read-only snapshots to be corrupted if they were created at a moment
9334 * when the source subvolume/snapshot had orphan items. The issue was that the
9335 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9336 * node instead of the post orphan cleanup root node.
9337 * So this function, and its callees, just detects and fixes those cases. Even
9338 * though the regression was for read-only snapshots, this function applies to
9339 * any snapshot/subvolume root.
9340 * This must be run before any other repair code - not doing it so, makes other
9341 * repair code delete or modify backrefs in the extent tree for example, which
9342 * will result in an inconsistent fs after repairing the root items.
9344 static int repair_root_items(struct btrfs_fs_info *info)
9346 struct btrfs_path *path = NULL;
9347 struct btrfs_key key;
9348 struct extent_buffer *leaf;
9349 struct btrfs_trans_handle *trans = NULL;
9354 ret = build_roots_info_cache(info);
9358 path = btrfs_alloc_path();
9364 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9365 key.type = BTRFS_ROOT_ITEM_KEY;
9370 * Avoid opening and committing transactions if a leaf doesn't have
9371 * any root items that need to be fixed, so that we avoid rotating
9372 * backup roots unnecessarily.
9375 trans = btrfs_start_transaction(info->tree_root, 1);
9376 if (IS_ERR(trans)) {
9377 ret = PTR_ERR(trans);
9382 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9386 leaf = path->nodes[0];
9389 struct btrfs_key found_key;
9391 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9392 int no_more_keys = find_next_key(path, &key);
9394 btrfs_release_path(path);
9396 ret = btrfs_commit_transaction(trans,
9408 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9410 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9412 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9415 ret = maybe_repair_root_item(info, path, &found_key,
9420 if (!trans && repair) {
9423 btrfs_release_path(path);
9433 free_roots_info_cache();
9434 btrfs_free_path(path);
9436 btrfs_commit_transaction(trans, info->tree_root);
9443 const char * const cmd_check_usage[] = {
9444 "btrfs check [options] <device>",
9445 "Check structural inegrity of a filesystem (unmounted).",
9446 "Check structural inegrity of an unmounted filesystem. Verify internal",
9447 "trees' consistency and item connectivity. In the repair mode try to",
9448 "fix the problems found.",
9449 "WARNING: the repair mode is considered dangerous",
9451 "-s|--super <superblock> use this superblock copy",
9452 "-b|--backup use the backup root copy",
9453 "--repair try to repair the filesystem",
9454 "--readonly run in read-only mode (default)",
9455 "--init-csum-tree create a new CRC tree",
9456 "--init-extent-tree create a new extent tree",
9457 "--check-data-csum verify checkums of data blocks",
9458 "-Q|--qgroup-report print a report on qgroup consistency",
9459 "-E|--subvol-extents <subvolid>",
9460 " print subvolume extents and sharing state",
9461 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9462 "-p|--progress indicate progress",
9466 int cmd_check(int argc, char **argv)
9468 struct cache_tree root_cache;
9469 struct btrfs_root *root;
9470 struct btrfs_fs_info *info;
9473 u64 tree_root_bytenr = 0;
9474 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9477 int init_csum_tree = 0;
9479 int qgroup_report = 0;
9480 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9484 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9485 OPT_CHECK_CSUM, OPT_READONLY };
9486 static const struct option long_options[] = {
9487 { "super", required_argument, NULL, 's' },
9488 { "repair", no_argument, NULL, OPT_REPAIR },
9489 { "readonly", no_argument, NULL, OPT_READONLY },
9490 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9491 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9492 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9493 { "backup", no_argument, NULL, 'b' },
9494 { "subvol-extents", required_argument, NULL, 'E' },
9495 { "qgroup-report", no_argument, NULL, 'Q' },
9496 { "tree-root", required_argument, NULL, 'r' },
9497 { "progress", no_argument, NULL, 'p' },
9501 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9505 case 'a': /* ignored */ break;
9507 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9510 num = arg_strtou64(optarg);
9511 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9513 "ERROR: super mirror should be less than: %d\n",
9514 BTRFS_SUPER_MIRROR_MAX);
9517 bytenr = btrfs_sb_offset(((int)num));
9518 printf("using SB copy %llu, bytenr %llu\n", num,
9519 (unsigned long long)bytenr);
9525 subvolid = arg_strtou64(optarg);
9528 tree_root_bytenr = arg_strtou64(optarg);
9531 ctx.progress_enabled = true;
9535 usage(cmd_check_usage);
9537 printf("enabling repair mode\n");
9539 ctree_flags |= OPEN_CTREE_WRITES;
9545 printf("Creating a new CRC tree\n");
9548 ctree_flags |= OPEN_CTREE_WRITES;
9550 case OPT_INIT_EXTENT:
9551 init_extent_tree = 1;
9552 ctree_flags |= (OPEN_CTREE_WRITES |
9553 OPEN_CTREE_NO_BLOCK_GROUPS);
9556 case OPT_CHECK_CSUM:
9557 check_data_csum = 1;
9561 argc = argc - optind;
9563 if (check_argc_exact(argc, 1))
9564 usage(cmd_check_usage);
9566 if (ctx.progress_enabled) {
9567 ctx.tp = TASK_NOTHING;
9568 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9571 /* This check is the only reason for --readonly to exist */
9572 if (readonly && repair) {
9573 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9578 cache_tree_init(&root_cache);
9580 if((ret = check_mounted(argv[optind])) < 0) {
9581 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9584 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9589 /* only allow partial opening under repair mode */
9591 ctree_flags |= OPEN_CTREE_PARTIAL;
9593 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9596 fprintf(stderr, "Couldn't open file system\n");
9602 root = info->fs_root;
9605 * repair mode will force us to commit transaction which
9606 * will make us fail to load log tree when mounting.
9608 if (repair && btrfs_super_log_root(info->super_copy)) {
9609 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9614 ret = zero_log_tree(root);
9616 fprintf(stderr, "fail to zero log tree\n");
9621 uuid_unparse(info->super_copy->fsid, uuidbuf);
9622 if (qgroup_report) {
9623 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9625 ret = qgroup_verify_all(info);
9627 print_qgroup_report(1);
9631 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9632 subvolid, argv[optind], uuidbuf);
9633 ret = print_extent_state(info, subvolid);
9636 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9638 if (!extent_buffer_uptodate(info->tree_root->node) ||
9639 !extent_buffer_uptodate(info->dev_root->node) ||
9640 !extent_buffer_uptodate(info->chunk_root->node)) {
9641 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9646 if (init_extent_tree || init_csum_tree) {
9647 struct btrfs_trans_handle *trans;
9649 trans = btrfs_start_transaction(info->extent_root, 0);
9650 if (IS_ERR(trans)) {
9651 fprintf(stderr, "Error starting transaction\n");
9652 ret = PTR_ERR(trans);
9656 if (init_extent_tree) {
9657 printf("Creating a new extent tree\n");
9658 ret = reinit_extent_tree(trans, info);
9663 if (init_csum_tree) {
9664 fprintf(stderr, "Reinit crc root\n");
9665 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9667 fprintf(stderr, "crc root initialization failed\n");
9672 ret = fill_csum_tree(trans, info->csum_root,
9675 fprintf(stderr, "crc refilling failed\n");
9680 * Ok now we commit and run the normal fsck, which will add
9681 * extent entries for all of the items it finds.
9683 ret = btrfs_commit_transaction(trans, info->extent_root);
9687 if (!extent_buffer_uptodate(info->extent_root->node)) {
9688 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9692 if (!extent_buffer_uptodate(info->csum_root->node)) {
9693 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9698 if (!ctx.progress_enabled)
9699 fprintf(stderr, "checking extents\n");
9700 ret = check_chunks_and_extents(root);
9702 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9704 ret = repair_root_items(info);
9708 fprintf(stderr, "Fixed %d roots.\n", ret);
9710 } else if (ret > 0) {
9712 "Found %d roots with an outdated root item.\n",
9715 "Please run a filesystem check with the option --repair to fix them.\n");
9720 if (!ctx.progress_enabled)
9721 fprintf(stderr, "checking free space cache\n");
9722 ret = check_space_cache(root);
9727 * We used to have to have these hole extents in between our real
9728 * extents so if we don't have this flag set we need to make sure there
9729 * are no gaps in the file extents for inodes, otherwise we can just
9730 * ignore it when this happens.
9732 no_holes = btrfs_fs_incompat(root->fs_info,
9733 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9734 if (!ctx.progress_enabled)
9735 fprintf(stderr, "checking fs roots\n");
9736 ret = check_fs_roots(root, &root_cache);
9740 fprintf(stderr, "checking csums\n");
9741 ret = check_csums(root);
9745 fprintf(stderr, "checking root refs\n");
9746 ret = check_root_refs(root, &root_cache);
9750 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9751 struct extent_buffer *eb;
9753 eb = list_first_entry(&root->fs_info->recow_ebs,
9754 struct extent_buffer, recow);
9755 list_del_init(&eb->recow);
9756 ret = recow_extent_buffer(root, eb);
9761 while (!list_empty(&delete_items)) {
9762 struct bad_item *bad;
9764 bad = list_first_entry(&delete_items, struct bad_item, list);
9765 list_del_init(&bad->list);
9767 ret = delete_bad_item(root, bad);
9771 if (info->quota_enabled) {
9773 fprintf(stderr, "checking quota groups\n");
9774 err = qgroup_verify_all(info);
9779 if (!list_empty(&root->fs_info->recow_ebs)) {
9780 fprintf(stderr, "Transid errors in file system\n");
9784 print_qgroup_report(0);
9785 if (found_old_backref) { /*
9786 * there was a disk format change when mixed
9787 * backref was in testing tree. The old format
9788 * existed about one week.
9790 printf("\n * Found old mixed backref format. "
9791 "The old format is not supported! *"
9792 "\n * Please mount the FS in readonly mode, "
9793 "backup data and re-format the FS. *\n\n");
9796 printf("found %llu bytes used err is %d\n",
9797 (unsigned long long)bytes_used, ret);
9798 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9799 printf("total tree bytes: %llu\n",
9800 (unsigned long long)total_btree_bytes);
9801 printf("total fs tree bytes: %llu\n",
9802 (unsigned long long)total_fs_tree_bytes);
9803 printf("total extent tree bytes: %llu\n",
9804 (unsigned long long)total_extent_tree_bytes);
9805 printf("btree space waste bytes: %llu\n",
9806 (unsigned long long)btree_space_waste);
9807 printf("file data blocks allocated: %llu\n referenced %llu\n",
9808 (unsigned long long)data_bytes_allocated,
9809 (unsigned long long)data_bytes_referenced);
9811 free_root_recs_tree(&root_cache);
9815 if (ctx.progress_enabled)
9816 task_deinit(ctx.info);