2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct inode_backref *tmp;
570 struct orphan_data_extent *src_orphan;
571 struct orphan_data_extent *dst_orphan;
575 rec = malloc(sizeof(*rec));
577 return ERR_PTR(-ENOMEM);
578 memcpy(rec, orig_rec, sizeof(*rec));
580 INIT_LIST_HEAD(&rec->backrefs);
581 INIT_LIST_HEAD(&rec->orphan_extents);
582 rec->holes = RB_ROOT;
584 list_for_each_entry(orig, &orig_rec->backrefs, list) {
585 size = sizeof(*orig) + orig->namelen + 1;
586 backref = malloc(size);
591 memcpy(backref, orig, size);
592 list_add_tail(&backref->list, &rec->backrefs);
594 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595 dst_orphan = malloc(sizeof(*dst_orphan));
600 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
603 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
609 if (!list_empty(&rec->backrefs))
610 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611 list_del(&orig->list);
615 if (!list_empty(&rec->orphan_extents))
616 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617 list_del(&orig->list);
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
629 struct orphan_data_extent *orphan;
631 if (list_empty(orphan_extents))
633 printf("The following data extent is lost in tree %llu:\n",
635 list_for_each_entry(orphan, orphan_extents, list) {
636 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637 orphan->objectid, orphan->offset, orphan->disk_bytenr,
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
644 u64 root_objectid = root->root_key.objectid;
645 int errors = rec->errors;
649 /* reloc root errors, we print its corresponding fs root objectid*/
650 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651 root_objectid = root->root_key.offset;
652 fprintf(stderr, "reloc");
654 fprintf(stderr, "root %llu inode %llu errors %x",
655 (unsigned long long) root_objectid,
656 (unsigned long long) rec->ino, rec->errors);
658 if (errors & I_ERR_NO_INODE_ITEM)
659 fprintf(stderr, ", no inode item");
660 if (errors & I_ERR_NO_ORPHAN_ITEM)
661 fprintf(stderr, ", no orphan item");
662 if (errors & I_ERR_DUP_INODE_ITEM)
663 fprintf(stderr, ", dup inode item");
664 if (errors & I_ERR_DUP_DIR_INDEX)
665 fprintf(stderr, ", dup dir index");
666 if (errors & I_ERR_ODD_DIR_ITEM)
667 fprintf(stderr, ", odd dir item");
668 if (errors & I_ERR_ODD_FILE_EXTENT)
669 fprintf(stderr, ", odd file extent");
670 if (errors & I_ERR_BAD_FILE_EXTENT)
671 fprintf(stderr, ", bad file extent");
672 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673 fprintf(stderr, ", file extent overlap");
674 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675 fprintf(stderr, ", file extent discount");
676 if (errors & I_ERR_DIR_ISIZE_WRONG)
677 fprintf(stderr, ", dir isize wrong");
678 if (errors & I_ERR_FILE_NBYTES_WRONG)
679 fprintf(stderr, ", nbytes wrong");
680 if (errors & I_ERR_ODD_CSUM_ITEM)
681 fprintf(stderr, ", odd csum item");
682 if (errors & I_ERR_SOME_CSUM_MISSING)
683 fprintf(stderr, ", some csum missing");
684 if (errors & I_ERR_LINK_COUNT_WRONG)
685 fprintf(stderr, ", link count wrong");
686 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687 fprintf(stderr, ", orphan file extent");
688 fprintf(stderr, "\n");
689 /* Print the orphan extents if needed */
690 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
693 /* Print the holes if needed */
694 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695 struct file_extent_hole *hole;
696 struct rb_node *node;
699 node = rb_first(&rec->holes);
700 fprintf(stderr, "Found file extent holes:\n");
703 hole = rb_entry(node, struct file_extent_hole, node);
704 fprintf(stderr, "\tstart: %llu, len: %llu\n",
705 hole->start, hole->len);
706 node = rb_next(node);
709 fprintf(stderr, "\tstart: 0, len: %llu\n",
710 round_up(rec->isize, root->sectorsize));
714 static void print_ref_error(int errors)
716 if (errors & REF_ERR_NO_DIR_ITEM)
717 fprintf(stderr, ", no dir item");
718 if (errors & REF_ERR_NO_DIR_INDEX)
719 fprintf(stderr, ", no dir index");
720 if (errors & REF_ERR_NO_INODE_REF)
721 fprintf(stderr, ", no inode ref");
722 if (errors & REF_ERR_DUP_DIR_ITEM)
723 fprintf(stderr, ", dup dir item");
724 if (errors & REF_ERR_DUP_DIR_INDEX)
725 fprintf(stderr, ", dup dir index");
726 if (errors & REF_ERR_DUP_INODE_REF)
727 fprintf(stderr, ", dup inode ref");
728 if (errors & REF_ERR_INDEX_UNMATCH)
729 fprintf(stderr, ", index unmatch");
730 if (errors & REF_ERR_FILETYPE_UNMATCH)
731 fprintf(stderr, ", filetype unmatch");
732 if (errors & REF_ERR_NAME_TOO_LONG)
733 fprintf(stderr, ", name too long");
734 if (errors & REF_ERR_NO_ROOT_REF)
735 fprintf(stderr, ", no root ref");
736 if (errors & REF_ERR_NO_ROOT_BACKREF)
737 fprintf(stderr, ", no root backref");
738 if (errors & REF_ERR_DUP_ROOT_REF)
739 fprintf(stderr, ", dup root ref");
740 if (errors & REF_ERR_DUP_ROOT_BACKREF)
741 fprintf(stderr, ", dup root backref");
742 fprintf(stderr, "\n");
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
748 struct ptr_node *node;
749 struct cache_extent *cache;
750 struct inode_record *rec = NULL;
753 cache = lookup_cache_extent(inode_cache, ino, 1);
755 node = container_of(cache, struct ptr_node, cache);
757 if (mod && rec->refs > 1) {
758 node->data = clone_inode_rec(rec);
759 if (IS_ERR(node->data))
765 rec = calloc(1, sizeof(*rec));
767 return ERR_PTR(-ENOMEM);
769 rec->extent_start = (u64)-1;
771 INIT_LIST_HEAD(&rec->backrefs);
772 INIT_LIST_HEAD(&rec->orphan_extents);
773 rec->holes = RB_ROOT;
775 node = malloc(sizeof(*node));
778 return ERR_PTR(-ENOMEM);
780 node->cache.start = ino;
781 node->cache.size = 1;
784 if (ino == BTRFS_FREE_INO_OBJECTID)
787 ret = insert_cache_extent(inode_cache, &node->cache);
789 return ERR_PTR(-EEXIST);
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
796 struct orphan_data_extent *orphan;
798 while (!list_empty(orphan_extents)) {
799 orphan = list_entry(orphan_extents->next,
800 struct orphan_data_extent, list);
801 list_del(&orphan->list);
806 static void free_inode_rec(struct inode_record *rec)
808 struct inode_backref *backref;
813 while (!list_empty(&rec->backrefs)) {
814 backref = list_entry(rec->backrefs.next,
815 struct inode_backref, list);
816 list_del(&backref->list);
819 free_orphan_data_extents(&rec->orphan_extents);
820 free_file_extent_holes(&rec->holes);
824 static int can_free_inode_rec(struct inode_record *rec)
826 if (!rec->errors && rec->checked && rec->found_inode_item &&
827 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833 struct inode_record *rec)
835 struct cache_extent *cache;
836 struct inode_backref *tmp, *backref;
837 struct ptr_node *node;
838 unsigned char filetype;
840 if (!rec->found_inode_item)
843 filetype = imode_to_type(rec->imode);
844 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845 if (backref->found_dir_item && backref->found_dir_index) {
846 if (backref->filetype != filetype)
847 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848 if (!backref->errors && backref->found_inode_ref &&
849 rec->nlink == rec->found_link) {
850 list_del(&backref->list);
856 if (!rec->checked || rec->merging)
859 if (S_ISDIR(rec->imode)) {
860 if (rec->found_size != rec->isize)
861 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862 if (rec->found_file_extent)
863 rec->errors |= I_ERR_ODD_FILE_EXTENT;
864 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865 if (rec->found_dir_item)
866 rec->errors |= I_ERR_ODD_DIR_ITEM;
867 if (rec->found_size != rec->nbytes)
868 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869 if (rec->nlink > 0 && !no_holes &&
870 (rec->extent_end < rec->isize ||
871 first_extent_gap(&rec->holes) < rec->isize))
872 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
875 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876 if (rec->found_csum_item && rec->nodatasum)
877 rec->errors |= I_ERR_ODD_CSUM_ITEM;
878 if (rec->some_csum_missing && !rec->nodatasum)
879 rec->errors |= I_ERR_SOME_CSUM_MISSING;
882 BUG_ON(rec->refs != 1);
883 if (can_free_inode_rec(rec)) {
884 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885 node = container_of(cache, struct ptr_node, cache);
886 BUG_ON(node->data != rec);
887 remove_cache_extent(inode_cache, &node->cache);
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
895 struct btrfs_path path;
896 struct btrfs_key key;
899 key.objectid = BTRFS_ORPHAN_OBJECTID;
900 key.type = BTRFS_ORPHAN_ITEM_KEY;
903 btrfs_init_path(&path);
904 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905 btrfs_release_path(&path);
911 static int process_inode_item(struct extent_buffer *eb,
912 int slot, struct btrfs_key *key,
913 struct shared_node *active_node)
915 struct inode_record *rec;
916 struct btrfs_inode_item *item;
918 rec = active_node->current;
919 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920 if (rec->found_inode_item) {
921 rec->errors |= I_ERR_DUP_INODE_ITEM;
924 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925 rec->nlink = btrfs_inode_nlink(eb, item);
926 rec->isize = btrfs_inode_size(eb, item);
927 rec->nbytes = btrfs_inode_nbytes(eb, item);
928 rec->imode = btrfs_inode_mode(eb, item);
929 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
931 rec->found_inode_item = 1;
933 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934 maybe_free_inode_rec(&active_node->inode_cache, rec);
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
940 int namelen, u64 dir)
942 struct inode_backref *backref;
944 list_for_each_entry(backref, &rec->backrefs, list) {
945 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
947 if (backref->dir != dir || backref->namelen != namelen)
949 if (memcmp(name, backref->name, namelen))
954 backref = malloc(sizeof(*backref) + namelen + 1);
957 memset(backref, 0, sizeof(*backref));
959 backref->namelen = namelen;
960 memcpy(backref->name, name, namelen);
961 backref->name[namelen] = '\0';
962 list_add_tail(&backref->list, &rec->backrefs);
966 static int add_inode_backref(struct cache_tree *inode_cache,
967 u64 ino, u64 dir, u64 index,
968 const char *name, int namelen,
969 int filetype, int itemtype, int errors)
971 struct inode_record *rec;
972 struct inode_backref *backref;
974 rec = get_inode_rec(inode_cache, ino, 1);
976 backref = get_inode_backref(rec, name, namelen, dir);
979 backref->errors |= errors;
980 if (itemtype == BTRFS_DIR_INDEX_KEY) {
981 if (backref->found_dir_index)
982 backref->errors |= REF_ERR_DUP_DIR_INDEX;
983 if (backref->found_inode_ref && backref->index != index)
984 backref->errors |= REF_ERR_INDEX_UNMATCH;
985 if (backref->found_dir_item && backref->filetype != filetype)
986 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
988 backref->index = index;
989 backref->filetype = filetype;
990 backref->found_dir_index = 1;
991 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
993 if (backref->found_dir_item)
994 backref->errors |= REF_ERR_DUP_DIR_ITEM;
995 if (backref->found_dir_index && backref->filetype != filetype)
996 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
998 backref->filetype = filetype;
999 backref->found_dir_item = 1;
1000 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1001 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1002 if (backref->found_inode_ref)
1003 backref->errors |= REF_ERR_DUP_INODE_REF;
1004 if (backref->found_dir_index && backref->index != index)
1005 backref->errors |= REF_ERR_INDEX_UNMATCH;
1007 backref->index = index;
1009 backref->ref_type = itemtype;
1010 backref->found_inode_ref = 1;
1015 maybe_free_inode_rec(inode_cache, rec);
1019 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1020 struct cache_tree *dst_cache)
1022 struct inode_backref *backref;
1027 list_for_each_entry(backref, &src->backrefs, list) {
1028 if (backref->found_dir_index) {
1029 add_inode_backref(dst_cache, dst->ino, backref->dir,
1030 backref->index, backref->name,
1031 backref->namelen, backref->filetype,
1032 BTRFS_DIR_INDEX_KEY, backref->errors);
1034 if (backref->found_dir_item) {
1036 add_inode_backref(dst_cache, dst->ino,
1037 backref->dir, 0, backref->name,
1038 backref->namelen, backref->filetype,
1039 BTRFS_DIR_ITEM_KEY, backref->errors);
1041 if (backref->found_inode_ref) {
1042 add_inode_backref(dst_cache, dst->ino,
1043 backref->dir, backref->index,
1044 backref->name, backref->namelen, 0,
1045 backref->ref_type, backref->errors);
1049 if (src->found_dir_item)
1050 dst->found_dir_item = 1;
1051 if (src->found_file_extent)
1052 dst->found_file_extent = 1;
1053 if (src->found_csum_item)
1054 dst->found_csum_item = 1;
1055 if (src->some_csum_missing)
1056 dst->some_csum_missing = 1;
1057 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1058 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1063 BUG_ON(src->found_link < dir_count);
1064 dst->found_link += src->found_link - dir_count;
1065 dst->found_size += src->found_size;
1066 if (src->extent_start != (u64)-1) {
1067 if (dst->extent_start == (u64)-1) {
1068 dst->extent_start = src->extent_start;
1069 dst->extent_end = src->extent_end;
1071 if (dst->extent_end > src->extent_start)
1072 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1073 else if (dst->extent_end < src->extent_start) {
1074 ret = add_file_extent_hole(&dst->holes,
1076 src->extent_start - dst->extent_end);
1078 if (dst->extent_end < src->extent_end)
1079 dst->extent_end = src->extent_end;
1083 dst->errors |= src->errors;
1084 if (src->found_inode_item) {
1085 if (!dst->found_inode_item) {
1086 dst->nlink = src->nlink;
1087 dst->isize = src->isize;
1088 dst->nbytes = src->nbytes;
1089 dst->imode = src->imode;
1090 dst->nodatasum = src->nodatasum;
1091 dst->found_inode_item = 1;
1093 dst->errors |= I_ERR_DUP_INODE_ITEM;
1101 static int splice_shared_node(struct shared_node *src_node,
1102 struct shared_node *dst_node)
1104 struct cache_extent *cache;
1105 struct ptr_node *node, *ins;
1106 struct cache_tree *src, *dst;
1107 struct inode_record *rec, *conflict;
1108 u64 current_ino = 0;
1112 if (--src_node->refs == 0)
1114 if (src_node->current)
1115 current_ino = src_node->current->ino;
1117 src = &src_node->root_cache;
1118 dst = &dst_node->root_cache;
1120 cache = search_cache_extent(src, 0);
1122 node = container_of(cache, struct ptr_node, cache);
1124 cache = next_cache_extent(cache);
1127 remove_cache_extent(src, &node->cache);
1130 ins = malloc(sizeof(*ins));
1131 ins->cache.start = node->cache.start;
1132 ins->cache.size = node->cache.size;
1136 ret = insert_cache_extent(dst, &ins->cache);
1137 if (ret == -EEXIST) {
1138 conflict = get_inode_rec(dst, rec->ino, 1);
1139 BUG_ON(IS_ERR(conflict));
1140 merge_inode_recs(rec, conflict, dst);
1142 conflict->checked = 1;
1143 if (dst_node->current == conflict)
1144 dst_node->current = NULL;
1146 maybe_free_inode_rec(dst, conflict);
1147 free_inode_rec(rec);
1154 if (src == &src_node->root_cache) {
1155 src = &src_node->inode_cache;
1156 dst = &dst_node->inode_cache;
1160 if (current_ino > 0 && (!dst_node->current ||
1161 current_ino > dst_node->current->ino)) {
1162 if (dst_node->current) {
1163 dst_node->current->checked = 1;
1164 maybe_free_inode_rec(dst, dst_node->current);
1166 dst_node->current = get_inode_rec(dst, current_ino, 1);
1167 BUG_ON(IS_ERR(dst_node->current));
1172 static void free_inode_ptr(struct cache_extent *cache)
1174 struct ptr_node *node;
1175 struct inode_record *rec;
1177 node = container_of(cache, struct ptr_node, cache);
1179 free_inode_rec(rec);
1183 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1185 static struct shared_node *find_shared_node(struct cache_tree *shared,
1188 struct cache_extent *cache;
1189 struct shared_node *node;
1191 cache = lookup_cache_extent(shared, bytenr, 1);
1193 node = container_of(cache, struct shared_node, cache);
1199 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1202 struct shared_node *node;
1204 node = calloc(1, sizeof(*node));
1207 node->cache.start = bytenr;
1208 node->cache.size = 1;
1209 cache_tree_init(&node->root_cache);
1210 cache_tree_init(&node->inode_cache);
1213 ret = insert_cache_extent(shared, &node->cache);
1218 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1219 struct walk_control *wc, int level)
1221 struct shared_node *node;
1222 struct shared_node *dest;
1225 if (level == wc->active_node)
1228 BUG_ON(wc->active_node <= level);
1229 node = find_shared_node(&wc->shared, bytenr);
1231 ret = add_shared_node(&wc->shared, bytenr, refs);
1233 node = find_shared_node(&wc->shared, bytenr);
1234 wc->nodes[level] = node;
1235 wc->active_node = level;
1239 if (wc->root_level == wc->active_node &&
1240 btrfs_root_refs(&root->root_item) == 0) {
1241 if (--node->refs == 0) {
1242 free_inode_recs_tree(&node->root_cache);
1243 free_inode_recs_tree(&node->inode_cache);
1244 remove_cache_extent(&wc->shared, &node->cache);
1250 dest = wc->nodes[wc->active_node];
1251 splice_shared_node(node, dest);
1252 if (node->refs == 0) {
1253 remove_cache_extent(&wc->shared, &node->cache);
1259 static int leave_shared_node(struct btrfs_root *root,
1260 struct walk_control *wc, int level)
1262 struct shared_node *node;
1263 struct shared_node *dest;
1266 if (level == wc->root_level)
1269 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1273 BUG_ON(i >= BTRFS_MAX_LEVEL);
1275 node = wc->nodes[wc->active_node];
1276 wc->nodes[wc->active_node] = NULL;
1277 wc->active_node = i;
1279 dest = wc->nodes[wc->active_node];
1280 if (wc->active_node < wc->root_level ||
1281 btrfs_root_refs(&root->root_item) > 0) {
1282 BUG_ON(node->refs <= 1);
1283 splice_shared_node(node, dest);
1285 BUG_ON(node->refs < 2);
1294 * 1 - if the root with id child_root_id is a child of root parent_root_id
1295 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1296 * has other root(s) as parent(s)
1297 * 2 - if the root child_root_id doesn't have any parent roots
1299 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1302 struct btrfs_path path;
1303 struct btrfs_key key;
1304 struct extent_buffer *leaf;
1308 btrfs_init_path(&path);
1310 key.objectid = parent_root_id;
1311 key.type = BTRFS_ROOT_REF_KEY;
1312 key.offset = child_root_id;
1313 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1317 btrfs_release_path(&path);
1321 key.objectid = child_root_id;
1322 key.type = BTRFS_ROOT_BACKREF_KEY;
1324 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1330 leaf = path.nodes[0];
1331 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1332 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1335 leaf = path.nodes[0];
1338 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1339 if (key.objectid != child_root_id ||
1340 key.type != BTRFS_ROOT_BACKREF_KEY)
1345 if (key.offset == parent_root_id) {
1346 btrfs_release_path(&path);
1353 btrfs_release_path(&path);
1356 return has_parent ? 0 : 2;
1359 static int process_dir_item(struct btrfs_root *root,
1360 struct extent_buffer *eb,
1361 int slot, struct btrfs_key *key,
1362 struct shared_node *active_node)
1372 struct btrfs_dir_item *di;
1373 struct inode_record *rec;
1374 struct cache_tree *root_cache;
1375 struct cache_tree *inode_cache;
1376 struct btrfs_key location;
1377 char namebuf[BTRFS_NAME_LEN];
1379 root_cache = &active_node->root_cache;
1380 inode_cache = &active_node->inode_cache;
1381 rec = active_node->current;
1382 rec->found_dir_item = 1;
1384 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1385 total = btrfs_item_size_nr(eb, slot);
1386 while (cur < total) {
1388 btrfs_dir_item_key_to_cpu(eb, di, &location);
1389 name_len = btrfs_dir_name_len(eb, di);
1390 data_len = btrfs_dir_data_len(eb, di);
1391 filetype = btrfs_dir_type(eb, di);
1393 rec->found_size += name_len;
1394 if (name_len <= BTRFS_NAME_LEN) {
1398 len = BTRFS_NAME_LEN;
1399 error = REF_ERR_NAME_TOO_LONG;
1401 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1403 if (location.type == BTRFS_INODE_ITEM_KEY) {
1404 add_inode_backref(inode_cache, location.objectid,
1405 key->objectid, key->offset, namebuf,
1406 len, filetype, key->type, error);
1407 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1408 add_inode_backref(root_cache, location.objectid,
1409 key->objectid, key->offset,
1410 namebuf, len, filetype,
1413 fprintf(stderr, "invalid location in dir item %u\n",
1415 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1416 key->objectid, key->offset, namebuf,
1417 len, filetype, key->type, error);
1420 len = sizeof(*di) + name_len + data_len;
1421 di = (struct btrfs_dir_item *)((char *)di + len);
1424 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1425 rec->errors |= I_ERR_DUP_DIR_INDEX;
1430 static int process_inode_ref(struct extent_buffer *eb,
1431 int slot, struct btrfs_key *key,
1432 struct shared_node *active_node)
1440 struct cache_tree *inode_cache;
1441 struct btrfs_inode_ref *ref;
1442 char namebuf[BTRFS_NAME_LEN];
1444 inode_cache = &active_node->inode_cache;
1446 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1447 total = btrfs_item_size_nr(eb, slot);
1448 while (cur < total) {
1449 name_len = btrfs_inode_ref_name_len(eb, ref);
1450 index = btrfs_inode_ref_index(eb, ref);
1451 if (name_len <= BTRFS_NAME_LEN) {
1455 len = BTRFS_NAME_LEN;
1456 error = REF_ERR_NAME_TOO_LONG;
1458 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1459 add_inode_backref(inode_cache, key->objectid, key->offset,
1460 index, namebuf, len, 0, key->type, error);
1462 len = sizeof(*ref) + name_len;
1463 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1469 static int process_inode_extref(struct extent_buffer *eb,
1470 int slot, struct btrfs_key *key,
1471 struct shared_node *active_node)
1480 struct cache_tree *inode_cache;
1481 struct btrfs_inode_extref *extref;
1482 char namebuf[BTRFS_NAME_LEN];
1484 inode_cache = &active_node->inode_cache;
1486 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1487 total = btrfs_item_size_nr(eb, slot);
1488 while (cur < total) {
1489 name_len = btrfs_inode_extref_name_len(eb, extref);
1490 index = btrfs_inode_extref_index(eb, extref);
1491 parent = btrfs_inode_extref_parent(eb, extref);
1492 if (name_len <= BTRFS_NAME_LEN) {
1496 len = BTRFS_NAME_LEN;
1497 error = REF_ERR_NAME_TOO_LONG;
1499 read_extent_buffer(eb, namebuf,
1500 (unsigned long)(extref + 1), len);
1501 add_inode_backref(inode_cache, key->objectid, parent,
1502 index, namebuf, len, 0, key->type, error);
1504 len = sizeof(*extref) + name_len;
1505 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1512 static int count_csum_range(struct btrfs_root *root, u64 start,
1513 u64 len, u64 *found)
1515 struct btrfs_key key;
1516 struct btrfs_path path;
1517 struct extent_buffer *leaf;
1522 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1524 btrfs_init_path(&path);
1526 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1528 key.type = BTRFS_EXTENT_CSUM_KEY;
1530 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1534 if (ret > 0 && path.slots[0] > 0) {
1535 leaf = path.nodes[0];
1536 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1537 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1538 key.type == BTRFS_EXTENT_CSUM_KEY)
1543 leaf = path.nodes[0];
1544 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1545 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1550 leaf = path.nodes[0];
1553 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1554 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1555 key.type != BTRFS_EXTENT_CSUM_KEY)
1558 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1559 if (key.offset >= start + len)
1562 if (key.offset > start)
1565 size = btrfs_item_size_nr(leaf, path.slots[0]);
1566 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1567 if (csum_end > start) {
1568 size = min(csum_end - start, len);
1577 btrfs_release_path(&path);
1583 static int process_file_extent(struct btrfs_root *root,
1584 struct extent_buffer *eb,
1585 int slot, struct btrfs_key *key,
1586 struct shared_node *active_node)
1588 struct inode_record *rec;
1589 struct btrfs_file_extent_item *fi;
1591 u64 disk_bytenr = 0;
1592 u64 extent_offset = 0;
1593 u64 mask = root->sectorsize - 1;
1597 rec = active_node->current;
1598 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1599 rec->found_file_extent = 1;
1601 if (rec->extent_start == (u64)-1) {
1602 rec->extent_start = key->offset;
1603 rec->extent_end = key->offset;
1606 if (rec->extent_end > key->offset)
1607 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1608 else if (rec->extent_end < key->offset) {
1609 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1610 key->offset - rec->extent_end);
1615 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1616 extent_type = btrfs_file_extent_type(eb, fi);
1618 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1619 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1621 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1622 rec->found_size += num_bytes;
1623 num_bytes = (num_bytes + mask) & ~mask;
1624 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1625 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1626 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1627 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1628 extent_offset = btrfs_file_extent_offset(eb, fi);
1629 if (num_bytes == 0 || (num_bytes & mask))
1630 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1631 if (num_bytes + extent_offset >
1632 btrfs_file_extent_ram_bytes(eb, fi))
1633 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1634 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1635 (btrfs_file_extent_compression(eb, fi) ||
1636 btrfs_file_extent_encryption(eb, fi) ||
1637 btrfs_file_extent_other_encoding(eb, fi)))
1638 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1639 if (disk_bytenr > 0)
1640 rec->found_size += num_bytes;
1642 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1644 rec->extent_end = key->offset + num_bytes;
1647 * The data reloc tree will copy full extents into its inode and then
1648 * copy the corresponding csums. Because the extent it copied could be
1649 * a preallocated extent that hasn't been written to yet there may be no
1650 * csums to copy, ergo we won't have csums for our file extent. This is
1651 * ok so just don't bother checking csums if the inode belongs to the
1654 if (disk_bytenr > 0 &&
1655 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1657 if (btrfs_file_extent_compression(eb, fi))
1658 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1660 disk_bytenr += extent_offset;
1662 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1665 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1667 rec->found_csum_item = 1;
1668 if (found < num_bytes)
1669 rec->some_csum_missing = 1;
1670 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1672 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1678 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1679 struct walk_control *wc)
1681 struct btrfs_key key;
1685 struct cache_tree *inode_cache;
1686 struct shared_node *active_node;
1688 if (wc->root_level == wc->active_node &&
1689 btrfs_root_refs(&root->root_item) == 0)
1692 active_node = wc->nodes[wc->active_node];
1693 inode_cache = &active_node->inode_cache;
1694 nritems = btrfs_header_nritems(eb);
1695 for (i = 0; i < nritems; i++) {
1696 btrfs_item_key_to_cpu(eb, &key, i);
1698 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1700 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1703 if (active_node->current == NULL ||
1704 active_node->current->ino < key.objectid) {
1705 if (active_node->current) {
1706 active_node->current->checked = 1;
1707 maybe_free_inode_rec(inode_cache,
1708 active_node->current);
1710 active_node->current = get_inode_rec(inode_cache,
1712 BUG_ON(IS_ERR(active_node->current));
1715 case BTRFS_DIR_ITEM_KEY:
1716 case BTRFS_DIR_INDEX_KEY:
1717 ret = process_dir_item(root, eb, i, &key, active_node);
1719 case BTRFS_INODE_REF_KEY:
1720 ret = process_inode_ref(eb, i, &key, active_node);
1722 case BTRFS_INODE_EXTREF_KEY:
1723 ret = process_inode_extref(eb, i, &key, active_node);
1725 case BTRFS_INODE_ITEM_KEY:
1726 ret = process_inode_item(eb, i, &key, active_node);
1728 case BTRFS_EXTENT_DATA_KEY:
1729 ret = process_file_extent(root, eb, i, &key,
1739 static void reada_walk_down(struct btrfs_root *root,
1740 struct extent_buffer *node, int slot)
1749 level = btrfs_header_level(node);
1753 nritems = btrfs_header_nritems(node);
1754 blocksize = btrfs_level_size(root, level - 1);
1755 for (i = slot; i < nritems; i++) {
1756 bytenr = btrfs_node_blockptr(node, i);
1757 ptr_gen = btrfs_node_ptr_generation(node, i);
1758 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1763 * Check the child node/leaf by the following condition:
1764 * 1. the first item key of the node/leaf should be the same with the one
1766 * 2. block in parent node should match the child node/leaf.
1767 * 3. generation of parent node and child's header should be consistent.
1769 * Or the child node/leaf pointed by the key in parent is not valid.
1771 * We hope to check leaf owner too, but since subvol may share leaves,
1772 * which makes leaf owner check not so strong, key check should be
1773 * sufficient enough for that case.
1775 static int check_child_node(struct btrfs_root *root,
1776 struct extent_buffer *parent, int slot,
1777 struct extent_buffer *child)
1779 struct btrfs_key parent_key;
1780 struct btrfs_key child_key;
1783 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1784 if (btrfs_header_level(child) == 0)
1785 btrfs_item_key_to_cpu(child, &child_key, 0);
1787 btrfs_node_key_to_cpu(child, &child_key, 0);
1789 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1792 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1793 parent_key.objectid, parent_key.type, parent_key.offset,
1794 child_key.objectid, child_key.type, child_key.offset);
1796 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1798 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1799 btrfs_node_blockptr(parent, slot),
1800 btrfs_header_bytenr(child));
1802 if (btrfs_node_ptr_generation(parent, slot) !=
1803 btrfs_header_generation(child)) {
1805 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1806 btrfs_header_generation(child),
1807 btrfs_node_ptr_generation(parent, slot));
1812 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1813 struct walk_control *wc, int *level)
1815 enum btrfs_tree_block_status status;
1818 struct extent_buffer *next;
1819 struct extent_buffer *cur;
1824 WARN_ON(*level < 0);
1825 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1826 ret = btrfs_lookup_extent_info(NULL, root,
1827 path->nodes[*level]->start,
1828 *level, 1, &refs, NULL);
1835 ret = enter_shared_node(root, path->nodes[*level]->start,
1843 while (*level >= 0) {
1844 WARN_ON(*level < 0);
1845 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1846 cur = path->nodes[*level];
1848 if (btrfs_header_level(cur) != *level)
1851 if (path->slots[*level] >= btrfs_header_nritems(cur))
1854 ret = process_one_leaf(root, cur, wc);
1859 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1860 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1861 blocksize = btrfs_level_size(root, *level - 1);
1862 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1868 ret = enter_shared_node(root, bytenr, refs,
1871 path->slots[*level]++;
1876 next = btrfs_find_tree_block(root, bytenr, blocksize);
1877 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1878 free_extent_buffer(next);
1879 reada_walk_down(root, cur, path->slots[*level]);
1880 next = read_tree_block(root, bytenr, blocksize,
1882 if (!extent_buffer_uptodate(next)) {
1883 struct btrfs_key node_key;
1885 btrfs_node_key_to_cpu(path->nodes[*level],
1887 path->slots[*level]);
1888 btrfs_add_corrupt_extent_record(root->fs_info,
1890 path->nodes[*level]->start,
1891 root->leafsize, *level);
1897 ret = check_child_node(root, cur, path->slots[*level], next);
1903 if (btrfs_is_leaf(next))
1904 status = btrfs_check_leaf(root, NULL, next);
1906 status = btrfs_check_node(root, NULL, next);
1907 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1908 free_extent_buffer(next);
1913 *level = *level - 1;
1914 free_extent_buffer(path->nodes[*level]);
1915 path->nodes[*level] = next;
1916 path->slots[*level] = 0;
1919 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1923 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1924 struct walk_control *wc, int *level)
1927 struct extent_buffer *leaf;
1929 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1930 leaf = path->nodes[i];
1931 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1936 free_extent_buffer(path->nodes[*level]);
1937 path->nodes[*level] = NULL;
1938 BUG_ON(*level > wc->active_node);
1939 if (*level == wc->active_node)
1940 leave_shared_node(root, wc, *level);
1947 static int check_root_dir(struct inode_record *rec)
1949 struct inode_backref *backref;
1952 if (!rec->found_inode_item || rec->errors)
1954 if (rec->nlink != 1 || rec->found_link != 0)
1956 if (list_empty(&rec->backrefs))
1958 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1959 if (!backref->found_inode_ref)
1961 if (backref->index != 0 || backref->namelen != 2 ||
1962 memcmp(backref->name, "..", 2))
1964 if (backref->found_dir_index || backref->found_dir_item)
1971 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1972 struct btrfs_root *root, struct btrfs_path *path,
1973 struct inode_record *rec)
1975 struct btrfs_inode_item *ei;
1976 struct btrfs_key key;
1979 key.objectid = rec->ino;
1980 key.type = BTRFS_INODE_ITEM_KEY;
1981 key.offset = (u64)-1;
1983 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1987 if (!path->slots[0]) {
1994 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1995 if (key.objectid != rec->ino) {
2000 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2001 struct btrfs_inode_item);
2002 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2003 btrfs_mark_buffer_dirty(path->nodes[0]);
2004 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2005 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2006 root->root_key.objectid);
2008 btrfs_release_path(path);
2012 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2013 struct btrfs_root *root,
2014 struct btrfs_path *path,
2015 struct inode_record *rec)
2019 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2020 btrfs_release_path(path);
2022 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2026 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2027 struct btrfs_root *root,
2028 struct btrfs_path *path,
2029 struct inode_record *rec)
2031 struct btrfs_inode_item *ei;
2032 struct btrfs_key key;
2035 key.objectid = rec->ino;
2036 key.type = BTRFS_INODE_ITEM_KEY;
2039 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2046 /* Since ret == 0, no need to check anything */
2047 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2048 struct btrfs_inode_item);
2049 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2050 btrfs_mark_buffer_dirty(path->nodes[0]);
2051 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2052 printf("reset nbytes for ino %llu root %llu\n",
2053 rec->ino, root->root_key.objectid);
2055 btrfs_release_path(path);
2059 static int add_missing_dir_index(struct btrfs_root *root,
2060 struct cache_tree *inode_cache,
2061 struct inode_record *rec,
2062 struct inode_backref *backref)
2064 struct btrfs_path *path;
2065 struct btrfs_trans_handle *trans;
2066 struct btrfs_dir_item *dir_item;
2067 struct extent_buffer *leaf;
2068 struct btrfs_key key;
2069 struct btrfs_disk_key disk_key;
2070 struct inode_record *dir_rec;
2071 unsigned long name_ptr;
2072 u32 data_size = sizeof(*dir_item) + backref->namelen;
2075 path = btrfs_alloc_path();
2079 trans = btrfs_start_transaction(root, 1);
2080 if (IS_ERR(trans)) {
2081 btrfs_free_path(path);
2082 return PTR_ERR(trans);
2085 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2086 (unsigned long long)rec->ino);
2087 key.objectid = backref->dir;
2088 key.type = BTRFS_DIR_INDEX_KEY;
2089 key.offset = backref->index;
2091 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2094 leaf = path->nodes[0];
2095 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2097 disk_key.objectid = cpu_to_le64(rec->ino);
2098 disk_key.type = BTRFS_INODE_ITEM_KEY;
2099 disk_key.offset = 0;
2101 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2102 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2103 btrfs_set_dir_data_len(leaf, dir_item, 0);
2104 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2105 name_ptr = (unsigned long)(dir_item + 1);
2106 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2107 btrfs_mark_buffer_dirty(leaf);
2108 btrfs_free_path(path);
2109 btrfs_commit_transaction(trans, root);
2111 backref->found_dir_index = 1;
2112 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2113 BUG_ON(IS_ERR(dir_rec));
2116 dir_rec->found_size += backref->namelen;
2117 if (dir_rec->found_size == dir_rec->isize &&
2118 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2119 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2120 if (dir_rec->found_size != dir_rec->isize)
2121 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2126 static int delete_dir_index(struct btrfs_root *root,
2127 struct cache_tree *inode_cache,
2128 struct inode_record *rec,
2129 struct inode_backref *backref)
2131 struct btrfs_trans_handle *trans;
2132 struct btrfs_dir_item *di;
2133 struct btrfs_path *path;
2136 path = btrfs_alloc_path();
2140 trans = btrfs_start_transaction(root, 1);
2141 if (IS_ERR(trans)) {
2142 btrfs_free_path(path);
2143 return PTR_ERR(trans);
2147 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2148 (unsigned long long)backref->dir,
2149 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2150 (unsigned long long)root->objectid);
2152 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2153 backref->name, backref->namelen,
2154 backref->index, -1);
2157 btrfs_free_path(path);
2158 btrfs_commit_transaction(trans, root);
2165 ret = btrfs_del_item(trans, root, path);
2167 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2169 btrfs_free_path(path);
2170 btrfs_commit_transaction(trans, root);
2174 static int create_inode_item(struct btrfs_root *root,
2175 struct inode_record *rec,
2176 struct inode_backref *backref, int root_dir)
2178 struct btrfs_trans_handle *trans;
2179 struct btrfs_inode_item inode_item;
2180 time_t now = time(NULL);
2183 trans = btrfs_start_transaction(root, 1);
2184 if (IS_ERR(trans)) {
2185 ret = PTR_ERR(trans);
2189 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2190 "be incomplete, please check permissions and content after "
2191 "the fsck completes.\n", (unsigned long long)root->objectid,
2192 (unsigned long long)rec->ino);
2194 memset(&inode_item, 0, sizeof(inode_item));
2195 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2197 btrfs_set_stack_inode_nlink(&inode_item, 1);
2199 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2200 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2201 if (rec->found_dir_item) {
2202 if (rec->found_file_extent)
2203 fprintf(stderr, "root %llu inode %llu has both a dir "
2204 "item and extents, unsure if it is a dir or a "
2205 "regular file so setting it as a directory\n",
2206 (unsigned long long)root->objectid,
2207 (unsigned long long)rec->ino);
2208 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2209 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2210 } else if (!rec->found_dir_item) {
2211 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2212 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2214 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2215 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2216 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2217 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2218 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2219 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2220 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2221 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2223 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2225 btrfs_commit_transaction(trans, root);
2229 static int repair_inode_backrefs(struct btrfs_root *root,
2230 struct inode_record *rec,
2231 struct cache_tree *inode_cache,
2234 struct inode_backref *tmp, *backref;
2235 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2239 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2240 if (!delete && rec->ino == root_dirid) {
2241 if (!rec->found_inode_item) {
2242 ret = create_inode_item(root, rec, backref, 1);
2249 /* Index 0 for root dir's are special, don't mess with it */
2250 if (rec->ino == root_dirid && backref->index == 0)
2254 ((backref->found_dir_index && !backref->found_inode_ref) ||
2255 (backref->found_dir_index && backref->found_inode_ref &&
2256 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2257 ret = delete_dir_index(root, inode_cache, rec, backref);
2261 list_del(&backref->list);
2265 if (!delete && !backref->found_dir_index &&
2266 backref->found_dir_item && backref->found_inode_ref) {
2267 ret = add_missing_dir_index(root, inode_cache, rec,
2272 if (backref->found_dir_item &&
2273 backref->found_dir_index &&
2274 backref->found_dir_index) {
2275 if (!backref->errors &&
2276 backref->found_inode_ref) {
2277 list_del(&backref->list);
2283 if (!delete && (!backref->found_dir_index &&
2284 !backref->found_dir_item &&
2285 backref->found_inode_ref)) {
2286 struct btrfs_trans_handle *trans;
2287 struct btrfs_key location;
2289 ret = check_dir_conflict(root, backref->name,
2295 * let nlink fixing routine to handle it,
2296 * which can do it better.
2301 location.objectid = rec->ino;
2302 location.type = BTRFS_INODE_ITEM_KEY;
2303 location.offset = 0;
2305 trans = btrfs_start_transaction(root, 1);
2306 if (IS_ERR(trans)) {
2307 ret = PTR_ERR(trans);
2310 fprintf(stderr, "adding missing dir index/item pair "
2312 (unsigned long long)rec->ino);
2313 ret = btrfs_insert_dir_item(trans, root, backref->name,
2315 backref->dir, &location,
2316 imode_to_type(rec->imode),
2319 btrfs_commit_transaction(trans, root);
2323 if (!delete && (backref->found_inode_ref &&
2324 backref->found_dir_index &&
2325 backref->found_dir_item &&
2326 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2327 !rec->found_inode_item)) {
2328 ret = create_inode_item(root, rec, backref, 0);
2335 return ret ? ret : repaired;
2339 * To determine the file type for nlink/inode_item repair
2341 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2342 * Return -ENOENT if file type is not found.
2344 static int find_file_type(struct inode_record *rec, u8 *type)
2346 struct inode_backref *backref;
2348 /* For inode item recovered case */
2349 if (rec->found_inode_item) {
2350 *type = imode_to_type(rec->imode);
2354 list_for_each_entry(backref, &rec->backrefs, list) {
2355 if (backref->found_dir_index || backref->found_dir_item) {
2356 *type = backref->filetype;
2364 * To determine the file name for nlink repair
2366 * Return 0 if file name is found, set name and namelen.
2367 * Return -ENOENT if file name is not found.
2369 static int find_file_name(struct inode_record *rec,
2370 char *name, int *namelen)
2372 struct inode_backref *backref;
2374 list_for_each_entry(backref, &rec->backrefs, list) {
2375 if (backref->found_dir_index || backref->found_dir_item ||
2376 backref->found_inode_ref) {
2377 memcpy(name, backref->name, backref->namelen);
2378 *namelen = backref->namelen;
2385 /* Reset the nlink of the inode to the correct one */
2386 static int reset_nlink(struct btrfs_trans_handle *trans,
2387 struct btrfs_root *root,
2388 struct btrfs_path *path,
2389 struct inode_record *rec)
2391 struct inode_backref *backref;
2392 struct inode_backref *tmp;
2393 struct btrfs_key key;
2394 struct btrfs_inode_item *inode_item;
2397 /* We don't believe this either, reset it and iterate backref */
2398 rec->found_link = 0;
2400 /* Remove all backref including the valid ones */
2401 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2402 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2403 backref->index, backref->name,
2404 backref->namelen, 0);
2408 /* remove invalid backref, so it won't be added back */
2409 if (!(backref->found_dir_index &&
2410 backref->found_dir_item &&
2411 backref->found_inode_ref)) {
2412 list_del(&backref->list);
2419 /* Set nlink to 0 */
2420 key.objectid = rec->ino;
2421 key.type = BTRFS_INODE_ITEM_KEY;
2423 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2430 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2431 struct btrfs_inode_item);
2432 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2433 btrfs_mark_buffer_dirty(path->nodes[0]);
2434 btrfs_release_path(path);
2437 * Add back valid inode_ref/dir_item/dir_index,
2438 * add_link() will handle the nlink inc, so new nlink must be correct
2440 list_for_each_entry(backref, &rec->backrefs, list) {
2441 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2442 backref->name, backref->namelen,
2443 backref->filetype, &backref->index, 1);
2448 btrfs_release_path(path);
2452 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2453 struct btrfs_root *root,
2454 struct btrfs_path *path,
2455 struct inode_record *rec)
2457 char *dir_name = "lost+found";
2458 char namebuf[BTRFS_NAME_LEN] = {0};
2463 int name_recovered = 0;
2464 int type_recovered = 0;
2468 * Get file name and type first before these invalid inode ref
2469 * are deleted by remove_all_invalid_backref()
2471 name_recovered = !find_file_name(rec, namebuf, &namelen);
2472 type_recovered = !find_file_type(rec, &type);
2474 if (!name_recovered) {
2475 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2476 rec->ino, rec->ino);
2477 namelen = count_digits(rec->ino);
2478 sprintf(namebuf, "%llu", rec->ino);
2481 if (!type_recovered) {
2482 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2484 type = BTRFS_FT_REG_FILE;
2488 ret = reset_nlink(trans, root, path, rec);
2491 "Failed to reset nlink for inode %llu: %s\n",
2492 rec->ino, strerror(-ret));
2496 if (rec->found_link == 0) {
2497 lost_found_ino = root->highest_inode;
2498 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2503 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2504 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2507 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2508 dir_name, strerror(-ret));
2511 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2512 namebuf, namelen, type, NULL, 1);
2514 * Add ".INO" suffix several times to handle case where
2515 * "FILENAME.INO" is already taken by another file.
2517 while (ret == -EEXIST) {
2519 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2521 if (namelen + count_digits(rec->ino) + 1 >
2526 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2528 namelen += count_digits(rec->ino) + 1;
2529 ret = btrfs_add_link(trans, root, rec->ino,
2530 lost_found_ino, namebuf,
2531 namelen, type, NULL, 1);
2535 "Failed to link the inode %llu to %s dir: %s\n",
2536 rec->ino, dir_name, strerror(-ret));
2540 * Just increase the found_link, don't actually add the
2541 * backref. This will make things easier and this inode
2542 * record will be freed after the repair is done.
2543 * So fsck will not report problem about this inode.
2546 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2547 namelen, namebuf, dir_name);
2549 printf("Fixed the nlink of inode %llu\n", rec->ino);
2552 * Clear the flag anyway, or we will loop forever for the same inode
2553 * as it will not be removed from the bad inode list and the dead loop
2556 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2557 btrfs_release_path(path);
2562 * Check if there is any normal(reg or prealloc) file extent for given
2564 * This is used to determine the file type when neither its dir_index/item or
2565 * inode_item exists.
2567 * This will *NOT* report error, if any error happens, just consider it does
2568 * not have any normal file extent.
2570 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2572 struct btrfs_path *path;
2573 struct btrfs_key key;
2574 struct btrfs_key found_key;
2575 struct btrfs_file_extent_item *fi;
2579 path = btrfs_alloc_path();
2583 key.type = BTRFS_EXTENT_DATA_KEY;
2586 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2591 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2592 ret = btrfs_next_leaf(root, path);
2599 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2601 if (found_key.objectid != ino ||
2602 found_key.type != BTRFS_EXTENT_DATA_KEY)
2604 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2605 struct btrfs_file_extent_item);
2606 type = btrfs_file_extent_type(path->nodes[0], fi);
2607 if (type != BTRFS_FILE_EXTENT_INLINE) {
2613 btrfs_free_path(path);
2617 static u32 btrfs_type_to_imode(u8 type)
2619 static u32 imode_by_btrfs_type[] = {
2620 [BTRFS_FT_REG_FILE] = S_IFREG,
2621 [BTRFS_FT_DIR] = S_IFDIR,
2622 [BTRFS_FT_CHRDEV] = S_IFCHR,
2623 [BTRFS_FT_BLKDEV] = S_IFBLK,
2624 [BTRFS_FT_FIFO] = S_IFIFO,
2625 [BTRFS_FT_SOCK] = S_IFSOCK,
2626 [BTRFS_FT_SYMLINK] = S_IFLNK,
2629 return imode_by_btrfs_type[(type)];
2632 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2633 struct btrfs_root *root,
2634 struct btrfs_path *path,
2635 struct inode_record *rec)
2639 int type_recovered = 0;
2642 printf("Trying to rebuild inode:%llu\n", rec->ino);
2644 type_recovered = !find_file_type(rec, &filetype);
2647 * Try to determine inode type if type not found.
2649 * For found regular file extent, it must be FILE.
2650 * For found dir_item/index, it must be DIR.
2652 * For undetermined one, use FILE as fallback.
2655 * 1. If found backref(inode_index/item is already handled) to it,
2657 * Need new inode-inode ref structure to allow search for that.
2659 if (!type_recovered) {
2660 if (rec->found_file_extent &&
2661 find_normal_file_extent(root, rec->ino)) {
2663 filetype = BTRFS_FT_REG_FILE;
2664 } else if (rec->found_dir_item) {
2666 filetype = BTRFS_FT_DIR;
2667 } else if (!list_empty(&rec->orphan_extents)) {
2669 filetype = BTRFS_FT_REG_FILE;
2671 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2674 filetype = BTRFS_FT_REG_FILE;
2678 ret = btrfs_new_inode(trans, root, rec->ino,
2679 mode | btrfs_type_to_imode(filetype));
2684 * Here inode rebuild is done, we only rebuild the inode item,
2685 * don't repair the nlink(like move to lost+found).
2686 * That is the job of nlink repair.
2688 * We just fill the record and return
2690 rec->found_dir_item = 1;
2691 rec->imode = mode | btrfs_type_to_imode(filetype);
2693 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2694 /* Ensure the inode_nlinks repair function will be called */
2695 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2700 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2701 struct btrfs_root *root,
2702 struct btrfs_path *path,
2703 struct inode_record *rec)
2705 struct orphan_data_extent *orphan;
2706 struct orphan_data_extent *tmp;
2709 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2711 * Check for conflicting file extents
2713 * Here we don't know whether the extents is compressed or not,
2714 * so we can only assume it not compressed nor data offset,
2715 * and use its disk_len as extent length.
2717 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2718 orphan->offset, orphan->disk_len, 0);
2719 btrfs_release_path(path);
2724 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2725 orphan->disk_bytenr, orphan->disk_len);
2726 ret = btrfs_free_extent(trans,
2727 root->fs_info->extent_root,
2728 orphan->disk_bytenr, orphan->disk_len,
2729 0, root->objectid, orphan->objectid,
2734 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2735 orphan->offset, orphan->disk_bytenr,
2736 orphan->disk_len, orphan->disk_len);
2740 /* Update file size info */
2741 rec->found_size += orphan->disk_len;
2742 if (rec->found_size == rec->nbytes)
2743 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2745 /* Update the file extent hole info too */
2746 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2750 if (RB_EMPTY_ROOT(&rec->holes))
2751 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2753 list_del(&orphan->list);
2756 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2761 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2762 struct btrfs_root *root,
2763 struct btrfs_path *path,
2764 struct inode_record *rec)
2766 struct rb_node *node;
2767 struct file_extent_hole *hole;
2771 node = rb_first(&rec->holes);
2775 hole = rb_entry(node, struct file_extent_hole, node);
2776 ret = btrfs_punch_hole(trans, root, rec->ino,
2777 hole->start, hole->len);
2780 ret = del_file_extent_hole(&rec->holes, hole->start,
2784 if (RB_EMPTY_ROOT(&rec->holes))
2785 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2786 node = rb_first(&rec->holes);
2788 /* special case for a file losing all its file extent */
2790 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2791 round_up(rec->isize, root->sectorsize));
2795 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2796 rec->ino, root->objectid);
2801 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2803 struct btrfs_trans_handle *trans;
2804 struct btrfs_path *path;
2807 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2808 I_ERR_NO_ORPHAN_ITEM |
2809 I_ERR_LINK_COUNT_WRONG |
2810 I_ERR_NO_INODE_ITEM |
2811 I_ERR_FILE_EXTENT_ORPHAN |
2812 I_ERR_FILE_EXTENT_DISCOUNT|
2813 I_ERR_FILE_NBYTES_WRONG)))
2816 path = btrfs_alloc_path();
2821 * For nlink repair, it may create a dir and add link, so
2822 * 2 for parent(256)'s dir_index and dir_item
2823 * 2 for lost+found dir's inode_item and inode_ref
2824 * 1 for the new inode_ref of the file
2825 * 2 for lost+found dir's dir_index and dir_item for the file
2827 trans = btrfs_start_transaction(root, 7);
2828 if (IS_ERR(trans)) {
2829 btrfs_free_path(path);
2830 return PTR_ERR(trans);
2833 if (rec->errors & I_ERR_NO_INODE_ITEM)
2834 ret = repair_inode_no_item(trans, root, path, rec);
2835 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2836 ret = repair_inode_orphan_extent(trans, root, path, rec);
2837 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2838 ret = repair_inode_discount_extent(trans, root, path, rec);
2839 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2840 ret = repair_inode_isize(trans, root, path, rec);
2841 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2842 ret = repair_inode_orphan_item(trans, root, path, rec);
2843 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2844 ret = repair_inode_nlinks(trans, root, path, rec);
2845 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2846 ret = repair_inode_nbytes(trans, root, path, rec);
2847 btrfs_commit_transaction(trans, root);
2848 btrfs_free_path(path);
2852 static int check_inode_recs(struct btrfs_root *root,
2853 struct cache_tree *inode_cache)
2855 struct cache_extent *cache;
2856 struct ptr_node *node;
2857 struct inode_record *rec;
2858 struct inode_backref *backref;
2863 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2865 if (btrfs_root_refs(&root->root_item) == 0) {
2866 if (!cache_tree_empty(inode_cache))
2867 fprintf(stderr, "warning line %d\n", __LINE__);
2872 * We need to record the highest inode number for later 'lost+found'
2874 * We must select a ino not used/refered by any existing inode, or
2875 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2876 * this may cause 'lost+found' dir has wrong nlinks.
2878 cache = last_cache_extent(inode_cache);
2880 node = container_of(cache, struct ptr_node, cache);
2882 if (rec->ino > root->highest_inode)
2883 root->highest_inode = rec->ino;
2887 * We need to repair backrefs first because we could change some of the
2888 * errors in the inode recs.
2890 * We also need to go through and delete invalid backrefs first and then
2891 * add the correct ones second. We do this because we may get EEXIST
2892 * when adding back the correct index because we hadn't yet deleted the
2895 * For example, if we were missing a dir index then the directories
2896 * isize would be wrong, so if we fixed the isize to what we thought it
2897 * would be and then fixed the backref we'd still have a invalid fs, so
2898 * we need to add back the dir index and then check to see if the isize
2903 if (stage == 3 && !err)
2906 cache = search_cache_extent(inode_cache, 0);
2907 while (repair && cache) {
2908 node = container_of(cache, struct ptr_node, cache);
2910 cache = next_cache_extent(cache);
2912 /* Need to free everything up and rescan */
2914 remove_cache_extent(inode_cache, &node->cache);
2916 free_inode_rec(rec);
2920 if (list_empty(&rec->backrefs))
2923 ret = repair_inode_backrefs(root, rec, inode_cache,
2937 rec = get_inode_rec(inode_cache, root_dirid, 0);
2938 BUG_ON(IS_ERR(rec));
2940 ret = check_root_dir(rec);
2942 fprintf(stderr, "root %llu root dir %llu error\n",
2943 (unsigned long long)root->root_key.objectid,
2944 (unsigned long long)root_dirid);
2945 print_inode_error(root, rec);
2950 struct btrfs_trans_handle *trans;
2952 trans = btrfs_start_transaction(root, 1);
2953 if (IS_ERR(trans)) {
2954 err = PTR_ERR(trans);
2959 "root %llu missing its root dir, recreating\n",
2960 (unsigned long long)root->objectid);
2962 ret = btrfs_make_root_dir(trans, root, root_dirid);
2965 btrfs_commit_transaction(trans, root);
2969 fprintf(stderr, "root %llu root dir %llu not found\n",
2970 (unsigned long long)root->root_key.objectid,
2971 (unsigned long long)root_dirid);
2975 cache = search_cache_extent(inode_cache, 0);
2978 node = container_of(cache, struct ptr_node, cache);
2980 remove_cache_extent(inode_cache, &node->cache);
2982 if (rec->ino == root_dirid ||
2983 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2984 free_inode_rec(rec);
2988 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2989 ret = check_orphan_item(root, rec->ino);
2991 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2992 if (can_free_inode_rec(rec)) {
2993 free_inode_rec(rec);
2998 if (!rec->found_inode_item)
2999 rec->errors |= I_ERR_NO_INODE_ITEM;
3000 if (rec->found_link != rec->nlink)
3001 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3003 ret = try_repair_inode(root, rec);
3004 if (ret == 0 && can_free_inode_rec(rec)) {
3005 free_inode_rec(rec);
3011 if (!(repair && ret == 0))
3013 print_inode_error(root, rec);
3014 list_for_each_entry(backref, &rec->backrefs, list) {
3015 if (!backref->found_dir_item)
3016 backref->errors |= REF_ERR_NO_DIR_ITEM;
3017 if (!backref->found_dir_index)
3018 backref->errors |= REF_ERR_NO_DIR_INDEX;
3019 if (!backref->found_inode_ref)
3020 backref->errors |= REF_ERR_NO_INODE_REF;
3021 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3022 " namelen %u name %s filetype %d errors %x",
3023 (unsigned long long)backref->dir,
3024 (unsigned long long)backref->index,
3025 backref->namelen, backref->name,
3026 backref->filetype, backref->errors);
3027 print_ref_error(backref->errors);
3029 free_inode_rec(rec);
3031 return (error > 0) ? -1 : 0;
3034 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3037 struct cache_extent *cache;
3038 struct root_record *rec = NULL;
3041 cache = lookup_cache_extent(root_cache, objectid, 1);
3043 rec = container_of(cache, struct root_record, cache);
3045 rec = calloc(1, sizeof(*rec));
3047 return ERR_PTR(-ENOMEM);
3048 rec->objectid = objectid;
3049 INIT_LIST_HEAD(&rec->backrefs);
3050 rec->cache.start = objectid;
3051 rec->cache.size = 1;
3053 ret = insert_cache_extent(root_cache, &rec->cache);
3055 return ERR_PTR(-EEXIST);
3060 static struct root_backref *get_root_backref(struct root_record *rec,
3061 u64 ref_root, u64 dir, u64 index,
3062 const char *name, int namelen)
3064 struct root_backref *backref;
3066 list_for_each_entry(backref, &rec->backrefs, list) {
3067 if (backref->ref_root != ref_root || backref->dir != dir ||
3068 backref->namelen != namelen)
3070 if (memcmp(name, backref->name, namelen))
3075 backref = calloc(1, sizeof(*backref) + namelen + 1);
3078 backref->ref_root = ref_root;
3080 backref->index = index;
3081 backref->namelen = namelen;
3082 memcpy(backref->name, name, namelen);
3083 backref->name[namelen] = '\0';
3084 list_add_tail(&backref->list, &rec->backrefs);
3088 static void free_root_record(struct cache_extent *cache)
3090 struct root_record *rec;
3091 struct root_backref *backref;
3093 rec = container_of(cache, struct root_record, cache);
3094 while (!list_empty(&rec->backrefs)) {
3095 backref = list_entry(rec->backrefs.next,
3096 struct root_backref, list);
3097 list_del(&backref->list);
3104 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3106 static int add_root_backref(struct cache_tree *root_cache,
3107 u64 root_id, u64 ref_root, u64 dir, u64 index,
3108 const char *name, int namelen,
3109 int item_type, int errors)
3111 struct root_record *rec;
3112 struct root_backref *backref;
3114 rec = get_root_rec(root_cache, root_id);
3115 BUG_ON(IS_ERR(rec));
3116 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3119 backref->errors |= errors;
3121 if (item_type != BTRFS_DIR_ITEM_KEY) {
3122 if (backref->found_dir_index || backref->found_back_ref ||
3123 backref->found_forward_ref) {
3124 if (backref->index != index)
3125 backref->errors |= REF_ERR_INDEX_UNMATCH;
3127 backref->index = index;
3131 if (item_type == BTRFS_DIR_ITEM_KEY) {
3132 if (backref->found_forward_ref)
3134 backref->found_dir_item = 1;
3135 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3136 backref->found_dir_index = 1;
3137 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3138 if (backref->found_forward_ref)
3139 backref->errors |= REF_ERR_DUP_ROOT_REF;
3140 else if (backref->found_dir_item)
3142 backref->found_forward_ref = 1;
3143 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3144 if (backref->found_back_ref)
3145 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3146 backref->found_back_ref = 1;
3151 if (backref->found_forward_ref && backref->found_dir_item)
3152 backref->reachable = 1;
3156 static int merge_root_recs(struct btrfs_root *root,
3157 struct cache_tree *src_cache,
3158 struct cache_tree *dst_cache)
3160 struct cache_extent *cache;
3161 struct ptr_node *node;
3162 struct inode_record *rec;
3163 struct inode_backref *backref;
3166 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3167 free_inode_recs_tree(src_cache);
3172 cache = search_cache_extent(src_cache, 0);
3175 node = container_of(cache, struct ptr_node, cache);
3177 remove_cache_extent(src_cache, &node->cache);
3180 ret = is_child_root(root, root->objectid, rec->ino);
3186 list_for_each_entry(backref, &rec->backrefs, list) {
3187 BUG_ON(backref->found_inode_ref);
3188 if (backref->found_dir_item)
3189 add_root_backref(dst_cache, rec->ino,
3190 root->root_key.objectid, backref->dir,
3191 backref->index, backref->name,
3192 backref->namelen, BTRFS_DIR_ITEM_KEY,
3194 if (backref->found_dir_index)
3195 add_root_backref(dst_cache, rec->ino,
3196 root->root_key.objectid, backref->dir,
3197 backref->index, backref->name,
3198 backref->namelen, BTRFS_DIR_INDEX_KEY,
3202 free_inode_rec(rec);
3209 static int check_root_refs(struct btrfs_root *root,
3210 struct cache_tree *root_cache)
3212 struct root_record *rec;
3213 struct root_record *ref_root;
3214 struct root_backref *backref;
3215 struct cache_extent *cache;
3221 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3222 BUG_ON(IS_ERR(rec));
3225 /* fixme: this can not detect circular references */
3228 cache = search_cache_extent(root_cache, 0);
3232 rec = container_of(cache, struct root_record, cache);
3233 cache = next_cache_extent(cache);
3235 if (rec->found_ref == 0)
3238 list_for_each_entry(backref, &rec->backrefs, list) {
3239 if (!backref->reachable)
3242 ref_root = get_root_rec(root_cache,
3244 BUG_ON(IS_ERR(ref_root));
3245 if (ref_root->found_ref > 0)
3248 backref->reachable = 0;
3250 if (rec->found_ref == 0)
3256 cache = search_cache_extent(root_cache, 0);
3260 rec = container_of(cache, struct root_record, cache);
3261 cache = next_cache_extent(cache);
3263 if (rec->found_ref == 0 &&
3264 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3265 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3266 ret = check_orphan_item(root->fs_info->tree_root,
3272 * If we don't have a root item then we likely just have
3273 * a dir item in a snapshot for this root but no actual
3274 * ref key or anything so it's meaningless.
3276 if (!rec->found_root_item)
3279 fprintf(stderr, "fs tree %llu not referenced\n",
3280 (unsigned long long)rec->objectid);
3284 if (rec->found_ref > 0 && !rec->found_root_item)
3286 list_for_each_entry(backref, &rec->backrefs, list) {
3287 if (!backref->found_dir_item)
3288 backref->errors |= REF_ERR_NO_DIR_ITEM;
3289 if (!backref->found_dir_index)
3290 backref->errors |= REF_ERR_NO_DIR_INDEX;
3291 if (!backref->found_back_ref)
3292 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3293 if (!backref->found_forward_ref)
3294 backref->errors |= REF_ERR_NO_ROOT_REF;
3295 if (backref->reachable && backref->errors)
3302 fprintf(stderr, "fs tree %llu refs %u %s\n",
3303 (unsigned long long)rec->objectid, rec->found_ref,
3304 rec->found_root_item ? "" : "not found");
3306 list_for_each_entry(backref, &rec->backrefs, list) {
3307 if (!backref->reachable)
3309 if (!backref->errors && rec->found_root_item)
3311 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3312 " index %llu namelen %u name %s errors %x\n",
3313 (unsigned long long)backref->ref_root,
3314 (unsigned long long)backref->dir,
3315 (unsigned long long)backref->index,
3316 backref->namelen, backref->name,
3318 print_ref_error(backref->errors);
3321 return errors > 0 ? 1 : 0;
3324 static int process_root_ref(struct extent_buffer *eb, int slot,
3325 struct btrfs_key *key,
3326 struct cache_tree *root_cache)
3332 struct btrfs_root_ref *ref;
3333 char namebuf[BTRFS_NAME_LEN];
3336 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3338 dirid = btrfs_root_ref_dirid(eb, ref);
3339 index = btrfs_root_ref_sequence(eb, ref);
3340 name_len = btrfs_root_ref_name_len(eb, ref);
3342 if (name_len <= BTRFS_NAME_LEN) {
3346 len = BTRFS_NAME_LEN;
3347 error = REF_ERR_NAME_TOO_LONG;
3349 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3351 if (key->type == BTRFS_ROOT_REF_KEY) {
3352 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3353 index, namebuf, len, key->type, error);
3355 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3356 index, namebuf, len, key->type, error);
3361 static void free_corrupt_block(struct cache_extent *cache)
3363 struct btrfs_corrupt_block *corrupt;
3365 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3369 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3372 * Repair the btree of the given root.
3374 * The fix is to remove the node key in corrupt_blocks cache_tree.
3375 * and rebalance the tree.
3376 * After the fix, the btree should be writeable.
3378 static int repair_btree(struct btrfs_root *root,
3379 struct cache_tree *corrupt_blocks)
3381 struct btrfs_trans_handle *trans;
3382 struct btrfs_path *path;
3383 struct btrfs_corrupt_block *corrupt;
3384 struct cache_extent *cache;
3385 struct btrfs_key key;
3390 if (cache_tree_empty(corrupt_blocks))
3393 path = btrfs_alloc_path();
3397 trans = btrfs_start_transaction(root, 1);
3398 if (IS_ERR(trans)) {
3399 ret = PTR_ERR(trans);
3400 fprintf(stderr, "Error starting transaction: %s\n",
3404 cache = first_cache_extent(corrupt_blocks);
3406 corrupt = container_of(cache, struct btrfs_corrupt_block,
3408 level = corrupt->level;
3409 path->lowest_level = level;
3410 key.objectid = corrupt->key.objectid;
3411 key.type = corrupt->key.type;
3412 key.offset = corrupt->key.offset;
3415 * Here we don't want to do any tree balance, since it may
3416 * cause a balance with corrupted brother leaf/node,
3417 * so ins_len set to 0 here.
3418 * Balance will be done after all corrupt node/leaf is deleted.
3420 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3423 offset = btrfs_node_blockptr(path->nodes[level],
3424 path->slots[level]);
3426 /* Remove the ptr */
3427 ret = btrfs_del_ptr(trans, root, path, level,
3428 path->slots[level]);
3432 * Remove the corresponding extent
3433 * return value is not concerned.
3435 btrfs_release_path(path);
3436 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3437 0, root->root_key.objectid,
3439 cache = next_cache_extent(cache);
3442 /* Balance the btree using btrfs_search_slot() */
3443 cache = first_cache_extent(corrupt_blocks);
3445 corrupt = container_of(cache, struct btrfs_corrupt_block,
3447 memcpy(&key, &corrupt->key, sizeof(key));
3448 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3451 /* return will always >0 since it won't find the item */
3453 btrfs_release_path(path);
3454 cache = next_cache_extent(cache);
3457 btrfs_commit_transaction(trans, root);
3459 btrfs_free_path(path);
3463 static int check_fs_root(struct btrfs_root *root,
3464 struct cache_tree *root_cache,
3465 struct walk_control *wc)
3471 struct btrfs_path path;
3472 struct shared_node root_node;
3473 struct root_record *rec;
3474 struct btrfs_root_item *root_item = &root->root_item;
3475 struct cache_tree corrupt_blocks;
3476 struct orphan_data_extent *orphan;
3477 struct orphan_data_extent *tmp;
3478 enum btrfs_tree_block_status status;
3481 * Reuse the corrupt_block cache tree to record corrupted tree block
3483 * Unlike the usage in extent tree check, here we do it in a per
3484 * fs/subvol tree base.
3486 cache_tree_init(&corrupt_blocks);
3487 root->fs_info->corrupt_blocks = &corrupt_blocks;
3489 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3490 rec = get_root_rec(root_cache, root->root_key.objectid);
3491 BUG_ON(IS_ERR(rec));
3492 if (btrfs_root_refs(root_item) > 0)
3493 rec->found_root_item = 1;
3496 btrfs_init_path(&path);
3497 memset(&root_node, 0, sizeof(root_node));
3498 cache_tree_init(&root_node.root_cache);
3499 cache_tree_init(&root_node.inode_cache);
3501 /* Move the orphan extent record to corresponding inode_record */
3502 list_for_each_entry_safe(orphan, tmp,
3503 &root->orphan_data_extents, list) {
3504 struct inode_record *inode;
3506 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3508 BUG_ON(IS_ERR(inode));
3509 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3510 list_move(&orphan->list, &inode->orphan_extents);
3513 level = btrfs_header_level(root->node);
3514 memset(wc->nodes, 0, sizeof(wc->nodes));
3515 wc->nodes[level] = &root_node;
3516 wc->active_node = level;
3517 wc->root_level = level;
3519 /* We may not have checked the root block, lets do that now */
3520 if (btrfs_is_leaf(root->node))
3521 status = btrfs_check_leaf(root, NULL, root->node);
3523 status = btrfs_check_node(root, NULL, root->node);
3524 if (status != BTRFS_TREE_BLOCK_CLEAN)
3527 if (btrfs_root_refs(root_item) > 0 ||
3528 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3529 path.nodes[level] = root->node;
3530 extent_buffer_get(root->node);
3531 path.slots[level] = 0;
3533 struct btrfs_key key;
3534 struct btrfs_disk_key found_key;
3536 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3537 level = root_item->drop_level;
3538 path.lowest_level = level;
3539 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3542 btrfs_node_key(path.nodes[level], &found_key,
3544 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3545 sizeof(found_key)));
3549 wret = walk_down_tree(root, &path, wc, &level);
3555 wret = walk_up_tree(root, &path, wc, &level);
3562 btrfs_release_path(&path);
3564 if (!cache_tree_empty(&corrupt_blocks)) {
3565 struct cache_extent *cache;
3566 struct btrfs_corrupt_block *corrupt;
3568 printf("The following tree block(s) is corrupted in tree %llu:\n",
3569 root->root_key.objectid);
3570 cache = first_cache_extent(&corrupt_blocks);
3572 corrupt = container_of(cache,
3573 struct btrfs_corrupt_block,
3575 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3576 cache->start, corrupt->level,
3577 corrupt->key.objectid, corrupt->key.type,
3578 corrupt->key.offset);
3579 cache = next_cache_extent(cache);
3582 printf("Try to repair the btree for root %llu\n",
3583 root->root_key.objectid);
3584 ret = repair_btree(root, &corrupt_blocks);
3586 fprintf(stderr, "Failed to repair btree: %s\n",
3589 printf("Btree for root %llu is fixed\n",
3590 root->root_key.objectid);
3594 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3598 if (root_node.current) {
3599 root_node.current->checked = 1;
3600 maybe_free_inode_rec(&root_node.inode_cache,
3604 err = check_inode_recs(root, &root_node.inode_cache);
3608 free_corrupt_blocks_tree(&corrupt_blocks);
3609 root->fs_info->corrupt_blocks = NULL;
3610 free_orphan_data_extents(&root->orphan_data_extents);
3614 static int fs_root_objectid(u64 objectid)
3616 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3617 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3619 return is_fstree(objectid);
3622 static int check_fs_roots(struct btrfs_root *root,
3623 struct cache_tree *root_cache)
3625 struct btrfs_path path;
3626 struct btrfs_key key;
3627 struct walk_control wc;
3628 struct extent_buffer *leaf, *tree_node;
3629 struct btrfs_root *tmp_root;
3630 struct btrfs_root *tree_root = root->fs_info->tree_root;
3634 if (ctx.progress_enabled) {
3635 ctx.tp = TASK_FS_ROOTS;
3636 task_start(ctx.info);
3640 * Just in case we made any changes to the extent tree that weren't
3641 * reflected into the free space cache yet.
3644 reset_cached_block_groups(root->fs_info);
3645 memset(&wc, 0, sizeof(wc));
3646 cache_tree_init(&wc.shared);
3647 btrfs_init_path(&path);
3652 key.type = BTRFS_ROOT_ITEM_KEY;
3653 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3658 tree_node = tree_root->node;
3660 if (tree_node != tree_root->node) {
3661 free_root_recs_tree(root_cache);
3662 btrfs_release_path(&path);
3665 leaf = path.nodes[0];
3666 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3667 ret = btrfs_next_leaf(tree_root, &path);
3673 leaf = path.nodes[0];
3675 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3676 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3677 fs_root_objectid(key.objectid)) {
3678 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3679 tmp_root = btrfs_read_fs_root_no_cache(
3680 root->fs_info, &key);
3682 key.offset = (u64)-1;
3683 tmp_root = btrfs_read_fs_root(
3684 root->fs_info, &key);
3686 if (IS_ERR(tmp_root)) {
3690 ret = check_fs_root(tmp_root, root_cache, &wc);
3691 if (ret == -EAGAIN) {
3692 free_root_recs_tree(root_cache);
3693 btrfs_release_path(&path);
3698 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3699 btrfs_free_fs_root(tmp_root);
3700 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3701 key.type == BTRFS_ROOT_BACKREF_KEY) {
3702 process_root_ref(leaf, path.slots[0], &key,
3709 btrfs_release_path(&path);
3711 free_extent_cache_tree(&wc.shared);
3712 if (!cache_tree_empty(&wc.shared))
3713 fprintf(stderr, "warning line %d\n", __LINE__);
3715 task_stop(ctx.info);
3720 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3722 struct list_head *cur = rec->backrefs.next;
3723 struct extent_backref *back;
3724 struct tree_backref *tback;
3725 struct data_backref *dback;
3729 while(cur != &rec->backrefs) {
3730 back = list_entry(cur, struct extent_backref, list);
3732 if (!back->found_extent_tree) {
3736 if (back->is_data) {
3737 dback = (struct data_backref *)back;
3738 fprintf(stderr, "Backref %llu %s %llu"
3739 " owner %llu offset %llu num_refs %lu"
3740 " not found in extent tree\n",
3741 (unsigned long long)rec->start,
3742 back->full_backref ?
3744 back->full_backref ?
3745 (unsigned long long)dback->parent:
3746 (unsigned long long)dback->root,
3747 (unsigned long long)dback->owner,
3748 (unsigned long long)dback->offset,
3749 (unsigned long)dback->num_refs);
3751 tback = (struct tree_backref *)back;
3752 fprintf(stderr, "Backref %llu parent %llu"
3753 " root %llu not found in extent tree\n",
3754 (unsigned long long)rec->start,
3755 (unsigned long long)tback->parent,
3756 (unsigned long long)tback->root);
3759 if (!back->is_data && !back->found_ref) {
3763 tback = (struct tree_backref *)back;
3764 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3765 (unsigned long long)rec->start,
3766 back->full_backref ? "parent" : "root",
3767 back->full_backref ?
3768 (unsigned long long)tback->parent :
3769 (unsigned long long)tback->root, back);
3771 if (back->is_data) {
3772 dback = (struct data_backref *)back;
3773 if (dback->found_ref != dback->num_refs) {
3777 fprintf(stderr, "Incorrect local backref count"
3778 " on %llu %s %llu owner %llu"
3779 " offset %llu found %u wanted %u back %p\n",
3780 (unsigned long long)rec->start,
3781 back->full_backref ?
3783 back->full_backref ?
3784 (unsigned long long)dback->parent:
3785 (unsigned long long)dback->root,
3786 (unsigned long long)dback->owner,
3787 (unsigned long long)dback->offset,
3788 dback->found_ref, dback->num_refs, back);
3790 if (dback->disk_bytenr != rec->start) {
3794 fprintf(stderr, "Backref disk bytenr does not"
3795 " match extent record, bytenr=%llu, "
3796 "ref bytenr=%llu\n",
3797 (unsigned long long)rec->start,
3798 (unsigned long long)dback->disk_bytenr);
3801 if (dback->bytes != rec->nr) {
3805 fprintf(stderr, "Backref bytes do not match "
3806 "extent backref, bytenr=%llu, ref "
3807 "bytes=%llu, backref bytes=%llu\n",
3808 (unsigned long long)rec->start,
3809 (unsigned long long)rec->nr,
3810 (unsigned long long)dback->bytes);
3813 if (!back->is_data) {
3816 dback = (struct data_backref *)back;
3817 found += dback->found_ref;
3820 if (found != rec->refs) {
3824 fprintf(stderr, "Incorrect global backref count "
3825 "on %llu found %llu wanted %llu\n",
3826 (unsigned long long)rec->start,
3827 (unsigned long long)found,
3828 (unsigned long long)rec->refs);
3834 static int free_all_extent_backrefs(struct extent_record *rec)
3836 struct extent_backref *back;
3837 struct list_head *cur;
3838 while (!list_empty(&rec->backrefs)) {
3839 cur = rec->backrefs.next;
3840 back = list_entry(cur, struct extent_backref, list);
3847 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3848 struct cache_tree *extent_cache)
3850 struct cache_extent *cache;
3851 struct extent_record *rec;
3854 cache = first_cache_extent(extent_cache);
3857 rec = container_of(cache, struct extent_record, cache);
3858 remove_cache_extent(extent_cache, cache);
3859 free_all_extent_backrefs(rec);
3864 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3865 struct extent_record *rec)
3867 if (rec->content_checked && rec->owner_ref_checked &&
3868 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3869 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3870 !rec->bad_full_backref && !rec->crossing_stripes &&
3871 !rec->wrong_chunk_type) {
3872 remove_cache_extent(extent_cache, &rec->cache);
3873 free_all_extent_backrefs(rec);
3874 list_del_init(&rec->list);
3880 static int check_owner_ref(struct btrfs_root *root,
3881 struct extent_record *rec,
3882 struct extent_buffer *buf)
3884 struct extent_backref *node;
3885 struct tree_backref *back;
3886 struct btrfs_root *ref_root;
3887 struct btrfs_key key;
3888 struct btrfs_path path;
3889 struct extent_buffer *parent;
3894 list_for_each_entry(node, &rec->backrefs, list) {
3897 if (!node->found_ref)
3899 if (node->full_backref)
3901 back = (struct tree_backref *)node;
3902 if (btrfs_header_owner(buf) == back->root)
3905 BUG_ON(rec->is_root);
3907 /* try to find the block by search corresponding fs tree */
3908 key.objectid = btrfs_header_owner(buf);
3909 key.type = BTRFS_ROOT_ITEM_KEY;
3910 key.offset = (u64)-1;
3912 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3913 if (IS_ERR(ref_root))
3916 level = btrfs_header_level(buf);
3918 btrfs_item_key_to_cpu(buf, &key, 0);
3920 btrfs_node_key_to_cpu(buf, &key, 0);
3922 btrfs_init_path(&path);
3923 path.lowest_level = level + 1;
3924 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3928 parent = path.nodes[level + 1];
3929 if (parent && buf->start == btrfs_node_blockptr(parent,
3930 path.slots[level + 1]))
3933 btrfs_release_path(&path);
3934 return found ? 0 : 1;
3937 static int is_extent_tree_record(struct extent_record *rec)
3939 struct list_head *cur = rec->backrefs.next;
3940 struct extent_backref *node;
3941 struct tree_backref *back;
3944 while(cur != &rec->backrefs) {
3945 node = list_entry(cur, struct extent_backref, list);
3949 back = (struct tree_backref *)node;
3950 if (node->full_backref)
3952 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3959 static int record_bad_block_io(struct btrfs_fs_info *info,
3960 struct cache_tree *extent_cache,
3963 struct extent_record *rec;
3964 struct cache_extent *cache;
3965 struct btrfs_key key;
3967 cache = lookup_cache_extent(extent_cache, start, len);
3971 rec = container_of(cache, struct extent_record, cache);
3972 if (!is_extent_tree_record(rec))
3975 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3976 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3979 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3980 struct extent_buffer *buf, int slot)
3982 if (btrfs_header_level(buf)) {
3983 struct btrfs_key_ptr ptr1, ptr2;
3985 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3986 sizeof(struct btrfs_key_ptr));
3987 read_extent_buffer(buf, &ptr2,
3988 btrfs_node_key_ptr_offset(slot + 1),
3989 sizeof(struct btrfs_key_ptr));
3990 write_extent_buffer(buf, &ptr1,
3991 btrfs_node_key_ptr_offset(slot + 1),
3992 sizeof(struct btrfs_key_ptr));
3993 write_extent_buffer(buf, &ptr2,
3994 btrfs_node_key_ptr_offset(slot),
3995 sizeof(struct btrfs_key_ptr));
3997 struct btrfs_disk_key key;
3998 btrfs_node_key(buf, &key, 0);
3999 btrfs_fixup_low_keys(root, path, &key,
4000 btrfs_header_level(buf) + 1);
4003 struct btrfs_item *item1, *item2;
4004 struct btrfs_key k1, k2;
4005 char *item1_data, *item2_data;
4006 u32 item1_offset, item2_offset, item1_size, item2_size;
4008 item1 = btrfs_item_nr(slot);
4009 item2 = btrfs_item_nr(slot + 1);
4010 btrfs_item_key_to_cpu(buf, &k1, slot);
4011 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4012 item1_offset = btrfs_item_offset(buf, item1);
4013 item2_offset = btrfs_item_offset(buf, item2);
4014 item1_size = btrfs_item_size(buf, item1);
4015 item2_size = btrfs_item_size(buf, item2);
4017 item1_data = malloc(item1_size);
4020 item2_data = malloc(item2_size);
4026 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4027 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4029 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4030 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4034 btrfs_set_item_offset(buf, item1, item2_offset);
4035 btrfs_set_item_offset(buf, item2, item1_offset);
4036 btrfs_set_item_size(buf, item1, item2_size);
4037 btrfs_set_item_size(buf, item2, item1_size);
4039 path->slots[0] = slot;
4040 btrfs_set_item_key_unsafe(root, path, &k2);
4041 path->slots[0] = slot + 1;
4042 btrfs_set_item_key_unsafe(root, path, &k1);
4047 static int fix_key_order(struct btrfs_trans_handle *trans,
4048 struct btrfs_root *root,
4049 struct btrfs_path *path)
4051 struct extent_buffer *buf;
4052 struct btrfs_key k1, k2;
4054 int level = path->lowest_level;
4057 buf = path->nodes[level];
4058 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4060 btrfs_node_key_to_cpu(buf, &k1, i);
4061 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4063 btrfs_item_key_to_cpu(buf, &k1, i);
4064 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4066 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4068 ret = swap_values(root, path, buf, i);
4071 btrfs_mark_buffer_dirty(buf);
4077 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4078 struct btrfs_root *root,
4079 struct btrfs_path *path,
4080 struct extent_buffer *buf, int slot)
4082 struct btrfs_key key;
4083 int nritems = btrfs_header_nritems(buf);
4085 btrfs_item_key_to_cpu(buf, &key, slot);
4087 /* These are all the keys we can deal with missing. */
4088 if (key.type != BTRFS_DIR_INDEX_KEY &&
4089 key.type != BTRFS_EXTENT_ITEM_KEY &&
4090 key.type != BTRFS_METADATA_ITEM_KEY &&
4091 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4092 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4095 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4096 (unsigned long long)key.objectid, key.type,
4097 (unsigned long long)key.offset, slot, buf->start);
4098 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4099 btrfs_item_nr_offset(slot + 1),
4100 sizeof(struct btrfs_item) *
4101 (nritems - slot - 1));
4102 btrfs_set_header_nritems(buf, nritems - 1);
4104 struct btrfs_disk_key disk_key;
4106 btrfs_item_key(buf, &disk_key, 0);
4107 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4109 btrfs_mark_buffer_dirty(buf);
4113 static int fix_item_offset(struct btrfs_trans_handle *trans,
4114 struct btrfs_root *root,
4115 struct btrfs_path *path)
4117 struct extent_buffer *buf;
4121 /* We should only get this for leaves */
4122 BUG_ON(path->lowest_level);
4123 buf = path->nodes[0];
4125 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4126 unsigned int shift = 0, offset;
4128 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4129 BTRFS_LEAF_DATA_SIZE(root)) {
4130 if (btrfs_item_end_nr(buf, i) >
4131 BTRFS_LEAF_DATA_SIZE(root)) {
4132 ret = delete_bogus_item(trans, root, path,
4136 fprintf(stderr, "item is off the end of the "
4137 "leaf, can't fix\n");
4141 shift = BTRFS_LEAF_DATA_SIZE(root) -
4142 btrfs_item_end_nr(buf, i);
4143 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4144 btrfs_item_offset_nr(buf, i - 1)) {
4145 if (btrfs_item_end_nr(buf, i) >
4146 btrfs_item_offset_nr(buf, i - 1)) {
4147 ret = delete_bogus_item(trans, root, path,
4151 fprintf(stderr, "items overlap, can't fix\n");
4155 shift = btrfs_item_offset_nr(buf, i - 1) -
4156 btrfs_item_end_nr(buf, i);
4161 printf("Shifting item nr %d by %u bytes in block %llu\n",
4162 i, shift, (unsigned long long)buf->start);
4163 offset = btrfs_item_offset_nr(buf, i);
4164 memmove_extent_buffer(buf,
4165 btrfs_leaf_data(buf) + offset + shift,
4166 btrfs_leaf_data(buf) + offset,
4167 btrfs_item_size_nr(buf, i));
4168 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4170 btrfs_mark_buffer_dirty(buf);
4174 * We may have moved things, in which case we want to exit so we don't
4175 * write those changes out. Once we have proper abort functionality in
4176 * progs this can be changed to something nicer.
4183 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4184 * then just return -EIO.
4186 static int try_to_fix_bad_block(struct btrfs_root *root,
4187 struct extent_buffer *buf,
4188 enum btrfs_tree_block_status status)
4190 struct btrfs_trans_handle *trans;
4191 struct ulist *roots;
4192 struct ulist_node *node;
4193 struct btrfs_root *search_root;
4194 struct btrfs_path *path;
4195 struct ulist_iterator iter;
4196 struct btrfs_key root_key, key;
4199 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4200 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4203 path = btrfs_alloc_path();
4207 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4210 btrfs_free_path(path);
4214 ULIST_ITER_INIT(&iter);
4215 while ((node = ulist_next(roots, &iter))) {
4216 root_key.objectid = node->val;
4217 root_key.type = BTRFS_ROOT_ITEM_KEY;
4218 root_key.offset = (u64)-1;
4220 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4227 trans = btrfs_start_transaction(search_root, 0);
4228 if (IS_ERR(trans)) {
4229 ret = PTR_ERR(trans);
4233 path->lowest_level = btrfs_header_level(buf);
4234 path->skip_check_block = 1;
4235 if (path->lowest_level)
4236 btrfs_node_key_to_cpu(buf, &key, 0);
4238 btrfs_item_key_to_cpu(buf, &key, 0);
4239 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4242 btrfs_commit_transaction(trans, search_root);
4245 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4246 ret = fix_key_order(trans, search_root, path);
4247 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4248 ret = fix_item_offset(trans, search_root, path);
4250 btrfs_commit_transaction(trans, search_root);
4253 btrfs_release_path(path);
4254 btrfs_commit_transaction(trans, search_root);
4257 btrfs_free_path(path);
4261 static int check_block(struct btrfs_root *root,
4262 struct cache_tree *extent_cache,
4263 struct extent_buffer *buf, u64 flags)
4265 struct extent_record *rec;
4266 struct cache_extent *cache;
4267 struct btrfs_key key;
4268 enum btrfs_tree_block_status status;
4272 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4275 rec = container_of(cache, struct extent_record, cache);
4276 rec->generation = btrfs_header_generation(buf);
4278 level = btrfs_header_level(buf);
4279 if (btrfs_header_nritems(buf) > 0) {
4282 btrfs_item_key_to_cpu(buf, &key, 0);
4284 btrfs_node_key_to_cpu(buf, &key, 0);
4286 rec->info_objectid = key.objectid;
4288 rec->info_level = level;
4290 if (btrfs_is_leaf(buf))
4291 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4293 status = btrfs_check_node(root, &rec->parent_key, buf);
4295 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4297 status = try_to_fix_bad_block(root, buf, status);
4298 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4300 fprintf(stderr, "bad block %llu\n",
4301 (unsigned long long)buf->start);
4304 * Signal to callers we need to start the scan over
4305 * again since we'll have cow'ed blocks.
4310 rec->content_checked = 1;
4311 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4312 rec->owner_ref_checked = 1;
4314 ret = check_owner_ref(root, rec, buf);
4316 rec->owner_ref_checked = 1;
4320 maybe_free_extent_rec(extent_cache, rec);
4324 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4325 u64 parent, u64 root)
4327 struct list_head *cur = rec->backrefs.next;
4328 struct extent_backref *node;
4329 struct tree_backref *back;
4331 while(cur != &rec->backrefs) {
4332 node = list_entry(cur, struct extent_backref, list);
4336 back = (struct tree_backref *)node;
4338 if (!node->full_backref)
4340 if (parent == back->parent)
4343 if (node->full_backref)
4345 if (back->root == root)
4352 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4353 u64 parent, u64 root)
4355 struct tree_backref *ref = malloc(sizeof(*ref));
4356 memset(&ref->node, 0, sizeof(ref->node));
4358 ref->parent = parent;
4359 ref->node.full_backref = 1;
4362 ref->node.full_backref = 0;
4364 list_add_tail(&ref->node.list, &rec->backrefs);
4369 static struct data_backref *find_data_backref(struct extent_record *rec,
4370 u64 parent, u64 root,
4371 u64 owner, u64 offset,
4373 u64 disk_bytenr, u64 bytes)
4375 struct list_head *cur = rec->backrefs.next;
4376 struct extent_backref *node;
4377 struct data_backref *back;
4379 while(cur != &rec->backrefs) {
4380 node = list_entry(cur, struct extent_backref, list);
4384 back = (struct data_backref *)node;
4386 if (!node->full_backref)
4388 if (parent == back->parent)
4391 if (node->full_backref)
4393 if (back->root == root && back->owner == owner &&
4394 back->offset == offset) {
4395 if (found_ref && node->found_ref &&
4396 (back->bytes != bytes ||
4397 back->disk_bytenr != disk_bytenr))
4406 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4407 u64 parent, u64 root,
4408 u64 owner, u64 offset,
4411 struct data_backref *ref = malloc(sizeof(*ref));
4412 memset(&ref->node, 0, sizeof(ref->node));
4413 ref->node.is_data = 1;
4416 ref->parent = parent;
4419 ref->node.full_backref = 1;
4423 ref->offset = offset;
4424 ref->node.full_backref = 0;
4426 ref->bytes = max_size;
4429 list_add_tail(&ref->node.list, &rec->backrefs);
4430 if (max_size > rec->max_size)
4431 rec->max_size = max_size;
4435 /* Check if the type of extent matches with its chunk */
4436 static void check_extent_type(struct extent_record *rec)
4438 struct btrfs_block_group_cache *bg_cache;
4440 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4444 /* data extent, check chunk directly*/
4445 if (!rec->metadata) {
4446 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4447 rec->wrong_chunk_type = 1;
4451 /* metadata extent, check the obvious case first */
4452 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4453 BTRFS_BLOCK_GROUP_METADATA))) {
4454 rec->wrong_chunk_type = 1;
4459 * Check SYSTEM extent, as it's also marked as metadata, we can only
4460 * make sure it's a SYSTEM extent by its backref
4462 if (!list_empty(&rec->backrefs)) {
4463 struct extent_backref *node;
4464 struct tree_backref *tback;
4467 node = list_entry(rec->backrefs.next, struct extent_backref,
4469 if (node->is_data) {
4470 /* tree block shouldn't have data backref */
4471 rec->wrong_chunk_type = 1;
4474 tback = container_of(node, struct tree_backref, node);
4476 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4477 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4479 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4480 if (!(bg_cache->flags & bg_type))
4481 rec->wrong_chunk_type = 1;
4485 static int add_extent_rec(struct cache_tree *extent_cache,
4486 struct btrfs_key *parent_key, u64 parent_gen,
4487 u64 start, u64 nr, u64 extent_item_refs,
4488 int is_root, int inc_ref, int set_checked,
4489 int metadata, int extent_rec, u64 max_size)
4491 struct extent_record *rec;
4492 struct cache_extent *cache;
4496 cache = lookup_cache_extent(extent_cache, start, nr);
4498 rec = container_of(cache, struct extent_record, cache);
4502 rec->nr = max(nr, max_size);
4505 * We need to make sure to reset nr to whatever the extent
4506 * record says was the real size, this way we can compare it to
4510 if (start != rec->start || rec->found_rec) {
4511 struct extent_record *tmp;
4514 if (list_empty(&rec->list))
4515 list_add_tail(&rec->list,
4516 &duplicate_extents);
4519 * We have to do this song and dance in case we
4520 * find an extent record that falls inside of
4521 * our current extent record but does not have
4522 * the same objectid.
4524 tmp = malloc(sizeof(*tmp));
4528 tmp->max_size = max_size;
4531 tmp->metadata = metadata;
4532 tmp->extent_item_refs = extent_item_refs;
4533 INIT_LIST_HEAD(&tmp->list);
4534 list_add_tail(&tmp->list, &rec->dups);
4535 rec->num_duplicates++;
4542 if (extent_item_refs && !dup) {
4543 if (rec->extent_item_refs) {
4544 fprintf(stderr, "block %llu rec "
4545 "extent_item_refs %llu, passed %llu\n",
4546 (unsigned long long)start,
4547 (unsigned long long)
4548 rec->extent_item_refs,
4549 (unsigned long long)extent_item_refs);
4551 rec->extent_item_refs = extent_item_refs;
4556 rec->content_checked = 1;
4557 rec->owner_ref_checked = 1;
4561 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4563 rec->parent_generation = parent_gen;
4565 if (rec->max_size < max_size)
4566 rec->max_size = max_size;
4569 * A metadata extent can't cross stripe_len boundary, otherwise
4570 * kernel scrub won't be able to handle it.
4571 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4574 if (metadata && check_crossing_stripes(rec->start,
4576 rec->crossing_stripes = 1;
4577 check_extent_type(rec);
4578 maybe_free_extent_rec(extent_cache, rec);
4581 rec = malloc(sizeof(*rec));
4583 rec->max_size = max_size;
4584 rec->nr = max(nr, max_size);
4585 rec->found_rec = !!extent_rec;
4586 rec->content_checked = 0;
4587 rec->owner_ref_checked = 0;
4588 rec->num_duplicates = 0;
4589 rec->metadata = metadata;
4590 rec->flag_block_full_backref = -1;
4591 rec->bad_full_backref = 0;
4592 rec->crossing_stripes = 0;
4593 rec->wrong_chunk_type = 0;
4594 INIT_LIST_HEAD(&rec->backrefs);
4595 INIT_LIST_HEAD(&rec->dups);
4596 INIT_LIST_HEAD(&rec->list);
4608 if (extent_item_refs)
4609 rec->extent_item_refs = extent_item_refs;
4611 rec->extent_item_refs = 0;
4614 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4616 memset(&rec->parent_key, 0, sizeof(*parent_key));
4619 rec->parent_generation = parent_gen;
4621 rec->parent_generation = 0;
4623 rec->cache.start = start;
4624 rec->cache.size = nr;
4625 ret = insert_cache_extent(extent_cache, &rec->cache);
4629 rec->content_checked = 1;
4630 rec->owner_ref_checked = 1;
4634 if (check_crossing_stripes(rec->start, rec->max_size))
4635 rec->crossing_stripes = 1;
4636 check_extent_type(rec);
4640 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4641 u64 parent, u64 root, int found_ref)
4643 struct extent_record *rec;
4644 struct tree_backref *back;
4645 struct cache_extent *cache;
4647 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4649 add_extent_rec(extent_cache, NULL, 0, bytenr,
4650 1, 0, 0, 0, 0, 1, 0, 0);
4651 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4656 rec = container_of(cache, struct extent_record, cache);
4657 if (rec->start != bytenr) {
4661 back = find_tree_backref(rec, parent, root);
4663 back = alloc_tree_backref(rec, parent, root);
4666 if (back->node.found_ref) {
4667 fprintf(stderr, "Extent back ref already exists "
4668 "for %llu parent %llu root %llu \n",
4669 (unsigned long long)bytenr,
4670 (unsigned long long)parent,
4671 (unsigned long long)root);
4673 back->node.found_ref = 1;
4675 if (back->node.found_extent_tree) {
4676 fprintf(stderr, "Extent back ref already exists "
4677 "for %llu parent %llu root %llu \n",
4678 (unsigned long long)bytenr,
4679 (unsigned long long)parent,
4680 (unsigned long long)root);
4682 back->node.found_extent_tree = 1;
4684 check_extent_type(rec);
4685 maybe_free_extent_rec(extent_cache, rec);
4689 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4690 u64 parent, u64 root, u64 owner, u64 offset,
4691 u32 num_refs, int found_ref, u64 max_size)
4693 struct extent_record *rec;
4694 struct data_backref *back;
4695 struct cache_extent *cache;
4697 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4699 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4701 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4706 rec = container_of(cache, struct extent_record, cache);
4707 if (rec->max_size < max_size)
4708 rec->max_size = max_size;
4711 * If found_ref is set then max_size is the real size and must match the
4712 * existing refs. So if we have already found a ref then we need to
4713 * make sure that this ref matches the existing one, otherwise we need
4714 * to add a new backref so we can notice that the backrefs don't match
4715 * and we need to figure out who is telling the truth. This is to
4716 * account for that awful fsync bug I introduced where we'd end up with
4717 * a btrfs_file_extent_item that would have its length include multiple
4718 * prealloc extents or point inside of a prealloc extent.
4720 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4723 back = alloc_data_backref(rec, parent, root, owner, offset,
4727 BUG_ON(num_refs != 1);
4728 if (back->node.found_ref)
4729 BUG_ON(back->bytes != max_size);
4730 back->node.found_ref = 1;
4731 back->found_ref += 1;
4732 back->bytes = max_size;
4733 back->disk_bytenr = bytenr;
4735 rec->content_checked = 1;
4736 rec->owner_ref_checked = 1;
4738 if (back->node.found_extent_tree) {
4739 fprintf(stderr, "Extent back ref already exists "
4740 "for %llu parent %llu root %llu "
4741 "owner %llu offset %llu num_refs %lu\n",
4742 (unsigned long long)bytenr,
4743 (unsigned long long)parent,
4744 (unsigned long long)root,
4745 (unsigned long long)owner,
4746 (unsigned long long)offset,
4747 (unsigned long)num_refs);
4749 back->num_refs = num_refs;
4750 back->node.found_extent_tree = 1;
4752 maybe_free_extent_rec(extent_cache, rec);
4756 static int add_pending(struct cache_tree *pending,
4757 struct cache_tree *seen, u64 bytenr, u32 size)
4760 ret = add_cache_extent(seen, bytenr, size);
4763 add_cache_extent(pending, bytenr, size);
4767 static int pick_next_pending(struct cache_tree *pending,
4768 struct cache_tree *reada,
4769 struct cache_tree *nodes,
4770 u64 last, struct block_info *bits, int bits_nr,
4773 unsigned long node_start = last;
4774 struct cache_extent *cache;
4777 cache = search_cache_extent(reada, 0);
4779 bits[0].start = cache->start;
4780 bits[0].size = cache->size;
4785 if (node_start > 32768)
4786 node_start -= 32768;
4788 cache = search_cache_extent(nodes, node_start);
4790 cache = search_cache_extent(nodes, 0);
4793 cache = search_cache_extent(pending, 0);
4798 bits[ret].start = cache->start;
4799 bits[ret].size = cache->size;
4800 cache = next_cache_extent(cache);
4802 } while (cache && ret < bits_nr);
4808 bits[ret].start = cache->start;
4809 bits[ret].size = cache->size;
4810 cache = next_cache_extent(cache);
4812 } while (cache && ret < bits_nr);
4814 if (bits_nr - ret > 8) {
4815 u64 lookup = bits[0].start + bits[0].size;
4816 struct cache_extent *next;
4817 next = search_cache_extent(pending, lookup);
4819 if (next->start - lookup > 32768)
4821 bits[ret].start = next->start;
4822 bits[ret].size = next->size;
4823 lookup = next->start + next->size;
4827 next = next_cache_extent(next);
4835 static void free_chunk_record(struct cache_extent *cache)
4837 struct chunk_record *rec;
4839 rec = container_of(cache, struct chunk_record, cache);
4840 list_del_init(&rec->list);
4841 list_del_init(&rec->dextents);
4845 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4847 cache_tree_free_extents(chunk_cache, free_chunk_record);
4850 static void free_device_record(struct rb_node *node)
4852 struct device_record *rec;
4854 rec = container_of(node, struct device_record, node);
4858 FREE_RB_BASED_TREE(device_cache, free_device_record);
4860 int insert_block_group_record(struct block_group_tree *tree,
4861 struct block_group_record *bg_rec)
4865 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4869 list_add_tail(&bg_rec->list, &tree->block_groups);
4873 static void free_block_group_record(struct cache_extent *cache)
4875 struct block_group_record *rec;
4877 rec = container_of(cache, struct block_group_record, cache);
4878 list_del_init(&rec->list);
4882 void free_block_group_tree(struct block_group_tree *tree)
4884 cache_tree_free_extents(&tree->tree, free_block_group_record);
4887 int insert_device_extent_record(struct device_extent_tree *tree,
4888 struct device_extent_record *de_rec)
4893 * Device extent is a bit different from the other extents, because
4894 * the extents which belong to the different devices may have the
4895 * same start and size, so we need use the special extent cache
4896 * search/insert functions.
4898 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4902 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4903 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4907 static void free_device_extent_record(struct cache_extent *cache)
4909 struct device_extent_record *rec;
4911 rec = container_of(cache, struct device_extent_record, cache);
4912 if (!list_empty(&rec->chunk_list))
4913 list_del_init(&rec->chunk_list);
4914 if (!list_empty(&rec->device_list))
4915 list_del_init(&rec->device_list);
4919 void free_device_extent_tree(struct device_extent_tree *tree)
4921 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4924 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4925 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4926 struct extent_buffer *leaf, int slot)
4928 struct btrfs_extent_ref_v0 *ref0;
4929 struct btrfs_key key;
4931 btrfs_item_key_to_cpu(leaf, &key, slot);
4932 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4933 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4934 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4936 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4937 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4943 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4944 struct btrfs_key *key,
4947 struct btrfs_chunk *ptr;
4948 struct chunk_record *rec;
4951 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4952 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4954 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4956 fprintf(stderr, "memory allocation failed\n");
4960 INIT_LIST_HEAD(&rec->list);
4961 INIT_LIST_HEAD(&rec->dextents);
4964 rec->cache.start = key->offset;
4965 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4967 rec->generation = btrfs_header_generation(leaf);
4969 rec->objectid = key->objectid;
4970 rec->type = key->type;
4971 rec->offset = key->offset;
4973 rec->length = rec->cache.size;
4974 rec->owner = btrfs_chunk_owner(leaf, ptr);
4975 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4976 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4977 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4978 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4979 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4980 rec->num_stripes = num_stripes;
4981 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4983 for (i = 0; i < rec->num_stripes; ++i) {
4984 rec->stripes[i].devid =
4985 btrfs_stripe_devid_nr(leaf, ptr, i);
4986 rec->stripes[i].offset =
4987 btrfs_stripe_offset_nr(leaf, ptr, i);
4988 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4989 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4996 static int process_chunk_item(struct cache_tree *chunk_cache,
4997 struct btrfs_key *key, struct extent_buffer *eb,
5000 struct chunk_record *rec;
5003 rec = btrfs_new_chunk_record(eb, key, slot);
5004 ret = insert_cache_extent(chunk_cache, &rec->cache);
5006 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5007 rec->offset, rec->length);
5014 static int process_device_item(struct rb_root *dev_cache,
5015 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5017 struct btrfs_dev_item *ptr;
5018 struct device_record *rec;
5021 ptr = btrfs_item_ptr(eb,
5022 slot, struct btrfs_dev_item);
5024 rec = malloc(sizeof(*rec));
5026 fprintf(stderr, "memory allocation failed\n");
5030 rec->devid = key->offset;
5031 rec->generation = btrfs_header_generation(eb);
5033 rec->objectid = key->objectid;
5034 rec->type = key->type;
5035 rec->offset = key->offset;
5037 rec->devid = btrfs_device_id(eb, ptr);
5038 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5039 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5041 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5043 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5050 struct block_group_record *
5051 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5054 struct btrfs_block_group_item *ptr;
5055 struct block_group_record *rec;
5057 rec = calloc(1, sizeof(*rec));
5059 fprintf(stderr, "memory allocation failed\n");
5063 rec->cache.start = key->objectid;
5064 rec->cache.size = key->offset;
5066 rec->generation = btrfs_header_generation(leaf);
5068 rec->objectid = key->objectid;
5069 rec->type = key->type;
5070 rec->offset = key->offset;
5072 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5073 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5075 INIT_LIST_HEAD(&rec->list);
5080 static int process_block_group_item(struct block_group_tree *block_group_cache,
5081 struct btrfs_key *key,
5082 struct extent_buffer *eb, int slot)
5084 struct block_group_record *rec;
5087 rec = btrfs_new_block_group_record(eb, key, slot);
5088 ret = insert_block_group_record(block_group_cache, rec);
5090 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5091 rec->objectid, rec->offset);
5098 struct device_extent_record *
5099 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5100 struct btrfs_key *key, int slot)
5102 struct device_extent_record *rec;
5103 struct btrfs_dev_extent *ptr;
5105 rec = calloc(1, sizeof(*rec));
5107 fprintf(stderr, "memory allocation failed\n");
5111 rec->cache.objectid = key->objectid;
5112 rec->cache.start = key->offset;
5114 rec->generation = btrfs_header_generation(leaf);
5116 rec->objectid = key->objectid;
5117 rec->type = key->type;
5118 rec->offset = key->offset;
5120 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5121 rec->chunk_objecteid =
5122 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5124 btrfs_dev_extent_chunk_offset(leaf, ptr);
5125 rec->length = btrfs_dev_extent_length(leaf, ptr);
5126 rec->cache.size = rec->length;
5128 INIT_LIST_HEAD(&rec->chunk_list);
5129 INIT_LIST_HEAD(&rec->device_list);
5135 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5136 struct btrfs_key *key, struct extent_buffer *eb,
5139 struct device_extent_record *rec;
5142 rec = btrfs_new_device_extent_record(eb, key, slot);
5143 ret = insert_device_extent_record(dev_extent_cache, rec);
5146 "Device extent[%llu, %llu, %llu] existed.\n",
5147 rec->objectid, rec->offset, rec->length);
5154 static int process_extent_item(struct btrfs_root *root,
5155 struct cache_tree *extent_cache,
5156 struct extent_buffer *eb, int slot)
5158 struct btrfs_extent_item *ei;
5159 struct btrfs_extent_inline_ref *iref;
5160 struct btrfs_extent_data_ref *dref;
5161 struct btrfs_shared_data_ref *sref;
5162 struct btrfs_key key;
5166 u32 item_size = btrfs_item_size_nr(eb, slot);
5172 btrfs_item_key_to_cpu(eb, &key, slot);
5174 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5176 num_bytes = root->leafsize;
5178 num_bytes = key.offset;
5181 if (item_size < sizeof(*ei)) {
5182 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5183 struct btrfs_extent_item_v0 *ei0;
5184 BUG_ON(item_size != sizeof(*ei0));
5185 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5186 refs = btrfs_extent_refs_v0(eb, ei0);
5190 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5191 num_bytes, refs, 0, 0, 0, metadata, 1,
5195 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5196 refs = btrfs_extent_refs(eb, ei);
5197 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5202 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5203 refs, 0, 0, 0, metadata, 1, num_bytes);
5205 ptr = (unsigned long)(ei + 1);
5206 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5207 key.type == BTRFS_EXTENT_ITEM_KEY)
5208 ptr += sizeof(struct btrfs_tree_block_info);
5210 end = (unsigned long)ei + item_size;
5212 iref = (struct btrfs_extent_inline_ref *)ptr;
5213 type = btrfs_extent_inline_ref_type(eb, iref);
5214 offset = btrfs_extent_inline_ref_offset(eb, iref);
5216 case BTRFS_TREE_BLOCK_REF_KEY:
5217 add_tree_backref(extent_cache, key.objectid,
5220 case BTRFS_SHARED_BLOCK_REF_KEY:
5221 add_tree_backref(extent_cache, key.objectid,
5224 case BTRFS_EXTENT_DATA_REF_KEY:
5225 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5226 add_data_backref(extent_cache, key.objectid, 0,
5227 btrfs_extent_data_ref_root(eb, dref),
5228 btrfs_extent_data_ref_objectid(eb,
5230 btrfs_extent_data_ref_offset(eb, dref),
5231 btrfs_extent_data_ref_count(eb, dref),
5234 case BTRFS_SHARED_DATA_REF_KEY:
5235 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5236 add_data_backref(extent_cache, key.objectid, offset,
5238 btrfs_shared_data_ref_count(eb, sref),
5242 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5243 key.objectid, key.type, num_bytes);
5246 ptr += btrfs_extent_inline_ref_size(type);
5253 static int check_cache_range(struct btrfs_root *root,
5254 struct btrfs_block_group_cache *cache,
5255 u64 offset, u64 bytes)
5257 struct btrfs_free_space *entry;
5263 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5264 bytenr = btrfs_sb_offset(i);
5265 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5266 cache->key.objectid, bytenr, 0,
5267 &logical, &nr, &stripe_len);
5272 if (logical[nr] + stripe_len <= offset)
5274 if (offset + bytes <= logical[nr])
5276 if (logical[nr] == offset) {
5277 if (stripe_len >= bytes) {
5281 bytes -= stripe_len;
5282 offset += stripe_len;
5283 } else if (logical[nr] < offset) {
5284 if (logical[nr] + stripe_len >=
5289 bytes = (offset + bytes) -
5290 (logical[nr] + stripe_len);
5291 offset = logical[nr] + stripe_len;
5294 * Could be tricky, the super may land in the
5295 * middle of the area we're checking. First
5296 * check the easiest case, it's at the end.
5298 if (logical[nr] + stripe_len >=
5300 bytes = logical[nr] - offset;
5304 /* Check the left side */
5305 ret = check_cache_range(root, cache,
5307 logical[nr] - offset);
5313 /* Now we continue with the right side */
5314 bytes = (offset + bytes) -
5315 (logical[nr] + stripe_len);
5316 offset = logical[nr] + stripe_len;
5323 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5325 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5326 offset, offset+bytes);
5330 if (entry->offset != offset) {
5331 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5336 if (entry->bytes != bytes) {
5337 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5338 bytes, entry->bytes, offset);
5342 unlink_free_space(cache->free_space_ctl, entry);
5347 static int verify_space_cache(struct btrfs_root *root,
5348 struct btrfs_block_group_cache *cache)
5350 struct btrfs_path *path;
5351 struct extent_buffer *leaf;
5352 struct btrfs_key key;
5356 path = btrfs_alloc_path();
5360 root = root->fs_info->extent_root;
5362 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5364 key.objectid = last;
5366 key.type = BTRFS_EXTENT_ITEM_KEY;
5368 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5373 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5374 ret = btrfs_next_leaf(root, path);
5382 leaf = path->nodes[0];
5383 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5384 if (key.objectid >= cache->key.offset + cache->key.objectid)
5386 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5387 key.type != BTRFS_METADATA_ITEM_KEY) {
5392 if (last == key.objectid) {
5393 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5394 last = key.objectid + key.offset;
5396 last = key.objectid + root->leafsize;
5401 ret = check_cache_range(root, cache, last,
5402 key.objectid - last);
5405 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5406 last = key.objectid + key.offset;
5408 last = key.objectid + root->leafsize;
5412 if (last < cache->key.objectid + cache->key.offset)
5413 ret = check_cache_range(root, cache, last,
5414 cache->key.objectid +
5415 cache->key.offset - last);
5418 btrfs_free_path(path);
5421 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5422 fprintf(stderr, "There are still entries left in the space "
5430 static int check_space_cache(struct btrfs_root *root)
5432 struct btrfs_block_group_cache *cache;
5433 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5437 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5438 btrfs_super_generation(root->fs_info->super_copy) !=
5439 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5440 printf("cache and super generation don't match, space cache "
5441 "will be invalidated\n");
5445 if (ctx.progress_enabled) {
5446 ctx.tp = TASK_FREE_SPACE;
5447 task_start(ctx.info);
5451 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5455 start = cache->key.objectid + cache->key.offset;
5456 if (!cache->free_space_ctl) {
5457 if (btrfs_init_free_space_ctl(cache,
5458 root->sectorsize)) {
5463 btrfs_remove_free_space_cache(cache);
5466 ret = load_free_space_cache(root->fs_info, cache);
5470 ret = verify_space_cache(root, cache);
5472 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5473 cache->key.objectid);
5478 task_stop(ctx.info);
5480 return error ? -EINVAL : 0;
5483 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5484 u64 num_bytes, unsigned long leaf_offset,
5485 struct extent_buffer *eb) {
5488 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5490 unsigned long csum_offset;
5494 u64 data_checked = 0;
5500 if (num_bytes % root->sectorsize)
5503 data = malloc(num_bytes);
5507 while (offset < num_bytes) {
5510 read_len = num_bytes - offset;
5511 /* read as much space once a time */
5512 ret = read_extent_data(root, data + offset,
5513 bytenr + offset, &read_len, mirror);
5517 /* verify every 4k data's checksum */
5518 while (data_checked < read_len) {
5520 tmp = offset + data_checked;
5522 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5523 csum, root->sectorsize);
5524 btrfs_csum_final(csum, (char *)&csum);
5526 csum_offset = leaf_offset +
5527 tmp / root->sectorsize * csum_size;
5528 read_extent_buffer(eb, (char *)&csum_expected,
5529 csum_offset, csum_size);
5530 /* try another mirror */
5531 if (csum != csum_expected) {
5532 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5533 mirror, bytenr + tmp,
5534 csum, csum_expected);
5535 num_copies = btrfs_num_copies(
5536 &root->fs_info->mapping_tree,
5538 if (mirror < num_copies - 1) {
5543 data_checked += root->sectorsize;
5552 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5555 struct btrfs_path *path;
5556 struct extent_buffer *leaf;
5557 struct btrfs_key key;
5560 path = btrfs_alloc_path();
5562 fprintf(stderr, "Error allocing path\n");
5566 key.objectid = bytenr;
5567 key.type = BTRFS_EXTENT_ITEM_KEY;
5568 key.offset = (u64)-1;
5571 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5574 fprintf(stderr, "Error looking up extent record %d\n", ret);
5575 btrfs_free_path(path);
5578 if (path->slots[0] > 0) {
5581 ret = btrfs_prev_leaf(root, path);
5584 } else if (ret > 0) {
5591 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5594 * Block group items come before extent items if they have the same
5595 * bytenr, so walk back one more just in case. Dear future traveler,
5596 * first congrats on mastering time travel. Now if it's not too much
5597 * trouble could you go back to 2006 and tell Chris to make the
5598 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5599 * EXTENT_ITEM_KEY please?
5601 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5602 if (path->slots[0] > 0) {
5605 ret = btrfs_prev_leaf(root, path);
5608 } else if (ret > 0) {
5613 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5617 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5618 ret = btrfs_next_leaf(root, path);
5620 fprintf(stderr, "Error going to next leaf "
5622 btrfs_free_path(path);
5628 leaf = path->nodes[0];
5629 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5630 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5634 if (key.objectid + key.offset < bytenr) {
5638 if (key.objectid > bytenr + num_bytes)
5641 if (key.objectid == bytenr) {
5642 if (key.offset >= num_bytes) {
5646 num_bytes -= key.offset;
5647 bytenr += key.offset;
5648 } else if (key.objectid < bytenr) {
5649 if (key.objectid + key.offset >= bytenr + num_bytes) {
5653 num_bytes = (bytenr + num_bytes) -
5654 (key.objectid + key.offset);
5655 bytenr = key.objectid + key.offset;
5657 if (key.objectid + key.offset < bytenr + num_bytes) {
5658 u64 new_start = key.objectid + key.offset;
5659 u64 new_bytes = bytenr + num_bytes - new_start;
5662 * Weird case, the extent is in the middle of
5663 * our range, we'll have to search one side
5664 * and then the other. Not sure if this happens
5665 * in real life, but no harm in coding it up
5666 * anyway just in case.
5668 btrfs_release_path(path);
5669 ret = check_extent_exists(root, new_start,
5672 fprintf(stderr, "Right section didn't "
5676 num_bytes = key.objectid - bytenr;
5679 num_bytes = key.objectid - bytenr;
5686 if (num_bytes && !ret) {
5687 fprintf(stderr, "There are no extents for csum range "
5688 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5692 btrfs_free_path(path);
5696 static int check_csums(struct btrfs_root *root)
5698 struct btrfs_path *path;
5699 struct extent_buffer *leaf;
5700 struct btrfs_key key;
5701 u64 offset = 0, num_bytes = 0;
5702 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5706 unsigned long leaf_offset;
5708 root = root->fs_info->csum_root;
5709 if (!extent_buffer_uptodate(root->node)) {
5710 fprintf(stderr, "No valid csum tree found\n");
5714 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5715 key.type = BTRFS_EXTENT_CSUM_KEY;
5718 path = btrfs_alloc_path();
5722 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5724 fprintf(stderr, "Error searching csum tree %d\n", ret);
5725 btrfs_free_path(path);
5729 if (ret > 0 && path->slots[0])
5734 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5735 ret = btrfs_next_leaf(root, path);
5737 fprintf(stderr, "Error going to next leaf "
5744 leaf = path->nodes[0];
5746 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5747 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5752 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5753 csum_size) * root->sectorsize;
5754 if (!check_data_csum)
5755 goto skip_csum_check;
5756 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5757 ret = check_extent_csums(root, key.offset, data_len,
5763 offset = key.offset;
5764 } else if (key.offset != offset + num_bytes) {
5765 ret = check_extent_exists(root, offset, num_bytes);
5767 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5768 "there is no extent record\n",
5769 offset, offset+num_bytes);
5772 offset = key.offset;
5775 num_bytes += data_len;
5779 btrfs_free_path(path);
5783 static int is_dropped_key(struct btrfs_key *key,
5784 struct btrfs_key *drop_key) {
5785 if (key->objectid < drop_key->objectid)
5787 else if (key->objectid == drop_key->objectid) {
5788 if (key->type < drop_key->type)
5790 else if (key->type == drop_key->type) {
5791 if (key->offset < drop_key->offset)
5799 * Here are the rules for FULL_BACKREF.
5801 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5802 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5804 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5805 * if it happened after the relocation occurred since we'll have dropped the
5806 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5807 * have no real way to know for sure.
5809 * We process the blocks one root at a time, and we start from the lowest root
5810 * objectid and go to the highest. So we can just lookup the owner backref for
5811 * the record and if we don't find it then we know it doesn't exist and we have
5814 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5815 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5816 * be set or not and then we can check later once we've gathered all the refs.
5818 static int calc_extent_flag(struct btrfs_root *root,
5819 struct cache_tree *extent_cache,
5820 struct extent_buffer *buf,
5821 struct root_item_record *ri,
5824 struct extent_record *rec;
5825 struct cache_extent *cache;
5826 struct tree_backref *tback;
5829 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5830 /* we have added this extent before */
5832 rec = container_of(cache, struct extent_record, cache);
5835 * Except file/reloc tree, we can not have
5838 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5843 if (buf->start == ri->bytenr)
5846 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5849 owner = btrfs_header_owner(buf);
5850 if (owner == ri->objectid)
5853 tback = find_tree_backref(rec, 0, owner);
5858 if (rec->flag_block_full_backref != -1 &&
5859 rec->flag_block_full_backref != 0)
5860 rec->bad_full_backref = 1;
5863 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5864 if (rec->flag_block_full_backref != -1 &&
5865 rec->flag_block_full_backref != 1)
5866 rec->bad_full_backref = 1;
5870 static int run_next_block(struct btrfs_root *root,
5871 struct block_info *bits,
5874 struct cache_tree *pending,
5875 struct cache_tree *seen,
5876 struct cache_tree *reada,
5877 struct cache_tree *nodes,
5878 struct cache_tree *extent_cache,
5879 struct cache_tree *chunk_cache,
5880 struct rb_root *dev_cache,
5881 struct block_group_tree *block_group_cache,
5882 struct device_extent_tree *dev_extent_cache,
5883 struct root_item_record *ri)
5885 struct extent_buffer *buf;
5886 struct extent_record *rec = NULL;
5897 struct btrfs_key key;
5898 struct cache_extent *cache;
5901 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5902 bits_nr, &reada_bits);
5907 for(i = 0; i < nritems; i++) {
5908 ret = add_cache_extent(reada, bits[i].start,
5913 /* fixme, get the parent transid */
5914 readahead_tree_block(root, bits[i].start,
5918 *last = bits[0].start;
5919 bytenr = bits[0].start;
5920 size = bits[0].size;
5922 cache = lookup_cache_extent(pending, bytenr, size);
5924 remove_cache_extent(pending, cache);
5927 cache = lookup_cache_extent(reada, bytenr, size);
5929 remove_cache_extent(reada, cache);
5932 cache = lookup_cache_extent(nodes, bytenr, size);
5934 remove_cache_extent(nodes, cache);
5937 cache = lookup_cache_extent(extent_cache, bytenr, size);
5939 rec = container_of(cache, struct extent_record, cache);
5940 gen = rec->parent_generation;
5943 /* fixme, get the real parent transid */
5944 buf = read_tree_block(root, bytenr, size, gen);
5945 if (!extent_buffer_uptodate(buf)) {
5946 record_bad_block_io(root->fs_info,
5947 extent_cache, bytenr, size);
5951 nritems = btrfs_header_nritems(buf);
5954 if (!init_extent_tree) {
5955 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5956 btrfs_header_level(buf), 1, NULL,
5959 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5961 fprintf(stderr, "Couldn't calc extent flags\n");
5962 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5967 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5969 fprintf(stderr, "Couldn't calc extent flags\n");
5970 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5974 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5976 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5977 ri->objectid == btrfs_header_owner(buf)) {
5979 * Ok we got to this block from it's original owner and
5980 * we have FULL_BACKREF set. Relocation can leave
5981 * converted blocks over so this is altogether possible,
5982 * however it's not possible if the generation > the
5983 * last snapshot, so check for this case.
5985 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5986 btrfs_header_generation(buf) > ri->last_snapshot) {
5987 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5988 rec->bad_full_backref = 1;
5993 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5994 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5995 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5996 rec->bad_full_backref = 1;
6000 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6001 rec->flag_block_full_backref = 1;
6005 rec->flag_block_full_backref = 0;
6007 owner = btrfs_header_owner(buf);
6010 ret = check_block(root, extent_cache, buf, flags);
6014 if (btrfs_is_leaf(buf)) {
6015 btree_space_waste += btrfs_leaf_free_space(root, buf);
6016 for (i = 0; i < nritems; i++) {
6017 struct btrfs_file_extent_item *fi;
6018 btrfs_item_key_to_cpu(buf, &key, i);
6019 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6020 process_extent_item(root, extent_cache, buf,
6024 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6025 process_extent_item(root, extent_cache, buf,
6029 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6031 btrfs_item_size_nr(buf, i);
6034 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6035 process_chunk_item(chunk_cache, &key, buf, i);
6038 if (key.type == BTRFS_DEV_ITEM_KEY) {
6039 process_device_item(dev_cache, &key, buf, i);
6042 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6043 process_block_group_item(block_group_cache,
6047 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6048 process_device_extent_item(dev_extent_cache,
6053 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6054 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6055 process_extent_ref_v0(extent_cache, buf, i);
6062 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6063 add_tree_backref(extent_cache, key.objectid, 0,
6067 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6068 add_tree_backref(extent_cache, key.objectid,
6072 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6073 struct btrfs_extent_data_ref *ref;
6074 ref = btrfs_item_ptr(buf, i,
6075 struct btrfs_extent_data_ref);
6076 add_data_backref(extent_cache,
6078 btrfs_extent_data_ref_root(buf, ref),
6079 btrfs_extent_data_ref_objectid(buf,
6081 btrfs_extent_data_ref_offset(buf, ref),
6082 btrfs_extent_data_ref_count(buf, ref),
6083 0, root->sectorsize);
6086 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6087 struct btrfs_shared_data_ref *ref;
6088 ref = btrfs_item_ptr(buf, i,
6089 struct btrfs_shared_data_ref);
6090 add_data_backref(extent_cache,
6091 key.objectid, key.offset, 0, 0, 0,
6092 btrfs_shared_data_ref_count(buf, ref),
6093 0, root->sectorsize);
6096 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6097 struct bad_item *bad;
6099 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6103 bad = malloc(sizeof(struct bad_item));
6106 INIT_LIST_HEAD(&bad->list);
6107 memcpy(&bad->key, &key,
6108 sizeof(struct btrfs_key));
6109 bad->root_id = owner;
6110 list_add_tail(&bad->list, &delete_items);
6113 if (key.type != BTRFS_EXTENT_DATA_KEY)
6115 fi = btrfs_item_ptr(buf, i,
6116 struct btrfs_file_extent_item);
6117 if (btrfs_file_extent_type(buf, fi) ==
6118 BTRFS_FILE_EXTENT_INLINE)
6120 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6123 data_bytes_allocated +=
6124 btrfs_file_extent_disk_num_bytes(buf, fi);
6125 if (data_bytes_allocated < root->sectorsize) {
6128 data_bytes_referenced +=
6129 btrfs_file_extent_num_bytes(buf, fi);
6130 add_data_backref(extent_cache,
6131 btrfs_file_extent_disk_bytenr(buf, fi),
6132 parent, owner, key.objectid, key.offset -
6133 btrfs_file_extent_offset(buf, fi), 1, 1,
6134 btrfs_file_extent_disk_num_bytes(buf, fi));
6138 struct btrfs_key first_key;
6140 first_key.objectid = 0;
6143 btrfs_item_key_to_cpu(buf, &first_key, 0);
6144 level = btrfs_header_level(buf);
6145 for (i = 0; i < nritems; i++) {
6146 ptr = btrfs_node_blockptr(buf, i);
6147 size = btrfs_level_size(root, level - 1);
6148 btrfs_node_key_to_cpu(buf, &key, i);
6150 if ((level == ri->drop_level)
6151 && is_dropped_key(&key, &ri->drop_key)) {
6155 ret = add_extent_rec(extent_cache, &key,
6156 btrfs_node_ptr_generation(buf, i),
6157 ptr, size, 0, 0, 1, 0, 1, 0,
6161 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6164 add_pending(nodes, seen, ptr, size);
6166 add_pending(pending, seen, ptr, size);
6169 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6170 nritems) * sizeof(struct btrfs_key_ptr);
6172 total_btree_bytes += buf->len;
6173 if (fs_root_objectid(btrfs_header_owner(buf)))
6174 total_fs_tree_bytes += buf->len;
6175 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6176 total_extent_tree_bytes += buf->len;
6177 if (!found_old_backref &&
6178 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6179 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6180 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6181 found_old_backref = 1;
6183 free_extent_buffer(buf);
6187 static int add_root_to_pending(struct extent_buffer *buf,
6188 struct cache_tree *extent_cache,
6189 struct cache_tree *pending,
6190 struct cache_tree *seen,
6191 struct cache_tree *nodes,
6194 if (btrfs_header_level(buf) > 0)
6195 add_pending(nodes, seen, buf->start, buf->len);
6197 add_pending(pending, seen, buf->start, buf->len);
6198 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6199 0, 1, 1, 0, 1, 0, buf->len);
6201 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6202 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6203 add_tree_backref(extent_cache, buf->start, buf->start,
6206 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6210 /* as we fix the tree, we might be deleting blocks that
6211 * we're tracking for repair. This hook makes sure we
6212 * remove any backrefs for blocks as we are fixing them.
6214 static int free_extent_hook(struct btrfs_trans_handle *trans,
6215 struct btrfs_root *root,
6216 u64 bytenr, u64 num_bytes, u64 parent,
6217 u64 root_objectid, u64 owner, u64 offset,
6220 struct extent_record *rec;
6221 struct cache_extent *cache;
6223 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6225 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6226 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6230 rec = container_of(cache, struct extent_record, cache);
6232 struct data_backref *back;
6233 back = find_data_backref(rec, parent, root_objectid, owner,
6234 offset, 1, bytenr, num_bytes);
6237 if (back->node.found_ref) {
6238 back->found_ref -= refs_to_drop;
6240 rec->refs -= refs_to_drop;
6242 if (back->node.found_extent_tree) {
6243 back->num_refs -= refs_to_drop;
6244 if (rec->extent_item_refs)
6245 rec->extent_item_refs -= refs_to_drop;
6247 if (back->found_ref == 0)
6248 back->node.found_ref = 0;
6249 if (back->num_refs == 0)
6250 back->node.found_extent_tree = 0;
6252 if (!back->node.found_extent_tree && back->node.found_ref) {
6253 list_del(&back->node.list);
6257 struct tree_backref *back;
6258 back = find_tree_backref(rec, parent, root_objectid);
6261 if (back->node.found_ref) {
6264 back->node.found_ref = 0;
6266 if (back->node.found_extent_tree) {
6267 if (rec->extent_item_refs)
6268 rec->extent_item_refs--;
6269 back->node.found_extent_tree = 0;
6271 if (!back->node.found_extent_tree && back->node.found_ref) {
6272 list_del(&back->node.list);
6276 maybe_free_extent_rec(extent_cache, rec);
6281 static int delete_extent_records(struct btrfs_trans_handle *trans,
6282 struct btrfs_root *root,
6283 struct btrfs_path *path,
6284 u64 bytenr, u64 new_len)
6286 struct btrfs_key key;
6287 struct btrfs_key found_key;
6288 struct extent_buffer *leaf;
6293 key.objectid = bytenr;
6295 key.offset = (u64)-1;
6298 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6305 if (path->slots[0] == 0)
6311 leaf = path->nodes[0];
6312 slot = path->slots[0];
6314 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6315 if (found_key.objectid != bytenr)
6318 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6319 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6320 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6321 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6322 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6323 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6324 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6325 btrfs_release_path(path);
6326 if (found_key.type == 0) {
6327 if (found_key.offset == 0)
6329 key.offset = found_key.offset - 1;
6330 key.type = found_key.type;
6332 key.type = found_key.type - 1;
6333 key.offset = (u64)-1;
6337 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6338 found_key.objectid, found_key.type, found_key.offset);
6340 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6343 btrfs_release_path(path);
6345 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6346 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6347 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6348 found_key.offset : root->leafsize;
6350 ret = btrfs_update_block_group(trans, root, bytenr,
6357 btrfs_release_path(path);
6362 * for a single backref, this will allocate a new extent
6363 * and add the backref to it.
6365 static int record_extent(struct btrfs_trans_handle *trans,
6366 struct btrfs_fs_info *info,
6367 struct btrfs_path *path,
6368 struct extent_record *rec,
6369 struct extent_backref *back,
6370 int allocated, u64 flags)
6373 struct btrfs_root *extent_root = info->extent_root;
6374 struct extent_buffer *leaf;
6375 struct btrfs_key ins_key;
6376 struct btrfs_extent_item *ei;
6377 struct tree_backref *tback;
6378 struct data_backref *dback;
6379 struct btrfs_tree_block_info *bi;
6382 rec->max_size = max_t(u64, rec->max_size,
6383 info->extent_root->leafsize);
6386 u32 item_size = sizeof(*ei);
6389 item_size += sizeof(*bi);
6391 ins_key.objectid = rec->start;
6392 ins_key.offset = rec->max_size;
6393 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6395 ret = btrfs_insert_empty_item(trans, extent_root, path,
6396 &ins_key, item_size);
6400 leaf = path->nodes[0];
6401 ei = btrfs_item_ptr(leaf, path->slots[0],
6402 struct btrfs_extent_item);
6404 btrfs_set_extent_refs(leaf, ei, 0);
6405 btrfs_set_extent_generation(leaf, ei, rec->generation);
6407 if (back->is_data) {
6408 btrfs_set_extent_flags(leaf, ei,
6409 BTRFS_EXTENT_FLAG_DATA);
6411 struct btrfs_disk_key copy_key;;
6413 tback = (struct tree_backref *)back;
6414 bi = (struct btrfs_tree_block_info *)(ei + 1);
6415 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6418 btrfs_set_disk_key_objectid(©_key,
6419 rec->info_objectid);
6420 btrfs_set_disk_key_type(©_key, 0);
6421 btrfs_set_disk_key_offset(©_key, 0);
6423 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6424 btrfs_set_tree_block_key(leaf, bi, ©_key);
6426 btrfs_set_extent_flags(leaf, ei,
6427 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6430 btrfs_mark_buffer_dirty(leaf);
6431 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6432 rec->max_size, 1, 0);
6435 btrfs_release_path(path);
6438 if (back->is_data) {
6442 dback = (struct data_backref *)back;
6443 if (back->full_backref)
6444 parent = dback->parent;
6448 for (i = 0; i < dback->found_ref; i++) {
6449 /* if parent != 0, we're doing a full backref
6450 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6451 * just makes the backref allocator create a data
6454 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6455 rec->start, rec->max_size,
6459 BTRFS_FIRST_FREE_OBJECTID :
6465 fprintf(stderr, "adding new data backref"
6466 " on %llu %s %llu owner %llu"
6467 " offset %llu found %d\n",
6468 (unsigned long long)rec->start,
6469 back->full_backref ?
6471 back->full_backref ?
6472 (unsigned long long)parent :
6473 (unsigned long long)dback->root,
6474 (unsigned long long)dback->owner,
6475 (unsigned long long)dback->offset,
6480 tback = (struct tree_backref *)back;
6481 if (back->full_backref)
6482 parent = tback->parent;
6486 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6487 rec->start, rec->max_size,
6488 parent, tback->root, 0, 0);
6489 fprintf(stderr, "adding new tree backref on "
6490 "start %llu len %llu parent %llu root %llu\n",
6491 rec->start, rec->max_size, parent, tback->root);
6494 btrfs_release_path(path);
6498 struct extent_entry {
6503 struct list_head list;
6506 static struct extent_entry *find_entry(struct list_head *entries,
6507 u64 bytenr, u64 bytes)
6509 struct extent_entry *entry = NULL;
6511 list_for_each_entry(entry, entries, list) {
6512 if (entry->bytenr == bytenr && entry->bytes == bytes)
6519 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6521 struct extent_entry *entry, *best = NULL, *prev = NULL;
6523 list_for_each_entry(entry, entries, list) {
6530 * If there are as many broken entries as entries then we know
6531 * not to trust this particular entry.
6533 if (entry->broken == entry->count)
6537 * If our current entry == best then we can't be sure our best
6538 * is really the best, so we need to keep searching.
6540 if (best && best->count == entry->count) {
6546 /* Prev == entry, not good enough, have to keep searching */
6547 if (!prev->broken && prev->count == entry->count)
6551 best = (prev->count > entry->count) ? prev : entry;
6552 else if (best->count < entry->count)
6560 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6561 struct data_backref *dback, struct extent_entry *entry)
6563 struct btrfs_trans_handle *trans;
6564 struct btrfs_root *root;
6565 struct btrfs_file_extent_item *fi;
6566 struct extent_buffer *leaf;
6567 struct btrfs_key key;
6571 key.objectid = dback->root;
6572 key.type = BTRFS_ROOT_ITEM_KEY;
6573 key.offset = (u64)-1;
6574 root = btrfs_read_fs_root(info, &key);
6576 fprintf(stderr, "Couldn't find root for our ref\n");
6581 * The backref points to the original offset of the extent if it was
6582 * split, so we need to search down to the offset we have and then walk
6583 * forward until we find the backref we're looking for.
6585 key.objectid = dback->owner;
6586 key.type = BTRFS_EXTENT_DATA_KEY;
6587 key.offset = dback->offset;
6588 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6590 fprintf(stderr, "Error looking up ref %d\n", ret);
6595 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6596 ret = btrfs_next_leaf(root, path);
6598 fprintf(stderr, "Couldn't find our ref, next\n");
6602 leaf = path->nodes[0];
6603 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6604 if (key.objectid != dback->owner ||
6605 key.type != BTRFS_EXTENT_DATA_KEY) {
6606 fprintf(stderr, "Couldn't find our ref, search\n");
6609 fi = btrfs_item_ptr(leaf, path->slots[0],
6610 struct btrfs_file_extent_item);
6611 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6612 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6614 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6619 btrfs_release_path(path);
6621 trans = btrfs_start_transaction(root, 1);
6623 return PTR_ERR(trans);
6626 * Ok we have the key of the file extent we want to fix, now we can cow
6627 * down to the thing and fix it.
6629 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6631 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6632 key.objectid, key.type, key.offset, ret);
6636 fprintf(stderr, "Well that's odd, we just found this key "
6637 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6642 leaf = path->nodes[0];
6643 fi = btrfs_item_ptr(leaf, path->slots[0],
6644 struct btrfs_file_extent_item);
6646 if (btrfs_file_extent_compression(leaf, fi) &&
6647 dback->disk_bytenr != entry->bytenr) {
6648 fprintf(stderr, "Ref doesn't match the record start and is "
6649 "compressed, please take a btrfs-image of this file "
6650 "system and send it to a btrfs developer so they can "
6651 "complete this functionality for bytenr %Lu\n",
6652 dback->disk_bytenr);
6657 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6658 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6659 } else if (dback->disk_bytenr > entry->bytenr) {
6660 u64 off_diff, offset;
6662 off_diff = dback->disk_bytenr - entry->bytenr;
6663 offset = btrfs_file_extent_offset(leaf, fi);
6664 if (dback->disk_bytenr + offset +
6665 btrfs_file_extent_num_bytes(leaf, fi) >
6666 entry->bytenr + entry->bytes) {
6667 fprintf(stderr, "Ref is past the entry end, please "
6668 "take a btrfs-image of this file system and "
6669 "send it to a btrfs developer, ref %Lu\n",
6670 dback->disk_bytenr);
6675 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6676 btrfs_set_file_extent_offset(leaf, fi, offset);
6677 } else if (dback->disk_bytenr < entry->bytenr) {
6680 offset = btrfs_file_extent_offset(leaf, fi);
6681 if (dback->disk_bytenr + offset < entry->bytenr) {
6682 fprintf(stderr, "Ref is before the entry start, please"
6683 " take a btrfs-image of this file system and "
6684 "send it to a btrfs developer, ref %Lu\n",
6685 dback->disk_bytenr);
6690 offset += dback->disk_bytenr;
6691 offset -= entry->bytenr;
6692 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6693 btrfs_set_file_extent_offset(leaf, fi, offset);
6696 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6699 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6700 * only do this if we aren't using compression, otherwise it's a
6703 if (!btrfs_file_extent_compression(leaf, fi))
6704 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6706 printf("ram bytes may be wrong?\n");
6707 btrfs_mark_buffer_dirty(leaf);
6709 err = btrfs_commit_transaction(trans, root);
6710 btrfs_release_path(path);
6711 return ret ? ret : err;
6714 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6715 struct extent_record *rec)
6717 struct extent_backref *back;
6718 struct data_backref *dback;
6719 struct extent_entry *entry, *best = NULL;
6722 int broken_entries = 0;
6727 * Metadata is easy and the backrefs should always agree on bytenr and
6728 * size, if not we've got bigger issues.
6733 list_for_each_entry(back, &rec->backrefs, list) {
6734 if (back->full_backref || !back->is_data)
6737 dback = (struct data_backref *)back;
6740 * We only pay attention to backrefs that we found a real
6743 if (dback->found_ref == 0)
6747 * For now we only catch when the bytes don't match, not the
6748 * bytenr. We can easily do this at the same time, but I want
6749 * to have a fs image to test on before we just add repair
6750 * functionality willy-nilly so we know we won't screw up the
6754 entry = find_entry(&entries, dback->disk_bytenr,
6757 entry = malloc(sizeof(struct extent_entry));
6762 memset(entry, 0, sizeof(*entry));
6763 entry->bytenr = dback->disk_bytenr;
6764 entry->bytes = dback->bytes;
6765 list_add_tail(&entry->list, &entries);
6770 * If we only have on entry we may think the entries agree when
6771 * in reality they don't so we have to do some extra checking.
6773 if (dback->disk_bytenr != rec->start ||
6774 dback->bytes != rec->nr || back->broken)
6785 /* Yay all the backrefs agree, carry on good sir */
6786 if (nr_entries <= 1 && !mismatch)
6789 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6790 "%Lu\n", rec->start);
6793 * First we want to see if the backrefs can agree amongst themselves who
6794 * is right, so figure out which one of the entries has the highest
6797 best = find_most_right_entry(&entries);
6800 * Ok so we may have an even split between what the backrefs think, so
6801 * this is where we use the extent ref to see what it thinks.
6804 entry = find_entry(&entries, rec->start, rec->nr);
6805 if (!entry && (!broken_entries || !rec->found_rec)) {
6806 fprintf(stderr, "Backrefs don't agree with each other "
6807 "and extent record doesn't agree with anybody,"
6808 " so we can't fix bytenr %Lu bytes %Lu\n",
6809 rec->start, rec->nr);
6812 } else if (!entry) {
6814 * Ok our backrefs were broken, we'll assume this is the
6815 * correct value and add an entry for this range.
6817 entry = malloc(sizeof(struct extent_entry));
6822 memset(entry, 0, sizeof(*entry));
6823 entry->bytenr = rec->start;
6824 entry->bytes = rec->nr;
6825 list_add_tail(&entry->list, &entries);
6829 best = find_most_right_entry(&entries);
6831 fprintf(stderr, "Backrefs and extent record evenly "
6832 "split on who is right, this is going to "
6833 "require user input to fix bytenr %Lu bytes "
6834 "%Lu\n", rec->start, rec->nr);
6841 * I don't think this can happen currently as we'll abort() if we catch
6842 * this case higher up, but in case somebody removes that we still can't
6843 * deal with it properly here yet, so just bail out of that's the case.
6845 if (best->bytenr != rec->start) {
6846 fprintf(stderr, "Extent start and backref starts don't match, "
6847 "please use btrfs-image on this file system and send "
6848 "it to a btrfs developer so they can make fsck fix "
6849 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6850 rec->start, rec->nr);
6856 * Ok great we all agreed on an extent record, let's go find the real
6857 * references and fix up the ones that don't match.
6859 list_for_each_entry(back, &rec->backrefs, list) {
6860 if (back->full_backref || !back->is_data)
6863 dback = (struct data_backref *)back;
6866 * Still ignoring backrefs that don't have a real ref attached
6869 if (dback->found_ref == 0)
6872 if (dback->bytes == best->bytes &&
6873 dback->disk_bytenr == best->bytenr)
6876 ret = repair_ref(info, path, dback, best);
6882 * Ok we messed with the actual refs, which means we need to drop our
6883 * entire cache and go back and rescan. I know this is a huge pain and
6884 * adds a lot of extra work, but it's the only way to be safe. Once all
6885 * the backrefs agree we may not need to do anything to the extent
6890 while (!list_empty(&entries)) {
6891 entry = list_entry(entries.next, struct extent_entry, list);
6892 list_del_init(&entry->list);
6898 static int process_duplicates(struct btrfs_root *root,
6899 struct cache_tree *extent_cache,
6900 struct extent_record *rec)
6902 struct extent_record *good, *tmp;
6903 struct cache_extent *cache;
6907 * If we found a extent record for this extent then return, or if we
6908 * have more than one duplicate we are likely going to need to delete
6911 if (rec->found_rec || rec->num_duplicates > 1)
6914 /* Shouldn't happen but just in case */
6915 BUG_ON(!rec->num_duplicates);
6918 * So this happens if we end up with a backref that doesn't match the
6919 * actual extent entry. So either the backref is bad or the extent
6920 * entry is bad. Either way we want to have the extent_record actually
6921 * reflect what we found in the extent_tree, so we need to take the
6922 * duplicate out and use that as the extent_record since the only way we
6923 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6925 remove_cache_extent(extent_cache, &rec->cache);
6927 good = list_entry(rec->dups.next, struct extent_record, list);
6928 list_del_init(&good->list);
6929 INIT_LIST_HEAD(&good->backrefs);
6930 INIT_LIST_HEAD(&good->dups);
6931 good->cache.start = good->start;
6932 good->cache.size = good->nr;
6933 good->content_checked = 0;
6934 good->owner_ref_checked = 0;
6935 good->num_duplicates = 0;
6936 good->refs = rec->refs;
6937 list_splice_init(&rec->backrefs, &good->backrefs);
6939 cache = lookup_cache_extent(extent_cache, good->start,
6943 tmp = container_of(cache, struct extent_record, cache);
6946 * If we find another overlapping extent and it's found_rec is
6947 * set then it's a duplicate and we need to try and delete
6950 if (tmp->found_rec || tmp->num_duplicates > 0) {
6951 if (list_empty(&good->list))
6952 list_add_tail(&good->list,
6953 &duplicate_extents);
6954 good->num_duplicates += tmp->num_duplicates + 1;
6955 list_splice_init(&tmp->dups, &good->dups);
6956 list_del_init(&tmp->list);
6957 list_add_tail(&tmp->list, &good->dups);
6958 remove_cache_extent(extent_cache, &tmp->cache);
6963 * Ok we have another non extent item backed extent rec, so lets
6964 * just add it to this extent and carry on like we did above.
6966 good->refs += tmp->refs;
6967 list_splice_init(&tmp->backrefs, &good->backrefs);
6968 remove_cache_extent(extent_cache, &tmp->cache);
6971 ret = insert_cache_extent(extent_cache, &good->cache);
6974 return good->num_duplicates ? 0 : 1;
6977 static int delete_duplicate_records(struct btrfs_root *root,
6978 struct extent_record *rec)
6980 struct btrfs_trans_handle *trans;
6981 LIST_HEAD(delete_list);
6982 struct btrfs_path *path;
6983 struct extent_record *tmp, *good, *n;
6986 struct btrfs_key key;
6988 path = btrfs_alloc_path();
6995 /* Find the record that covers all of the duplicates. */
6996 list_for_each_entry(tmp, &rec->dups, list) {
6997 if (good->start < tmp->start)
6999 if (good->nr > tmp->nr)
7002 if (tmp->start + tmp->nr < good->start + good->nr) {
7003 fprintf(stderr, "Ok we have overlapping extents that "
7004 "aren't completely covered by eachother, this "
7005 "is going to require more careful thought. "
7006 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7007 tmp->start, tmp->nr, good->start, good->nr);
7014 list_add_tail(&rec->list, &delete_list);
7016 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7019 list_move_tail(&tmp->list, &delete_list);
7022 root = root->fs_info->extent_root;
7023 trans = btrfs_start_transaction(root, 1);
7024 if (IS_ERR(trans)) {
7025 ret = PTR_ERR(trans);
7029 list_for_each_entry(tmp, &delete_list, list) {
7030 if (tmp->found_rec == 0)
7032 key.objectid = tmp->start;
7033 key.type = BTRFS_EXTENT_ITEM_KEY;
7034 key.offset = tmp->nr;
7036 /* Shouldn't happen but just in case */
7037 if (tmp->metadata) {
7038 fprintf(stderr, "Well this shouldn't happen, extent "
7039 "record overlaps but is metadata? "
7040 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7044 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7050 ret = btrfs_del_item(trans, root, path);
7053 btrfs_release_path(path);
7056 err = btrfs_commit_transaction(trans, root);
7060 while (!list_empty(&delete_list)) {
7061 tmp = list_entry(delete_list.next, struct extent_record, list);
7062 list_del_init(&tmp->list);
7068 while (!list_empty(&rec->dups)) {
7069 tmp = list_entry(rec->dups.next, struct extent_record, list);
7070 list_del_init(&tmp->list);
7074 btrfs_free_path(path);
7076 if (!ret && !nr_del)
7077 rec->num_duplicates = 0;
7079 return ret ? ret : nr_del;
7082 static int find_possible_backrefs(struct btrfs_fs_info *info,
7083 struct btrfs_path *path,
7084 struct cache_tree *extent_cache,
7085 struct extent_record *rec)
7087 struct btrfs_root *root;
7088 struct extent_backref *back;
7089 struct data_backref *dback;
7090 struct cache_extent *cache;
7091 struct btrfs_file_extent_item *fi;
7092 struct btrfs_key key;
7096 list_for_each_entry(back, &rec->backrefs, list) {
7097 /* Don't care about full backrefs (poor unloved backrefs) */
7098 if (back->full_backref || !back->is_data)
7101 dback = (struct data_backref *)back;
7103 /* We found this one, we don't need to do a lookup */
7104 if (dback->found_ref)
7107 key.objectid = dback->root;
7108 key.type = BTRFS_ROOT_ITEM_KEY;
7109 key.offset = (u64)-1;
7111 root = btrfs_read_fs_root(info, &key);
7113 /* No root, definitely a bad ref, skip */
7114 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7116 /* Other err, exit */
7118 return PTR_ERR(root);
7120 key.objectid = dback->owner;
7121 key.type = BTRFS_EXTENT_DATA_KEY;
7122 key.offset = dback->offset;
7123 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7125 btrfs_release_path(path);
7128 /* Didn't find it, we can carry on */
7133 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7134 struct btrfs_file_extent_item);
7135 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7136 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7137 btrfs_release_path(path);
7138 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7140 struct extent_record *tmp;
7141 tmp = container_of(cache, struct extent_record, cache);
7144 * If we found an extent record for the bytenr for this
7145 * particular backref then we can't add it to our
7146 * current extent record. We only want to add backrefs
7147 * that don't have a corresponding extent item in the
7148 * extent tree since they likely belong to this record
7149 * and we need to fix it if it doesn't match bytenrs.
7155 dback->found_ref += 1;
7156 dback->disk_bytenr = bytenr;
7157 dback->bytes = bytes;
7160 * Set this so the verify backref code knows not to trust the
7161 * values in this backref.
7170 * Record orphan data ref into corresponding root.
7172 * Return 0 if the extent item contains data ref and recorded.
7173 * Return 1 if the extent item contains no useful data ref
7174 * On that case, it may contains only shared_dataref or metadata backref
7175 * or the file extent exists(this should be handled by the extent bytenr
7177 * Return <0 if something goes wrong.
7179 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7180 struct extent_record *rec)
7182 struct btrfs_key key;
7183 struct btrfs_root *dest_root;
7184 struct extent_backref *back;
7185 struct data_backref *dback;
7186 struct orphan_data_extent *orphan;
7187 struct btrfs_path *path;
7188 int recorded_data_ref = 0;
7193 path = btrfs_alloc_path();
7196 list_for_each_entry(back, &rec->backrefs, list) {
7197 if (back->full_backref || !back->is_data ||
7198 !back->found_extent_tree)
7200 dback = (struct data_backref *)back;
7201 if (dback->found_ref)
7203 key.objectid = dback->root;
7204 key.type = BTRFS_ROOT_ITEM_KEY;
7205 key.offset = (u64)-1;
7207 dest_root = btrfs_read_fs_root(fs_info, &key);
7209 /* For non-exist root we just skip it */
7210 if (IS_ERR(dest_root) || !dest_root)
7213 key.objectid = dback->owner;
7214 key.type = BTRFS_EXTENT_DATA_KEY;
7215 key.offset = dback->offset;
7217 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7219 * For ret < 0, it's OK since the fs-tree may be corrupted,
7220 * we need to record it for inode/file extent rebuild.
7221 * For ret > 0, we record it only for file extent rebuild.
7222 * For ret == 0, the file extent exists but only bytenr
7223 * mismatch, let the original bytenr fix routine to handle,
7229 orphan = malloc(sizeof(*orphan));
7234 INIT_LIST_HEAD(&orphan->list);
7235 orphan->root = dback->root;
7236 orphan->objectid = dback->owner;
7237 orphan->offset = dback->offset;
7238 orphan->disk_bytenr = rec->cache.start;
7239 orphan->disk_len = rec->cache.size;
7240 list_add(&dest_root->orphan_data_extents, &orphan->list);
7241 recorded_data_ref = 1;
7244 btrfs_free_path(path);
7246 return !recorded_data_ref;
7252 * when an incorrect extent item is found, this will delete
7253 * all of the existing entries for it and recreate them
7254 * based on what the tree scan found.
7256 static int fixup_extent_refs(struct btrfs_fs_info *info,
7257 struct cache_tree *extent_cache,
7258 struct extent_record *rec)
7260 struct btrfs_trans_handle *trans = NULL;
7262 struct btrfs_path *path;
7263 struct list_head *cur = rec->backrefs.next;
7264 struct cache_extent *cache;
7265 struct extent_backref *back;
7269 if (rec->flag_block_full_backref)
7270 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7272 path = btrfs_alloc_path();
7276 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7278 * Sometimes the backrefs themselves are so broken they don't
7279 * get attached to any meaningful rec, so first go back and
7280 * check any of our backrefs that we couldn't find and throw
7281 * them into the list if we find the backref so that
7282 * verify_backrefs can figure out what to do.
7284 ret = find_possible_backrefs(info, path, extent_cache, rec);
7289 /* step one, make sure all of the backrefs agree */
7290 ret = verify_backrefs(info, path, rec);
7294 trans = btrfs_start_transaction(info->extent_root, 1);
7295 if (IS_ERR(trans)) {
7296 ret = PTR_ERR(trans);
7300 /* step two, delete all the existing records */
7301 ret = delete_extent_records(trans, info->extent_root, path,
7302 rec->start, rec->max_size);
7307 /* was this block corrupt? If so, don't add references to it */
7308 cache = lookup_cache_extent(info->corrupt_blocks,
7309 rec->start, rec->max_size);
7315 /* step three, recreate all the refs we did find */
7316 while(cur != &rec->backrefs) {
7317 back = list_entry(cur, struct extent_backref, list);
7321 * if we didn't find any references, don't create a
7324 if (!back->found_ref)
7327 rec->bad_full_backref = 0;
7328 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7336 int err = btrfs_commit_transaction(trans, info->extent_root);
7341 btrfs_free_path(path);
7345 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7346 struct extent_record *rec)
7348 struct btrfs_trans_handle *trans;
7349 struct btrfs_root *root = fs_info->extent_root;
7350 struct btrfs_path *path;
7351 struct btrfs_extent_item *ei;
7352 struct btrfs_key key;
7356 key.objectid = rec->start;
7357 if (rec->metadata) {
7358 key.type = BTRFS_METADATA_ITEM_KEY;
7359 key.offset = rec->info_level;
7361 key.type = BTRFS_EXTENT_ITEM_KEY;
7362 key.offset = rec->max_size;
7365 path = btrfs_alloc_path();
7369 trans = btrfs_start_transaction(root, 0);
7370 if (IS_ERR(trans)) {
7371 btrfs_free_path(path);
7372 return PTR_ERR(trans);
7375 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7377 btrfs_free_path(path);
7378 btrfs_commit_transaction(trans, root);
7381 fprintf(stderr, "Didn't find extent for %llu\n",
7382 (unsigned long long)rec->start);
7383 btrfs_free_path(path);
7384 btrfs_commit_transaction(trans, root);
7388 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7389 struct btrfs_extent_item);
7390 flags = btrfs_extent_flags(path->nodes[0], ei);
7391 if (rec->flag_block_full_backref) {
7392 fprintf(stderr, "setting full backref on %llu\n",
7393 (unsigned long long)key.objectid);
7394 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7396 fprintf(stderr, "clearing full backref on %llu\n",
7397 (unsigned long long)key.objectid);
7398 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7400 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7401 btrfs_mark_buffer_dirty(path->nodes[0]);
7402 btrfs_free_path(path);
7403 return btrfs_commit_transaction(trans, root);
7406 /* right now we only prune from the extent allocation tree */
7407 static int prune_one_block(struct btrfs_trans_handle *trans,
7408 struct btrfs_fs_info *info,
7409 struct btrfs_corrupt_block *corrupt)
7412 struct btrfs_path path;
7413 struct extent_buffer *eb;
7417 int level = corrupt->level + 1;
7419 btrfs_init_path(&path);
7421 /* we want to stop at the parent to our busted block */
7422 path.lowest_level = level;
7424 ret = btrfs_search_slot(trans, info->extent_root,
7425 &corrupt->key, &path, -1, 1);
7430 eb = path.nodes[level];
7437 * hopefully the search gave us the block we want to prune,
7438 * lets try that first
7440 slot = path.slots[level];
7441 found = btrfs_node_blockptr(eb, slot);
7442 if (found == corrupt->cache.start)
7445 nritems = btrfs_header_nritems(eb);
7447 /* the search failed, lets scan this node and hope we find it */
7448 for (slot = 0; slot < nritems; slot++) {
7449 found = btrfs_node_blockptr(eb, slot);
7450 if (found == corrupt->cache.start)
7454 * we couldn't find the bad block. TODO, search all the nodes for pointers
7457 if (eb == info->extent_root->node) {
7462 btrfs_release_path(&path);
7467 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7468 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7471 btrfs_release_path(&path);
7475 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7477 struct btrfs_trans_handle *trans = NULL;
7478 struct cache_extent *cache;
7479 struct btrfs_corrupt_block *corrupt;
7482 cache = search_cache_extent(info->corrupt_blocks, 0);
7486 trans = btrfs_start_transaction(info->extent_root, 1);
7488 return PTR_ERR(trans);
7490 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7491 prune_one_block(trans, info, corrupt);
7492 remove_cache_extent(info->corrupt_blocks, cache);
7495 return btrfs_commit_transaction(trans, info->extent_root);
7499 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7501 struct btrfs_block_group_cache *cache;
7506 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7507 &start, &end, EXTENT_DIRTY);
7510 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7516 cache = btrfs_lookup_first_block_group(fs_info, start);
7521 start = cache->key.objectid + cache->key.offset;
7525 static int check_extent_refs(struct btrfs_root *root,
7526 struct cache_tree *extent_cache)
7528 struct extent_record *rec;
7529 struct cache_extent *cache;
7538 * if we're doing a repair, we have to make sure
7539 * we don't allocate from the problem extents.
7540 * In the worst case, this will be all the
7543 cache = search_cache_extent(extent_cache, 0);
7545 rec = container_of(cache, struct extent_record, cache);
7546 set_extent_dirty(root->fs_info->excluded_extents,
7548 rec->start + rec->max_size - 1,
7550 cache = next_cache_extent(cache);
7553 /* pin down all the corrupted blocks too */
7554 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7556 set_extent_dirty(root->fs_info->excluded_extents,
7558 cache->start + cache->size - 1,
7560 cache = next_cache_extent(cache);
7562 prune_corrupt_blocks(root->fs_info);
7563 reset_cached_block_groups(root->fs_info);
7566 reset_cached_block_groups(root->fs_info);
7569 * We need to delete any duplicate entries we find first otherwise we
7570 * could mess up the extent tree when we have backrefs that actually
7571 * belong to a different extent item and not the weird duplicate one.
7573 while (repair && !list_empty(&duplicate_extents)) {
7574 rec = list_entry(duplicate_extents.next, struct extent_record,
7576 list_del_init(&rec->list);
7578 /* Sometimes we can find a backref before we find an actual
7579 * extent, so we need to process it a little bit to see if there
7580 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7581 * if this is a backref screwup. If we need to delete stuff
7582 * process_duplicates() will return 0, otherwise it will return
7585 if (process_duplicates(root, extent_cache, rec))
7587 ret = delete_duplicate_records(root, rec);
7591 * delete_duplicate_records will return the number of entries
7592 * deleted, so if it's greater than 0 then we know we actually
7593 * did something and we need to remove.
7607 cache = search_cache_extent(extent_cache, 0);
7610 rec = container_of(cache, struct extent_record, cache);
7611 if (rec->num_duplicates) {
7612 fprintf(stderr, "extent item %llu has multiple extent "
7613 "items\n", (unsigned long long)rec->start);
7618 if (rec->refs != rec->extent_item_refs) {
7619 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7620 (unsigned long long)rec->start,
7621 (unsigned long long)rec->nr);
7622 fprintf(stderr, "extent item %llu, found %llu\n",
7623 (unsigned long long)rec->extent_item_refs,
7624 (unsigned long long)rec->refs);
7625 ret = record_orphan_data_extents(root->fs_info, rec);
7632 * we can't use the extent to repair file
7633 * extent, let the fallback method handle it.
7635 if (!fixed && repair) {
7636 ret = fixup_extent_refs(
7647 if (all_backpointers_checked(rec, 1)) {
7648 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7649 (unsigned long long)rec->start,
7650 (unsigned long long)rec->nr);
7652 if (!fixed && !recorded && repair) {
7653 ret = fixup_extent_refs(root->fs_info,
7662 if (!rec->owner_ref_checked) {
7663 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7664 (unsigned long long)rec->start,
7665 (unsigned long long)rec->nr);
7666 if (!fixed && !recorded && repair) {
7667 ret = fixup_extent_refs(root->fs_info,
7676 if (rec->bad_full_backref) {
7677 fprintf(stderr, "bad full backref, on [%llu]\n",
7678 (unsigned long long)rec->start);
7680 ret = fixup_extent_flags(root->fs_info, rec);
7689 * Although it's not a extent ref's problem, we reuse this
7690 * routine for error reporting.
7691 * No repair function yet.
7693 if (rec->crossing_stripes) {
7695 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7696 rec->start, rec->start + rec->max_size);
7701 if (rec->wrong_chunk_type) {
7703 "bad extent [%llu, %llu), type mismatch with chunk\n",
7704 rec->start, rec->start + rec->max_size);
7709 remove_cache_extent(extent_cache, cache);
7710 free_all_extent_backrefs(rec);
7711 if (!init_extent_tree && repair && (!cur_err || fixed))
7712 clear_extent_dirty(root->fs_info->excluded_extents,
7714 rec->start + rec->max_size - 1,
7720 if (ret && ret != -EAGAIN) {
7721 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7724 struct btrfs_trans_handle *trans;
7726 root = root->fs_info->extent_root;
7727 trans = btrfs_start_transaction(root, 1);
7728 if (IS_ERR(trans)) {
7729 ret = PTR_ERR(trans);
7733 btrfs_fix_block_accounting(trans, root);
7734 ret = btrfs_commit_transaction(trans, root);
7739 fprintf(stderr, "repaired damaged extent references\n");
7745 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7749 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7750 stripe_size = length;
7751 stripe_size /= num_stripes;
7752 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7753 stripe_size = length * 2;
7754 stripe_size /= num_stripes;
7755 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7756 stripe_size = length;
7757 stripe_size /= (num_stripes - 1);
7758 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7759 stripe_size = length;
7760 stripe_size /= (num_stripes - 2);
7762 stripe_size = length;
7768 * Check the chunk with its block group/dev list ref:
7769 * Return 0 if all refs seems valid.
7770 * Return 1 if part of refs seems valid, need later check for rebuild ref
7771 * like missing block group and needs to search extent tree to rebuild them.
7772 * Return -1 if essential refs are missing and unable to rebuild.
7774 static int check_chunk_refs(struct chunk_record *chunk_rec,
7775 struct block_group_tree *block_group_cache,
7776 struct device_extent_tree *dev_extent_cache,
7779 struct cache_extent *block_group_item;
7780 struct block_group_record *block_group_rec;
7781 struct cache_extent *dev_extent_item;
7782 struct device_extent_record *dev_extent_rec;
7786 int metadump_v2 = 0;
7790 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7793 if (block_group_item) {
7794 block_group_rec = container_of(block_group_item,
7795 struct block_group_record,
7797 if (chunk_rec->length != block_group_rec->offset ||
7798 chunk_rec->offset != block_group_rec->objectid ||
7800 chunk_rec->type_flags != block_group_rec->flags)) {
7803 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7804 chunk_rec->objectid,
7809 chunk_rec->type_flags,
7810 block_group_rec->objectid,
7811 block_group_rec->type,
7812 block_group_rec->offset,
7813 block_group_rec->offset,
7814 block_group_rec->objectid,
7815 block_group_rec->flags);
7818 list_del_init(&block_group_rec->list);
7819 chunk_rec->bg_rec = block_group_rec;
7824 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7825 chunk_rec->objectid,
7830 chunk_rec->type_flags);
7837 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7838 chunk_rec->num_stripes);
7839 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7840 devid = chunk_rec->stripes[i].devid;
7841 offset = chunk_rec->stripes[i].offset;
7842 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7843 devid, offset, length);
7844 if (dev_extent_item) {
7845 dev_extent_rec = container_of(dev_extent_item,
7846 struct device_extent_record,
7848 if (dev_extent_rec->objectid != devid ||
7849 dev_extent_rec->offset != offset ||
7850 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7851 dev_extent_rec->length != length) {
7854 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7855 chunk_rec->objectid,
7858 chunk_rec->stripes[i].devid,
7859 chunk_rec->stripes[i].offset,
7860 dev_extent_rec->objectid,
7861 dev_extent_rec->offset,
7862 dev_extent_rec->length);
7865 list_move(&dev_extent_rec->chunk_list,
7866 &chunk_rec->dextents);
7871 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7872 chunk_rec->objectid,
7875 chunk_rec->stripes[i].devid,
7876 chunk_rec->stripes[i].offset);
7883 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7884 int check_chunks(struct cache_tree *chunk_cache,
7885 struct block_group_tree *block_group_cache,
7886 struct device_extent_tree *dev_extent_cache,
7887 struct list_head *good, struct list_head *bad,
7888 struct list_head *rebuild, int silent)
7890 struct cache_extent *chunk_item;
7891 struct chunk_record *chunk_rec;
7892 struct block_group_record *bg_rec;
7893 struct device_extent_record *dext_rec;
7897 chunk_item = first_cache_extent(chunk_cache);
7898 while (chunk_item) {
7899 chunk_rec = container_of(chunk_item, struct chunk_record,
7901 err = check_chunk_refs(chunk_rec, block_group_cache,
7902 dev_extent_cache, silent);
7905 if (err == 0 && good)
7906 list_add_tail(&chunk_rec->list, good);
7907 if (err > 0 && rebuild)
7908 list_add_tail(&chunk_rec->list, rebuild);
7910 list_add_tail(&chunk_rec->list, bad);
7911 chunk_item = next_cache_extent(chunk_item);
7914 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7917 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7925 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7929 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7940 static int check_device_used(struct device_record *dev_rec,
7941 struct device_extent_tree *dext_cache)
7943 struct cache_extent *cache;
7944 struct device_extent_record *dev_extent_rec;
7947 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7949 dev_extent_rec = container_of(cache,
7950 struct device_extent_record,
7952 if (dev_extent_rec->objectid != dev_rec->devid)
7955 list_del_init(&dev_extent_rec->device_list);
7956 total_byte += dev_extent_rec->length;
7957 cache = next_cache_extent(cache);
7960 if (total_byte != dev_rec->byte_used) {
7962 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7963 total_byte, dev_rec->byte_used, dev_rec->objectid,
7964 dev_rec->type, dev_rec->offset);
7971 /* check btrfs_dev_item -> btrfs_dev_extent */
7972 static int check_devices(struct rb_root *dev_cache,
7973 struct device_extent_tree *dev_extent_cache)
7975 struct rb_node *dev_node;
7976 struct device_record *dev_rec;
7977 struct device_extent_record *dext_rec;
7981 dev_node = rb_first(dev_cache);
7983 dev_rec = container_of(dev_node, struct device_record, node);
7984 err = check_device_used(dev_rec, dev_extent_cache);
7988 dev_node = rb_next(dev_node);
7990 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7993 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7994 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8001 static int add_root_item_to_list(struct list_head *head,
8002 u64 objectid, u64 bytenr, u64 last_snapshot,
8003 u8 level, u8 drop_level,
8004 int level_size, struct btrfs_key *drop_key)
8007 struct root_item_record *ri_rec;
8008 ri_rec = malloc(sizeof(*ri_rec));
8011 ri_rec->bytenr = bytenr;
8012 ri_rec->objectid = objectid;
8013 ri_rec->level = level;
8014 ri_rec->level_size = level_size;
8015 ri_rec->drop_level = drop_level;
8016 ri_rec->last_snapshot = last_snapshot;
8018 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8019 list_add_tail(&ri_rec->list, head);
8024 static void free_root_item_list(struct list_head *list)
8026 struct root_item_record *ri_rec;
8028 while (!list_empty(list)) {
8029 ri_rec = list_first_entry(list, struct root_item_record,
8031 list_del_init(&ri_rec->list);
8036 static int deal_root_from_list(struct list_head *list,
8037 struct btrfs_root *root,
8038 struct block_info *bits,
8040 struct cache_tree *pending,
8041 struct cache_tree *seen,
8042 struct cache_tree *reada,
8043 struct cache_tree *nodes,
8044 struct cache_tree *extent_cache,
8045 struct cache_tree *chunk_cache,
8046 struct rb_root *dev_cache,
8047 struct block_group_tree *block_group_cache,
8048 struct device_extent_tree *dev_extent_cache)
8053 while (!list_empty(list)) {
8054 struct root_item_record *rec;
8055 struct extent_buffer *buf;
8056 rec = list_entry(list->next,
8057 struct root_item_record, list);
8059 buf = read_tree_block(root->fs_info->tree_root,
8060 rec->bytenr, rec->level_size, 0);
8061 if (!extent_buffer_uptodate(buf)) {
8062 free_extent_buffer(buf);
8066 add_root_to_pending(buf, extent_cache, pending,
8067 seen, nodes, rec->objectid);
8069 * To rebuild extent tree, we need deal with snapshot
8070 * one by one, otherwise we deal with node firstly which
8071 * can maximize readahead.
8074 ret = run_next_block(root, bits, bits_nr, &last,
8075 pending, seen, reada, nodes,
8076 extent_cache, chunk_cache,
8077 dev_cache, block_group_cache,
8078 dev_extent_cache, rec);
8082 free_extent_buffer(buf);
8083 list_del(&rec->list);
8089 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8090 reada, nodes, extent_cache, chunk_cache,
8091 dev_cache, block_group_cache,
8092 dev_extent_cache, NULL);
8102 static int check_chunks_and_extents(struct btrfs_root *root)
8104 struct rb_root dev_cache;
8105 struct cache_tree chunk_cache;
8106 struct block_group_tree block_group_cache;
8107 struct device_extent_tree dev_extent_cache;
8108 struct cache_tree extent_cache;
8109 struct cache_tree seen;
8110 struct cache_tree pending;
8111 struct cache_tree reada;
8112 struct cache_tree nodes;
8113 struct extent_io_tree excluded_extents;
8114 struct cache_tree corrupt_blocks;
8115 struct btrfs_path path;
8116 struct btrfs_key key;
8117 struct btrfs_key found_key;
8119 struct block_info *bits;
8121 struct extent_buffer *leaf;
8123 struct btrfs_root_item ri;
8124 struct list_head dropping_trees;
8125 struct list_head normal_trees;
8126 struct btrfs_root *root1;
8131 dev_cache = RB_ROOT;
8132 cache_tree_init(&chunk_cache);
8133 block_group_tree_init(&block_group_cache);
8134 device_extent_tree_init(&dev_extent_cache);
8136 cache_tree_init(&extent_cache);
8137 cache_tree_init(&seen);
8138 cache_tree_init(&pending);
8139 cache_tree_init(&nodes);
8140 cache_tree_init(&reada);
8141 cache_tree_init(&corrupt_blocks);
8142 extent_io_tree_init(&excluded_extents);
8143 INIT_LIST_HEAD(&dropping_trees);
8144 INIT_LIST_HEAD(&normal_trees);
8147 root->fs_info->excluded_extents = &excluded_extents;
8148 root->fs_info->fsck_extent_cache = &extent_cache;
8149 root->fs_info->free_extent_hook = free_extent_hook;
8150 root->fs_info->corrupt_blocks = &corrupt_blocks;
8154 bits = malloc(bits_nr * sizeof(struct block_info));
8160 if (ctx.progress_enabled) {
8161 ctx.tp = TASK_EXTENTS;
8162 task_start(ctx.info);
8166 root1 = root->fs_info->tree_root;
8167 level = btrfs_header_level(root1->node);
8168 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8169 root1->node->start, 0, level, 0,
8170 btrfs_level_size(root1, level), NULL);
8173 root1 = root->fs_info->chunk_root;
8174 level = btrfs_header_level(root1->node);
8175 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8176 root1->node->start, 0, level, 0,
8177 btrfs_level_size(root1, level), NULL);
8180 btrfs_init_path(&path);
8183 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8184 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8189 leaf = path.nodes[0];
8190 slot = path.slots[0];
8191 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8192 ret = btrfs_next_leaf(root, &path);
8195 leaf = path.nodes[0];
8196 slot = path.slots[0];
8198 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8199 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8200 unsigned long offset;
8203 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8204 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8205 last_snapshot = btrfs_root_last_snapshot(&ri);
8206 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8207 level = btrfs_root_level(&ri);
8208 level_size = btrfs_level_size(root, level);
8209 ret = add_root_item_to_list(&normal_trees,
8211 btrfs_root_bytenr(&ri),
8212 last_snapshot, level,
8213 0, level_size, NULL);
8217 level = btrfs_root_level(&ri);
8218 level_size = btrfs_level_size(root, level);
8219 objectid = found_key.objectid;
8220 btrfs_disk_key_to_cpu(&found_key,
8222 ret = add_root_item_to_list(&dropping_trees,
8224 btrfs_root_bytenr(&ri),
8225 last_snapshot, level,
8227 level_size, &found_key);
8234 btrfs_release_path(&path);
8237 * check_block can return -EAGAIN if it fixes something, please keep
8238 * this in mind when dealing with return values from these functions, if
8239 * we get -EAGAIN we want to fall through and restart the loop.
8241 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8242 &seen, &reada, &nodes, &extent_cache,
8243 &chunk_cache, &dev_cache, &block_group_cache,
8250 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8251 &pending, &seen, &reada, &nodes,
8252 &extent_cache, &chunk_cache, &dev_cache,
8253 &block_group_cache, &dev_extent_cache);
8260 ret = check_chunks(&chunk_cache, &block_group_cache,
8261 &dev_extent_cache, NULL, NULL, NULL, 0);
8268 ret = check_extent_refs(root, &extent_cache);
8275 ret = check_devices(&dev_cache, &dev_extent_cache);
8280 task_stop(ctx.info);
8282 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8283 extent_io_tree_cleanup(&excluded_extents);
8284 root->fs_info->fsck_extent_cache = NULL;
8285 root->fs_info->free_extent_hook = NULL;
8286 root->fs_info->corrupt_blocks = NULL;
8287 root->fs_info->excluded_extents = NULL;
8290 free_chunk_cache_tree(&chunk_cache);
8291 free_device_cache_tree(&dev_cache);
8292 free_block_group_tree(&block_group_cache);
8293 free_device_extent_tree(&dev_extent_cache);
8294 free_extent_cache_tree(&seen);
8295 free_extent_cache_tree(&pending);
8296 free_extent_cache_tree(&reada);
8297 free_extent_cache_tree(&nodes);
8300 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8301 free_extent_cache_tree(&seen);
8302 free_extent_cache_tree(&pending);
8303 free_extent_cache_tree(&reada);
8304 free_extent_cache_tree(&nodes);
8305 free_chunk_cache_tree(&chunk_cache);
8306 free_block_group_tree(&block_group_cache);
8307 free_device_cache_tree(&dev_cache);
8308 free_device_extent_tree(&dev_extent_cache);
8309 free_extent_record_cache(root->fs_info, &extent_cache);
8310 free_root_item_list(&normal_trees);
8311 free_root_item_list(&dropping_trees);
8312 extent_io_tree_cleanup(&excluded_extents);
8316 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8317 struct btrfs_root *root, int overwrite)
8319 struct extent_buffer *c;
8320 struct extent_buffer *old = root->node;
8323 struct btrfs_disk_key disk_key = {0,0,0};
8329 extent_buffer_get(c);
8332 c = btrfs_alloc_free_block(trans, root,
8333 btrfs_level_size(root, 0),
8334 root->root_key.objectid,
8335 &disk_key, level, 0, 0);
8338 extent_buffer_get(c);
8342 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8343 btrfs_set_header_level(c, level);
8344 btrfs_set_header_bytenr(c, c->start);
8345 btrfs_set_header_generation(c, trans->transid);
8346 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8347 btrfs_set_header_owner(c, root->root_key.objectid);
8349 write_extent_buffer(c, root->fs_info->fsid,
8350 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8352 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8353 btrfs_header_chunk_tree_uuid(c),
8356 btrfs_mark_buffer_dirty(c);
8358 * this case can happen in the following case:
8360 * 1.overwrite previous root.
8362 * 2.reinit reloc data root, this is because we skip pin
8363 * down reloc data tree before which means we can allocate
8364 * same block bytenr here.
8366 if (old->start == c->start) {
8367 btrfs_set_root_generation(&root->root_item,
8369 root->root_item.level = btrfs_header_level(root->node);
8370 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8371 &root->root_key, &root->root_item);
8373 free_extent_buffer(c);
8377 free_extent_buffer(old);
8379 add_root_to_dirty_list(root);
8383 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8384 struct extent_buffer *eb, int tree_root)
8386 struct extent_buffer *tmp;
8387 struct btrfs_root_item *ri;
8388 struct btrfs_key key;
8391 int level = btrfs_header_level(eb);
8397 * If we have pinned this block before, don't pin it again.
8398 * This can not only avoid forever loop with broken filesystem
8399 * but also give us some speedups.
8401 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8402 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8405 btrfs_pin_extent(fs_info, eb->start, eb->len);
8407 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8408 nritems = btrfs_header_nritems(eb);
8409 for (i = 0; i < nritems; i++) {
8411 btrfs_item_key_to_cpu(eb, &key, i);
8412 if (key.type != BTRFS_ROOT_ITEM_KEY)
8414 /* Skip the extent root and reloc roots */
8415 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8416 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8417 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8419 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8420 bytenr = btrfs_disk_root_bytenr(eb, ri);
8423 * If at any point we start needing the real root we
8424 * will have to build a stump root for the root we are
8425 * in, but for now this doesn't actually use the root so
8426 * just pass in extent_root.
8428 tmp = read_tree_block(fs_info->extent_root, bytenr,
8430 if (!extent_buffer_uptodate(tmp)) {
8431 fprintf(stderr, "Error reading root block\n");
8434 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8435 free_extent_buffer(tmp);
8439 bytenr = btrfs_node_blockptr(eb, i);
8441 /* If we aren't the tree root don't read the block */
8442 if (level == 1 && !tree_root) {
8443 btrfs_pin_extent(fs_info, bytenr, leafsize);
8447 tmp = read_tree_block(fs_info->extent_root, bytenr,
8449 if (!extent_buffer_uptodate(tmp)) {
8450 fprintf(stderr, "Error reading tree block\n");
8453 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8454 free_extent_buffer(tmp);
8463 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8467 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8471 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8474 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8476 struct btrfs_block_group_cache *cache;
8477 struct btrfs_path *path;
8478 struct extent_buffer *leaf;
8479 struct btrfs_chunk *chunk;
8480 struct btrfs_key key;
8484 path = btrfs_alloc_path();
8489 key.type = BTRFS_CHUNK_ITEM_KEY;
8492 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8494 btrfs_free_path(path);
8499 * We do this in case the block groups were screwed up and had alloc
8500 * bits that aren't actually set on the chunks. This happens with
8501 * restored images every time and could happen in real life I guess.
8503 fs_info->avail_data_alloc_bits = 0;
8504 fs_info->avail_metadata_alloc_bits = 0;
8505 fs_info->avail_system_alloc_bits = 0;
8507 /* First we need to create the in-memory block groups */
8509 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8510 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8512 btrfs_free_path(path);
8520 leaf = path->nodes[0];
8521 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8522 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8527 chunk = btrfs_item_ptr(leaf, path->slots[0],
8528 struct btrfs_chunk);
8529 btrfs_add_block_group(fs_info, 0,
8530 btrfs_chunk_type(leaf, chunk),
8531 key.objectid, key.offset,
8532 btrfs_chunk_length(leaf, chunk));
8533 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8534 key.offset + btrfs_chunk_length(leaf, chunk),
8540 cache = btrfs_lookup_first_block_group(fs_info, start);
8544 start = cache->key.objectid + cache->key.offset;
8547 btrfs_free_path(path);
8551 static int reset_balance(struct btrfs_trans_handle *trans,
8552 struct btrfs_fs_info *fs_info)
8554 struct btrfs_root *root = fs_info->tree_root;
8555 struct btrfs_path *path;
8556 struct extent_buffer *leaf;
8557 struct btrfs_key key;
8558 int del_slot, del_nr = 0;
8562 path = btrfs_alloc_path();
8566 key.objectid = BTRFS_BALANCE_OBJECTID;
8567 key.type = BTRFS_BALANCE_ITEM_KEY;
8570 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8575 goto reinit_data_reloc;
8580 ret = btrfs_del_item(trans, root, path);
8583 btrfs_release_path(path);
8585 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8586 key.type = BTRFS_ROOT_ITEM_KEY;
8589 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8593 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8598 ret = btrfs_del_items(trans, root, path,
8605 btrfs_release_path(path);
8608 ret = btrfs_search_slot(trans, root, &key, path,
8615 leaf = path->nodes[0];
8616 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8617 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8619 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8624 del_slot = path->slots[0];
8633 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8637 btrfs_release_path(path);
8640 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8641 key.type = BTRFS_ROOT_ITEM_KEY;
8642 key.offset = (u64)-1;
8643 root = btrfs_read_fs_root(fs_info, &key);
8645 fprintf(stderr, "Error reading data reloc tree\n");
8646 ret = PTR_ERR(root);
8649 record_root_in_trans(trans, root);
8650 ret = btrfs_fsck_reinit_root(trans, root, 0);
8653 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8655 btrfs_free_path(path);
8659 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8660 struct btrfs_fs_info *fs_info)
8666 * The only reason we don't do this is because right now we're just
8667 * walking the trees we find and pinning down their bytes, we don't look
8668 * at any of the leaves. In order to do mixed groups we'd have to check
8669 * the leaves of any fs roots and pin down the bytes for any file
8670 * extents we find. Not hard but why do it if we don't have to?
8672 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8673 fprintf(stderr, "We don't support re-initing the extent tree "
8674 "for mixed block groups yet, please notify a btrfs "
8675 "developer you want to do this so they can add this "
8676 "functionality.\n");
8681 * first we need to walk all of the trees except the extent tree and pin
8682 * down the bytes that are in use so we don't overwrite any existing
8685 ret = pin_metadata_blocks(fs_info);
8687 fprintf(stderr, "error pinning down used bytes\n");
8692 * Need to drop all the block groups since we're going to recreate all
8695 btrfs_free_block_groups(fs_info);
8696 ret = reset_block_groups(fs_info);
8698 fprintf(stderr, "error resetting the block groups\n");
8702 /* Ok we can allocate now, reinit the extent root */
8703 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8705 fprintf(stderr, "extent root initialization failed\n");
8707 * When the transaction code is updated we should end the
8708 * transaction, but for now progs only knows about commit so
8709 * just return an error.
8715 * Now we have all the in-memory block groups setup so we can make
8716 * allocations properly, and the metadata we care about is safe since we
8717 * pinned all of it above.
8720 struct btrfs_block_group_cache *cache;
8722 cache = btrfs_lookup_first_block_group(fs_info, start);
8725 start = cache->key.objectid + cache->key.offset;
8726 ret = btrfs_insert_item(trans, fs_info->extent_root,
8727 &cache->key, &cache->item,
8728 sizeof(cache->item));
8730 fprintf(stderr, "Error adding block group\n");
8733 btrfs_extent_post_op(trans, fs_info->extent_root);
8736 ret = reset_balance(trans, fs_info);
8738 fprintf(stderr, "error reseting the pending balance\n");
8743 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8745 struct btrfs_path *path;
8746 struct btrfs_trans_handle *trans;
8747 struct btrfs_key key;
8750 printf("Recowing metadata block %llu\n", eb->start);
8751 key.objectid = btrfs_header_owner(eb);
8752 key.type = BTRFS_ROOT_ITEM_KEY;
8753 key.offset = (u64)-1;
8755 root = btrfs_read_fs_root(root->fs_info, &key);
8757 fprintf(stderr, "Couldn't find owner root %llu\n",
8759 return PTR_ERR(root);
8762 path = btrfs_alloc_path();
8766 trans = btrfs_start_transaction(root, 1);
8767 if (IS_ERR(trans)) {
8768 btrfs_free_path(path);
8769 return PTR_ERR(trans);
8772 path->lowest_level = btrfs_header_level(eb);
8773 if (path->lowest_level)
8774 btrfs_node_key_to_cpu(eb, &key, 0);
8776 btrfs_item_key_to_cpu(eb, &key, 0);
8778 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8779 btrfs_commit_transaction(trans, root);
8780 btrfs_free_path(path);
8784 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8786 struct btrfs_path *path;
8787 struct btrfs_trans_handle *trans;
8788 struct btrfs_key key;
8791 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8792 bad->key.type, bad->key.offset);
8793 key.objectid = bad->root_id;
8794 key.type = BTRFS_ROOT_ITEM_KEY;
8795 key.offset = (u64)-1;
8797 root = btrfs_read_fs_root(root->fs_info, &key);
8799 fprintf(stderr, "Couldn't find owner root %llu\n",
8801 return PTR_ERR(root);
8804 path = btrfs_alloc_path();
8808 trans = btrfs_start_transaction(root, 1);
8809 if (IS_ERR(trans)) {
8810 btrfs_free_path(path);
8811 return PTR_ERR(trans);
8814 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8820 ret = btrfs_del_item(trans, root, path);
8822 btrfs_commit_transaction(trans, root);
8823 btrfs_free_path(path);
8827 static int zero_log_tree(struct btrfs_root *root)
8829 struct btrfs_trans_handle *trans;
8832 trans = btrfs_start_transaction(root, 1);
8833 if (IS_ERR(trans)) {
8834 ret = PTR_ERR(trans);
8837 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8838 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8839 ret = btrfs_commit_transaction(trans, root);
8843 static int populate_csum(struct btrfs_trans_handle *trans,
8844 struct btrfs_root *csum_root, char *buf, u64 start,
8851 while (offset < len) {
8852 sectorsize = csum_root->sectorsize;
8853 ret = read_extent_data(csum_root, buf, start + offset,
8857 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8858 start + offset, buf, sectorsize);
8861 offset += sectorsize;
8866 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8867 struct btrfs_root *csum_root,
8868 struct btrfs_root *cur_root)
8870 struct btrfs_path *path;
8871 struct btrfs_key key;
8872 struct extent_buffer *node;
8873 struct btrfs_file_extent_item *fi;
8880 path = btrfs_alloc_path();
8883 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8893 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8896 /* Iterate all regular file extents and fill its csum */
8898 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8900 if (key.type != BTRFS_EXTENT_DATA_KEY)
8902 node = path->nodes[0];
8903 slot = path->slots[0];
8904 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8905 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8907 start = btrfs_file_extent_disk_bytenr(node, fi);
8908 len = btrfs_file_extent_disk_num_bytes(node, fi);
8910 ret = populate_csum(trans, csum_root, buf, start, len);
8917 * TODO: if next leaf is corrupted, jump to nearest next valid
8920 ret = btrfs_next_item(cur_root, path);
8930 btrfs_free_path(path);
8935 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8936 struct btrfs_root *csum_root)
8938 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8939 struct btrfs_path *path;
8940 struct btrfs_root *tree_root = fs_info->tree_root;
8941 struct btrfs_root *cur_root;
8942 struct extent_buffer *node;
8943 struct btrfs_key key;
8947 path = btrfs_alloc_path();
8951 key.objectid = BTRFS_FS_TREE_OBJECTID;
8953 key.type = BTRFS_ROOT_ITEM_KEY;
8955 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8964 node = path->nodes[0];
8965 slot = path->slots[0];
8966 btrfs_item_key_to_cpu(node, &key, slot);
8967 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8969 if (key.type != BTRFS_ROOT_ITEM_KEY)
8971 if (!is_fstree(key.objectid))
8973 key.offset = (u64)-1;
8975 cur_root = btrfs_read_fs_root(fs_info, &key);
8976 if (IS_ERR(cur_root) || !cur_root) {
8977 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8981 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8986 ret = btrfs_next_item(tree_root, path);
8996 btrfs_free_path(path);
9000 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9001 struct btrfs_root *csum_root)
9003 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9004 struct btrfs_path *path;
9005 struct btrfs_extent_item *ei;
9006 struct extent_buffer *leaf;
9008 struct btrfs_key key;
9011 path = btrfs_alloc_path();
9016 key.type = BTRFS_EXTENT_ITEM_KEY;
9019 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9021 btrfs_free_path(path);
9025 buf = malloc(csum_root->sectorsize);
9027 btrfs_free_path(path);
9032 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9033 ret = btrfs_next_leaf(extent_root, path);
9041 leaf = path->nodes[0];
9043 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9044 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9049 ei = btrfs_item_ptr(leaf, path->slots[0],
9050 struct btrfs_extent_item);
9051 if (!(btrfs_extent_flags(leaf, ei) &
9052 BTRFS_EXTENT_FLAG_DATA)) {
9057 ret = populate_csum(trans, csum_root, buf, key.objectid,
9064 btrfs_free_path(path);
9070 * Recalculate the csum and put it into the csum tree.
9072 * Extent tree init will wipe out all the extent info, so in that case, we
9073 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9074 * will use fs/subvol trees to init the csum tree.
9076 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9077 struct btrfs_root *csum_root,
9081 return fill_csum_tree_from_fs(trans, csum_root);
9083 return fill_csum_tree_from_extent(trans, csum_root);
9086 struct root_item_info {
9087 /* level of the root */
9089 /* number of nodes at this level, must be 1 for a root */
9093 struct cache_extent cache_extent;
9096 static struct cache_tree *roots_info_cache = NULL;
9098 static void free_roots_info_cache(void)
9100 if (!roots_info_cache)
9103 while (!cache_tree_empty(roots_info_cache)) {
9104 struct cache_extent *entry;
9105 struct root_item_info *rii;
9107 entry = first_cache_extent(roots_info_cache);
9110 remove_cache_extent(roots_info_cache, entry);
9111 rii = container_of(entry, struct root_item_info, cache_extent);
9115 free(roots_info_cache);
9116 roots_info_cache = NULL;
9119 static int build_roots_info_cache(struct btrfs_fs_info *info)
9122 struct btrfs_key key;
9123 struct extent_buffer *leaf;
9124 struct btrfs_path *path;
9126 if (!roots_info_cache) {
9127 roots_info_cache = malloc(sizeof(*roots_info_cache));
9128 if (!roots_info_cache)
9130 cache_tree_init(roots_info_cache);
9133 path = btrfs_alloc_path();
9138 key.type = BTRFS_EXTENT_ITEM_KEY;
9141 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9144 leaf = path->nodes[0];
9147 struct btrfs_key found_key;
9148 struct btrfs_extent_item *ei;
9149 struct btrfs_extent_inline_ref *iref;
9150 int slot = path->slots[0];
9155 struct cache_extent *entry;
9156 struct root_item_info *rii;
9158 if (slot >= btrfs_header_nritems(leaf)) {
9159 ret = btrfs_next_leaf(info->extent_root, path);
9166 leaf = path->nodes[0];
9167 slot = path->slots[0];
9170 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9172 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9173 found_key.type != BTRFS_METADATA_ITEM_KEY)
9176 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9177 flags = btrfs_extent_flags(leaf, ei);
9179 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9180 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9183 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9184 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9185 level = found_key.offset;
9187 struct btrfs_tree_block_info *info;
9189 info = (struct btrfs_tree_block_info *)(ei + 1);
9190 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9191 level = btrfs_tree_block_level(leaf, info);
9195 * For a root extent, it must be of the following type and the
9196 * first (and only one) iref in the item.
9198 type = btrfs_extent_inline_ref_type(leaf, iref);
9199 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9202 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9203 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9205 rii = malloc(sizeof(struct root_item_info));
9210 rii->cache_extent.start = root_id;
9211 rii->cache_extent.size = 1;
9212 rii->level = (u8)-1;
9213 entry = &rii->cache_extent;
9214 ret = insert_cache_extent(roots_info_cache, entry);
9217 rii = container_of(entry, struct root_item_info,
9221 ASSERT(rii->cache_extent.start == root_id);
9222 ASSERT(rii->cache_extent.size == 1);
9224 if (level > rii->level || rii->level == (u8)-1) {
9226 rii->bytenr = found_key.objectid;
9227 rii->gen = btrfs_extent_generation(leaf, ei);
9228 rii->node_count = 1;
9229 } else if (level == rii->level) {
9237 btrfs_free_path(path);
9242 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9243 struct btrfs_path *path,
9244 const struct btrfs_key *root_key,
9245 const int read_only_mode)
9247 const u64 root_id = root_key->objectid;
9248 struct cache_extent *entry;
9249 struct root_item_info *rii;
9250 struct btrfs_root_item ri;
9251 unsigned long offset;
9253 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9256 "Error: could not find extent items for root %llu\n",
9257 root_key->objectid);
9261 rii = container_of(entry, struct root_item_info, cache_extent);
9262 ASSERT(rii->cache_extent.start == root_id);
9263 ASSERT(rii->cache_extent.size == 1);
9265 if (rii->node_count != 1) {
9267 "Error: could not find btree root extent for root %llu\n",
9272 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9273 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9275 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9276 btrfs_root_level(&ri) != rii->level ||
9277 btrfs_root_generation(&ri) != rii->gen) {
9280 * If we're in repair mode but our caller told us to not update
9281 * the root item, i.e. just check if it needs to be updated, don't
9282 * print this message, since the caller will call us again shortly
9283 * for the same root item without read only mode (the caller will
9284 * open a transaction first).
9286 if (!(read_only_mode && repair))
9288 "%sroot item for root %llu,"
9289 " current bytenr %llu, current gen %llu, current level %u,"
9290 " new bytenr %llu, new gen %llu, new level %u\n",
9291 (read_only_mode ? "" : "fixing "),
9293 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9294 btrfs_root_level(&ri),
9295 rii->bytenr, rii->gen, rii->level);
9297 if (btrfs_root_generation(&ri) > rii->gen) {
9299 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9300 root_id, btrfs_root_generation(&ri), rii->gen);
9304 if (!read_only_mode) {
9305 btrfs_set_root_bytenr(&ri, rii->bytenr);
9306 btrfs_set_root_level(&ri, rii->level);
9307 btrfs_set_root_generation(&ri, rii->gen);
9308 write_extent_buffer(path->nodes[0], &ri,
9309 offset, sizeof(ri));
9319 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9320 * caused read-only snapshots to be corrupted if they were created at a moment
9321 * when the source subvolume/snapshot had orphan items. The issue was that the
9322 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9323 * node instead of the post orphan cleanup root node.
9324 * So this function, and its callees, just detects and fixes those cases. Even
9325 * though the regression was for read-only snapshots, this function applies to
9326 * any snapshot/subvolume root.
9327 * This must be run before any other repair code - not doing it so, makes other
9328 * repair code delete or modify backrefs in the extent tree for example, which
9329 * will result in an inconsistent fs after repairing the root items.
9331 static int repair_root_items(struct btrfs_fs_info *info)
9333 struct btrfs_path *path = NULL;
9334 struct btrfs_key key;
9335 struct extent_buffer *leaf;
9336 struct btrfs_trans_handle *trans = NULL;
9341 ret = build_roots_info_cache(info);
9345 path = btrfs_alloc_path();
9351 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9352 key.type = BTRFS_ROOT_ITEM_KEY;
9357 * Avoid opening and committing transactions if a leaf doesn't have
9358 * any root items that need to be fixed, so that we avoid rotating
9359 * backup roots unnecessarily.
9362 trans = btrfs_start_transaction(info->tree_root, 1);
9363 if (IS_ERR(trans)) {
9364 ret = PTR_ERR(trans);
9369 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9373 leaf = path->nodes[0];
9376 struct btrfs_key found_key;
9378 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9379 int no_more_keys = find_next_key(path, &key);
9381 btrfs_release_path(path);
9383 ret = btrfs_commit_transaction(trans,
9395 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9397 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9399 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9402 ret = maybe_repair_root_item(info, path, &found_key,
9407 if (!trans && repair) {
9410 btrfs_release_path(path);
9420 free_roots_info_cache();
9421 btrfs_free_path(path);
9423 btrfs_commit_transaction(trans, info->tree_root);
9430 const char * const cmd_check_usage[] = {
9431 "btrfs check [options] <device>",
9432 "Check structural inegrity of a filesystem (unmounted).",
9433 "Check structural inegrity of an unmounted filesystem. Verify internal",
9434 "trees' consistency and item connectivity. In the repair mode try to",
9435 "fix the problems found.",
9436 "WARNING: the repair mode is considered dangerous",
9438 "-s|--super <superblock> use this superblock copy",
9439 "-b|--backup use the backup root copy",
9440 "--repair try to repair the filesystem",
9441 "--readonly run in read-only mode (default)",
9442 "--init-csum-tree create a new CRC tree",
9443 "--init-extent-tree create a new extent tree",
9444 "--check-data-csum verify checkums of data blocks",
9445 "-Q|--qgroup-report print a report on qgroup consistency",
9446 "-E|--subvol-extents <subvolid>",
9447 " print subvolume extents and sharing state",
9448 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9449 "-p|--progress indicate progress",
9453 int cmd_check(int argc, char **argv)
9455 struct cache_tree root_cache;
9456 struct btrfs_root *root;
9457 struct btrfs_fs_info *info;
9460 u64 tree_root_bytenr = 0;
9461 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9464 int init_csum_tree = 0;
9466 int qgroup_report = 0;
9467 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9471 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9472 OPT_CHECK_CSUM, OPT_READONLY };
9473 static const struct option long_options[] = {
9474 { "super", required_argument, NULL, 's' },
9475 { "repair", no_argument, NULL, OPT_REPAIR },
9476 { "readonly", no_argument, NULL, OPT_READONLY },
9477 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9478 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9479 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9480 { "backup", no_argument, NULL, 'b' },
9481 { "subvol-extents", required_argument, NULL, 'E' },
9482 { "qgroup-report", no_argument, NULL, 'Q' },
9483 { "tree-root", required_argument, NULL, 'r' },
9484 { "progress", no_argument, NULL, 'p' },
9488 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9492 case 'a': /* ignored */ break;
9494 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9497 num = arg_strtou64(optarg);
9498 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9500 "ERROR: super mirror should be less than: %d\n",
9501 BTRFS_SUPER_MIRROR_MAX);
9504 bytenr = btrfs_sb_offset(((int)num));
9505 printf("using SB copy %llu, bytenr %llu\n", num,
9506 (unsigned long long)bytenr);
9512 subvolid = arg_strtou64(optarg);
9515 tree_root_bytenr = arg_strtou64(optarg);
9518 ctx.progress_enabled = true;
9522 usage(cmd_check_usage);
9524 printf("enabling repair mode\n");
9526 ctree_flags |= OPEN_CTREE_WRITES;
9532 printf("Creating a new CRC tree\n");
9535 ctree_flags |= OPEN_CTREE_WRITES;
9537 case OPT_INIT_EXTENT:
9538 init_extent_tree = 1;
9539 ctree_flags |= (OPEN_CTREE_WRITES |
9540 OPEN_CTREE_NO_BLOCK_GROUPS);
9543 case OPT_CHECK_CSUM:
9544 check_data_csum = 1;
9548 argc = argc - optind;
9550 if (check_argc_exact(argc, 1))
9551 usage(cmd_check_usage);
9553 if (ctx.progress_enabled) {
9554 ctx.tp = TASK_NOTHING;
9555 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9558 /* This check is the only reason for --readonly to exist */
9559 if (readonly && repair) {
9560 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9565 cache_tree_init(&root_cache);
9567 if((ret = check_mounted(argv[optind])) < 0) {
9568 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9571 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9576 /* only allow partial opening under repair mode */
9578 ctree_flags |= OPEN_CTREE_PARTIAL;
9580 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9583 fprintf(stderr, "Couldn't open file system\n");
9589 root = info->fs_root;
9592 * repair mode will force us to commit transaction which
9593 * will make us fail to load log tree when mounting.
9595 if (repair && btrfs_super_log_root(info->super_copy)) {
9596 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9601 ret = zero_log_tree(root);
9603 fprintf(stderr, "fail to zero log tree\n");
9608 uuid_unparse(info->super_copy->fsid, uuidbuf);
9609 if (qgroup_report) {
9610 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9612 ret = qgroup_verify_all(info);
9614 print_qgroup_report(1);
9618 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9619 subvolid, argv[optind], uuidbuf);
9620 ret = print_extent_state(info, subvolid);
9623 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9625 if (!extent_buffer_uptodate(info->tree_root->node) ||
9626 !extent_buffer_uptodate(info->dev_root->node) ||
9627 !extent_buffer_uptodate(info->chunk_root->node)) {
9628 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9633 if (init_extent_tree || init_csum_tree) {
9634 struct btrfs_trans_handle *trans;
9636 trans = btrfs_start_transaction(info->extent_root, 0);
9637 if (IS_ERR(trans)) {
9638 fprintf(stderr, "Error starting transaction\n");
9639 ret = PTR_ERR(trans);
9643 if (init_extent_tree) {
9644 printf("Creating a new extent tree\n");
9645 ret = reinit_extent_tree(trans, info);
9650 if (init_csum_tree) {
9651 fprintf(stderr, "Reinit crc root\n");
9652 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9654 fprintf(stderr, "crc root initialization failed\n");
9659 ret = fill_csum_tree(trans, info->csum_root,
9662 fprintf(stderr, "crc refilling failed\n");
9667 * Ok now we commit and run the normal fsck, which will add
9668 * extent entries for all of the items it finds.
9670 ret = btrfs_commit_transaction(trans, info->extent_root);
9674 if (!extent_buffer_uptodate(info->extent_root->node)) {
9675 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9679 if (!extent_buffer_uptodate(info->csum_root->node)) {
9680 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9685 if (!ctx.progress_enabled)
9686 fprintf(stderr, "checking extents\n");
9687 ret = check_chunks_and_extents(root);
9689 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9691 ret = repair_root_items(info);
9695 fprintf(stderr, "Fixed %d roots.\n", ret);
9697 } else if (ret > 0) {
9699 "Found %d roots with an outdated root item.\n",
9702 "Please run a filesystem check with the option --repair to fix them.\n");
9707 if (!ctx.progress_enabled)
9708 fprintf(stderr, "checking free space cache\n");
9709 ret = check_space_cache(root);
9714 * We used to have to have these hole extents in between our real
9715 * extents so if we don't have this flag set we need to make sure there
9716 * are no gaps in the file extents for inodes, otherwise we can just
9717 * ignore it when this happens.
9719 no_holes = btrfs_fs_incompat(root->fs_info,
9720 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9721 if (!ctx.progress_enabled)
9722 fprintf(stderr, "checking fs roots\n");
9723 ret = check_fs_roots(root, &root_cache);
9727 fprintf(stderr, "checking csums\n");
9728 ret = check_csums(root);
9732 fprintf(stderr, "checking root refs\n");
9733 ret = check_root_refs(root, &root_cache);
9737 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9738 struct extent_buffer *eb;
9740 eb = list_first_entry(&root->fs_info->recow_ebs,
9741 struct extent_buffer, recow);
9742 list_del_init(&eb->recow);
9743 ret = recow_extent_buffer(root, eb);
9748 while (!list_empty(&delete_items)) {
9749 struct bad_item *bad;
9751 bad = list_first_entry(&delete_items, struct bad_item, list);
9752 list_del_init(&bad->list);
9754 ret = delete_bad_item(root, bad);
9758 if (info->quota_enabled) {
9760 fprintf(stderr, "checking quota groups\n");
9761 err = qgroup_verify_all(info);
9766 if (!list_empty(&root->fs_info->recow_ebs)) {
9767 fprintf(stderr, "Transid errors in file system\n");
9771 print_qgroup_report(0);
9772 if (found_old_backref) { /*
9773 * there was a disk format change when mixed
9774 * backref was in testing tree. The old format
9775 * existed about one week.
9777 printf("\n * Found old mixed backref format. "
9778 "The old format is not supported! *"
9779 "\n * Please mount the FS in readonly mode, "
9780 "backup data and re-format the FS. *\n\n");
9783 printf("found %llu bytes used err is %d\n",
9784 (unsigned long long)bytes_used, ret);
9785 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9786 printf("total tree bytes: %llu\n",
9787 (unsigned long long)total_btree_bytes);
9788 printf("total fs tree bytes: %llu\n",
9789 (unsigned long long)total_fs_tree_bytes);
9790 printf("total extent tree bytes: %llu\n",
9791 (unsigned long long)total_extent_tree_bytes);
9792 printf("btree space waste bytes: %llu\n",
9793 (unsigned long long)btree_space_waste);
9794 printf("file data blocks allocated: %llu\n referenced %llu\n",
9795 (unsigned long long)data_bytes_allocated,
9796 (unsigned long long)data_bytes_referenced);
9798 free_root_recs_tree(&root_cache);
9802 if (ctx.progress_enabled)
9803 task_deinit(ctx.info);