2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct inode_backref *tmp;
570 struct orphan_data_extent *src_orphan;
571 struct orphan_data_extent *dst_orphan;
575 rec = malloc(sizeof(*rec));
577 return ERR_PTR(-ENOMEM);
578 memcpy(rec, orig_rec, sizeof(*rec));
580 INIT_LIST_HEAD(&rec->backrefs);
581 INIT_LIST_HEAD(&rec->orphan_extents);
582 rec->holes = RB_ROOT;
584 list_for_each_entry(orig, &orig_rec->backrefs, list) {
585 size = sizeof(*orig) + orig->namelen + 1;
586 backref = malloc(size);
591 memcpy(backref, orig, size);
592 list_add_tail(&backref->list, &rec->backrefs);
594 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595 dst_orphan = malloc(sizeof(*dst_orphan));
600 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
603 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
609 if (!list_empty(&rec->backrefs))
610 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611 list_del(&orig->list);
615 if (!list_empty(&rec->orphan_extents))
616 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617 list_del(&orig->list);
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
629 struct orphan_data_extent *orphan;
631 if (list_empty(orphan_extents))
633 printf("The following data extent is lost in tree %llu:\n",
635 list_for_each_entry(orphan, orphan_extents, list) {
636 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637 orphan->objectid, orphan->offset, orphan->disk_bytenr,
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
644 u64 root_objectid = root->root_key.objectid;
645 int errors = rec->errors;
649 /* reloc root errors, we print its corresponding fs root objectid*/
650 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651 root_objectid = root->root_key.offset;
652 fprintf(stderr, "reloc");
654 fprintf(stderr, "root %llu inode %llu errors %x",
655 (unsigned long long) root_objectid,
656 (unsigned long long) rec->ino, rec->errors);
658 if (errors & I_ERR_NO_INODE_ITEM)
659 fprintf(stderr, ", no inode item");
660 if (errors & I_ERR_NO_ORPHAN_ITEM)
661 fprintf(stderr, ", no orphan item");
662 if (errors & I_ERR_DUP_INODE_ITEM)
663 fprintf(stderr, ", dup inode item");
664 if (errors & I_ERR_DUP_DIR_INDEX)
665 fprintf(stderr, ", dup dir index");
666 if (errors & I_ERR_ODD_DIR_ITEM)
667 fprintf(stderr, ", odd dir item");
668 if (errors & I_ERR_ODD_FILE_EXTENT)
669 fprintf(stderr, ", odd file extent");
670 if (errors & I_ERR_BAD_FILE_EXTENT)
671 fprintf(stderr, ", bad file extent");
672 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673 fprintf(stderr, ", file extent overlap");
674 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675 fprintf(stderr, ", file extent discount");
676 if (errors & I_ERR_DIR_ISIZE_WRONG)
677 fprintf(stderr, ", dir isize wrong");
678 if (errors & I_ERR_FILE_NBYTES_WRONG)
679 fprintf(stderr, ", nbytes wrong");
680 if (errors & I_ERR_ODD_CSUM_ITEM)
681 fprintf(stderr, ", odd csum item");
682 if (errors & I_ERR_SOME_CSUM_MISSING)
683 fprintf(stderr, ", some csum missing");
684 if (errors & I_ERR_LINK_COUNT_WRONG)
685 fprintf(stderr, ", link count wrong");
686 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687 fprintf(stderr, ", orphan file extent");
688 fprintf(stderr, "\n");
689 /* Print the orphan extents if needed */
690 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
693 /* Print the holes if needed */
694 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695 struct file_extent_hole *hole;
696 struct rb_node *node;
699 node = rb_first(&rec->holes);
700 fprintf(stderr, "Found file extent holes:\n");
703 hole = rb_entry(node, struct file_extent_hole, node);
704 fprintf(stderr, "\tstart: %llu, len: %llu\n",
705 hole->start, hole->len);
706 node = rb_next(node);
709 fprintf(stderr, "\tstart: 0, len: %llu\n",
710 round_up(rec->isize, root->sectorsize));
714 static void print_ref_error(int errors)
716 if (errors & REF_ERR_NO_DIR_ITEM)
717 fprintf(stderr, ", no dir item");
718 if (errors & REF_ERR_NO_DIR_INDEX)
719 fprintf(stderr, ", no dir index");
720 if (errors & REF_ERR_NO_INODE_REF)
721 fprintf(stderr, ", no inode ref");
722 if (errors & REF_ERR_DUP_DIR_ITEM)
723 fprintf(stderr, ", dup dir item");
724 if (errors & REF_ERR_DUP_DIR_INDEX)
725 fprintf(stderr, ", dup dir index");
726 if (errors & REF_ERR_DUP_INODE_REF)
727 fprintf(stderr, ", dup inode ref");
728 if (errors & REF_ERR_INDEX_UNMATCH)
729 fprintf(stderr, ", index unmatch");
730 if (errors & REF_ERR_FILETYPE_UNMATCH)
731 fprintf(stderr, ", filetype unmatch");
732 if (errors & REF_ERR_NAME_TOO_LONG)
733 fprintf(stderr, ", name too long");
734 if (errors & REF_ERR_NO_ROOT_REF)
735 fprintf(stderr, ", no root ref");
736 if (errors & REF_ERR_NO_ROOT_BACKREF)
737 fprintf(stderr, ", no root backref");
738 if (errors & REF_ERR_DUP_ROOT_REF)
739 fprintf(stderr, ", dup root ref");
740 if (errors & REF_ERR_DUP_ROOT_BACKREF)
741 fprintf(stderr, ", dup root backref");
742 fprintf(stderr, "\n");
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
748 struct ptr_node *node;
749 struct cache_extent *cache;
750 struct inode_record *rec = NULL;
753 cache = lookup_cache_extent(inode_cache, ino, 1);
755 node = container_of(cache, struct ptr_node, cache);
757 if (mod && rec->refs > 1) {
758 node->data = clone_inode_rec(rec);
759 if (IS_ERR(node->data))
765 rec = calloc(1, sizeof(*rec));
767 return ERR_PTR(-ENOMEM);
769 rec->extent_start = (u64)-1;
771 INIT_LIST_HEAD(&rec->backrefs);
772 INIT_LIST_HEAD(&rec->orphan_extents);
773 rec->holes = RB_ROOT;
775 node = malloc(sizeof(*node));
778 return ERR_PTR(-ENOMEM);
780 node->cache.start = ino;
781 node->cache.size = 1;
784 if (ino == BTRFS_FREE_INO_OBJECTID)
787 ret = insert_cache_extent(inode_cache, &node->cache);
789 return ERR_PTR(-EEXIST);
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
796 struct orphan_data_extent *orphan;
798 while (!list_empty(orphan_extents)) {
799 orphan = list_entry(orphan_extents->next,
800 struct orphan_data_extent, list);
801 list_del(&orphan->list);
806 static void free_inode_rec(struct inode_record *rec)
808 struct inode_backref *backref;
813 while (!list_empty(&rec->backrefs)) {
814 backref = list_entry(rec->backrefs.next,
815 struct inode_backref, list);
816 list_del(&backref->list);
819 free_orphan_data_extents(&rec->orphan_extents);
820 free_file_extent_holes(&rec->holes);
824 static int can_free_inode_rec(struct inode_record *rec)
826 if (!rec->errors && rec->checked && rec->found_inode_item &&
827 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833 struct inode_record *rec)
835 struct cache_extent *cache;
836 struct inode_backref *tmp, *backref;
837 struct ptr_node *node;
838 unsigned char filetype;
840 if (!rec->found_inode_item)
843 filetype = imode_to_type(rec->imode);
844 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845 if (backref->found_dir_item && backref->found_dir_index) {
846 if (backref->filetype != filetype)
847 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848 if (!backref->errors && backref->found_inode_ref &&
849 rec->nlink == rec->found_link) {
850 list_del(&backref->list);
856 if (!rec->checked || rec->merging)
859 if (S_ISDIR(rec->imode)) {
860 if (rec->found_size != rec->isize)
861 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862 if (rec->found_file_extent)
863 rec->errors |= I_ERR_ODD_FILE_EXTENT;
864 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865 if (rec->found_dir_item)
866 rec->errors |= I_ERR_ODD_DIR_ITEM;
867 if (rec->found_size != rec->nbytes)
868 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869 if (rec->nlink > 0 && !no_holes &&
870 (rec->extent_end < rec->isize ||
871 first_extent_gap(&rec->holes) < rec->isize))
872 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
875 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876 if (rec->found_csum_item && rec->nodatasum)
877 rec->errors |= I_ERR_ODD_CSUM_ITEM;
878 if (rec->some_csum_missing && !rec->nodatasum)
879 rec->errors |= I_ERR_SOME_CSUM_MISSING;
882 BUG_ON(rec->refs != 1);
883 if (can_free_inode_rec(rec)) {
884 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885 node = container_of(cache, struct ptr_node, cache);
886 BUG_ON(node->data != rec);
887 remove_cache_extent(inode_cache, &node->cache);
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
895 struct btrfs_path path;
896 struct btrfs_key key;
899 key.objectid = BTRFS_ORPHAN_OBJECTID;
900 key.type = BTRFS_ORPHAN_ITEM_KEY;
903 btrfs_init_path(&path);
904 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905 btrfs_release_path(&path);
911 static int process_inode_item(struct extent_buffer *eb,
912 int slot, struct btrfs_key *key,
913 struct shared_node *active_node)
915 struct inode_record *rec;
916 struct btrfs_inode_item *item;
918 rec = active_node->current;
919 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920 if (rec->found_inode_item) {
921 rec->errors |= I_ERR_DUP_INODE_ITEM;
924 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925 rec->nlink = btrfs_inode_nlink(eb, item);
926 rec->isize = btrfs_inode_size(eb, item);
927 rec->nbytes = btrfs_inode_nbytes(eb, item);
928 rec->imode = btrfs_inode_mode(eb, item);
929 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
931 rec->found_inode_item = 1;
933 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934 maybe_free_inode_rec(&active_node->inode_cache, rec);
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
940 int namelen, u64 dir)
942 struct inode_backref *backref;
944 list_for_each_entry(backref, &rec->backrefs, list) {
945 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
947 if (backref->dir != dir || backref->namelen != namelen)
949 if (memcmp(name, backref->name, namelen))
954 backref = malloc(sizeof(*backref) + namelen + 1);
955 memset(backref, 0, sizeof(*backref));
957 backref->namelen = namelen;
958 memcpy(backref->name, name, namelen);
959 backref->name[namelen] = '\0';
960 list_add_tail(&backref->list, &rec->backrefs);
964 static int add_inode_backref(struct cache_tree *inode_cache,
965 u64 ino, u64 dir, u64 index,
966 const char *name, int namelen,
967 int filetype, int itemtype, int errors)
969 struct inode_record *rec;
970 struct inode_backref *backref;
972 rec = get_inode_rec(inode_cache, ino, 1);
974 backref = get_inode_backref(rec, name, namelen, dir);
976 backref->errors |= errors;
977 if (itemtype == BTRFS_DIR_INDEX_KEY) {
978 if (backref->found_dir_index)
979 backref->errors |= REF_ERR_DUP_DIR_INDEX;
980 if (backref->found_inode_ref && backref->index != index)
981 backref->errors |= REF_ERR_INDEX_UNMATCH;
982 if (backref->found_dir_item && backref->filetype != filetype)
983 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
985 backref->index = index;
986 backref->filetype = filetype;
987 backref->found_dir_index = 1;
988 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
990 if (backref->found_dir_item)
991 backref->errors |= REF_ERR_DUP_DIR_ITEM;
992 if (backref->found_dir_index && backref->filetype != filetype)
993 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
995 backref->filetype = filetype;
996 backref->found_dir_item = 1;
997 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
998 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
999 if (backref->found_inode_ref)
1000 backref->errors |= REF_ERR_DUP_INODE_REF;
1001 if (backref->found_dir_index && backref->index != index)
1002 backref->errors |= REF_ERR_INDEX_UNMATCH;
1004 backref->index = index;
1006 backref->ref_type = itemtype;
1007 backref->found_inode_ref = 1;
1012 maybe_free_inode_rec(inode_cache, rec);
1016 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1017 struct cache_tree *dst_cache)
1019 struct inode_backref *backref;
1024 list_for_each_entry(backref, &src->backrefs, list) {
1025 if (backref->found_dir_index) {
1026 add_inode_backref(dst_cache, dst->ino, backref->dir,
1027 backref->index, backref->name,
1028 backref->namelen, backref->filetype,
1029 BTRFS_DIR_INDEX_KEY, backref->errors);
1031 if (backref->found_dir_item) {
1033 add_inode_backref(dst_cache, dst->ino,
1034 backref->dir, 0, backref->name,
1035 backref->namelen, backref->filetype,
1036 BTRFS_DIR_ITEM_KEY, backref->errors);
1038 if (backref->found_inode_ref) {
1039 add_inode_backref(dst_cache, dst->ino,
1040 backref->dir, backref->index,
1041 backref->name, backref->namelen, 0,
1042 backref->ref_type, backref->errors);
1046 if (src->found_dir_item)
1047 dst->found_dir_item = 1;
1048 if (src->found_file_extent)
1049 dst->found_file_extent = 1;
1050 if (src->found_csum_item)
1051 dst->found_csum_item = 1;
1052 if (src->some_csum_missing)
1053 dst->some_csum_missing = 1;
1054 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1055 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1060 BUG_ON(src->found_link < dir_count);
1061 dst->found_link += src->found_link - dir_count;
1062 dst->found_size += src->found_size;
1063 if (src->extent_start != (u64)-1) {
1064 if (dst->extent_start == (u64)-1) {
1065 dst->extent_start = src->extent_start;
1066 dst->extent_end = src->extent_end;
1068 if (dst->extent_end > src->extent_start)
1069 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1070 else if (dst->extent_end < src->extent_start) {
1071 ret = add_file_extent_hole(&dst->holes,
1073 src->extent_start - dst->extent_end);
1075 if (dst->extent_end < src->extent_end)
1076 dst->extent_end = src->extent_end;
1080 dst->errors |= src->errors;
1081 if (src->found_inode_item) {
1082 if (!dst->found_inode_item) {
1083 dst->nlink = src->nlink;
1084 dst->isize = src->isize;
1085 dst->nbytes = src->nbytes;
1086 dst->imode = src->imode;
1087 dst->nodatasum = src->nodatasum;
1088 dst->found_inode_item = 1;
1090 dst->errors |= I_ERR_DUP_INODE_ITEM;
1098 static int splice_shared_node(struct shared_node *src_node,
1099 struct shared_node *dst_node)
1101 struct cache_extent *cache;
1102 struct ptr_node *node, *ins;
1103 struct cache_tree *src, *dst;
1104 struct inode_record *rec, *conflict;
1105 u64 current_ino = 0;
1109 if (--src_node->refs == 0)
1111 if (src_node->current)
1112 current_ino = src_node->current->ino;
1114 src = &src_node->root_cache;
1115 dst = &dst_node->root_cache;
1117 cache = search_cache_extent(src, 0);
1119 node = container_of(cache, struct ptr_node, cache);
1121 cache = next_cache_extent(cache);
1124 remove_cache_extent(src, &node->cache);
1127 ins = malloc(sizeof(*ins));
1128 ins->cache.start = node->cache.start;
1129 ins->cache.size = node->cache.size;
1133 ret = insert_cache_extent(dst, &ins->cache);
1134 if (ret == -EEXIST) {
1135 conflict = get_inode_rec(dst, rec->ino, 1);
1136 BUG_ON(IS_ERR(conflict));
1137 merge_inode_recs(rec, conflict, dst);
1139 conflict->checked = 1;
1140 if (dst_node->current == conflict)
1141 dst_node->current = NULL;
1143 maybe_free_inode_rec(dst, conflict);
1144 free_inode_rec(rec);
1151 if (src == &src_node->root_cache) {
1152 src = &src_node->inode_cache;
1153 dst = &dst_node->inode_cache;
1157 if (current_ino > 0 && (!dst_node->current ||
1158 current_ino > dst_node->current->ino)) {
1159 if (dst_node->current) {
1160 dst_node->current->checked = 1;
1161 maybe_free_inode_rec(dst, dst_node->current);
1163 dst_node->current = get_inode_rec(dst, current_ino, 1);
1164 BUG_ON(IS_ERR(dst_node->current));
1169 static void free_inode_ptr(struct cache_extent *cache)
1171 struct ptr_node *node;
1172 struct inode_record *rec;
1174 node = container_of(cache, struct ptr_node, cache);
1176 free_inode_rec(rec);
1180 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1182 static struct shared_node *find_shared_node(struct cache_tree *shared,
1185 struct cache_extent *cache;
1186 struct shared_node *node;
1188 cache = lookup_cache_extent(shared, bytenr, 1);
1190 node = container_of(cache, struct shared_node, cache);
1196 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1199 struct shared_node *node;
1201 node = calloc(1, sizeof(*node));
1204 node->cache.start = bytenr;
1205 node->cache.size = 1;
1206 cache_tree_init(&node->root_cache);
1207 cache_tree_init(&node->inode_cache);
1210 ret = insert_cache_extent(shared, &node->cache);
1215 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1216 struct walk_control *wc, int level)
1218 struct shared_node *node;
1219 struct shared_node *dest;
1222 if (level == wc->active_node)
1225 BUG_ON(wc->active_node <= level);
1226 node = find_shared_node(&wc->shared, bytenr);
1228 ret = add_shared_node(&wc->shared, bytenr, refs);
1230 node = find_shared_node(&wc->shared, bytenr);
1231 wc->nodes[level] = node;
1232 wc->active_node = level;
1236 if (wc->root_level == wc->active_node &&
1237 btrfs_root_refs(&root->root_item) == 0) {
1238 if (--node->refs == 0) {
1239 free_inode_recs_tree(&node->root_cache);
1240 free_inode_recs_tree(&node->inode_cache);
1241 remove_cache_extent(&wc->shared, &node->cache);
1247 dest = wc->nodes[wc->active_node];
1248 splice_shared_node(node, dest);
1249 if (node->refs == 0) {
1250 remove_cache_extent(&wc->shared, &node->cache);
1256 static int leave_shared_node(struct btrfs_root *root,
1257 struct walk_control *wc, int level)
1259 struct shared_node *node;
1260 struct shared_node *dest;
1263 if (level == wc->root_level)
1266 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1270 BUG_ON(i >= BTRFS_MAX_LEVEL);
1272 node = wc->nodes[wc->active_node];
1273 wc->nodes[wc->active_node] = NULL;
1274 wc->active_node = i;
1276 dest = wc->nodes[wc->active_node];
1277 if (wc->active_node < wc->root_level ||
1278 btrfs_root_refs(&root->root_item) > 0) {
1279 BUG_ON(node->refs <= 1);
1280 splice_shared_node(node, dest);
1282 BUG_ON(node->refs < 2);
1291 * 1 - if the root with id child_root_id is a child of root parent_root_id
1292 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1293 * has other root(s) as parent(s)
1294 * 2 - if the root child_root_id doesn't have any parent roots
1296 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1299 struct btrfs_path path;
1300 struct btrfs_key key;
1301 struct extent_buffer *leaf;
1305 btrfs_init_path(&path);
1307 key.objectid = parent_root_id;
1308 key.type = BTRFS_ROOT_REF_KEY;
1309 key.offset = child_root_id;
1310 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1314 btrfs_release_path(&path);
1318 key.objectid = child_root_id;
1319 key.type = BTRFS_ROOT_BACKREF_KEY;
1321 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1327 leaf = path.nodes[0];
1328 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1329 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1332 leaf = path.nodes[0];
1335 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1336 if (key.objectid != child_root_id ||
1337 key.type != BTRFS_ROOT_BACKREF_KEY)
1342 if (key.offset == parent_root_id) {
1343 btrfs_release_path(&path);
1350 btrfs_release_path(&path);
1353 return has_parent ? 0 : 2;
1356 static int process_dir_item(struct btrfs_root *root,
1357 struct extent_buffer *eb,
1358 int slot, struct btrfs_key *key,
1359 struct shared_node *active_node)
1369 struct btrfs_dir_item *di;
1370 struct inode_record *rec;
1371 struct cache_tree *root_cache;
1372 struct cache_tree *inode_cache;
1373 struct btrfs_key location;
1374 char namebuf[BTRFS_NAME_LEN];
1376 root_cache = &active_node->root_cache;
1377 inode_cache = &active_node->inode_cache;
1378 rec = active_node->current;
1379 rec->found_dir_item = 1;
1381 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1382 total = btrfs_item_size_nr(eb, slot);
1383 while (cur < total) {
1385 btrfs_dir_item_key_to_cpu(eb, di, &location);
1386 name_len = btrfs_dir_name_len(eb, di);
1387 data_len = btrfs_dir_data_len(eb, di);
1388 filetype = btrfs_dir_type(eb, di);
1390 rec->found_size += name_len;
1391 if (name_len <= BTRFS_NAME_LEN) {
1395 len = BTRFS_NAME_LEN;
1396 error = REF_ERR_NAME_TOO_LONG;
1398 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1400 if (location.type == BTRFS_INODE_ITEM_KEY) {
1401 add_inode_backref(inode_cache, location.objectid,
1402 key->objectid, key->offset, namebuf,
1403 len, filetype, key->type, error);
1404 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1405 add_inode_backref(root_cache, location.objectid,
1406 key->objectid, key->offset,
1407 namebuf, len, filetype,
1410 fprintf(stderr, "invalid location in dir item %u\n",
1412 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1413 key->objectid, key->offset, namebuf,
1414 len, filetype, key->type, error);
1417 len = sizeof(*di) + name_len + data_len;
1418 di = (struct btrfs_dir_item *)((char *)di + len);
1421 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1422 rec->errors |= I_ERR_DUP_DIR_INDEX;
1427 static int process_inode_ref(struct extent_buffer *eb,
1428 int slot, struct btrfs_key *key,
1429 struct shared_node *active_node)
1437 struct cache_tree *inode_cache;
1438 struct btrfs_inode_ref *ref;
1439 char namebuf[BTRFS_NAME_LEN];
1441 inode_cache = &active_node->inode_cache;
1443 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1444 total = btrfs_item_size_nr(eb, slot);
1445 while (cur < total) {
1446 name_len = btrfs_inode_ref_name_len(eb, ref);
1447 index = btrfs_inode_ref_index(eb, ref);
1448 if (name_len <= BTRFS_NAME_LEN) {
1452 len = BTRFS_NAME_LEN;
1453 error = REF_ERR_NAME_TOO_LONG;
1455 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1456 add_inode_backref(inode_cache, key->objectid, key->offset,
1457 index, namebuf, len, 0, key->type, error);
1459 len = sizeof(*ref) + name_len;
1460 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1466 static int process_inode_extref(struct extent_buffer *eb,
1467 int slot, struct btrfs_key *key,
1468 struct shared_node *active_node)
1477 struct cache_tree *inode_cache;
1478 struct btrfs_inode_extref *extref;
1479 char namebuf[BTRFS_NAME_LEN];
1481 inode_cache = &active_node->inode_cache;
1483 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1484 total = btrfs_item_size_nr(eb, slot);
1485 while (cur < total) {
1486 name_len = btrfs_inode_extref_name_len(eb, extref);
1487 index = btrfs_inode_extref_index(eb, extref);
1488 parent = btrfs_inode_extref_parent(eb, extref);
1489 if (name_len <= BTRFS_NAME_LEN) {
1493 len = BTRFS_NAME_LEN;
1494 error = REF_ERR_NAME_TOO_LONG;
1496 read_extent_buffer(eb, namebuf,
1497 (unsigned long)(extref + 1), len);
1498 add_inode_backref(inode_cache, key->objectid, parent,
1499 index, namebuf, len, 0, key->type, error);
1501 len = sizeof(*extref) + name_len;
1502 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1509 static int count_csum_range(struct btrfs_root *root, u64 start,
1510 u64 len, u64 *found)
1512 struct btrfs_key key;
1513 struct btrfs_path path;
1514 struct extent_buffer *leaf;
1519 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1521 btrfs_init_path(&path);
1523 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1525 key.type = BTRFS_EXTENT_CSUM_KEY;
1527 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1531 if (ret > 0 && path.slots[0] > 0) {
1532 leaf = path.nodes[0];
1533 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1534 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1535 key.type == BTRFS_EXTENT_CSUM_KEY)
1540 leaf = path.nodes[0];
1541 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1552 key.type != BTRFS_EXTENT_CSUM_KEY)
1555 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1556 if (key.offset >= start + len)
1559 if (key.offset > start)
1562 size = btrfs_item_size_nr(leaf, path.slots[0]);
1563 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1564 if (csum_end > start) {
1565 size = min(csum_end - start, len);
1574 btrfs_release_path(&path);
1580 static int process_file_extent(struct btrfs_root *root,
1581 struct extent_buffer *eb,
1582 int slot, struct btrfs_key *key,
1583 struct shared_node *active_node)
1585 struct inode_record *rec;
1586 struct btrfs_file_extent_item *fi;
1588 u64 disk_bytenr = 0;
1589 u64 extent_offset = 0;
1590 u64 mask = root->sectorsize - 1;
1594 rec = active_node->current;
1595 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1596 rec->found_file_extent = 1;
1598 if (rec->extent_start == (u64)-1) {
1599 rec->extent_start = key->offset;
1600 rec->extent_end = key->offset;
1603 if (rec->extent_end > key->offset)
1604 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1605 else if (rec->extent_end < key->offset) {
1606 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1607 key->offset - rec->extent_end);
1612 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1613 extent_type = btrfs_file_extent_type(eb, fi);
1615 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1616 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1618 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1619 rec->found_size += num_bytes;
1620 num_bytes = (num_bytes + mask) & ~mask;
1621 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1622 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1623 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1624 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1625 extent_offset = btrfs_file_extent_offset(eb, fi);
1626 if (num_bytes == 0 || (num_bytes & mask))
1627 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1628 if (num_bytes + extent_offset >
1629 btrfs_file_extent_ram_bytes(eb, fi))
1630 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1631 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1632 (btrfs_file_extent_compression(eb, fi) ||
1633 btrfs_file_extent_encryption(eb, fi) ||
1634 btrfs_file_extent_other_encoding(eb, fi)))
1635 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1636 if (disk_bytenr > 0)
1637 rec->found_size += num_bytes;
1639 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1641 rec->extent_end = key->offset + num_bytes;
1644 * The data reloc tree will copy full extents into its inode and then
1645 * copy the corresponding csums. Because the extent it copied could be
1646 * a preallocated extent that hasn't been written to yet there may be no
1647 * csums to copy, ergo we won't have csums for our file extent. This is
1648 * ok so just don't bother checking csums if the inode belongs to the
1651 if (disk_bytenr > 0 &&
1652 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1654 if (btrfs_file_extent_compression(eb, fi))
1655 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1657 disk_bytenr += extent_offset;
1659 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1662 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1664 rec->found_csum_item = 1;
1665 if (found < num_bytes)
1666 rec->some_csum_missing = 1;
1667 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1669 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1675 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1676 struct walk_control *wc)
1678 struct btrfs_key key;
1682 struct cache_tree *inode_cache;
1683 struct shared_node *active_node;
1685 if (wc->root_level == wc->active_node &&
1686 btrfs_root_refs(&root->root_item) == 0)
1689 active_node = wc->nodes[wc->active_node];
1690 inode_cache = &active_node->inode_cache;
1691 nritems = btrfs_header_nritems(eb);
1692 for (i = 0; i < nritems; i++) {
1693 btrfs_item_key_to_cpu(eb, &key, i);
1695 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1697 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1700 if (active_node->current == NULL ||
1701 active_node->current->ino < key.objectid) {
1702 if (active_node->current) {
1703 active_node->current->checked = 1;
1704 maybe_free_inode_rec(inode_cache,
1705 active_node->current);
1707 active_node->current = get_inode_rec(inode_cache,
1709 BUG_ON(IS_ERR(active_node->current));
1712 case BTRFS_DIR_ITEM_KEY:
1713 case BTRFS_DIR_INDEX_KEY:
1714 ret = process_dir_item(root, eb, i, &key, active_node);
1716 case BTRFS_INODE_REF_KEY:
1717 ret = process_inode_ref(eb, i, &key, active_node);
1719 case BTRFS_INODE_EXTREF_KEY:
1720 ret = process_inode_extref(eb, i, &key, active_node);
1722 case BTRFS_INODE_ITEM_KEY:
1723 ret = process_inode_item(eb, i, &key, active_node);
1725 case BTRFS_EXTENT_DATA_KEY:
1726 ret = process_file_extent(root, eb, i, &key,
1736 static void reada_walk_down(struct btrfs_root *root,
1737 struct extent_buffer *node, int slot)
1746 level = btrfs_header_level(node);
1750 nritems = btrfs_header_nritems(node);
1751 blocksize = btrfs_level_size(root, level - 1);
1752 for (i = slot; i < nritems; i++) {
1753 bytenr = btrfs_node_blockptr(node, i);
1754 ptr_gen = btrfs_node_ptr_generation(node, i);
1755 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1760 * Check the child node/leaf by the following condition:
1761 * 1. the first item key of the node/leaf should be the same with the one
1763 * 2. block in parent node should match the child node/leaf.
1764 * 3. generation of parent node and child's header should be consistent.
1766 * Or the child node/leaf pointed by the key in parent is not valid.
1768 * We hope to check leaf owner too, but since subvol may share leaves,
1769 * which makes leaf owner check not so strong, key check should be
1770 * sufficient enough for that case.
1772 static int check_child_node(struct btrfs_root *root,
1773 struct extent_buffer *parent, int slot,
1774 struct extent_buffer *child)
1776 struct btrfs_key parent_key;
1777 struct btrfs_key child_key;
1780 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1781 if (btrfs_header_level(child) == 0)
1782 btrfs_item_key_to_cpu(child, &child_key, 0);
1784 btrfs_node_key_to_cpu(child, &child_key, 0);
1786 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1789 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1790 parent_key.objectid, parent_key.type, parent_key.offset,
1791 child_key.objectid, child_key.type, child_key.offset);
1793 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1795 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1796 btrfs_node_blockptr(parent, slot),
1797 btrfs_header_bytenr(child));
1799 if (btrfs_node_ptr_generation(parent, slot) !=
1800 btrfs_header_generation(child)) {
1802 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1803 btrfs_header_generation(child),
1804 btrfs_node_ptr_generation(parent, slot));
1809 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1810 struct walk_control *wc, int *level)
1812 enum btrfs_tree_block_status status;
1815 struct extent_buffer *next;
1816 struct extent_buffer *cur;
1821 WARN_ON(*level < 0);
1822 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1823 ret = btrfs_lookup_extent_info(NULL, root,
1824 path->nodes[*level]->start,
1825 *level, 1, &refs, NULL);
1832 ret = enter_shared_node(root, path->nodes[*level]->start,
1840 while (*level >= 0) {
1841 WARN_ON(*level < 0);
1842 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1843 cur = path->nodes[*level];
1845 if (btrfs_header_level(cur) != *level)
1848 if (path->slots[*level] >= btrfs_header_nritems(cur))
1851 ret = process_one_leaf(root, cur, wc);
1856 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1857 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1858 blocksize = btrfs_level_size(root, *level - 1);
1859 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1865 ret = enter_shared_node(root, bytenr, refs,
1868 path->slots[*level]++;
1873 next = btrfs_find_tree_block(root, bytenr, blocksize);
1874 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1875 free_extent_buffer(next);
1876 reada_walk_down(root, cur, path->slots[*level]);
1877 next = read_tree_block(root, bytenr, blocksize,
1879 if (!extent_buffer_uptodate(next)) {
1880 struct btrfs_key node_key;
1882 btrfs_node_key_to_cpu(path->nodes[*level],
1884 path->slots[*level]);
1885 btrfs_add_corrupt_extent_record(root->fs_info,
1887 path->nodes[*level]->start,
1888 root->leafsize, *level);
1894 ret = check_child_node(root, cur, path->slots[*level], next);
1900 if (btrfs_is_leaf(next))
1901 status = btrfs_check_leaf(root, NULL, next);
1903 status = btrfs_check_node(root, NULL, next);
1904 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1905 free_extent_buffer(next);
1910 *level = *level - 1;
1911 free_extent_buffer(path->nodes[*level]);
1912 path->nodes[*level] = next;
1913 path->slots[*level] = 0;
1916 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1920 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level)
1924 struct extent_buffer *leaf;
1926 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1927 leaf = path->nodes[i];
1928 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1933 free_extent_buffer(path->nodes[*level]);
1934 path->nodes[*level] = NULL;
1935 BUG_ON(*level > wc->active_node);
1936 if (*level == wc->active_node)
1937 leave_shared_node(root, wc, *level);
1944 static int check_root_dir(struct inode_record *rec)
1946 struct inode_backref *backref;
1949 if (!rec->found_inode_item || rec->errors)
1951 if (rec->nlink != 1 || rec->found_link != 0)
1953 if (list_empty(&rec->backrefs))
1955 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1956 if (!backref->found_inode_ref)
1958 if (backref->index != 0 || backref->namelen != 2 ||
1959 memcmp(backref->name, "..", 2))
1961 if (backref->found_dir_index || backref->found_dir_item)
1968 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1969 struct btrfs_root *root, struct btrfs_path *path,
1970 struct inode_record *rec)
1972 struct btrfs_inode_item *ei;
1973 struct btrfs_key key;
1976 key.objectid = rec->ino;
1977 key.type = BTRFS_INODE_ITEM_KEY;
1978 key.offset = (u64)-1;
1980 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1984 if (!path->slots[0]) {
1991 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1992 if (key.objectid != rec->ino) {
1997 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1998 struct btrfs_inode_item);
1999 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2000 btrfs_mark_buffer_dirty(path->nodes[0]);
2001 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2002 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2003 root->root_key.objectid);
2005 btrfs_release_path(path);
2009 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2010 struct btrfs_root *root,
2011 struct btrfs_path *path,
2012 struct inode_record *rec)
2016 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2017 btrfs_release_path(path);
2019 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2023 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2024 struct btrfs_root *root,
2025 struct btrfs_path *path,
2026 struct inode_record *rec)
2028 struct btrfs_inode_item *ei;
2029 struct btrfs_key key;
2032 key.objectid = rec->ino;
2033 key.type = BTRFS_INODE_ITEM_KEY;
2036 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2043 /* Since ret == 0, no need to check anything */
2044 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2045 struct btrfs_inode_item);
2046 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2047 btrfs_mark_buffer_dirty(path->nodes[0]);
2048 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2049 printf("reset nbytes for ino %llu root %llu\n",
2050 rec->ino, root->root_key.objectid);
2052 btrfs_release_path(path);
2056 static int add_missing_dir_index(struct btrfs_root *root,
2057 struct cache_tree *inode_cache,
2058 struct inode_record *rec,
2059 struct inode_backref *backref)
2061 struct btrfs_path *path;
2062 struct btrfs_trans_handle *trans;
2063 struct btrfs_dir_item *dir_item;
2064 struct extent_buffer *leaf;
2065 struct btrfs_key key;
2066 struct btrfs_disk_key disk_key;
2067 struct inode_record *dir_rec;
2068 unsigned long name_ptr;
2069 u32 data_size = sizeof(*dir_item) + backref->namelen;
2072 path = btrfs_alloc_path();
2076 trans = btrfs_start_transaction(root, 1);
2077 if (IS_ERR(trans)) {
2078 btrfs_free_path(path);
2079 return PTR_ERR(trans);
2082 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2083 (unsigned long long)rec->ino);
2084 key.objectid = backref->dir;
2085 key.type = BTRFS_DIR_INDEX_KEY;
2086 key.offset = backref->index;
2088 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2091 leaf = path->nodes[0];
2092 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2094 disk_key.objectid = cpu_to_le64(rec->ino);
2095 disk_key.type = BTRFS_INODE_ITEM_KEY;
2096 disk_key.offset = 0;
2098 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2099 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2100 btrfs_set_dir_data_len(leaf, dir_item, 0);
2101 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2102 name_ptr = (unsigned long)(dir_item + 1);
2103 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2104 btrfs_mark_buffer_dirty(leaf);
2105 btrfs_free_path(path);
2106 btrfs_commit_transaction(trans, root);
2108 backref->found_dir_index = 1;
2109 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2110 BUG_ON(IS_ERR(dir_rec));
2113 dir_rec->found_size += backref->namelen;
2114 if (dir_rec->found_size == dir_rec->isize &&
2115 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2116 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2117 if (dir_rec->found_size != dir_rec->isize)
2118 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2123 static int delete_dir_index(struct btrfs_root *root,
2124 struct cache_tree *inode_cache,
2125 struct inode_record *rec,
2126 struct inode_backref *backref)
2128 struct btrfs_trans_handle *trans;
2129 struct btrfs_dir_item *di;
2130 struct btrfs_path *path;
2133 path = btrfs_alloc_path();
2137 trans = btrfs_start_transaction(root, 1);
2138 if (IS_ERR(trans)) {
2139 btrfs_free_path(path);
2140 return PTR_ERR(trans);
2144 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2145 (unsigned long long)backref->dir,
2146 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2147 (unsigned long long)root->objectid);
2149 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2150 backref->name, backref->namelen,
2151 backref->index, -1);
2154 btrfs_free_path(path);
2155 btrfs_commit_transaction(trans, root);
2162 ret = btrfs_del_item(trans, root, path);
2164 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2166 btrfs_free_path(path);
2167 btrfs_commit_transaction(trans, root);
2171 static int create_inode_item(struct btrfs_root *root,
2172 struct inode_record *rec,
2173 struct inode_backref *backref, int root_dir)
2175 struct btrfs_trans_handle *trans;
2176 struct btrfs_inode_item inode_item;
2177 time_t now = time(NULL);
2180 trans = btrfs_start_transaction(root, 1);
2181 if (IS_ERR(trans)) {
2182 ret = PTR_ERR(trans);
2186 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2187 "be incomplete, please check permissions and content after "
2188 "the fsck completes.\n", (unsigned long long)root->objectid,
2189 (unsigned long long)rec->ino);
2191 memset(&inode_item, 0, sizeof(inode_item));
2192 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2194 btrfs_set_stack_inode_nlink(&inode_item, 1);
2196 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2197 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2198 if (rec->found_dir_item) {
2199 if (rec->found_file_extent)
2200 fprintf(stderr, "root %llu inode %llu has both a dir "
2201 "item and extents, unsure if it is a dir or a "
2202 "regular file so setting it as a directory\n",
2203 (unsigned long long)root->objectid,
2204 (unsigned long long)rec->ino);
2205 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2206 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2207 } else if (!rec->found_dir_item) {
2208 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2209 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2211 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2212 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2213 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2214 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2215 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2216 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2217 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2218 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2220 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2222 btrfs_commit_transaction(trans, root);
2226 static int repair_inode_backrefs(struct btrfs_root *root,
2227 struct inode_record *rec,
2228 struct cache_tree *inode_cache,
2231 struct inode_backref *tmp, *backref;
2232 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2236 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2237 if (!delete && rec->ino == root_dirid) {
2238 if (!rec->found_inode_item) {
2239 ret = create_inode_item(root, rec, backref, 1);
2246 /* Index 0 for root dir's are special, don't mess with it */
2247 if (rec->ino == root_dirid && backref->index == 0)
2251 ((backref->found_dir_index && !backref->found_inode_ref) ||
2252 (backref->found_dir_index && backref->found_inode_ref &&
2253 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2254 ret = delete_dir_index(root, inode_cache, rec, backref);
2258 list_del(&backref->list);
2262 if (!delete && !backref->found_dir_index &&
2263 backref->found_dir_item && backref->found_inode_ref) {
2264 ret = add_missing_dir_index(root, inode_cache, rec,
2269 if (backref->found_dir_item &&
2270 backref->found_dir_index &&
2271 backref->found_dir_index) {
2272 if (!backref->errors &&
2273 backref->found_inode_ref) {
2274 list_del(&backref->list);
2280 if (!delete && (!backref->found_dir_index &&
2281 !backref->found_dir_item &&
2282 backref->found_inode_ref)) {
2283 struct btrfs_trans_handle *trans;
2284 struct btrfs_key location;
2286 ret = check_dir_conflict(root, backref->name,
2292 * let nlink fixing routine to handle it,
2293 * which can do it better.
2298 location.objectid = rec->ino;
2299 location.type = BTRFS_INODE_ITEM_KEY;
2300 location.offset = 0;
2302 trans = btrfs_start_transaction(root, 1);
2303 if (IS_ERR(trans)) {
2304 ret = PTR_ERR(trans);
2307 fprintf(stderr, "adding missing dir index/item pair "
2309 (unsigned long long)rec->ino);
2310 ret = btrfs_insert_dir_item(trans, root, backref->name,
2312 backref->dir, &location,
2313 imode_to_type(rec->imode),
2316 btrfs_commit_transaction(trans, root);
2320 if (!delete && (backref->found_inode_ref &&
2321 backref->found_dir_index &&
2322 backref->found_dir_item &&
2323 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2324 !rec->found_inode_item)) {
2325 ret = create_inode_item(root, rec, backref, 0);
2332 return ret ? ret : repaired;
2336 * To determine the file type for nlink/inode_item repair
2338 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2339 * Return -ENOENT if file type is not found.
2341 static int find_file_type(struct inode_record *rec, u8 *type)
2343 struct inode_backref *backref;
2345 /* For inode item recovered case */
2346 if (rec->found_inode_item) {
2347 *type = imode_to_type(rec->imode);
2351 list_for_each_entry(backref, &rec->backrefs, list) {
2352 if (backref->found_dir_index || backref->found_dir_item) {
2353 *type = backref->filetype;
2361 * To determine the file name for nlink repair
2363 * Return 0 if file name is found, set name and namelen.
2364 * Return -ENOENT if file name is not found.
2366 static int find_file_name(struct inode_record *rec,
2367 char *name, int *namelen)
2369 struct inode_backref *backref;
2371 list_for_each_entry(backref, &rec->backrefs, list) {
2372 if (backref->found_dir_index || backref->found_dir_item ||
2373 backref->found_inode_ref) {
2374 memcpy(name, backref->name, backref->namelen);
2375 *namelen = backref->namelen;
2382 /* Reset the nlink of the inode to the correct one */
2383 static int reset_nlink(struct btrfs_trans_handle *trans,
2384 struct btrfs_root *root,
2385 struct btrfs_path *path,
2386 struct inode_record *rec)
2388 struct inode_backref *backref;
2389 struct inode_backref *tmp;
2390 struct btrfs_key key;
2391 struct btrfs_inode_item *inode_item;
2394 /* We don't believe this either, reset it and iterate backref */
2395 rec->found_link = 0;
2397 /* Remove all backref including the valid ones */
2398 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2399 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2400 backref->index, backref->name,
2401 backref->namelen, 0);
2405 /* remove invalid backref, so it won't be added back */
2406 if (!(backref->found_dir_index &&
2407 backref->found_dir_item &&
2408 backref->found_inode_ref)) {
2409 list_del(&backref->list);
2416 /* Set nlink to 0 */
2417 key.objectid = rec->ino;
2418 key.type = BTRFS_INODE_ITEM_KEY;
2420 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2427 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2428 struct btrfs_inode_item);
2429 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2430 btrfs_mark_buffer_dirty(path->nodes[0]);
2431 btrfs_release_path(path);
2434 * Add back valid inode_ref/dir_item/dir_index,
2435 * add_link() will handle the nlink inc, so new nlink must be correct
2437 list_for_each_entry(backref, &rec->backrefs, list) {
2438 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2439 backref->name, backref->namelen,
2440 backref->filetype, &backref->index, 1);
2445 btrfs_release_path(path);
2449 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2450 struct btrfs_root *root,
2451 struct btrfs_path *path,
2452 struct inode_record *rec)
2454 char *dir_name = "lost+found";
2455 char namebuf[BTRFS_NAME_LEN] = {0};
2460 int name_recovered = 0;
2461 int type_recovered = 0;
2465 * Get file name and type first before these invalid inode ref
2466 * are deleted by remove_all_invalid_backref()
2468 name_recovered = !find_file_name(rec, namebuf, &namelen);
2469 type_recovered = !find_file_type(rec, &type);
2471 if (!name_recovered) {
2472 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2473 rec->ino, rec->ino);
2474 namelen = count_digits(rec->ino);
2475 sprintf(namebuf, "%llu", rec->ino);
2478 if (!type_recovered) {
2479 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2481 type = BTRFS_FT_REG_FILE;
2485 ret = reset_nlink(trans, root, path, rec);
2488 "Failed to reset nlink for inode %llu: %s\n",
2489 rec->ino, strerror(-ret));
2493 if (rec->found_link == 0) {
2494 lost_found_ino = root->highest_inode;
2495 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2500 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2501 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2504 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2505 dir_name, strerror(-ret));
2508 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2509 namebuf, namelen, type, NULL, 1);
2511 * Add ".INO" suffix several times to handle case where
2512 * "FILENAME.INO" is already taken by another file.
2514 while (ret == -EEXIST) {
2516 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2518 if (namelen + count_digits(rec->ino) + 1 >
2523 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2525 namelen += count_digits(rec->ino) + 1;
2526 ret = btrfs_add_link(trans, root, rec->ino,
2527 lost_found_ino, namebuf,
2528 namelen, type, NULL, 1);
2532 "Failed to link the inode %llu to %s dir: %s\n",
2533 rec->ino, dir_name, strerror(-ret));
2537 * Just increase the found_link, don't actually add the
2538 * backref. This will make things easier and this inode
2539 * record will be freed after the repair is done.
2540 * So fsck will not report problem about this inode.
2543 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2544 namelen, namebuf, dir_name);
2546 printf("Fixed the nlink of inode %llu\n", rec->ino);
2549 * Clear the flag anyway, or we will loop forever for the same inode
2550 * as it will not be removed from the bad inode list and the dead loop
2553 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2554 btrfs_release_path(path);
2559 * Check if there is any normal(reg or prealloc) file extent for given
2561 * This is used to determine the file type when neither its dir_index/item or
2562 * inode_item exists.
2564 * This will *NOT* report error, if any error happens, just consider it does
2565 * not have any normal file extent.
2567 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2569 struct btrfs_path *path;
2570 struct btrfs_key key;
2571 struct btrfs_key found_key;
2572 struct btrfs_file_extent_item *fi;
2576 path = btrfs_alloc_path();
2580 key.type = BTRFS_EXTENT_DATA_KEY;
2583 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2588 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2589 ret = btrfs_next_leaf(root, path);
2596 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2598 if (found_key.objectid != ino ||
2599 found_key.type != BTRFS_EXTENT_DATA_KEY)
2601 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2602 struct btrfs_file_extent_item);
2603 type = btrfs_file_extent_type(path->nodes[0], fi);
2604 if (type != BTRFS_FILE_EXTENT_INLINE) {
2610 btrfs_free_path(path);
2614 static u32 btrfs_type_to_imode(u8 type)
2616 static u32 imode_by_btrfs_type[] = {
2617 [BTRFS_FT_REG_FILE] = S_IFREG,
2618 [BTRFS_FT_DIR] = S_IFDIR,
2619 [BTRFS_FT_CHRDEV] = S_IFCHR,
2620 [BTRFS_FT_BLKDEV] = S_IFBLK,
2621 [BTRFS_FT_FIFO] = S_IFIFO,
2622 [BTRFS_FT_SOCK] = S_IFSOCK,
2623 [BTRFS_FT_SYMLINK] = S_IFLNK,
2626 return imode_by_btrfs_type[(type)];
2629 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2630 struct btrfs_root *root,
2631 struct btrfs_path *path,
2632 struct inode_record *rec)
2636 int type_recovered = 0;
2639 printf("Trying to rebuild inode:%llu\n", rec->ino);
2641 type_recovered = !find_file_type(rec, &filetype);
2644 * Try to determine inode type if type not found.
2646 * For found regular file extent, it must be FILE.
2647 * For found dir_item/index, it must be DIR.
2649 * For undetermined one, use FILE as fallback.
2652 * 1. If found backref(inode_index/item is already handled) to it,
2654 * Need new inode-inode ref structure to allow search for that.
2656 if (!type_recovered) {
2657 if (rec->found_file_extent &&
2658 find_normal_file_extent(root, rec->ino)) {
2660 filetype = BTRFS_FT_REG_FILE;
2661 } else if (rec->found_dir_item) {
2663 filetype = BTRFS_FT_DIR;
2664 } else if (!list_empty(&rec->orphan_extents)) {
2666 filetype = BTRFS_FT_REG_FILE;
2668 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2671 filetype = BTRFS_FT_REG_FILE;
2675 ret = btrfs_new_inode(trans, root, rec->ino,
2676 mode | btrfs_type_to_imode(filetype));
2681 * Here inode rebuild is done, we only rebuild the inode item,
2682 * don't repair the nlink(like move to lost+found).
2683 * That is the job of nlink repair.
2685 * We just fill the record and return
2687 rec->found_dir_item = 1;
2688 rec->imode = mode | btrfs_type_to_imode(filetype);
2690 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2691 /* Ensure the inode_nlinks repair function will be called */
2692 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2697 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2698 struct btrfs_root *root,
2699 struct btrfs_path *path,
2700 struct inode_record *rec)
2702 struct orphan_data_extent *orphan;
2703 struct orphan_data_extent *tmp;
2706 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2708 * Check for conflicting file extents
2710 * Here we don't know whether the extents is compressed or not,
2711 * so we can only assume it not compressed nor data offset,
2712 * and use its disk_len as extent length.
2714 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2715 orphan->offset, orphan->disk_len, 0);
2716 btrfs_release_path(path);
2721 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2722 orphan->disk_bytenr, orphan->disk_len);
2723 ret = btrfs_free_extent(trans,
2724 root->fs_info->extent_root,
2725 orphan->disk_bytenr, orphan->disk_len,
2726 0, root->objectid, orphan->objectid,
2731 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2732 orphan->offset, orphan->disk_bytenr,
2733 orphan->disk_len, orphan->disk_len);
2737 /* Update file size info */
2738 rec->found_size += orphan->disk_len;
2739 if (rec->found_size == rec->nbytes)
2740 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2742 /* Update the file extent hole info too */
2743 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2747 if (RB_EMPTY_ROOT(&rec->holes))
2748 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2750 list_del(&orphan->list);
2753 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2758 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2759 struct btrfs_root *root,
2760 struct btrfs_path *path,
2761 struct inode_record *rec)
2763 struct rb_node *node;
2764 struct file_extent_hole *hole;
2768 node = rb_first(&rec->holes);
2772 hole = rb_entry(node, struct file_extent_hole, node);
2773 ret = btrfs_punch_hole(trans, root, rec->ino,
2774 hole->start, hole->len);
2777 ret = del_file_extent_hole(&rec->holes, hole->start,
2781 if (RB_EMPTY_ROOT(&rec->holes))
2782 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2783 node = rb_first(&rec->holes);
2785 /* special case for a file losing all its file extent */
2787 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2788 round_up(rec->isize, root->sectorsize));
2792 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2793 rec->ino, root->objectid);
2798 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2800 struct btrfs_trans_handle *trans;
2801 struct btrfs_path *path;
2804 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2805 I_ERR_NO_ORPHAN_ITEM |
2806 I_ERR_LINK_COUNT_WRONG |
2807 I_ERR_NO_INODE_ITEM |
2808 I_ERR_FILE_EXTENT_ORPHAN |
2809 I_ERR_FILE_EXTENT_DISCOUNT|
2810 I_ERR_FILE_NBYTES_WRONG)))
2813 path = btrfs_alloc_path();
2818 * For nlink repair, it may create a dir and add link, so
2819 * 2 for parent(256)'s dir_index and dir_item
2820 * 2 for lost+found dir's inode_item and inode_ref
2821 * 1 for the new inode_ref of the file
2822 * 2 for lost+found dir's dir_index and dir_item for the file
2824 trans = btrfs_start_transaction(root, 7);
2825 if (IS_ERR(trans)) {
2826 btrfs_free_path(path);
2827 return PTR_ERR(trans);
2830 if (rec->errors & I_ERR_NO_INODE_ITEM)
2831 ret = repair_inode_no_item(trans, root, path, rec);
2832 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2833 ret = repair_inode_orphan_extent(trans, root, path, rec);
2834 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2835 ret = repair_inode_discount_extent(trans, root, path, rec);
2836 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2837 ret = repair_inode_isize(trans, root, path, rec);
2838 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2839 ret = repair_inode_orphan_item(trans, root, path, rec);
2840 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2841 ret = repair_inode_nlinks(trans, root, path, rec);
2842 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2843 ret = repair_inode_nbytes(trans, root, path, rec);
2844 btrfs_commit_transaction(trans, root);
2845 btrfs_free_path(path);
2849 static int check_inode_recs(struct btrfs_root *root,
2850 struct cache_tree *inode_cache)
2852 struct cache_extent *cache;
2853 struct ptr_node *node;
2854 struct inode_record *rec;
2855 struct inode_backref *backref;
2860 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2862 if (btrfs_root_refs(&root->root_item) == 0) {
2863 if (!cache_tree_empty(inode_cache))
2864 fprintf(stderr, "warning line %d\n", __LINE__);
2869 * We need to record the highest inode number for later 'lost+found'
2871 * We must select a ino not used/refered by any existing inode, or
2872 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2873 * this may cause 'lost+found' dir has wrong nlinks.
2875 cache = last_cache_extent(inode_cache);
2877 node = container_of(cache, struct ptr_node, cache);
2879 if (rec->ino > root->highest_inode)
2880 root->highest_inode = rec->ino;
2884 * We need to repair backrefs first because we could change some of the
2885 * errors in the inode recs.
2887 * We also need to go through and delete invalid backrefs first and then
2888 * add the correct ones second. We do this because we may get EEXIST
2889 * when adding back the correct index because we hadn't yet deleted the
2892 * For example, if we were missing a dir index then the directories
2893 * isize would be wrong, so if we fixed the isize to what we thought it
2894 * would be and then fixed the backref we'd still have a invalid fs, so
2895 * we need to add back the dir index and then check to see if the isize
2900 if (stage == 3 && !err)
2903 cache = search_cache_extent(inode_cache, 0);
2904 while (repair && cache) {
2905 node = container_of(cache, struct ptr_node, cache);
2907 cache = next_cache_extent(cache);
2909 /* Need to free everything up and rescan */
2911 remove_cache_extent(inode_cache, &node->cache);
2913 free_inode_rec(rec);
2917 if (list_empty(&rec->backrefs))
2920 ret = repair_inode_backrefs(root, rec, inode_cache,
2934 rec = get_inode_rec(inode_cache, root_dirid, 0);
2935 BUG_ON(IS_ERR(rec));
2937 ret = check_root_dir(rec);
2939 fprintf(stderr, "root %llu root dir %llu error\n",
2940 (unsigned long long)root->root_key.objectid,
2941 (unsigned long long)root_dirid);
2942 print_inode_error(root, rec);
2947 struct btrfs_trans_handle *trans;
2949 trans = btrfs_start_transaction(root, 1);
2950 if (IS_ERR(trans)) {
2951 err = PTR_ERR(trans);
2956 "root %llu missing its root dir, recreating\n",
2957 (unsigned long long)root->objectid);
2959 ret = btrfs_make_root_dir(trans, root, root_dirid);
2962 btrfs_commit_transaction(trans, root);
2966 fprintf(stderr, "root %llu root dir %llu not found\n",
2967 (unsigned long long)root->root_key.objectid,
2968 (unsigned long long)root_dirid);
2972 cache = search_cache_extent(inode_cache, 0);
2975 node = container_of(cache, struct ptr_node, cache);
2977 remove_cache_extent(inode_cache, &node->cache);
2979 if (rec->ino == root_dirid ||
2980 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2981 free_inode_rec(rec);
2985 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2986 ret = check_orphan_item(root, rec->ino);
2988 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2989 if (can_free_inode_rec(rec)) {
2990 free_inode_rec(rec);
2995 if (!rec->found_inode_item)
2996 rec->errors |= I_ERR_NO_INODE_ITEM;
2997 if (rec->found_link != rec->nlink)
2998 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3000 ret = try_repair_inode(root, rec);
3001 if (ret == 0 && can_free_inode_rec(rec)) {
3002 free_inode_rec(rec);
3008 if (!(repair && ret == 0))
3010 print_inode_error(root, rec);
3011 list_for_each_entry(backref, &rec->backrefs, list) {
3012 if (!backref->found_dir_item)
3013 backref->errors |= REF_ERR_NO_DIR_ITEM;
3014 if (!backref->found_dir_index)
3015 backref->errors |= REF_ERR_NO_DIR_INDEX;
3016 if (!backref->found_inode_ref)
3017 backref->errors |= REF_ERR_NO_INODE_REF;
3018 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3019 " namelen %u name %s filetype %d errors %x",
3020 (unsigned long long)backref->dir,
3021 (unsigned long long)backref->index,
3022 backref->namelen, backref->name,
3023 backref->filetype, backref->errors);
3024 print_ref_error(backref->errors);
3026 free_inode_rec(rec);
3028 return (error > 0) ? -1 : 0;
3031 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3034 struct cache_extent *cache;
3035 struct root_record *rec = NULL;
3038 cache = lookup_cache_extent(root_cache, objectid, 1);
3040 rec = container_of(cache, struct root_record, cache);
3042 rec = calloc(1, sizeof(*rec));
3044 return ERR_PTR(-ENOMEM);
3045 rec->objectid = objectid;
3046 INIT_LIST_HEAD(&rec->backrefs);
3047 rec->cache.start = objectid;
3048 rec->cache.size = 1;
3050 ret = insert_cache_extent(root_cache, &rec->cache);
3052 return ERR_PTR(-EEXIST);
3057 static struct root_backref *get_root_backref(struct root_record *rec,
3058 u64 ref_root, u64 dir, u64 index,
3059 const char *name, int namelen)
3061 struct root_backref *backref;
3063 list_for_each_entry(backref, &rec->backrefs, list) {
3064 if (backref->ref_root != ref_root || backref->dir != dir ||
3065 backref->namelen != namelen)
3067 if (memcmp(name, backref->name, namelen))
3072 backref = calloc(1, sizeof(*backref) + namelen + 1);
3073 backref->ref_root = ref_root;
3075 backref->index = index;
3076 backref->namelen = namelen;
3077 memcpy(backref->name, name, namelen);
3078 backref->name[namelen] = '\0';
3079 list_add_tail(&backref->list, &rec->backrefs);
3083 static void free_root_record(struct cache_extent *cache)
3085 struct root_record *rec;
3086 struct root_backref *backref;
3088 rec = container_of(cache, struct root_record, cache);
3089 while (!list_empty(&rec->backrefs)) {
3090 backref = list_entry(rec->backrefs.next,
3091 struct root_backref, list);
3092 list_del(&backref->list);
3099 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3101 static int add_root_backref(struct cache_tree *root_cache,
3102 u64 root_id, u64 ref_root, u64 dir, u64 index,
3103 const char *name, int namelen,
3104 int item_type, int errors)
3106 struct root_record *rec;
3107 struct root_backref *backref;
3109 rec = get_root_rec(root_cache, root_id);
3110 BUG_ON(IS_ERR(rec));
3111 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3113 backref->errors |= errors;
3115 if (item_type != BTRFS_DIR_ITEM_KEY) {
3116 if (backref->found_dir_index || backref->found_back_ref ||
3117 backref->found_forward_ref) {
3118 if (backref->index != index)
3119 backref->errors |= REF_ERR_INDEX_UNMATCH;
3121 backref->index = index;
3125 if (item_type == BTRFS_DIR_ITEM_KEY) {
3126 if (backref->found_forward_ref)
3128 backref->found_dir_item = 1;
3129 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3130 backref->found_dir_index = 1;
3131 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3132 if (backref->found_forward_ref)
3133 backref->errors |= REF_ERR_DUP_ROOT_REF;
3134 else if (backref->found_dir_item)
3136 backref->found_forward_ref = 1;
3137 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3138 if (backref->found_back_ref)
3139 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3140 backref->found_back_ref = 1;
3145 if (backref->found_forward_ref && backref->found_dir_item)
3146 backref->reachable = 1;
3150 static int merge_root_recs(struct btrfs_root *root,
3151 struct cache_tree *src_cache,
3152 struct cache_tree *dst_cache)
3154 struct cache_extent *cache;
3155 struct ptr_node *node;
3156 struct inode_record *rec;
3157 struct inode_backref *backref;
3160 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3161 free_inode_recs_tree(src_cache);
3166 cache = search_cache_extent(src_cache, 0);
3169 node = container_of(cache, struct ptr_node, cache);
3171 remove_cache_extent(src_cache, &node->cache);
3174 ret = is_child_root(root, root->objectid, rec->ino);
3180 list_for_each_entry(backref, &rec->backrefs, list) {
3181 BUG_ON(backref->found_inode_ref);
3182 if (backref->found_dir_item)
3183 add_root_backref(dst_cache, rec->ino,
3184 root->root_key.objectid, backref->dir,
3185 backref->index, backref->name,
3186 backref->namelen, BTRFS_DIR_ITEM_KEY,
3188 if (backref->found_dir_index)
3189 add_root_backref(dst_cache, rec->ino,
3190 root->root_key.objectid, backref->dir,
3191 backref->index, backref->name,
3192 backref->namelen, BTRFS_DIR_INDEX_KEY,
3196 free_inode_rec(rec);
3203 static int check_root_refs(struct btrfs_root *root,
3204 struct cache_tree *root_cache)
3206 struct root_record *rec;
3207 struct root_record *ref_root;
3208 struct root_backref *backref;
3209 struct cache_extent *cache;
3215 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3216 BUG_ON(IS_ERR(rec));
3219 /* fixme: this can not detect circular references */
3222 cache = search_cache_extent(root_cache, 0);
3226 rec = container_of(cache, struct root_record, cache);
3227 cache = next_cache_extent(cache);
3229 if (rec->found_ref == 0)
3232 list_for_each_entry(backref, &rec->backrefs, list) {
3233 if (!backref->reachable)
3236 ref_root = get_root_rec(root_cache,
3238 BUG_ON(IS_ERR(ref_root));
3239 if (ref_root->found_ref > 0)
3242 backref->reachable = 0;
3244 if (rec->found_ref == 0)
3250 cache = search_cache_extent(root_cache, 0);
3254 rec = container_of(cache, struct root_record, cache);
3255 cache = next_cache_extent(cache);
3257 if (rec->found_ref == 0 &&
3258 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3259 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3260 ret = check_orphan_item(root->fs_info->tree_root,
3266 * If we don't have a root item then we likely just have
3267 * a dir item in a snapshot for this root but no actual
3268 * ref key or anything so it's meaningless.
3270 if (!rec->found_root_item)
3273 fprintf(stderr, "fs tree %llu not referenced\n",
3274 (unsigned long long)rec->objectid);
3278 if (rec->found_ref > 0 && !rec->found_root_item)
3280 list_for_each_entry(backref, &rec->backrefs, list) {
3281 if (!backref->found_dir_item)
3282 backref->errors |= REF_ERR_NO_DIR_ITEM;
3283 if (!backref->found_dir_index)
3284 backref->errors |= REF_ERR_NO_DIR_INDEX;
3285 if (!backref->found_back_ref)
3286 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3287 if (!backref->found_forward_ref)
3288 backref->errors |= REF_ERR_NO_ROOT_REF;
3289 if (backref->reachable && backref->errors)
3296 fprintf(stderr, "fs tree %llu refs %u %s\n",
3297 (unsigned long long)rec->objectid, rec->found_ref,
3298 rec->found_root_item ? "" : "not found");
3300 list_for_each_entry(backref, &rec->backrefs, list) {
3301 if (!backref->reachable)
3303 if (!backref->errors && rec->found_root_item)
3305 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3306 " index %llu namelen %u name %s errors %x\n",
3307 (unsigned long long)backref->ref_root,
3308 (unsigned long long)backref->dir,
3309 (unsigned long long)backref->index,
3310 backref->namelen, backref->name,
3312 print_ref_error(backref->errors);
3315 return errors > 0 ? 1 : 0;
3318 static int process_root_ref(struct extent_buffer *eb, int slot,
3319 struct btrfs_key *key,
3320 struct cache_tree *root_cache)
3326 struct btrfs_root_ref *ref;
3327 char namebuf[BTRFS_NAME_LEN];
3330 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3332 dirid = btrfs_root_ref_dirid(eb, ref);
3333 index = btrfs_root_ref_sequence(eb, ref);
3334 name_len = btrfs_root_ref_name_len(eb, ref);
3336 if (name_len <= BTRFS_NAME_LEN) {
3340 len = BTRFS_NAME_LEN;
3341 error = REF_ERR_NAME_TOO_LONG;
3343 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3345 if (key->type == BTRFS_ROOT_REF_KEY) {
3346 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3347 index, namebuf, len, key->type, error);
3349 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3350 index, namebuf, len, key->type, error);
3355 static void free_corrupt_block(struct cache_extent *cache)
3357 struct btrfs_corrupt_block *corrupt;
3359 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3363 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3366 * Repair the btree of the given root.
3368 * The fix is to remove the node key in corrupt_blocks cache_tree.
3369 * and rebalance the tree.
3370 * After the fix, the btree should be writeable.
3372 static int repair_btree(struct btrfs_root *root,
3373 struct cache_tree *corrupt_blocks)
3375 struct btrfs_trans_handle *trans;
3376 struct btrfs_path *path;
3377 struct btrfs_corrupt_block *corrupt;
3378 struct cache_extent *cache;
3379 struct btrfs_key key;
3384 if (cache_tree_empty(corrupt_blocks))
3387 path = btrfs_alloc_path();
3391 trans = btrfs_start_transaction(root, 1);
3392 if (IS_ERR(trans)) {
3393 ret = PTR_ERR(trans);
3394 fprintf(stderr, "Error starting transaction: %s\n",
3398 cache = first_cache_extent(corrupt_blocks);
3400 corrupt = container_of(cache, struct btrfs_corrupt_block,
3402 level = corrupt->level;
3403 path->lowest_level = level;
3404 key.objectid = corrupt->key.objectid;
3405 key.type = corrupt->key.type;
3406 key.offset = corrupt->key.offset;
3409 * Here we don't want to do any tree balance, since it may
3410 * cause a balance with corrupted brother leaf/node,
3411 * so ins_len set to 0 here.
3412 * Balance will be done after all corrupt node/leaf is deleted.
3414 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3417 offset = btrfs_node_blockptr(path->nodes[level],
3418 path->slots[level]);
3420 /* Remove the ptr */
3421 ret = btrfs_del_ptr(trans, root, path, level,
3422 path->slots[level]);
3426 * Remove the corresponding extent
3427 * return value is not concerned.
3429 btrfs_release_path(path);
3430 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3431 0, root->root_key.objectid,
3433 cache = next_cache_extent(cache);
3436 /* Balance the btree using btrfs_search_slot() */
3437 cache = first_cache_extent(corrupt_blocks);
3439 corrupt = container_of(cache, struct btrfs_corrupt_block,
3441 memcpy(&key, &corrupt->key, sizeof(key));
3442 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3445 /* return will always >0 since it won't find the item */
3447 btrfs_release_path(path);
3448 cache = next_cache_extent(cache);
3451 btrfs_commit_transaction(trans, root);
3453 btrfs_free_path(path);
3457 static int check_fs_root(struct btrfs_root *root,
3458 struct cache_tree *root_cache,
3459 struct walk_control *wc)
3465 struct btrfs_path path;
3466 struct shared_node root_node;
3467 struct root_record *rec;
3468 struct btrfs_root_item *root_item = &root->root_item;
3469 struct cache_tree corrupt_blocks;
3470 struct orphan_data_extent *orphan;
3471 struct orphan_data_extent *tmp;
3472 enum btrfs_tree_block_status status;
3475 * Reuse the corrupt_block cache tree to record corrupted tree block
3477 * Unlike the usage in extent tree check, here we do it in a per
3478 * fs/subvol tree base.
3480 cache_tree_init(&corrupt_blocks);
3481 root->fs_info->corrupt_blocks = &corrupt_blocks;
3483 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3484 rec = get_root_rec(root_cache, root->root_key.objectid);
3485 BUG_ON(IS_ERR(rec));
3486 if (btrfs_root_refs(root_item) > 0)
3487 rec->found_root_item = 1;
3490 btrfs_init_path(&path);
3491 memset(&root_node, 0, sizeof(root_node));
3492 cache_tree_init(&root_node.root_cache);
3493 cache_tree_init(&root_node.inode_cache);
3495 /* Move the orphan extent record to corresponding inode_record */
3496 list_for_each_entry_safe(orphan, tmp,
3497 &root->orphan_data_extents, list) {
3498 struct inode_record *inode;
3500 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3502 BUG_ON(IS_ERR(inode));
3503 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3504 list_move(&orphan->list, &inode->orphan_extents);
3507 level = btrfs_header_level(root->node);
3508 memset(wc->nodes, 0, sizeof(wc->nodes));
3509 wc->nodes[level] = &root_node;
3510 wc->active_node = level;
3511 wc->root_level = level;
3513 /* We may not have checked the root block, lets do that now */
3514 if (btrfs_is_leaf(root->node))
3515 status = btrfs_check_leaf(root, NULL, root->node);
3517 status = btrfs_check_node(root, NULL, root->node);
3518 if (status != BTRFS_TREE_BLOCK_CLEAN)
3521 if (btrfs_root_refs(root_item) > 0 ||
3522 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3523 path.nodes[level] = root->node;
3524 extent_buffer_get(root->node);
3525 path.slots[level] = 0;
3527 struct btrfs_key key;
3528 struct btrfs_disk_key found_key;
3530 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3531 level = root_item->drop_level;
3532 path.lowest_level = level;
3533 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3536 btrfs_node_key(path.nodes[level], &found_key,
3538 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3539 sizeof(found_key)));
3543 wret = walk_down_tree(root, &path, wc, &level);
3549 wret = walk_up_tree(root, &path, wc, &level);
3556 btrfs_release_path(&path);
3558 if (!cache_tree_empty(&corrupt_blocks)) {
3559 struct cache_extent *cache;
3560 struct btrfs_corrupt_block *corrupt;
3562 printf("The following tree block(s) is corrupted in tree %llu:\n",
3563 root->root_key.objectid);
3564 cache = first_cache_extent(&corrupt_blocks);
3566 corrupt = container_of(cache,
3567 struct btrfs_corrupt_block,
3569 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3570 cache->start, corrupt->level,
3571 corrupt->key.objectid, corrupt->key.type,
3572 corrupt->key.offset);
3573 cache = next_cache_extent(cache);
3576 printf("Try to repair the btree for root %llu\n",
3577 root->root_key.objectid);
3578 ret = repair_btree(root, &corrupt_blocks);
3580 fprintf(stderr, "Failed to repair btree: %s\n",
3583 printf("Btree for root %llu is fixed\n",
3584 root->root_key.objectid);
3588 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3592 if (root_node.current) {
3593 root_node.current->checked = 1;
3594 maybe_free_inode_rec(&root_node.inode_cache,
3598 err = check_inode_recs(root, &root_node.inode_cache);
3602 free_corrupt_blocks_tree(&corrupt_blocks);
3603 root->fs_info->corrupt_blocks = NULL;
3604 free_orphan_data_extents(&root->orphan_data_extents);
3608 static int fs_root_objectid(u64 objectid)
3610 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3611 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3613 return is_fstree(objectid);
3616 static int check_fs_roots(struct btrfs_root *root,
3617 struct cache_tree *root_cache)
3619 struct btrfs_path path;
3620 struct btrfs_key key;
3621 struct walk_control wc;
3622 struct extent_buffer *leaf, *tree_node;
3623 struct btrfs_root *tmp_root;
3624 struct btrfs_root *tree_root = root->fs_info->tree_root;
3628 if (ctx.progress_enabled) {
3629 ctx.tp = TASK_FS_ROOTS;
3630 task_start(ctx.info);
3634 * Just in case we made any changes to the extent tree that weren't
3635 * reflected into the free space cache yet.
3638 reset_cached_block_groups(root->fs_info);
3639 memset(&wc, 0, sizeof(wc));
3640 cache_tree_init(&wc.shared);
3641 btrfs_init_path(&path);
3646 key.type = BTRFS_ROOT_ITEM_KEY;
3647 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3652 tree_node = tree_root->node;
3654 if (tree_node != tree_root->node) {
3655 free_root_recs_tree(root_cache);
3656 btrfs_release_path(&path);
3659 leaf = path.nodes[0];
3660 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3661 ret = btrfs_next_leaf(tree_root, &path);
3667 leaf = path.nodes[0];
3669 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3670 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3671 fs_root_objectid(key.objectid)) {
3672 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3673 tmp_root = btrfs_read_fs_root_no_cache(
3674 root->fs_info, &key);
3676 key.offset = (u64)-1;
3677 tmp_root = btrfs_read_fs_root(
3678 root->fs_info, &key);
3680 if (IS_ERR(tmp_root)) {
3684 ret = check_fs_root(tmp_root, root_cache, &wc);
3685 if (ret == -EAGAIN) {
3686 free_root_recs_tree(root_cache);
3687 btrfs_release_path(&path);
3692 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3693 btrfs_free_fs_root(tmp_root);
3694 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3695 key.type == BTRFS_ROOT_BACKREF_KEY) {
3696 process_root_ref(leaf, path.slots[0], &key,
3703 btrfs_release_path(&path);
3705 free_extent_cache_tree(&wc.shared);
3706 if (!cache_tree_empty(&wc.shared))
3707 fprintf(stderr, "warning line %d\n", __LINE__);
3709 task_stop(ctx.info);
3714 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3716 struct list_head *cur = rec->backrefs.next;
3717 struct extent_backref *back;
3718 struct tree_backref *tback;
3719 struct data_backref *dback;
3723 while(cur != &rec->backrefs) {
3724 back = list_entry(cur, struct extent_backref, list);
3726 if (!back->found_extent_tree) {
3730 if (back->is_data) {
3731 dback = (struct data_backref *)back;
3732 fprintf(stderr, "Backref %llu %s %llu"
3733 " owner %llu offset %llu num_refs %lu"
3734 " not found in extent tree\n",
3735 (unsigned long long)rec->start,
3736 back->full_backref ?
3738 back->full_backref ?
3739 (unsigned long long)dback->parent:
3740 (unsigned long long)dback->root,
3741 (unsigned long long)dback->owner,
3742 (unsigned long long)dback->offset,
3743 (unsigned long)dback->num_refs);
3745 tback = (struct tree_backref *)back;
3746 fprintf(stderr, "Backref %llu parent %llu"
3747 " root %llu not found in extent tree\n",
3748 (unsigned long long)rec->start,
3749 (unsigned long long)tback->parent,
3750 (unsigned long long)tback->root);
3753 if (!back->is_data && !back->found_ref) {
3757 tback = (struct tree_backref *)back;
3758 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3759 (unsigned long long)rec->start,
3760 back->full_backref ? "parent" : "root",
3761 back->full_backref ?
3762 (unsigned long long)tback->parent :
3763 (unsigned long long)tback->root, back);
3765 if (back->is_data) {
3766 dback = (struct data_backref *)back;
3767 if (dback->found_ref != dback->num_refs) {
3771 fprintf(stderr, "Incorrect local backref count"
3772 " on %llu %s %llu owner %llu"
3773 " offset %llu found %u wanted %u back %p\n",
3774 (unsigned long long)rec->start,
3775 back->full_backref ?
3777 back->full_backref ?
3778 (unsigned long long)dback->parent:
3779 (unsigned long long)dback->root,
3780 (unsigned long long)dback->owner,
3781 (unsigned long long)dback->offset,
3782 dback->found_ref, dback->num_refs, back);
3784 if (dback->disk_bytenr != rec->start) {
3788 fprintf(stderr, "Backref disk bytenr does not"
3789 " match extent record, bytenr=%llu, "
3790 "ref bytenr=%llu\n",
3791 (unsigned long long)rec->start,
3792 (unsigned long long)dback->disk_bytenr);
3795 if (dback->bytes != rec->nr) {
3799 fprintf(stderr, "Backref bytes do not match "
3800 "extent backref, bytenr=%llu, ref "
3801 "bytes=%llu, backref bytes=%llu\n",
3802 (unsigned long long)rec->start,
3803 (unsigned long long)rec->nr,
3804 (unsigned long long)dback->bytes);
3807 if (!back->is_data) {
3810 dback = (struct data_backref *)back;
3811 found += dback->found_ref;
3814 if (found != rec->refs) {
3818 fprintf(stderr, "Incorrect global backref count "
3819 "on %llu found %llu wanted %llu\n",
3820 (unsigned long long)rec->start,
3821 (unsigned long long)found,
3822 (unsigned long long)rec->refs);
3828 static int free_all_extent_backrefs(struct extent_record *rec)
3830 struct extent_backref *back;
3831 struct list_head *cur;
3832 while (!list_empty(&rec->backrefs)) {
3833 cur = rec->backrefs.next;
3834 back = list_entry(cur, struct extent_backref, list);
3841 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3842 struct cache_tree *extent_cache)
3844 struct cache_extent *cache;
3845 struct extent_record *rec;
3848 cache = first_cache_extent(extent_cache);
3851 rec = container_of(cache, struct extent_record, cache);
3852 remove_cache_extent(extent_cache, cache);
3853 free_all_extent_backrefs(rec);
3858 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3859 struct extent_record *rec)
3861 if (rec->content_checked && rec->owner_ref_checked &&
3862 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3863 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3864 !rec->bad_full_backref && !rec->crossing_stripes &&
3865 !rec->wrong_chunk_type) {
3866 remove_cache_extent(extent_cache, &rec->cache);
3867 free_all_extent_backrefs(rec);
3868 list_del_init(&rec->list);
3874 static int check_owner_ref(struct btrfs_root *root,
3875 struct extent_record *rec,
3876 struct extent_buffer *buf)
3878 struct extent_backref *node;
3879 struct tree_backref *back;
3880 struct btrfs_root *ref_root;
3881 struct btrfs_key key;
3882 struct btrfs_path path;
3883 struct extent_buffer *parent;
3888 list_for_each_entry(node, &rec->backrefs, list) {
3891 if (!node->found_ref)
3893 if (node->full_backref)
3895 back = (struct tree_backref *)node;
3896 if (btrfs_header_owner(buf) == back->root)
3899 BUG_ON(rec->is_root);
3901 /* try to find the block by search corresponding fs tree */
3902 key.objectid = btrfs_header_owner(buf);
3903 key.type = BTRFS_ROOT_ITEM_KEY;
3904 key.offset = (u64)-1;
3906 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3907 if (IS_ERR(ref_root))
3910 level = btrfs_header_level(buf);
3912 btrfs_item_key_to_cpu(buf, &key, 0);
3914 btrfs_node_key_to_cpu(buf, &key, 0);
3916 btrfs_init_path(&path);
3917 path.lowest_level = level + 1;
3918 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3922 parent = path.nodes[level + 1];
3923 if (parent && buf->start == btrfs_node_blockptr(parent,
3924 path.slots[level + 1]))
3927 btrfs_release_path(&path);
3928 return found ? 0 : 1;
3931 static int is_extent_tree_record(struct extent_record *rec)
3933 struct list_head *cur = rec->backrefs.next;
3934 struct extent_backref *node;
3935 struct tree_backref *back;
3938 while(cur != &rec->backrefs) {
3939 node = list_entry(cur, struct extent_backref, list);
3943 back = (struct tree_backref *)node;
3944 if (node->full_backref)
3946 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3953 static int record_bad_block_io(struct btrfs_fs_info *info,
3954 struct cache_tree *extent_cache,
3957 struct extent_record *rec;
3958 struct cache_extent *cache;
3959 struct btrfs_key key;
3961 cache = lookup_cache_extent(extent_cache, start, len);
3965 rec = container_of(cache, struct extent_record, cache);
3966 if (!is_extent_tree_record(rec))
3969 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3970 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3973 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3974 struct extent_buffer *buf, int slot)
3976 if (btrfs_header_level(buf)) {
3977 struct btrfs_key_ptr ptr1, ptr2;
3979 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3980 sizeof(struct btrfs_key_ptr));
3981 read_extent_buffer(buf, &ptr2,
3982 btrfs_node_key_ptr_offset(slot + 1),
3983 sizeof(struct btrfs_key_ptr));
3984 write_extent_buffer(buf, &ptr1,
3985 btrfs_node_key_ptr_offset(slot + 1),
3986 sizeof(struct btrfs_key_ptr));
3987 write_extent_buffer(buf, &ptr2,
3988 btrfs_node_key_ptr_offset(slot),
3989 sizeof(struct btrfs_key_ptr));
3991 struct btrfs_disk_key key;
3992 btrfs_node_key(buf, &key, 0);
3993 btrfs_fixup_low_keys(root, path, &key,
3994 btrfs_header_level(buf) + 1);
3997 struct btrfs_item *item1, *item2;
3998 struct btrfs_key k1, k2;
3999 char *item1_data, *item2_data;
4000 u32 item1_offset, item2_offset, item1_size, item2_size;
4002 item1 = btrfs_item_nr(slot);
4003 item2 = btrfs_item_nr(slot + 1);
4004 btrfs_item_key_to_cpu(buf, &k1, slot);
4005 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4006 item1_offset = btrfs_item_offset(buf, item1);
4007 item2_offset = btrfs_item_offset(buf, item2);
4008 item1_size = btrfs_item_size(buf, item1);
4009 item2_size = btrfs_item_size(buf, item2);
4011 item1_data = malloc(item1_size);
4014 item2_data = malloc(item2_size);
4020 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4021 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4023 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4024 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4028 btrfs_set_item_offset(buf, item1, item2_offset);
4029 btrfs_set_item_offset(buf, item2, item1_offset);
4030 btrfs_set_item_size(buf, item1, item2_size);
4031 btrfs_set_item_size(buf, item2, item1_size);
4033 path->slots[0] = slot;
4034 btrfs_set_item_key_unsafe(root, path, &k2);
4035 path->slots[0] = slot + 1;
4036 btrfs_set_item_key_unsafe(root, path, &k1);
4041 static int fix_key_order(struct btrfs_trans_handle *trans,
4042 struct btrfs_root *root,
4043 struct btrfs_path *path)
4045 struct extent_buffer *buf;
4046 struct btrfs_key k1, k2;
4048 int level = path->lowest_level;
4051 buf = path->nodes[level];
4052 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4054 btrfs_node_key_to_cpu(buf, &k1, i);
4055 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4057 btrfs_item_key_to_cpu(buf, &k1, i);
4058 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4060 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4062 ret = swap_values(root, path, buf, i);
4065 btrfs_mark_buffer_dirty(buf);
4071 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4072 struct btrfs_root *root,
4073 struct btrfs_path *path,
4074 struct extent_buffer *buf, int slot)
4076 struct btrfs_key key;
4077 int nritems = btrfs_header_nritems(buf);
4079 btrfs_item_key_to_cpu(buf, &key, slot);
4081 /* These are all the keys we can deal with missing. */
4082 if (key.type != BTRFS_DIR_INDEX_KEY &&
4083 key.type != BTRFS_EXTENT_ITEM_KEY &&
4084 key.type != BTRFS_METADATA_ITEM_KEY &&
4085 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4086 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4089 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4090 (unsigned long long)key.objectid, key.type,
4091 (unsigned long long)key.offset, slot, buf->start);
4092 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4093 btrfs_item_nr_offset(slot + 1),
4094 sizeof(struct btrfs_item) *
4095 (nritems - slot - 1));
4096 btrfs_set_header_nritems(buf, nritems - 1);
4098 struct btrfs_disk_key disk_key;
4100 btrfs_item_key(buf, &disk_key, 0);
4101 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4103 btrfs_mark_buffer_dirty(buf);
4107 static int fix_item_offset(struct btrfs_trans_handle *trans,
4108 struct btrfs_root *root,
4109 struct btrfs_path *path)
4111 struct extent_buffer *buf;
4115 /* We should only get this for leaves */
4116 BUG_ON(path->lowest_level);
4117 buf = path->nodes[0];
4119 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4120 unsigned int shift = 0, offset;
4122 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4123 BTRFS_LEAF_DATA_SIZE(root)) {
4124 if (btrfs_item_end_nr(buf, i) >
4125 BTRFS_LEAF_DATA_SIZE(root)) {
4126 ret = delete_bogus_item(trans, root, path,
4130 fprintf(stderr, "item is off the end of the "
4131 "leaf, can't fix\n");
4135 shift = BTRFS_LEAF_DATA_SIZE(root) -
4136 btrfs_item_end_nr(buf, i);
4137 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4138 btrfs_item_offset_nr(buf, i - 1)) {
4139 if (btrfs_item_end_nr(buf, i) >
4140 btrfs_item_offset_nr(buf, i - 1)) {
4141 ret = delete_bogus_item(trans, root, path,
4145 fprintf(stderr, "items overlap, can't fix\n");
4149 shift = btrfs_item_offset_nr(buf, i - 1) -
4150 btrfs_item_end_nr(buf, i);
4155 printf("Shifting item nr %d by %u bytes in block %llu\n",
4156 i, shift, (unsigned long long)buf->start);
4157 offset = btrfs_item_offset_nr(buf, i);
4158 memmove_extent_buffer(buf,
4159 btrfs_leaf_data(buf) + offset + shift,
4160 btrfs_leaf_data(buf) + offset,
4161 btrfs_item_size_nr(buf, i));
4162 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4164 btrfs_mark_buffer_dirty(buf);
4168 * We may have moved things, in which case we want to exit so we don't
4169 * write those changes out. Once we have proper abort functionality in
4170 * progs this can be changed to something nicer.
4177 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4178 * then just return -EIO.
4180 static int try_to_fix_bad_block(struct btrfs_root *root,
4181 struct extent_buffer *buf,
4182 enum btrfs_tree_block_status status)
4184 struct btrfs_trans_handle *trans;
4185 struct ulist *roots;
4186 struct ulist_node *node;
4187 struct btrfs_root *search_root;
4188 struct btrfs_path *path;
4189 struct ulist_iterator iter;
4190 struct btrfs_key root_key, key;
4193 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4194 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4197 path = btrfs_alloc_path();
4201 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4204 btrfs_free_path(path);
4208 ULIST_ITER_INIT(&iter);
4209 while ((node = ulist_next(roots, &iter))) {
4210 root_key.objectid = node->val;
4211 root_key.type = BTRFS_ROOT_ITEM_KEY;
4212 root_key.offset = (u64)-1;
4214 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4221 trans = btrfs_start_transaction(search_root, 0);
4222 if (IS_ERR(trans)) {
4223 ret = PTR_ERR(trans);
4227 path->lowest_level = btrfs_header_level(buf);
4228 path->skip_check_block = 1;
4229 if (path->lowest_level)
4230 btrfs_node_key_to_cpu(buf, &key, 0);
4232 btrfs_item_key_to_cpu(buf, &key, 0);
4233 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4236 btrfs_commit_transaction(trans, search_root);
4239 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4240 ret = fix_key_order(trans, search_root, path);
4241 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4242 ret = fix_item_offset(trans, search_root, path);
4244 btrfs_commit_transaction(trans, search_root);
4247 btrfs_release_path(path);
4248 btrfs_commit_transaction(trans, search_root);
4251 btrfs_free_path(path);
4255 static int check_block(struct btrfs_root *root,
4256 struct cache_tree *extent_cache,
4257 struct extent_buffer *buf, u64 flags)
4259 struct extent_record *rec;
4260 struct cache_extent *cache;
4261 struct btrfs_key key;
4262 enum btrfs_tree_block_status status;
4266 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4269 rec = container_of(cache, struct extent_record, cache);
4270 rec->generation = btrfs_header_generation(buf);
4272 level = btrfs_header_level(buf);
4273 if (btrfs_header_nritems(buf) > 0) {
4276 btrfs_item_key_to_cpu(buf, &key, 0);
4278 btrfs_node_key_to_cpu(buf, &key, 0);
4280 rec->info_objectid = key.objectid;
4282 rec->info_level = level;
4284 if (btrfs_is_leaf(buf))
4285 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4287 status = btrfs_check_node(root, &rec->parent_key, buf);
4289 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4291 status = try_to_fix_bad_block(root, buf, status);
4292 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4294 fprintf(stderr, "bad block %llu\n",
4295 (unsigned long long)buf->start);
4298 * Signal to callers we need to start the scan over
4299 * again since we'll have cow'ed blocks.
4304 rec->content_checked = 1;
4305 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4306 rec->owner_ref_checked = 1;
4308 ret = check_owner_ref(root, rec, buf);
4310 rec->owner_ref_checked = 1;
4314 maybe_free_extent_rec(extent_cache, rec);
4318 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4319 u64 parent, u64 root)
4321 struct list_head *cur = rec->backrefs.next;
4322 struct extent_backref *node;
4323 struct tree_backref *back;
4325 while(cur != &rec->backrefs) {
4326 node = list_entry(cur, struct extent_backref, list);
4330 back = (struct tree_backref *)node;
4332 if (!node->full_backref)
4334 if (parent == back->parent)
4337 if (node->full_backref)
4339 if (back->root == root)
4346 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4347 u64 parent, u64 root)
4349 struct tree_backref *ref = malloc(sizeof(*ref));
4350 memset(&ref->node, 0, sizeof(ref->node));
4352 ref->parent = parent;
4353 ref->node.full_backref = 1;
4356 ref->node.full_backref = 0;
4358 list_add_tail(&ref->node.list, &rec->backrefs);
4363 static struct data_backref *find_data_backref(struct extent_record *rec,
4364 u64 parent, u64 root,
4365 u64 owner, u64 offset,
4367 u64 disk_bytenr, u64 bytes)
4369 struct list_head *cur = rec->backrefs.next;
4370 struct extent_backref *node;
4371 struct data_backref *back;
4373 while(cur != &rec->backrefs) {
4374 node = list_entry(cur, struct extent_backref, list);
4378 back = (struct data_backref *)node;
4380 if (!node->full_backref)
4382 if (parent == back->parent)
4385 if (node->full_backref)
4387 if (back->root == root && back->owner == owner &&
4388 back->offset == offset) {
4389 if (found_ref && node->found_ref &&
4390 (back->bytes != bytes ||
4391 back->disk_bytenr != disk_bytenr))
4400 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4401 u64 parent, u64 root,
4402 u64 owner, u64 offset,
4405 struct data_backref *ref = malloc(sizeof(*ref));
4406 memset(&ref->node, 0, sizeof(ref->node));
4407 ref->node.is_data = 1;
4410 ref->parent = parent;
4413 ref->node.full_backref = 1;
4417 ref->offset = offset;
4418 ref->node.full_backref = 0;
4420 ref->bytes = max_size;
4423 list_add_tail(&ref->node.list, &rec->backrefs);
4424 if (max_size > rec->max_size)
4425 rec->max_size = max_size;
4429 /* Check if the type of extent matches with its chunk */
4430 static void check_extent_type(struct extent_record *rec)
4432 struct btrfs_block_group_cache *bg_cache;
4434 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4438 /* data extent, check chunk directly*/
4439 if (!rec->metadata) {
4440 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4441 rec->wrong_chunk_type = 1;
4445 /* metadata extent, check the obvious case first */
4446 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4447 BTRFS_BLOCK_GROUP_METADATA))) {
4448 rec->wrong_chunk_type = 1;
4453 * Check SYSTEM extent, as it's also marked as metadata, we can only
4454 * make sure it's a SYSTEM extent by its backref
4456 if (!list_empty(&rec->backrefs)) {
4457 struct extent_backref *node;
4458 struct tree_backref *tback;
4461 node = list_entry(rec->backrefs.next, struct extent_backref,
4463 if (node->is_data) {
4464 /* tree block shouldn't have data backref */
4465 rec->wrong_chunk_type = 1;
4468 tback = container_of(node, struct tree_backref, node);
4470 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4471 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4473 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4474 if (!(bg_cache->flags & bg_type))
4475 rec->wrong_chunk_type = 1;
4479 static int add_extent_rec(struct cache_tree *extent_cache,
4480 struct btrfs_key *parent_key, u64 parent_gen,
4481 u64 start, u64 nr, u64 extent_item_refs,
4482 int is_root, int inc_ref, int set_checked,
4483 int metadata, int extent_rec, u64 max_size)
4485 struct extent_record *rec;
4486 struct cache_extent *cache;
4490 cache = lookup_cache_extent(extent_cache, start, nr);
4492 rec = container_of(cache, struct extent_record, cache);
4496 rec->nr = max(nr, max_size);
4499 * We need to make sure to reset nr to whatever the extent
4500 * record says was the real size, this way we can compare it to
4504 if (start != rec->start || rec->found_rec) {
4505 struct extent_record *tmp;
4508 if (list_empty(&rec->list))
4509 list_add_tail(&rec->list,
4510 &duplicate_extents);
4513 * We have to do this song and dance in case we
4514 * find an extent record that falls inside of
4515 * our current extent record but does not have
4516 * the same objectid.
4518 tmp = malloc(sizeof(*tmp));
4522 tmp->max_size = max_size;
4525 tmp->metadata = metadata;
4526 tmp->extent_item_refs = extent_item_refs;
4527 INIT_LIST_HEAD(&tmp->list);
4528 list_add_tail(&tmp->list, &rec->dups);
4529 rec->num_duplicates++;
4536 if (extent_item_refs && !dup) {
4537 if (rec->extent_item_refs) {
4538 fprintf(stderr, "block %llu rec "
4539 "extent_item_refs %llu, passed %llu\n",
4540 (unsigned long long)start,
4541 (unsigned long long)
4542 rec->extent_item_refs,
4543 (unsigned long long)extent_item_refs);
4545 rec->extent_item_refs = extent_item_refs;
4550 rec->content_checked = 1;
4551 rec->owner_ref_checked = 1;
4555 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4557 rec->parent_generation = parent_gen;
4559 if (rec->max_size < max_size)
4560 rec->max_size = max_size;
4563 * A metadata extent can't cross stripe_len boundary, otherwise
4564 * kernel scrub won't be able to handle it.
4565 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4568 if (metadata && check_crossing_stripes(rec->start,
4570 rec->crossing_stripes = 1;
4571 check_extent_type(rec);
4572 maybe_free_extent_rec(extent_cache, rec);
4575 rec = malloc(sizeof(*rec));
4577 rec->max_size = max_size;
4578 rec->nr = max(nr, max_size);
4579 rec->found_rec = !!extent_rec;
4580 rec->content_checked = 0;
4581 rec->owner_ref_checked = 0;
4582 rec->num_duplicates = 0;
4583 rec->metadata = metadata;
4584 rec->flag_block_full_backref = -1;
4585 rec->bad_full_backref = 0;
4586 rec->crossing_stripes = 0;
4587 rec->wrong_chunk_type = 0;
4588 INIT_LIST_HEAD(&rec->backrefs);
4589 INIT_LIST_HEAD(&rec->dups);
4590 INIT_LIST_HEAD(&rec->list);
4602 if (extent_item_refs)
4603 rec->extent_item_refs = extent_item_refs;
4605 rec->extent_item_refs = 0;
4608 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4610 memset(&rec->parent_key, 0, sizeof(*parent_key));
4613 rec->parent_generation = parent_gen;
4615 rec->parent_generation = 0;
4617 rec->cache.start = start;
4618 rec->cache.size = nr;
4619 ret = insert_cache_extent(extent_cache, &rec->cache);
4623 rec->content_checked = 1;
4624 rec->owner_ref_checked = 1;
4628 if (check_crossing_stripes(rec->start, rec->max_size))
4629 rec->crossing_stripes = 1;
4630 check_extent_type(rec);
4634 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4635 u64 parent, u64 root, int found_ref)
4637 struct extent_record *rec;
4638 struct tree_backref *back;
4639 struct cache_extent *cache;
4641 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4643 add_extent_rec(extent_cache, NULL, 0, bytenr,
4644 1, 0, 0, 0, 0, 1, 0, 0);
4645 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4650 rec = container_of(cache, struct extent_record, cache);
4651 if (rec->start != bytenr) {
4655 back = find_tree_backref(rec, parent, root);
4657 back = alloc_tree_backref(rec, parent, root);
4660 if (back->node.found_ref) {
4661 fprintf(stderr, "Extent back ref already exists "
4662 "for %llu parent %llu root %llu \n",
4663 (unsigned long long)bytenr,
4664 (unsigned long long)parent,
4665 (unsigned long long)root);
4667 back->node.found_ref = 1;
4669 if (back->node.found_extent_tree) {
4670 fprintf(stderr, "Extent back ref already exists "
4671 "for %llu parent %llu root %llu \n",
4672 (unsigned long long)bytenr,
4673 (unsigned long long)parent,
4674 (unsigned long long)root);
4676 back->node.found_extent_tree = 1;
4678 check_extent_type(rec);
4679 maybe_free_extent_rec(extent_cache, rec);
4683 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4684 u64 parent, u64 root, u64 owner, u64 offset,
4685 u32 num_refs, int found_ref, u64 max_size)
4687 struct extent_record *rec;
4688 struct data_backref *back;
4689 struct cache_extent *cache;
4691 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4693 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4695 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4700 rec = container_of(cache, struct extent_record, cache);
4701 if (rec->max_size < max_size)
4702 rec->max_size = max_size;
4705 * If found_ref is set then max_size is the real size and must match the
4706 * existing refs. So if we have already found a ref then we need to
4707 * make sure that this ref matches the existing one, otherwise we need
4708 * to add a new backref so we can notice that the backrefs don't match
4709 * and we need to figure out who is telling the truth. This is to
4710 * account for that awful fsync bug I introduced where we'd end up with
4711 * a btrfs_file_extent_item that would have its length include multiple
4712 * prealloc extents or point inside of a prealloc extent.
4714 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4717 back = alloc_data_backref(rec, parent, root, owner, offset,
4721 BUG_ON(num_refs != 1);
4722 if (back->node.found_ref)
4723 BUG_ON(back->bytes != max_size);
4724 back->node.found_ref = 1;
4725 back->found_ref += 1;
4726 back->bytes = max_size;
4727 back->disk_bytenr = bytenr;
4729 rec->content_checked = 1;
4730 rec->owner_ref_checked = 1;
4732 if (back->node.found_extent_tree) {
4733 fprintf(stderr, "Extent back ref already exists "
4734 "for %llu parent %llu root %llu "
4735 "owner %llu offset %llu num_refs %lu\n",
4736 (unsigned long long)bytenr,
4737 (unsigned long long)parent,
4738 (unsigned long long)root,
4739 (unsigned long long)owner,
4740 (unsigned long long)offset,
4741 (unsigned long)num_refs);
4743 back->num_refs = num_refs;
4744 back->node.found_extent_tree = 1;
4746 maybe_free_extent_rec(extent_cache, rec);
4750 static int add_pending(struct cache_tree *pending,
4751 struct cache_tree *seen, u64 bytenr, u32 size)
4754 ret = add_cache_extent(seen, bytenr, size);
4757 add_cache_extent(pending, bytenr, size);
4761 static int pick_next_pending(struct cache_tree *pending,
4762 struct cache_tree *reada,
4763 struct cache_tree *nodes,
4764 u64 last, struct block_info *bits, int bits_nr,
4767 unsigned long node_start = last;
4768 struct cache_extent *cache;
4771 cache = search_cache_extent(reada, 0);
4773 bits[0].start = cache->start;
4774 bits[0].size = cache->size;
4779 if (node_start > 32768)
4780 node_start -= 32768;
4782 cache = search_cache_extent(nodes, node_start);
4784 cache = search_cache_extent(nodes, 0);
4787 cache = search_cache_extent(pending, 0);
4792 bits[ret].start = cache->start;
4793 bits[ret].size = cache->size;
4794 cache = next_cache_extent(cache);
4796 } while (cache && ret < bits_nr);
4802 bits[ret].start = cache->start;
4803 bits[ret].size = cache->size;
4804 cache = next_cache_extent(cache);
4806 } while (cache && ret < bits_nr);
4808 if (bits_nr - ret > 8) {
4809 u64 lookup = bits[0].start + bits[0].size;
4810 struct cache_extent *next;
4811 next = search_cache_extent(pending, lookup);
4813 if (next->start - lookup > 32768)
4815 bits[ret].start = next->start;
4816 bits[ret].size = next->size;
4817 lookup = next->start + next->size;
4821 next = next_cache_extent(next);
4829 static void free_chunk_record(struct cache_extent *cache)
4831 struct chunk_record *rec;
4833 rec = container_of(cache, struct chunk_record, cache);
4834 list_del_init(&rec->list);
4835 list_del_init(&rec->dextents);
4839 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4841 cache_tree_free_extents(chunk_cache, free_chunk_record);
4844 static void free_device_record(struct rb_node *node)
4846 struct device_record *rec;
4848 rec = container_of(node, struct device_record, node);
4852 FREE_RB_BASED_TREE(device_cache, free_device_record);
4854 int insert_block_group_record(struct block_group_tree *tree,
4855 struct block_group_record *bg_rec)
4859 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4863 list_add_tail(&bg_rec->list, &tree->block_groups);
4867 static void free_block_group_record(struct cache_extent *cache)
4869 struct block_group_record *rec;
4871 rec = container_of(cache, struct block_group_record, cache);
4872 list_del_init(&rec->list);
4876 void free_block_group_tree(struct block_group_tree *tree)
4878 cache_tree_free_extents(&tree->tree, free_block_group_record);
4881 int insert_device_extent_record(struct device_extent_tree *tree,
4882 struct device_extent_record *de_rec)
4887 * Device extent is a bit different from the other extents, because
4888 * the extents which belong to the different devices may have the
4889 * same start and size, so we need use the special extent cache
4890 * search/insert functions.
4892 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4896 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4897 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4901 static void free_device_extent_record(struct cache_extent *cache)
4903 struct device_extent_record *rec;
4905 rec = container_of(cache, struct device_extent_record, cache);
4906 if (!list_empty(&rec->chunk_list))
4907 list_del_init(&rec->chunk_list);
4908 if (!list_empty(&rec->device_list))
4909 list_del_init(&rec->device_list);
4913 void free_device_extent_tree(struct device_extent_tree *tree)
4915 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4918 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4919 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4920 struct extent_buffer *leaf, int slot)
4922 struct btrfs_extent_ref_v0 *ref0;
4923 struct btrfs_key key;
4925 btrfs_item_key_to_cpu(leaf, &key, slot);
4926 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4927 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4928 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4930 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4931 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4937 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4938 struct btrfs_key *key,
4941 struct btrfs_chunk *ptr;
4942 struct chunk_record *rec;
4945 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4946 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4948 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4950 fprintf(stderr, "memory allocation failed\n");
4954 INIT_LIST_HEAD(&rec->list);
4955 INIT_LIST_HEAD(&rec->dextents);
4958 rec->cache.start = key->offset;
4959 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4961 rec->generation = btrfs_header_generation(leaf);
4963 rec->objectid = key->objectid;
4964 rec->type = key->type;
4965 rec->offset = key->offset;
4967 rec->length = rec->cache.size;
4968 rec->owner = btrfs_chunk_owner(leaf, ptr);
4969 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4970 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4971 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4972 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4973 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4974 rec->num_stripes = num_stripes;
4975 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4977 for (i = 0; i < rec->num_stripes; ++i) {
4978 rec->stripes[i].devid =
4979 btrfs_stripe_devid_nr(leaf, ptr, i);
4980 rec->stripes[i].offset =
4981 btrfs_stripe_offset_nr(leaf, ptr, i);
4982 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4983 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4990 static int process_chunk_item(struct cache_tree *chunk_cache,
4991 struct btrfs_key *key, struct extent_buffer *eb,
4994 struct chunk_record *rec;
4997 rec = btrfs_new_chunk_record(eb, key, slot);
4998 ret = insert_cache_extent(chunk_cache, &rec->cache);
5000 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5001 rec->offset, rec->length);
5008 static int process_device_item(struct rb_root *dev_cache,
5009 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5011 struct btrfs_dev_item *ptr;
5012 struct device_record *rec;
5015 ptr = btrfs_item_ptr(eb,
5016 slot, struct btrfs_dev_item);
5018 rec = malloc(sizeof(*rec));
5020 fprintf(stderr, "memory allocation failed\n");
5024 rec->devid = key->offset;
5025 rec->generation = btrfs_header_generation(eb);
5027 rec->objectid = key->objectid;
5028 rec->type = key->type;
5029 rec->offset = key->offset;
5031 rec->devid = btrfs_device_id(eb, ptr);
5032 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5033 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5035 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5037 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5044 struct block_group_record *
5045 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5048 struct btrfs_block_group_item *ptr;
5049 struct block_group_record *rec;
5051 rec = calloc(1, sizeof(*rec));
5053 fprintf(stderr, "memory allocation failed\n");
5057 rec->cache.start = key->objectid;
5058 rec->cache.size = key->offset;
5060 rec->generation = btrfs_header_generation(leaf);
5062 rec->objectid = key->objectid;
5063 rec->type = key->type;
5064 rec->offset = key->offset;
5066 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5067 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5069 INIT_LIST_HEAD(&rec->list);
5074 static int process_block_group_item(struct block_group_tree *block_group_cache,
5075 struct btrfs_key *key,
5076 struct extent_buffer *eb, int slot)
5078 struct block_group_record *rec;
5081 rec = btrfs_new_block_group_record(eb, key, slot);
5082 ret = insert_block_group_record(block_group_cache, rec);
5084 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5085 rec->objectid, rec->offset);
5092 struct device_extent_record *
5093 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5094 struct btrfs_key *key, int slot)
5096 struct device_extent_record *rec;
5097 struct btrfs_dev_extent *ptr;
5099 rec = calloc(1, sizeof(*rec));
5101 fprintf(stderr, "memory allocation failed\n");
5105 rec->cache.objectid = key->objectid;
5106 rec->cache.start = key->offset;
5108 rec->generation = btrfs_header_generation(leaf);
5110 rec->objectid = key->objectid;
5111 rec->type = key->type;
5112 rec->offset = key->offset;
5114 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5115 rec->chunk_objecteid =
5116 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5118 btrfs_dev_extent_chunk_offset(leaf, ptr);
5119 rec->length = btrfs_dev_extent_length(leaf, ptr);
5120 rec->cache.size = rec->length;
5122 INIT_LIST_HEAD(&rec->chunk_list);
5123 INIT_LIST_HEAD(&rec->device_list);
5129 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5130 struct btrfs_key *key, struct extent_buffer *eb,
5133 struct device_extent_record *rec;
5136 rec = btrfs_new_device_extent_record(eb, key, slot);
5137 ret = insert_device_extent_record(dev_extent_cache, rec);
5140 "Device extent[%llu, %llu, %llu] existed.\n",
5141 rec->objectid, rec->offset, rec->length);
5148 static int process_extent_item(struct btrfs_root *root,
5149 struct cache_tree *extent_cache,
5150 struct extent_buffer *eb, int slot)
5152 struct btrfs_extent_item *ei;
5153 struct btrfs_extent_inline_ref *iref;
5154 struct btrfs_extent_data_ref *dref;
5155 struct btrfs_shared_data_ref *sref;
5156 struct btrfs_key key;
5160 u32 item_size = btrfs_item_size_nr(eb, slot);
5166 btrfs_item_key_to_cpu(eb, &key, slot);
5168 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5170 num_bytes = root->leafsize;
5172 num_bytes = key.offset;
5175 if (item_size < sizeof(*ei)) {
5176 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5177 struct btrfs_extent_item_v0 *ei0;
5178 BUG_ON(item_size != sizeof(*ei0));
5179 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5180 refs = btrfs_extent_refs_v0(eb, ei0);
5184 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5185 num_bytes, refs, 0, 0, 0, metadata, 1,
5189 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5190 refs = btrfs_extent_refs(eb, ei);
5191 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5196 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5197 refs, 0, 0, 0, metadata, 1, num_bytes);
5199 ptr = (unsigned long)(ei + 1);
5200 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5201 key.type == BTRFS_EXTENT_ITEM_KEY)
5202 ptr += sizeof(struct btrfs_tree_block_info);
5204 end = (unsigned long)ei + item_size;
5206 iref = (struct btrfs_extent_inline_ref *)ptr;
5207 type = btrfs_extent_inline_ref_type(eb, iref);
5208 offset = btrfs_extent_inline_ref_offset(eb, iref);
5210 case BTRFS_TREE_BLOCK_REF_KEY:
5211 add_tree_backref(extent_cache, key.objectid,
5214 case BTRFS_SHARED_BLOCK_REF_KEY:
5215 add_tree_backref(extent_cache, key.objectid,
5218 case BTRFS_EXTENT_DATA_REF_KEY:
5219 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5220 add_data_backref(extent_cache, key.objectid, 0,
5221 btrfs_extent_data_ref_root(eb, dref),
5222 btrfs_extent_data_ref_objectid(eb,
5224 btrfs_extent_data_ref_offset(eb, dref),
5225 btrfs_extent_data_ref_count(eb, dref),
5228 case BTRFS_SHARED_DATA_REF_KEY:
5229 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5230 add_data_backref(extent_cache, key.objectid, offset,
5232 btrfs_shared_data_ref_count(eb, sref),
5236 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5237 key.objectid, key.type, num_bytes);
5240 ptr += btrfs_extent_inline_ref_size(type);
5247 static int check_cache_range(struct btrfs_root *root,
5248 struct btrfs_block_group_cache *cache,
5249 u64 offset, u64 bytes)
5251 struct btrfs_free_space *entry;
5257 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5258 bytenr = btrfs_sb_offset(i);
5259 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5260 cache->key.objectid, bytenr, 0,
5261 &logical, &nr, &stripe_len);
5266 if (logical[nr] + stripe_len <= offset)
5268 if (offset + bytes <= logical[nr])
5270 if (logical[nr] == offset) {
5271 if (stripe_len >= bytes) {
5275 bytes -= stripe_len;
5276 offset += stripe_len;
5277 } else if (logical[nr] < offset) {
5278 if (logical[nr] + stripe_len >=
5283 bytes = (offset + bytes) -
5284 (logical[nr] + stripe_len);
5285 offset = logical[nr] + stripe_len;
5288 * Could be tricky, the super may land in the
5289 * middle of the area we're checking. First
5290 * check the easiest case, it's at the end.
5292 if (logical[nr] + stripe_len >=
5294 bytes = logical[nr] - offset;
5298 /* Check the left side */
5299 ret = check_cache_range(root, cache,
5301 logical[nr] - offset);
5307 /* Now we continue with the right side */
5308 bytes = (offset + bytes) -
5309 (logical[nr] + stripe_len);
5310 offset = logical[nr] + stripe_len;
5317 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5319 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5320 offset, offset+bytes);
5324 if (entry->offset != offset) {
5325 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5330 if (entry->bytes != bytes) {
5331 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5332 bytes, entry->bytes, offset);
5336 unlink_free_space(cache->free_space_ctl, entry);
5341 static int verify_space_cache(struct btrfs_root *root,
5342 struct btrfs_block_group_cache *cache)
5344 struct btrfs_path *path;
5345 struct extent_buffer *leaf;
5346 struct btrfs_key key;
5350 path = btrfs_alloc_path();
5354 root = root->fs_info->extent_root;
5356 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5358 key.objectid = last;
5360 key.type = BTRFS_EXTENT_ITEM_KEY;
5362 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5367 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5368 ret = btrfs_next_leaf(root, path);
5376 leaf = path->nodes[0];
5377 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5378 if (key.objectid >= cache->key.offset + cache->key.objectid)
5380 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5381 key.type != BTRFS_METADATA_ITEM_KEY) {
5386 if (last == key.objectid) {
5387 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5388 last = key.objectid + key.offset;
5390 last = key.objectid + root->leafsize;
5395 ret = check_cache_range(root, cache, last,
5396 key.objectid - last);
5399 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5400 last = key.objectid + key.offset;
5402 last = key.objectid + root->leafsize;
5406 if (last < cache->key.objectid + cache->key.offset)
5407 ret = check_cache_range(root, cache, last,
5408 cache->key.objectid +
5409 cache->key.offset - last);
5412 btrfs_free_path(path);
5415 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5416 fprintf(stderr, "There are still entries left in the space "
5424 static int check_space_cache(struct btrfs_root *root)
5426 struct btrfs_block_group_cache *cache;
5427 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5431 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5432 btrfs_super_generation(root->fs_info->super_copy) !=
5433 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5434 printf("cache and super generation don't match, space cache "
5435 "will be invalidated\n");
5439 if (ctx.progress_enabled) {
5440 ctx.tp = TASK_FREE_SPACE;
5441 task_start(ctx.info);
5445 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5449 start = cache->key.objectid + cache->key.offset;
5450 if (!cache->free_space_ctl) {
5451 if (btrfs_init_free_space_ctl(cache,
5452 root->sectorsize)) {
5457 btrfs_remove_free_space_cache(cache);
5460 ret = load_free_space_cache(root->fs_info, cache);
5464 ret = verify_space_cache(root, cache);
5466 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5467 cache->key.objectid);
5472 task_stop(ctx.info);
5474 return error ? -EINVAL : 0;
5477 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5478 u64 num_bytes, unsigned long leaf_offset,
5479 struct extent_buffer *eb) {
5482 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5484 unsigned long csum_offset;
5488 u64 data_checked = 0;
5494 if (num_bytes % root->sectorsize)
5497 data = malloc(num_bytes);
5501 while (offset < num_bytes) {
5504 read_len = num_bytes - offset;
5505 /* read as much space once a time */
5506 ret = read_extent_data(root, data + offset,
5507 bytenr + offset, &read_len, mirror);
5511 /* verify every 4k data's checksum */
5512 while (data_checked < read_len) {
5514 tmp = offset + data_checked;
5516 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5517 csum, root->sectorsize);
5518 btrfs_csum_final(csum, (char *)&csum);
5520 csum_offset = leaf_offset +
5521 tmp / root->sectorsize * csum_size;
5522 read_extent_buffer(eb, (char *)&csum_expected,
5523 csum_offset, csum_size);
5524 /* try another mirror */
5525 if (csum != csum_expected) {
5526 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5527 mirror, bytenr + tmp,
5528 csum, csum_expected);
5529 num_copies = btrfs_num_copies(
5530 &root->fs_info->mapping_tree,
5532 if (mirror < num_copies - 1) {
5537 data_checked += root->sectorsize;
5546 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5549 struct btrfs_path *path;
5550 struct extent_buffer *leaf;
5551 struct btrfs_key key;
5554 path = btrfs_alloc_path();
5556 fprintf(stderr, "Error allocing path\n");
5560 key.objectid = bytenr;
5561 key.type = BTRFS_EXTENT_ITEM_KEY;
5562 key.offset = (u64)-1;
5565 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5568 fprintf(stderr, "Error looking up extent record %d\n", ret);
5569 btrfs_free_path(path);
5572 if (path->slots[0] > 0) {
5575 ret = btrfs_prev_leaf(root, path);
5578 } else if (ret > 0) {
5585 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5588 * Block group items come before extent items if they have the same
5589 * bytenr, so walk back one more just in case. Dear future traveler,
5590 * first congrats on mastering time travel. Now if it's not too much
5591 * trouble could you go back to 2006 and tell Chris to make the
5592 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5593 * EXTENT_ITEM_KEY please?
5595 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5596 if (path->slots[0] > 0) {
5599 ret = btrfs_prev_leaf(root, path);
5602 } else if (ret > 0) {
5607 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5611 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5612 ret = btrfs_next_leaf(root, path);
5614 fprintf(stderr, "Error going to next leaf "
5616 btrfs_free_path(path);
5622 leaf = path->nodes[0];
5623 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5624 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5628 if (key.objectid + key.offset < bytenr) {
5632 if (key.objectid > bytenr + num_bytes)
5635 if (key.objectid == bytenr) {
5636 if (key.offset >= num_bytes) {
5640 num_bytes -= key.offset;
5641 bytenr += key.offset;
5642 } else if (key.objectid < bytenr) {
5643 if (key.objectid + key.offset >= bytenr + num_bytes) {
5647 num_bytes = (bytenr + num_bytes) -
5648 (key.objectid + key.offset);
5649 bytenr = key.objectid + key.offset;
5651 if (key.objectid + key.offset < bytenr + num_bytes) {
5652 u64 new_start = key.objectid + key.offset;
5653 u64 new_bytes = bytenr + num_bytes - new_start;
5656 * Weird case, the extent is in the middle of
5657 * our range, we'll have to search one side
5658 * and then the other. Not sure if this happens
5659 * in real life, but no harm in coding it up
5660 * anyway just in case.
5662 btrfs_release_path(path);
5663 ret = check_extent_exists(root, new_start,
5666 fprintf(stderr, "Right section didn't "
5670 num_bytes = key.objectid - bytenr;
5673 num_bytes = key.objectid - bytenr;
5680 if (num_bytes && !ret) {
5681 fprintf(stderr, "There are no extents for csum range "
5682 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5686 btrfs_free_path(path);
5690 static int check_csums(struct btrfs_root *root)
5692 struct btrfs_path *path;
5693 struct extent_buffer *leaf;
5694 struct btrfs_key key;
5695 u64 offset = 0, num_bytes = 0;
5696 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5700 unsigned long leaf_offset;
5702 root = root->fs_info->csum_root;
5703 if (!extent_buffer_uptodate(root->node)) {
5704 fprintf(stderr, "No valid csum tree found\n");
5708 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5709 key.type = BTRFS_EXTENT_CSUM_KEY;
5712 path = btrfs_alloc_path();
5716 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5718 fprintf(stderr, "Error searching csum tree %d\n", ret);
5719 btrfs_free_path(path);
5723 if (ret > 0 && path->slots[0])
5728 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5729 ret = btrfs_next_leaf(root, path);
5731 fprintf(stderr, "Error going to next leaf "
5738 leaf = path->nodes[0];
5740 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5741 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5746 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5747 csum_size) * root->sectorsize;
5748 if (!check_data_csum)
5749 goto skip_csum_check;
5750 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5751 ret = check_extent_csums(root, key.offset, data_len,
5757 offset = key.offset;
5758 } else if (key.offset != offset + num_bytes) {
5759 ret = check_extent_exists(root, offset, num_bytes);
5761 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5762 "there is no extent record\n",
5763 offset, offset+num_bytes);
5766 offset = key.offset;
5769 num_bytes += data_len;
5773 btrfs_free_path(path);
5777 static int is_dropped_key(struct btrfs_key *key,
5778 struct btrfs_key *drop_key) {
5779 if (key->objectid < drop_key->objectid)
5781 else if (key->objectid == drop_key->objectid) {
5782 if (key->type < drop_key->type)
5784 else if (key->type == drop_key->type) {
5785 if (key->offset < drop_key->offset)
5793 * Here are the rules for FULL_BACKREF.
5795 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5796 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5798 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5799 * if it happened after the relocation occurred since we'll have dropped the
5800 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5801 * have no real way to know for sure.
5803 * We process the blocks one root at a time, and we start from the lowest root
5804 * objectid and go to the highest. So we can just lookup the owner backref for
5805 * the record and if we don't find it then we know it doesn't exist and we have
5808 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5809 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5810 * be set or not and then we can check later once we've gathered all the refs.
5812 static int calc_extent_flag(struct btrfs_root *root,
5813 struct cache_tree *extent_cache,
5814 struct extent_buffer *buf,
5815 struct root_item_record *ri,
5818 struct extent_record *rec;
5819 struct cache_extent *cache;
5820 struct tree_backref *tback;
5823 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5824 /* we have added this extent before */
5826 rec = container_of(cache, struct extent_record, cache);
5829 * Except file/reloc tree, we can not have
5832 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5837 if (buf->start == ri->bytenr)
5840 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5843 owner = btrfs_header_owner(buf);
5844 if (owner == ri->objectid)
5847 tback = find_tree_backref(rec, 0, owner);
5852 if (rec->flag_block_full_backref != -1 &&
5853 rec->flag_block_full_backref != 0)
5854 rec->bad_full_backref = 1;
5857 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5858 if (rec->flag_block_full_backref != -1 &&
5859 rec->flag_block_full_backref != 1)
5860 rec->bad_full_backref = 1;
5864 static int run_next_block(struct btrfs_root *root,
5865 struct block_info *bits,
5868 struct cache_tree *pending,
5869 struct cache_tree *seen,
5870 struct cache_tree *reada,
5871 struct cache_tree *nodes,
5872 struct cache_tree *extent_cache,
5873 struct cache_tree *chunk_cache,
5874 struct rb_root *dev_cache,
5875 struct block_group_tree *block_group_cache,
5876 struct device_extent_tree *dev_extent_cache,
5877 struct root_item_record *ri)
5879 struct extent_buffer *buf;
5880 struct extent_record *rec = NULL;
5891 struct btrfs_key key;
5892 struct cache_extent *cache;
5895 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5896 bits_nr, &reada_bits);
5901 for(i = 0; i < nritems; i++) {
5902 ret = add_cache_extent(reada, bits[i].start,
5907 /* fixme, get the parent transid */
5908 readahead_tree_block(root, bits[i].start,
5912 *last = bits[0].start;
5913 bytenr = bits[0].start;
5914 size = bits[0].size;
5916 cache = lookup_cache_extent(pending, bytenr, size);
5918 remove_cache_extent(pending, cache);
5921 cache = lookup_cache_extent(reada, bytenr, size);
5923 remove_cache_extent(reada, cache);
5926 cache = lookup_cache_extent(nodes, bytenr, size);
5928 remove_cache_extent(nodes, cache);
5931 cache = lookup_cache_extent(extent_cache, bytenr, size);
5933 rec = container_of(cache, struct extent_record, cache);
5934 gen = rec->parent_generation;
5937 /* fixme, get the real parent transid */
5938 buf = read_tree_block(root, bytenr, size, gen);
5939 if (!extent_buffer_uptodate(buf)) {
5940 record_bad_block_io(root->fs_info,
5941 extent_cache, bytenr, size);
5945 nritems = btrfs_header_nritems(buf);
5948 if (!init_extent_tree) {
5949 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5950 btrfs_header_level(buf), 1, NULL,
5953 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5955 fprintf(stderr, "Couldn't calc extent flags\n");
5956 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5961 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5963 fprintf(stderr, "Couldn't calc extent flags\n");
5964 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5968 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5970 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5971 ri->objectid == btrfs_header_owner(buf)) {
5973 * Ok we got to this block from it's original owner and
5974 * we have FULL_BACKREF set. Relocation can leave
5975 * converted blocks over so this is altogether possible,
5976 * however it's not possible if the generation > the
5977 * last snapshot, so check for this case.
5979 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5980 btrfs_header_generation(buf) > ri->last_snapshot) {
5981 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5982 rec->bad_full_backref = 1;
5987 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5988 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5989 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5990 rec->bad_full_backref = 1;
5994 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5995 rec->flag_block_full_backref = 1;
5999 rec->flag_block_full_backref = 0;
6001 owner = btrfs_header_owner(buf);
6004 ret = check_block(root, extent_cache, buf, flags);
6008 if (btrfs_is_leaf(buf)) {
6009 btree_space_waste += btrfs_leaf_free_space(root, buf);
6010 for (i = 0; i < nritems; i++) {
6011 struct btrfs_file_extent_item *fi;
6012 btrfs_item_key_to_cpu(buf, &key, i);
6013 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6014 process_extent_item(root, extent_cache, buf,
6018 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6019 process_extent_item(root, extent_cache, buf,
6023 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6025 btrfs_item_size_nr(buf, i);
6028 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6029 process_chunk_item(chunk_cache, &key, buf, i);
6032 if (key.type == BTRFS_DEV_ITEM_KEY) {
6033 process_device_item(dev_cache, &key, buf, i);
6036 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6037 process_block_group_item(block_group_cache,
6041 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6042 process_device_extent_item(dev_extent_cache,
6047 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6048 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6049 process_extent_ref_v0(extent_cache, buf, i);
6056 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6057 add_tree_backref(extent_cache, key.objectid, 0,
6061 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6062 add_tree_backref(extent_cache, key.objectid,
6066 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6067 struct btrfs_extent_data_ref *ref;
6068 ref = btrfs_item_ptr(buf, i,
6069 struct btrfs_extent_data_ref);
6070 add_data_backref(extent_cache,
6072 btrfs_extent_data_ref_root(buf, ref),
6073 btrfs_extent_data_ref_objectid(buf,
6075 btrfs_extent_data_ref_offset(buf, ref),
6076 btrfs_extent_data_ref_count(buf, ref),
6077 0, root->sectorsize);
6080 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6081 struct btrfs_shared_data_ref *ref;
6082 ref = btrfs_item_ptr(buf, i,
6083 struct btrfs_shared_data_ref);
6084 add_data_backref(extent_cache,
6085 key.objectid, key.offset, 0, 0, 0,
6086 btrfs_shared_data_ref_count(buf, ref),
6087 0, root->sectorsize);
6090 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6091 struct bad_item *bad;
6093 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6097 bad = malloc(sizeof(struct bad_item));
6100 INIT_LIST_HEAD(&bad->list);
6101 memcpy(&bad->key, &key,
6102 sizeof(struct btrfs_key));
6103 bad->root_id = owner;
6104 list_add_tail(&bad->list, &delete_items);
6107 if (key.type != BTRFS_EXTENT_DATA_KEY)
6109 fi = btrfs_item_ptr(buf, i,
6110 struct btrfs_file_extent_item);
6111 if (btrfs_file_extent_type(buf, fi) ==
6112 BTRFS_FILE_EXTENT_INLINE)
6114 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6117 data_bytes_allocated +=
6118 btrfs_file_extent_disk_num_bytes(buf, fi);
6119 if (data_bytes_allocated < root->sectorsize) {
6122 data_bytes_referenced +=
6123 btrfs_file_extent_num_bytes(buf, fi);
6124 add_data_backref(extent_cache,
6125 btrfs_file_extent_disk_bytenr(buf, fi),
6126 parent, owner, key.objectid, key.offset -
6127 btrfs_file_extent_offset(buf, fi), 1, 1,
6128 btrfs_file_extent_disk_num_bytes(buf, fi));
6132 struct btrfs_key first_key;
6134 first_key.objectid = 0;
6137 btrfs_item_key_to_cpu(buf, &first_key, 0);
6138 level = btrfs_header_level(buf);
6139 for (i = 0; i < nritems; i++) {
6140 ptr = btrfs_node_blockptr(buf, i);
6141 size = btrfs_level_size(root, level - 1);
6142 btrfs_node_key_to_cpu(buf, &key, i);
6144 if ((level == ri->drop_level)
6145 && is_dropped_key(&key, &ri->drop_key)) {
6149 ret = add_extent_rec(extent_cache, &key,
6150 btrfs_node_ptr_generation(buf, i),
6151 ptr, size, 0, 0, 1, 0, 1, 0,
6155 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6158 add_pending(nodes, seen, ptr, size);
6160 add_pending(pending, seen, ptr, size);
6163 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6164 nritems) * sizeof(struct btrfs_key_ptr);
6166 total_btree_bytes += buf->len;
6167 if (fs_root_objectid(btrfs_header_owner(buf)))
6168 total_fs_tree_bytes += buf->len;
6169 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6170 total_extent_tree_bytes += buf->len;
6171 if (!found_old_backref &&
6172 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6173 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6174 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6175 found_old_backref = 1;
6177 free_extent_buffer(buf);
6181 static int add_root_to_pending(struct extent_buffer *buf,
6182 struct cache_tree *extent_cache,
6183 struct cache_tree *pending,
6184 struct cache_tree *seen,
6185 struct cache_tree *nodes,
6188 if (btrfs_header_level(buf) > 0)
6189 add_pending(nodes, seen, buf->start, buf->len);
6191 add_pending(pending, seen, buf->start, buf->len);
6192 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6193 0, 1, 1, 0, 1, 0, buf->len);
6195 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6196 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6197 add_tree_backref(extent_cache, buf->start, buf->start,
6200 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6204 /* as we fix the tree, we might be deleting blocks that
6205 * we're tracking for repair. This hook makes sure we
6206 * remove any backrefs for blocks as we are fixing them.
6208 static int free_extent_hook(struct btrfs_trans_handle *trans,
6209 struct btrfs_root *root,
6210 u64 bytenr, u64 num_bytes, u64 parent,
6211 u64 root_objectid, u64 owner, u64 offset,
6214 struct extent_record *rec;
6215 struct cache_extent *cache;
6217 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6219 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6220 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6224 rec = container_of(cache, struct extent_record, cache);
6226 struct data_backref *back;
6227 back = find_data_backref(rec, parent, root_objectid, owner,
6228 offset, 1, bytenr, num_bytes);
6231 if (back->node.found_ref) {
6232 back->found_ref -= refs_to_drop;
6234 rec->refs -= refs_to_drop;
6236 if (back->node.found_extent_tree) {
6237 back->num_refs -= refs_to_drop;
6238 if (rec->extent_item_refs)
6239 rec->extent_item_refs -= refs_to_drop;
6241 if (back->found_ref == 0)
6242 back->node.found_ref = 0;
6243 if (back->num_refs == 0)
6244 back->node.found_extent_tree = 0;
6246 if (!back->node.found_extent_tree && back->node.found_ref) {
6247 list_del(&back->node.list);
6251 struct tree_backref *back;
6252 back = find_tree_backref(rec, parent, root_objectid);
6255 if (back->node.found_ref) {
6258 back->node.found_ref = 0;
6260 if (back->node.found_extent_tree) {
6261 if (rec->extent_item_refs)
6262 rec->extent_item_refs--;
6263 back->node.found_extent_tree = 0;
6265 if (!back->node.found_extent_tree && back->node.found_ref) {
6266 list_del(&back->node.list);
6270 maybe_free_extent_rec(extent_cache, rec);
6275 static int delete_extent_records(struct btrfs_trans_handle *trans,
6276 struct btrfs_root *root,
6277 struct btrfs_path *path,
6278 u64 bytenr, u64 new_len)
6280 struct btrfs_key key;
6281 struct btrfs_key found_key;
6282 struct extent_buffer *leaf;
6287 key.objectid = bytenr;
6289 key.offset = (u64)-1;
6292 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6299 if (path->slots[0] == 0)
6305 leaf = path->nodes[0];
6306 slot = path->slots[0];
6308 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6309 if (found_key.objectid != bytenr)
6312 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6313 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6314 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6315 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6316 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6317 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6318 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6319 btrfs_release_path(path);
6320 if (found_key.type == 0) {
6321 if (found_key.offset == 0)
6323 key.offset = found_key.offset - 1;
6324 key.type = found_key.type;
6326 key.type = found_key.type - 1;
6327 key.offset = (u64)-1;
6331 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6332 found_key.objectid, found_key.type, found_key.offset);
6334 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6337 btrfs_release_path(path);
6339 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6340 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6341 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6342 found_key.offset : root->leafsize;
6344 ret = btrfs_update_block_group(trans, root, bytenr,
6351 btrfs_release_path(path);
6356 * for a single backref, this will allocate a new extent
6357 * and add the backref to it.
6359 static int record_extent(struct btrfs_trans_handle *trans,
6360 struct btrfs_fs_info *info,
6361 struct btrfs_path *path,
6362 struct extent_record *rec,
6363 struct extent_backref *back,
6364 int allocated, u64 flags)
6367 struct btrfs_root *extent_root = info->extent_root;
6368 struct extent_buffer *leaf;
6369 struct btrfs_key ins_key;
6370 struct btrfs_extent_item *ei;
6371 struct tree_backref *tback;
6372 struct data_backref *dback;
6373 struct btrfs_tree_block_info *bi;
6376 rec->max_size = max_t(u64, rec->max_size,
6377 info->extent_root->leafsize);
6380 u32 item_size = sizeof(*ei);
6383 item_size += sizeof(*bi);
6385 ins_key.objectid = rec->start;
6386 ins_key.offset = rec->max_size;
6387 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6389 ret = btrfs_insert_empty_item(trans, extent_root, path,
6390 &ins_key, item_size);
6394 leaf = path->nodes[0];
6395 ei = btrfs_item_ptr(leaf, path->slots[0],
6396 struct btrfs_extent_item);
6398 btrfs_set_extent_refs(leaf, ei, 0);
6399 btrfs_set_extent_generation(leaf, ei, rec->generation);
6401 if (back->is_data) {
6402 btrfs_set_extent_flags(leaf, ei,
6403 BTRFS_EXTENT_FLAG_DATA);
6405 struct btrfs_disk_key copy_key;;
6407 tback = (struct tree_backref *)back;
6408 bi = (struct btrfs_tree_block_info *)(ei + 1);
6409 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6412 btrfs_set_disk_key_objectid(©_key,
6413 rec->info_objectid);
6414 btrfs_set_disk_key_type(©_key, 0);
6415 btrfs_set_disk_key_offset(©_key, 0);
6417 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6418 btrfs_set_tree_block_key(leaf, bi, ©_key);
6420 btrfs_set_extent_flags(leaf, ei,
6421 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6424 btrfs_mark_buffer_dirty(leaf);
6425 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6426 rec->max_size, 1, 0);
6429 btrfs_release_path(path);
6432 if (back->is_data) {
6436 dback = (struct data_backref *)back;
6437 if (back->full_backref)
6438 parent = dback->parent;
6442 for (i = 0; i < dback->found_ref; i++) {
6443 /* if parent != 0, we're doing a full backref
6444 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6445 * just makes the backref allocator create a data
6448 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6449 rec->start, rec->max_size,
6453 BTRFS_FIRST_FREE_OBJECTID :
6459 fprintf(stderr, "adding new data backref"
6460 " on %llu %s %llu owner %llu"
6461 " offset %llu found %d\n",
6462 (unsigned long long)rec->start,
6463 back->full_backref ?
6465 back->full_backref ?
6466 (unsigned long long)parent :
6467 (unsigned long long)dback->root,
6468 (unsigned long long)dback->owner,
6469 (unsigned long long)dback->offset,
6474 tback = (struct tree_backref *)back;
6475 if (back->full_backref)
6476 parent = tback->parent;
6480 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6481 rec->start, rec->max_size,
6482 parent, tback->root, 0, 0);
6483 fprintf(stderr, "adding new tree backref on "
6484 "start %llu len %llu parent %llu root %llu\n",
6485 rec->start, rec->max_size, parent, tback->root);
6488 btrfs_release_path(path);
6492 struct extent_entry {
6497 struct list_head list;
6500 static struct extent_entry *find_entry(struct list_head *entries,
6501 u64 bytenr, u64 bytes)
6503 struct extent_entry *entry = NULL;
6505 list_for_each_entry(entry, entries, list) {
6506 if (entry->bytenr == bytenr && entry->bytes == bytes)
6513 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6515 struct extent_entry *entry, *best = NULL, *prev = NULL;
6517 list_for_each_entry(entry, entries, list) {
6524 * If there are as many broken entries as entries then we know
6525 * not to trust this particular entry.
6527 if (entry->broken == entry->count)
6531 * If our current entry == best then we can't be sure our best
6532 * is really the best, so we need to keep searching.
6534 if (best && best->count == entry->count) {
6540 /* Prev == entry, not good enough, have to keep searching */
6541 if (!prev->broken && prev->count == entry->count)
6545 best = (prev->count > entry->count) ? prev : entry;
6546 else if (best->count < entry->count)
6554 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6555 struct data_backref *dback, struct extent_entry *entry)
6557 struct btrfs_trans_handle *trans;
6558 struct btrfs_root *root;
6559 struct btrfs_file_extent_item *fi;
6560 struct extent_buffer *leaf;
6561 struct btrfs_key key;
6565 key.objectid = dback->root;
6566 key.type = BTRFS_ROOT_ITEM_KEY;
6567 key.offset = (u64)-1;
6568 root = btrfs_read_fs_root(info, &key);
6570 fprintf(stderr, "Couldn't find root for our ref\n");
6575 * The backref points to the original offset of the extent if it was
6576 * split, so we need to search down to the offset we have and then walk
6577 * forward until we find the backref we're looking for.
6579 key.objectid = dback->owner;
6580 key.type = BTRFS_EXTENT_DATA_KEY;
6581 key.offset = dback->offset;
6582 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6584 fprintf(stderr, "Error looking up ref %d\n", ret);
6589 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6590 ret = btrfs_next_leaf(root, path);
6592 fprintf(stderr, "Couldn't find our ref, next\n");
6596 leaf = path->nodes[0];
6597 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6598 if (key.objectid != dback->owner ||
6599 key.type != BTRFS_EXTENT_DATA_KEY) {
6600 fprintf(stderr, "Couldn't find our ref, search\n");
6603 fi = btrfs_item_ptr(leaf, path->slots[0],
6604 struct btrfs_file_extent_item);
6605 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6606 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6608 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6613 btrfs_release_path(path);
6615 trans = btrfs_start_transaction(root, 1);
6617 return PTR_ERR(trans);
6620 * Ok we have the key of the file extent we want to fix, now we can cow
6621 * down to the thing and fix it.
6623 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6625 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6626 key.objectid, key.type, key.offset, ret);
6630 fprintf(stderr, "Well that's odd, we just found this key "
6631 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6636 leaf = path->nodes[0];
6637 fi = btrfs_item_ptr(leaf, path->slots[0],
6638 struct btrfs_file_extent_item);
6640 if (btrfs_file_extent_compression(leaf, fi) &&
6641 dback->disk_bytenr != entry->bytenr) {
6642 fprintf(stderr, "Ref doesn't match the record start and is "
6643 "compressed, please take a btrfs-image of this file "
6644 "system and send it to a btrfs developer so they can "
6645 "complete this functionality for bytenr %Lu\n",
6646 dback->disk_bytenr);
6651 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6652 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6653 } else if (dback->disk_bytenr > entry->bytenr) {
6654 u64 off_diff, offset;
6656 off_diff = dback->disk_bytenr - entry->bytenr;
6657 offset = btrfs_file_extent_offset(leaf, fi);
6658 if (dback->disk_bytenr + offset +
6659 btrfs_file_extent_num_bytes(leaf, fi) >
6660 entry->bytenr + entry->bytes) {
6661 fprintf(stderr, "Ref is past the entry end, please "
6662 "take a btrfs-image of this file system and "
6663 "send it to a btrfs developer, ref %Lu\n",
6664 dback->disk_bytenr);
6669 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6670 btrfs_set_file_extent_offset(leaf, fi, offset);
6671 } else if (dback->disk_bytenr < entry->bytenr) {
6674 offset = btrfs_file_extent_offset(leaf, fi);
6675 if (dback->disk_bytenr + offset < entry->bytenr) {
6676 fprintf(stderr, "Ref is before the entry start, please"
6677 " take a btrfs-image of this file system and "
6678 "send it to a btrfs developer, ref %Lu\n",
6679 dback->disk_bytenr);
6684 offset += dback->disk_bytenr;
6685 offset -= entry->bytenr;
6686 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6687 btrfs_set_file_extent_offset(leaf, fi, offset);
6690 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6693 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6694 * only do this if we aren't using compression, otherwise it's a
6697 if (!btrfs_file_extent_compression(leaf, fi))
6698 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6700 printf("ram bytes may be wrong?\n");
6701 btrfs_mark_buffer_dirty(leaf);
6703 err = btrfs_commit_transaction(trans, root);
6704 btrfs_release_path(path);
6705 return ret ? ret : err;
6708 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6709 struct extent_record *rec)
6711 struct extent_backref *back;
6712 struct data_backref *dback;
6713 struct extent_entry *entry, *best = NULL;
6716 int broken_entries = 0;
6721 * Metadata is easy and the backrefs should always agree on bytenr and
6722 * size, if not we've got bigger issues.
6727 list_for_each_entry(back, &rec->backrefs, list) {
6728 if (back->full_backref || !back->is_data)
6731 dback = (struct data_backref *)back;
6734 * We only pay attention to backrefs that we found a real
6737 if (dback->found_ref == 0)
6741 * For now we only catch when the bytes don't match, not the
6742 * bytenr. We can easily do this at the same time, but I want
6743 * to have a fs image to test on before we just add repair
6744 * functionality willy-nilly so we know we won't screw up the
6748 entry = find_entry(&entries, dback->disk_bytenr,
6751 entry = malloc(sizeof(struct extent_entry));
6756 memset(entry, 0, sizeof(*entry));
6757 entry->bytenr = dback->disk_bytenr;
6758 entry->bytes = dback->bytes;
6759 list_add_tail(&entry->list, &entries);
6764 * If we only have on entry we may think the entries agree when
6765 * in reality they don't so we have to do some extra checking.
6767 if (dback->disk_bytenr != rec->start ||
6768 dback->bytes != rec->nr || back->broken)
6779 /* Yay all the backrefs agree, carry on good sir */
6780 if (nr_entries <= 1 && !mismatch)
6783 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6784 "%Lu\n", rec->start);
6787 * First we want to see if the backrefs can agree amongst themselves who
6788 * is right, so figure out which one of the entries has the highest
6791 best = find_most_right_entry(&entries);
6794 * Ok so we may have an even split between what the backrefs think, so
6795 * this is where we use the extent ref to see what it thinks.
6798 entry = find_entry(&entries, rec->start, rec->nr);
6799 if (!entry && (!broken_entries || !rec->found_rec)) {
6800 fprintf(stderr, "Backrefs don't agree with each other "
6801 "and extent record doesn't agree with anybody,"
6802 " so we can't fix bytenr %Lu bytes %Lu\n",
6803 rec->start, rec->nr);
6806 } else if (!entry) {
6808 * Ok our backrefs were broken, we'll assume this is the
6809 * correct value and add an entry for this range.
6811 entry = malloc(sizeof(struct extent_entry));
6816 memset(entry, 0, sizeof(*entry));
6817 entry->bytenr = rec->start;
6818 entry->bytes = rec->nr;
6819 list_add_tail(&entry->list, &entries);
6823 best = find_most_right_entry(&entries);
6825 fprintf(stderr, "Backrefs and extent record evenly "
6826 "split on who is right, this is going to "
6827 "require user input to fix bytenr %Lu bytes "
6828 "%Lu\n", rec->start, rec->nr);
6835 * I don't think this can happen currently as we'll abort() if we catch
6836 * this case higher up, but in case somebody removes that we still can't
6837 * deal with it properly here yet, so just bail out of that's the case.
6839 if (best->bytenr != rec->start) {
6840 fprintf(stderr, "Extent start and backref starts don't match, "
6841 "please use btrfs-image on this file system and send "
6842 "it to a btrfs developer so they can make fsck fix "
6843 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6844 rec->start, rec->nr);
6850 * Ok great we all agreed on an extent record, let's go find the real
6851 * references and fix up the ones that don't match.
6853 list_for_each_entry(back, &rec->backrefs, list) {
6854 if (back->full_backref || !back->is_data)
6857 dback = (struct data_backref *)back;
6860 * Still ignoring backrefs that don't have a real ref attached
6863 if (dback->found_ref == 0)
6866 if (dback->bytes == best->bytes &&
6867 dback->disk_bytenr == best->bytenr)
6870 ret = repair_ref(info, path, dback, best);
6876 * Ok we messed with the actual refs, which means we need to drop our
6877 * entire cache and go back and rescan. I know this is a huge pain and
6878 * adds a lot of extra work, but it's the only way to be safe. Once all
6879 * the backrefs agree we may not need to do anything to the extent
6884 while (!list_empty(&entries)) {
6885 entry = list_entry(entries.next, struct extent_entry, list);
6886 list_del_init(&entry->list);
6892 static int process_duplicates(struct btrfs_root *root,
6893 struct cache_tree *extent_cache,
6894 struct extent_record *rec)
6896 struct extent_record *good, *tmp;
6897 struct cache_extent *cache;
6901 * If we found a extent record for this extent then return, or if we
6902 * have more than one duplicate we are likely going to need to delete
6905 if (rec->found_rec || rec->num_duplicates > 1)
6908 /* Shouldn't happen but just in case */
6909 BUG_ON(!rec->num_duplicates);
6912 * So this happens if we end up with a backref that doesn't match the
6913 * actual extent entry. So either the backref is bad or the extent
6914 * entry is bad. Either way we want to have the extent_record actually
6915 * reflect what we found in the extent_tree, so we need to take the
6916 * duplicate out and use that as the extent_record since the only way we
6917 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6919 remove_cache_extent(extent_cache, &rec->cache);
6921 good = list_entry(rec->dups.next, struct extent_record, list);
6922 list_del_init(&good->list);
6923 INIT_LIST_HEAD(&good->backrefs);
6924 INIT_LIST_HEAD(&good->dups);
6925 good->cache.start = good->start;
6926 good->cache.size = good->nr;
6927 good->content_checked = 0;
6928 good->owner_ref_checked = 0;
6929 good->num_duplicates = 0;
6930 good->refs = rec->refs;
6931 list_splice_init(&rec->backrefs, &good->backrefs);
6933 cache = lookup_cache_extent(extent_cache, good->start,
6937 tmp = container_of(cache, struct extent_record, cache);
6940 * If we find another overlapping extent and it's found_rec is
6941 * set then it's a duplicate and we need to try and delete
6944 if (tmp->found_rec || tmp->num_duplicates > 0) {
6945 if (list_empty(&good->list))
6946 list_add_tail(&good->list,
6947 &duplicate_extents);
6948 good->num_duplicates += tmp->num_duplicates + 1;
6949 list_splice_init(&tmp->dups, &good->dups);
6950 list_del_init(&tmp->list);
6951 list_add_tail(&tmp->list, &good->dups);
6952 remove_cache_extent(extent_cache, &tmp->cache);
6957 * Ok we have another non extent item backed extent rec, so lets
6958 * just add it to this extent and carry on like we did above.
6960 good->refs += tmp->refs;
6961 list_splice_init(&tmp->backrefs, &good->backrefs);
6962 remove_cache_extent(extent_cache, &tmp->cache);
6965 ret = insert_cache_extent(extent_cache, &good->cache);
6968 return good->num_duplicates ? 0 : 1;
6971 static int delete_duplicate_records(struct btrfs_root *root,
6972 struct extent_record *rec)
6974 struct btrfs_trans_handle *trans;
6975 LIST_HEAD(delete_list);
6976 struct btrfs_path *path;
6977 struct extent_record *tmp, *good, *n;
6980 struct btrfs_key key;
6982 path = btrfs_alloc_path();
6989 /* Find the record that covers all of the duplicates. */
6990 list_for_each_entry(tmp, &rec->dups, list) {
6991 if (good->start < tmp->start)
6993 if (good->nr > tmp->nr)
6996 if (tmp->start + tmp->nr < good->start + good->nr) {
6997 fprintf(stderr, "Ok we have overlapping extents that "
6998 "aren't completely covered by eachother, this "
6999 "is going to require more careful thought. "
7000 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7001 tmp->start, tmp->nr, good->start, good->nr);
7008 list_add_tail(&rec->list, &delete_list);
7010 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7013 list_move_tail(&tmp->list, &delete_list);
7016 root = root->fs_info->extent_root;
7017 trans = btrfs_start_transaction(root, 1);
7018 if (IS_ERR(trans)) {
7019 ret = PTR_ERR(trans);
7023 list_for_each_entry(tmp, &delete_list, list) {
7024 if (tmp->found_rec == 0)
7026 key.objectid = tmp->start;
7027 key.type = BTRFS_EXTENT_ITEM_KEY;
7028 key.offset = tmp->nr;
7030 /* Shouldn't happen but just in case */
7031 if (tmp->metadata) {
7032 fprintf(stderr, "Well this shouldn't happen, extent "
7033 "record overlaps but is metadata? "
7034 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7038 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7044 ret = btrfs_del_item(trans, root, path);
7047 btrfs_release_path(path);
7050 err = btrfs_commit_transaction(trans, root);
7054 while (!list_empty(&delete_list)) {
7055 tmp = list_entry(delete_list.next, struct extent_record, list);
7056 list_del_init(&tmp->list);
7062 while (!list_empty(&rec->dups)) {
7063 tmp = list_entry(rec->dups.next, struct extent_record, list);
7064 list_del_init(&tmp->list);
7068 btrfs_free_path(path);
7070 if (!ret && !nr_del)
7071 rec->num_duplicates = 0;
7073 return ret ? ret : nr_del;
7076 static int find_possible_backrefs(struct btrfs_fs_info *info,
7077 struct btrfs_path *path,
7078 struct cache_tree *extent_cache,
7079 struct extent_record *rec)
7081 struct btrfs_root *root;
7082 struct extent_backref *back;
7083 struct data_backref *dback;
7084 struct cache_extent *cache;
7085 struct btrfs_file_extent_item *fi;
7086 struct btrfs_key key;
7090 list_for_each_entry(back, &rec->backrefs, list) {
7091 /* Don't care about full backrefs (poor unloved backrefs) */
7092 if (back->full_backref || !back->is_data)
7095 dback = (struct data_backref *)back;
7097 /* We found this one, we don't need to do a lookup */
7098 if (dback->found_ref)
7101 key.objectid = dback->root;
7102 key.type = BTRFS_ROOT_ITEM_KEY;
7103 key.offset = (u64)-1;
7105 root = btrfs_read_fs_root(info, &key);
7107 /* No root, definitely a bad ref, skip */
7108 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7110 /* Other err, exit */
7112 return PTR_ERR(root);
7114 key.objectid = dback->owner;
7115 key.type = BTRFS_EXTENT_DATA_KEY;
7116 key.offset = dback->offset;
7117 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7119 btrfs_release_path(path);
7122 /* Didn't find it, we can carry on */
7127 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7128 struct btrfs_file_extent_item);
7129 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7130 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7131 btrfs_release_path(path);
7132 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7134 struct extent_record *tmp;
7135 tmp = container_of(cache, struct extent_record, cache);
7138 * If we found an extent record for the bytenr for this
7139 * particular backref then we can't add it to our
7140 * current extent record. We only want to add backrefs
7141 * that don't have a corresponding extent item in the
7142 * extent tree since they likely belong to this record
7143 * and we need to fix it if it doesn't match bytenrs.
7149 dback->found_ref += 1;
7150 dback->disk_bytenr = bytenr;
7151 dback->bytes = bytes;
7154 * Set this so the verify backref code knows not to trust the
7155 * values in this backref.
7164 * Record orphan data ref into corresponding root.
7166 * Return 0 if the extent item contains data ref and recorded.
7167 * Return 1 if the extent item contains no useful data ref
7168 * On that case, it may contains only shared_dataref or metadata backref
7169 * or the file extent exists(this should be handled by the extent bytenr
7171 * Return <0 if something goes wrong.
7173 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7174 struct extent_record *rec)
7176 struct btrfs_key key;
7177 struct btrfs_root *dest_root;
7178 struct extent_backref *back;
7179 struct data_backref *dback;
7180 struct orphan_data_extent *orphan;
7181 struct btrfs_path *path;
7182 int recorded_data_ref = 0;
7187 path = btrfs_alloc_path();
7190 list_for_each_entry(back, &rec->backrefs, list) {
7191 if (back->full_backref || !back->is_data ||
7192 !back->found_extent_tree)
7194 dback = (struct data_backref *)back;
7195 if (dback->found_ref)
7197 key.objectid = dback->root;
7198 key.type = BTRFS_ROOT_ITEM_KEY;
7199 key.offset = (u64)-1;
7201 dest_root = btrfs_read_fs_root(fs_info, &key);
7203 /* For non-exist root we just skip it */
7204 if (IS_ERR(dest_root) || !dest_root)
7207 key.objectid = dback->owner;
7208 key.type = BTRFS_EXTENT_DATA_KEY;
7209 key.offset = dback->offset;
7211 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7213 * For ret < 0, it's OK since the fs-tree may be corrupted,
7214 * we need to record it for inode/file extent rebuild.
7215 * For ret > 0, we record it only for file extent rebuild.
7216 * For ret == 0, the file extent exists but only bytenr
7217 * mismatch, let the original bytenr fix routine to handle,
7223 orphan = malloc(sizeof(*orphan));
7228 INIT_LIST_HEAD(&orphan->list);
7229 orphan->root = dback->root;
7230 orphan->objectid = dback->owner;
7231 orphan->offset = dback->offset;
7232 orphan->disk_bytenr = rec->cache.start;
7233 orphan->disk_len = rec->cache.size;
7234 list_add(&dest_root->orphan_data_extents, &orphan->list);
7235 recorded_data_ref = 1;
7238 btrfs_free_path(path);
7240 return !recorded_data_ref;
7246 * when an incorrect extent item is found, this will delete
7247 * all of the existing entries for it and recreate them
7248 * based on what the tree scan found.
7250 static int fixup_extent_refs(struct btrfs_fs_info *info,
7251 struct cache_tree *extent_cache,
7252 struct extent_record *rec)
7254 struct btrfs_trans_handle *trans = NULL;
7256 struct btrfs_path *path;
7257 struct list_head *cur = rec->backrefs.next;
7258 struct cache_extent *cache;
7259 struct extent_backref *back;
7263 if (rec->flag_block_full_backref)
7264 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7266 path = btrfs_alloc_path();
7270 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7272 * Sometimes the backrefs themselves are so broken they don't
7273 * get attached to any meaningful rec, so first go back and
7274 * check any of our backrefs that we couldn't find and throw
7275 * them into the list if we find the backref so that
7276 * verify_backrefs can figure out what to do.
7278 ret = find_possible_backrefs(info, path, extent_cache, rec);
7283 /* step one, make sure all of the backrefs agree */
7284 ret = verify_backrefs(info, path, rec);
7288 trans = btrfs_start_transaction(info->extent_root, 1);
7289 if (IS_ERR(trans)) {
7290 ret = PTR_ERR(trans);
7294 /* step two, delete all the existing records */
7295 ret = delete_extent_records(trans, info->extent_root, path,
7296 rec->start, rec->max_size);
7301 /* was this block corrupt? If so, don't add references to it */
7302 cache = lookup_cache_extent(info->corrupt_blocks,
7303 rec->start, rec->max_size);
7309 /* step three, recreate all the refs we did find */
7310 while(cur != &rec->backrefs) {
7311 back = list_entry(cur, struct extent_backref, list);
7315 * if we didn't find any references, don't create a
7318 if (!back->found_ref)
7321 rec->bad_full_backref = 0;
7322 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7330 int err = btrfs_commit_transaction(trans, info->extent_root);
7335 btrfs_free_path(path);
7339 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7340 struct extent_record *rec)
7342 struct btrfs_trans_handle *trans;
7343 struct btrfs_root *root = fs_info->extent_root;
7344 struct btrfs_path *path;
7345 struct btrfs_extent_item *ei;
7346 struct btrfs_key key;
7350 key.objectid = rec->start;
7351 if (rec->metadata) {
7352 key.type = BTRFS_METADATA_ITEM_KEY;
7353 key.offset = rec->info_level;
7355 key.type = BTRFS_EXTENT_ITEM_KEY;
7356 key.offset = rec->max_size;
7359 path = btrfs_alloc_path();
7363 trans = btrfs_start_transaction(root, 0);
7364 if (IS_ERR(trans)) {
7365 btrfs_free_path(path);
7366 return PTR_ERR(trans);
7369 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7371 btrfs_free_path(path);
7372 btrfs_commit_transaction(trans, root);
7375 fprintf(stderr, "Didn't find extent for %llu\n",
7376 (unsigned long long)rec->start);
7377 btrfs_free_path(path);
7378 btrfs_commit_transaction(trans, root);
7382 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7383 struct btrfs_extent_item);
7384 flags = btrfs_extent_flags(path->nodes[0], ei);
7385 if (rec->flag_block_full_backref) {
7386 fprintf(stderr, "setting full backref on %llu\n",
7387 (unsigned long long)key.objectid);
7388 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7390 fprintf(stderr, "clearing full backref on %llu\n",
7391 (unsigned long long)key.objectid);
7392 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7394 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7395 btrfs_mark_buffer_dirty(path->nodes[0]);
7396 btrfs_free_path(path);
7397 return btrfs_commit_transaction(trans, root);
7400 /* right now we only prune from the extent allocation tree */
7401 static int prune_one_block(struct btrfs_trans_handle *trans,
7402 struct btrfs_fs_info *info,
7403 struct btrfs_corrupt_block *corrupt)
7406 struct btrfs_path path;
7407 struct extent_buffer *eb;
7411 int level = corrupt->level + 1;
7413 btrfs_init_path(&path);
7415 /* we want to stop at the parent to our busted block */
7416 path.lowest_level = level;
7418 ret = btrfs_search_slot(trans, info->extent_root,
7419 &corrupt->key, &path, -1, 1);
7424 eb = path.nodes[level];
7431 * hopefully the search gave us the block we want to prune,
7432 * lets try that first
7434 slot = path.slots[level];
7435 found = btrfs_node_blockptr(eb, slot);
7436 if (found == corrupt->cache.start)
7439 nritems = btrfs_header_nritems(eb);
7441 /* the search failed, lets scan this node and hope we find it */
7442 for (slot = 0; slot < nritems; slot++) {
7443 found = btrfs_node_blockptr(eb, slot);
7444 if (found == corrupt->cache.start)
7448 * we couldn't find the bad block. TODO, search all the nodes for pointers
7451 if (eb == info->extent_root->node) {
7456 btrfs_release_path(&path);
7461 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7462 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7465 btrfs_release_path(&path);
7469 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7471 struct btrfs_trans_handle *trans = NULL;
7472 struct cache_extent *cache;
7473 struct btrfs_corrupt_block *corrupt;
7476 cache = search_cache_extent(info->corrupt_blocks, 0);
7480 trans = btrfs_start_transaction(info->extent_root, 1);
7482 return PTR_ERR(trans);
7484 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7485 prune_one_block(trans, info, corrupt);
7486 remove_cache_extent(info->corrupt_blocks, cache);
7489 return btrfs_commit_transaction(trans, info->extent_root);
7493 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7495 struct btrfs_block_group_cache *cache;
7500 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7501 &start, &end, EXTENT_DIRTY);
7504 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7510 cache = btrfs_lookup_first_block_group(fs_info, start);
7515 start = cache->key.objectid + cache->key.offset;
7519 static int check_extent_refs(struct btrfs_root *root,
7520 struct cache_tree *extent_cache)
7522 struct extent_record *rec;
7523 struct cache_extent *cache;
7532 * if we're doing a repair, we have to make sure
7533 * we don't allocate from the problem extents.
7534 * In the worst case, this will be all the
7537 cache = search_cache_extent(extent_cache, 0);
7539 rec = container_of(cache, struct extent_record, cache);
7540 set_extent_dirty(root->fs_info->excluded_extents,
7542 rec->start + rec->max_size - 1,
7544 cache = next_cache_extent(cache);
7547 /* pin down all the corrupted blocks too */
7548 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7550 set_extent_dirty(root->fs_info->excluded_extents,
7552 cache->start + cache->size - 1,
7554 cache = next_cache_extent(cache);
7556 prune_corrupt_blocks(root->fs_info);
7557 reset_cached_block_groups(root->fs_info);
7560 reset_cached_block_groups(root->fs_info);
7563 * We need to delete any duplicate entries we find first otherwise we
7564 * could mess up the extent tree when we have backrefs that actually
7565 * belong to a different extent item and not the weird duplicate one.
7567 while (repair && !list_empty(&duplicate_extents)) {
7568 rec = list_entry(duplicate_extents.next, struct extent_record,
7570 list_del_init(&rec->list);
7572 /* Sometimes we can find a backref before we find an actual
7573 * extent, so we need to process it a little bit to see if there
7574 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7575 * if this is a backref screwup. If we need to delete stuff
7576 * process_duplicates() will return 0, otherwise it will return
7579 if (process_duplicates(root, extent_cache, rec))
7581 ret = delete_duplicate_records(root, rec);
7585 * delete_duplicate_records will return the number of entries
7586 * deleted, so if it's greater than 0 then we know we actually
7587 * did something and we need to remove.
7601 cache = search_cache_extent(extent_cache, 0);
7604 rec = container_of(cache, struct extent_record, cache);
7605 if (rec->num_duplicates) {
7606 fprintf(stderr, "extent item %llu has multiple extent "
7607 "items\n", (unsigned long long)rec->start);
7612 if (rec->refs != rec->extent_item_refs) {
7613 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7614 (unsigned long long)rec->start,
7615 (unsigned long long)rec->nr);
7616 fprintf(stderr, "extent item %llu, found %llu\n",
7617 (unsigned long long)rec->extent_item_refs,
7618 (unsigned long long)rec->refs);
7619 ret = record_orphan_data_extents(root->fs_info, rec);
7626 * we can't use the extent to repair file
7627 * extent, let the fallback method handle it.
7629 if (!fixed && repair) {
7630 ret = fixup_extent_refs(
7641 if (all_backpointers_checked(rec, 1)) {
7642 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7643 (unsigned long long)rec->start,
7644 (unsigned long long)rec->nr);
7646 if (!fixed && !recorded && repair) {
7647 ret = fixup_extent_refs(root->fs_info,
7656 if (!rec->owner_ref_checked) {
7657 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7658 (unsigned long long)rec->start,
7659 (unsigned long long)rec->nr);
7660 if (!fixed && !recorded && repair) {
7661 ret = fixup_extent_refs(root->fs_info,
7670 if (rec->bad_full_backref) {
7671 fprintf(stderr, "bad full backref, on [%llu]\n",
7672 (unsigned long long)rec->start);
7674 ret = fixup_extent_flags(root->fs_info, rec);
7683 * Although it's not a extent ref's problem, we reuse this
7684 * routine for error reporting.
7685 * No repair function yet.
7687 if (rec->crossing_stripes) {
7689 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7690 rec->start, rec->start + rec->max_size);
7695 if (rec->wrong_chunk_type) {
7697 "bad extent [%llu, %llu), type mismatch with chunk\n",
7698 rec->start, rec->start + rec->max_size);
7703 remove_cache_extent(extent_cache, cache);
7704 free_all_extent_backrefs(rec);
7705 if (!init_extent_tree && repair && (!cur_err || fixed))
7706 clear_extent_dirty(root->fs_info->excluded_extents,
7708 rec->start + rec->max_size - 1,
7714 if (ret && ret != -EAGAIN) {
7715 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7718 struct btrfs_trans_handle *trans;
7720 root = root->fs_info->extent_root;
7721 trans = btrfs_start_transaction(root, 1);
7722 if (IS_ERR(trans)) {
7723 ret = PTR_ERR(trans);
7727 btrfs_fix_block_accounting(trans, root);
7728 ret = btrfs_commit_transaction(trans, root);
7733 fprintf(stderr, "repaired damaged extent references\n");
7739 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7743 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7744 stripe_size = length;
7745 stripe_size /= num_stripes;
7746 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7747 stripe_size = length * 2;
7748 stripe_size /= num_stripes;
7749 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7750 stripe_size = length;
7751 stripe_size /= (num_stripes - 1);
7752 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7753 stripe_size = length;
7754 stripe_size /= (num_stripes - 2);
7756 stripe_size = length;
7762 * Check the chunk with its block group/dev list ref:
7763 * Return 0 if all refs seems valid.
7764 * Return 1 if part of refs seems valid, need later check for rebuild ref
7765 * like missing block group and needs to search extent tree to rebuild them.
7766 * Return -1 if essential refs are missing and unable to rebuild.
7768 static int check_chunk_refs(struct chunk_record *chunk_rec,
7769 struct block_group_tree *block_group_cache,
7770 struct device_extent_tree *dev_extent_cache,
7773 struct cache_extent *block_group_item;
7774 struct block_group_record *block_group_rec;
7775 struct cache_extent *dev_extent_item;
7776 struct device_extent_record *dev_extent_rec;
7780 int metadump_v2 = 0;
7784 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7787 if (block_group_item) {
7788 block_group_rec = container_of(block_group_item,
7789 struct block_group_record,
7791 if (chunk_rec->length != block_group_rec->offset ||
7792 chunk_rec->offset != block_group_rec->objectid ||
7794 chunk_rec->type_flags != block_group_rec->flags)) {
7797 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7798 chunk_rec->objectid,
7803 chunk_rec->type_flags,
7804 block_group_rec->objectid,
7805 block_group_rec->type,
7806 block_group_rec->offset,
7807 block_group_rec->offset,
7808 block_group_rec->objectid,
7809 block_group_rec->flags);
7812 list_del_init(&block_group_rec->list);
7813 chunk_rec->bg_rec = block_group_rec;
7818 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7819 chunk_rec->objectid,
7824 chunk_rec->type_flags);
7831 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7832 chunk_rec->num_stripes);
7833 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7834 devid = chunk_rec->stripes[i].devid;
7835 offset = chunk_rec->stripes[i].offset;
7836 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7837 devid, offset, length);
7838 if (dev_extent_item) {
7839 dev_extent_rec = container_of(dev_extent_item,
7840 struct device_extent_record,
7842 if (dev_extent_rec->objectid != devid ||
7843 dev_extent_rec->offset != offset ||
7844 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7845 dev_extent_rec->length != length) {
7848 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7849 chunk_rec->objectid,
7852 chunk_rec->stripes[i].devid,
7853 chunk_rec->stripes[i].offset,
7854 dev_extent_rec->objectid,
7855 dev_extent_rec->offset,
7856 dev_extent_rec->length);
7859 list_move(&dev_extent_rec->chunk_list,
7860 &chunk_rec->dextents);
7865 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7866 chunk_rec->objectid,
7869 chunk_rec->stripes[i].devid,
7870 chunk_rec->stripes[i].offset);
7877 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7878 int check_chunks(struct cache_tree *chunk_cache,
7879 struct block_group_tree *block_group_cache,
7880 struct device_extent_tree *dev_extent_cache,
7881 struct list_head *good, struct list_head *bad,
7882 struct list_head *rebuild, int silent)
7884 struct cache_extent *chunk_item;
7885 struct chunk_record *chunk_rec;
7886 struct block_group_record *bg_rec;
7887 struct device_extent_record *dext_rec;
7891 chunk_item = first_cache_extent(chunk_cache);
7892 while (chunk_item) {
7893 chunk_rec = container_of(chunk_item, struct chunk_record,
7895 err = check_chunk_refs(chunk_rec, block_group_cache,
7896 dev_extent_cache, silent);
7899 if (err == 0 && good)
7900 list_add_tail(&chunk_rec->list, good);
7901 if (err > 0 && rebuild)
7902 list_add_tail(&chunk_rec->list, rebuild);
7904 list_add_tail(&chunk_rec->list, bad);
7905 chunk_item = next_cache_extent(chunk_item);
7908 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7911 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7919 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7923 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7934 static int check_device_used(struct device_record *dev_rec,
7935 struct device_extent_tree *dext_cache)
7937 struct cache_extent *cache;
7938 struct device_extent_record *dev_extent_rec;
7941 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7943 dev_extent_rec = container_of(cache,
7944 struct device_extent_record,
7946 if (dev_extent_rec->objectid != dev_rec->devid)
7949 list_del_init(&dev_extent_rec->device_list);
7950 total_byte += dev_extent_rec->length;
7951 cache = next_cache_extent(cache);
7954 if (total_byte != dev_rec->byte_used) {
7956 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7957 total_byte, dev_rec->byte_used, dev_rec->objectid,
7958 dev_rec->type, dev_rec->offset);
7965 /* check btrfs_dev_item -> btrfs_dev_extent */
7966 static int check_devices(struct rb_root *dev_cache,
7967 struct device_extent_tree *dev_extent_cache)
7969 struct rb_node *dev_node;
7970 struct device_record *dev_rec;
7971 struct device_extent_record *dext_rec;
7975 dev_node = rb_first(dev_cache);
7977 dev_rec = container_of(dev_node, struct device_record, node);
7978 err = check_device_used(dev_rec, dev_extent_cache);
7982 dev_node = rb_next(dev_node);
7984 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7987 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7988 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7995 static int add_root_item_to_list(struct list_head *head,
7996 u64 objectid, u64 bytenr, u64 last_snapshot,
7997 u8 level, u8 drop_level,
7998 int level_size, struct btrfs_key *drop_key)
8001 struct root_item_record *ri_rec;
8002 ri_rec = malloc(sizeof(*ri_rec));
8005 ri_rec->bytenr = bytenr;
8006 ri_rec->objectid = objectid;
8007 ri_rec->level = level;
8008 ri_rec->level_size = level_size;
8009 ri_rec->drop_level = drop_level;
8010 ri_rec->last_snapshot = last_snapshot;
8012 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8013 list_add_tail(&ri_rec->list, head);
8018 static void free_root_item_list(struct list_head *list)
8020 struct root_item_record *ri_rec;
8022 while (!list_empty(list)) {
8023 ri_rec = list_first_entry(list, struct root_item_record,
8025 list_del_init(&ri_rec->list);
8030 static int deal_root_from_list(struct list_head *list,
8031 struct btrfs_root *root,
8032 struct block_info *bits,
8034 struct cache_tree *pending,
8035 struct cache_tree *seen,
8036 struct cache_tree *reada,
8037 struct cache_tree *nodes,
8038 struct cache_tree *extent_cache,
8039 struct cache_tree *chunk_cache,
8040 struct rb_root *dev_cache,
8041 struct block_group_tree *block_group_cache,
8042 struct device_extent_tree *dev_extent_cache)
8047 while (!list_empty(list)) {
8048 struct root_item_record *rec;
8049 struct extent_buffer *buf;
8050 rec = list_entry(list->next,
8051 struct root_item_record, list);
8053 buf = read_tree_block(root->fs_info->tree_root,
8054 rec->bytenr, rec->level_size, 0);
8055 if (!extent_buffer_uptodate(buf)) {
8056 free_extent_buffer(buf);
8060 add_root_to_pending(buf, extent_cache, pending,
8061 seen, nodes, rec->objectid);
8063 * To rebuild extent tree, we need deal with snapshot
8064 * one by one, otherwise we deal with node firstly which
8065 * can maximize readahead.
8068 ret = run_next_block(root, bits, bits_nr, &last,
8069 pending, seen, reada, nodes,
8070 extent_cache, chunk_cache,
8071 dev_cache, block_group_cache,
8072 dev_extent_cache, rec);
8076 free_extent_buffer(buf);
8077 list_del(&rec->list);
8083 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8084 reada, nodes, extent_cache, chunk_cache,
8085 dev_cache, block_group_cache,
8086 dev_extent_cache, NULL);
8096 static int check_chunks_and_extents(struct btrfs_root *root)
8098 struct rb_root dev_cache;
8099 struct cache_tree chunk_cache;
8100 struct block_group_tree block_group_cache;
8101 struct device_extent_tree dev_extent_cache;
8102 struct cache_tree extent_cache;
8103 struct cache_tree seen;
8104 struct cache_tree pending;
8105 struct cache_tree reada;
8106 struct cache_tree nodes;
8107 struct extent_io_tree excluded_extents;
8108 struct cache_tree corrupt_blocks;
8109 struct btrfs_path path;
8110 struct btrfs_key key;
8111 struct btrfs_key found_key;
8113 struct block_info *bits;
8115 struct extent_buffer *leaf;
8117 struct btrfs_root_item ri;
8118 struct list_head dropping_trees;
8119 struct list_head normal_trees;
8120 struct btrfs_root *root1;
8125 dev_cache = RB_ROOT;
8126 cache_tree_init(&chunk_cache);
8127 block_group_tree_init(&block_group_cache);
8128 device_extent_tree_init(&dev_extent_cache);
8130 cache_tree_init(&extent_cache);
8131 cache_tree_init(&seen);
8132 cache_tree_init(&pending);
8133 cache_tree_init(&nodes);
8134 cache_tree_init(&reada);
8135 cache_tree_init(&corrupt_blocks);
8136 extent_io_tree_init(&excluded_extents);
8137 INIT_LIST_HEAD(&dropping_trees);
8138 INIT_LIST_HEAD(&normal_trees);
8141 root->fs_info->excluded_extents = &excluded_extents;
8142 root->fs_info->fsck_extent_cache = &extent_cache;
8143 root->fs_info->free_extent_hook = free_extent_hook;
8144 root->fs_info->corrupt_blocks = &corrupt_blocks;
8148 bits = malloc(bits_nr * sizeof(struct block_info));
8154 if (ctx.progress_enabled) {
8155 ctx.tp = TASK_EXTENTS;
8156 task_start(ctx.info);
8160 root1 = root->fs_info->tree_root;
8161 level = btrfs_header_level(root1->node);
8162 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8163 root1->node->start, 0, level, 0,
8164 btrfs_level_size(root1, level), NULL);
8167 root1 = root->fs_info->chunk_root;
8168 level = btrfs_header_level(root1->node);
8169 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8170 root1->node->start, 0, level, 0,
8171 btrfs_level_size(root1, level), NULL);
8174 btrfs_init_path(&path);
8177 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8178 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8183 leaf = path.nodes[0];
8184 slot = path.slots[0];
8185 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8186 ret = btrfs_next_leaf(root, &path);
8189 leaf = path.nodes[0];
8190 slot = path.slots[0];
8192 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8193 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8194 unsigned long offset;
8197 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8198 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8199 last_snapshot = btrfs_root_last_snapshot(&ri);
8200 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8201 level = btrfs_root_level(&ri);
8202 level_size = btrfs_level_size(root, level);
8203 ret = add_root_item_to_list(&normal_trees,
8205 btrfs_root_bytenr(&ri),
8206 last_snapshot, level,
8207 0, level_size, NULL);
8211 level = btrfs_root_level(&ri);
8212 level_size = btrfs_level_size(root, level);
8213 objectid = found_key.objectid;
8214 btrfs_disk_key_to_cpu(&found_key,
8216 ret = add_root_item_to_list(&dropping_trees,
8218 btrfs_root_bytenr(&ri),
8219 last_snapshot, level,
8221 level_size, &found_key);
8228 btrfs_release_path(&path);
8231 * check_block can return -EAGAIN if it fixes something, please keep
8232 * this in mind when dealing with return values from these functions, if
8233 * we get -EAGAIN we want to fall through and restart the loop.
8235 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8236 &seen, &reada, &nodes, &extent_cache,
8237 &chunk_cache, &dev_cache, &block_group_cache,
8244 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8245 &pending, &seen, &reada, &nodes,
8246 &extent_cache, &chunk_cache, &dev_cache,
8247 &block_group_cache, &dev_extent_cache);
8254 ret = check_chunks(&chunk_cache, &block_group_cache,
8255 &dev_extent_cache, NULL, NULL, NULL, 0);
8262 ret = check_extent_refs(root, &extent_cache);
8269 ret = check_devices(&dev_cache, &dev_extent_cache);
8274 task_stop(ctx.info);
8276 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8277 extent_io_tree_cleanup(&excluded_extents);
8278 root->fs_info->fsck_extent_cache = NULL;
8279 root->fs_info->free_extent_hook = NULL;
8280 root->fs_info->corrupt_blocks = NULL;
8281 root->fs_info->excluded_extents = NULL;
8284 free_chunk_cache_tree(&chunk_cache);
8285 free_device_cache_tree(&dev_cache);
8286 free_block_group_tree(&block_group_cache);
8287 free_device_extent_tree(&dev_extent_cache);
8288 free_extent_cache_tree(&seen);
8289 free_extent_cache_tree(&pending);
8290 free_extent_cache_tree(&reada);
8291 free_extent_cache_tree(&nodes);
8294 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8295 free_extent_cache_tree(&seen);
8296 free_extent_cache_tree(&pending);
8297 free_extent_cache_tree(&reada);
8298 free_extent_cache_tree(&nodes);
8299 free_chunk_cache_tree(&chunk_cache);
8300 free_block_group_tree(&block_group_cache);
8301 free_device_cache_tree(&dev_cache);
8302 free_device_extent_tree(&dev_extent_cache);
8303 free_extent_record_cache(root->fs_info, &extent_cache);
8304 free_root_item_list(&normal_trees);
8305 free_root_item_list(&dropping_trees);
8306 extent_io_tree_cleanup(&excluded_extents);
8310 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8311 struct btrfs_root *root, int overwrite)
8313 struct extent_buffer *c;
8314 struct extent_buffer *old = root->node;
8317 struct btrfs_disk_key disk_key = {0,0,0};
8323 extent_buffer_get(c);
8326 c = btrfs_alloc_free_block(trans, root,
8327 btrfs_level_size(root, 0),
8328 root->root_key.objectid,
8329 &disk_key, level, 0, 0);
8332 extent_buffer_get(c);
8336 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8337 btrfs_set_header_level(c, level);
8338 btrfs_set_header_bytenr(c, c->start);
8339 btrfs_set_header_generation(c, trans->transid);
8340 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8341 btrfs_set_header_owner(c, root->root_key.objectid);
8343 write_extent_buffer(c, root->fs_info->fsid,
8344 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8346 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8347 btrfs_header_chunk_tree_uuid(c),
8350 btrfs_mark_buffer_dirty(c);
8352 * this case can happen in the following case:
8354 * 1.overwrite previous root.
8356 * 2.reinit reloc data root, this is because we skip pin
8357 * down reloc data tree before which means we can allocate
8358 * same block bytenr here.
8360 if (old->start == c->start) {
8361 btrfs_set_root_generation(&root->root_item,
8363 root->root_item.level = btrfs_header_level(root->node);
8364 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8365 &root->root_key, &root->root_item);
8367 free_extent_buffer(c);
8371 free_extent_buffer(old);
8373 add_root_to_dirty_list(root);
8377 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8378 struct extent_buffer *eb, int tree_root)
8380 struct extent_buffer *tmp;
8381 struct btrfs_root_item *ri;
8382 struct btrfs_key key;
8385 int level = btrfs_header_level(eb);
8391 * If we have pinned this block before, don't pin it again.
8392 * This can not only avoid forever loop with broken filesystem
8393 * but also give us some speedups.
8395 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8396 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8399 btrfs_pin_extent(fs_info, eb->start, eb->len);
8401 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8402 nritems = btrfs_header_nritems(eb);
8403 for (i = 0; i < nritems; i++) {
8405 btrfs_item_key_to_cpu(eb, &key, i);
8406 if (key.type != BTRFS_ROOT_ITEM_KEY)
8408 /* Skip the extent root and reloc roots */
8409 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8410 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8411 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8413 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8414 bytenr = btrfs_disk_root_bytenr(eb, ri);
8417 * If at any point we start needing the real root we
8418 * will have to build a stump root for the root we are
8419 * in, but for now this doesn't actually use the root so
8420 * just pass in extent_root.
8422 tmp = read_tree_block(fs_info->extent_root, bytenr,
8424 if (!extent_buffer_uptodate(tmp)) {
8425 fprintf(stderr, "Error reading root block\n");
8428 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8429 free_extent_buffer(tmp);
8433 bytenr = btrfs_node_blockptr(eb, i);
8435 /* If we aren't the tree root don't read the block */
8436 if (level == 1 && !tree_root) {
8437 btrfs_pin_extent(fs_info, bytenr, leafsize);
8441 tmp = read_tree_block(fs_info->extent_root, bytenr,
8443 if (!extent_buffer_uptodate(tmp)) {
8444 fprintf(stderr, "Error reading tree block\n");
8447 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8448 free_extent_buffer(tmp);
8457 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8461 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8465 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8468 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8470 struct btrfs_block_group_cache *cache;
8471 struct btrfs_path *path;
8472 struct extent_buffer *leaf;
8473 struct btrfs_chunk *chunk;
8474 struct btrfs_key key;
8478 path = btrfs_alloc_path();
8483 key.type = BTRFS_CHUNK_ITEM_KEY;
8486 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8488 btrfs_free_path(path);
8493 * We do this in case the block groups were screwed up and had alloc
8494 * bits that aren't actually set on the chunks. This happens with
8495 * restored images every time and could happen in real life I guess.
8497 fs_info->avail_data_alloc_bits = 0;
8498 fs_info->avail_metadata_alloc_bits = 0;
8499 fs_info->avail_system_alloc_bits = 0;
8501 /* First we need to create the in-memory block groups */
8503 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8504 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8506 btrfs_free_path(path);
8514 leaf = path->nodes[0];
8515 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8516 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8521 chunk = btrfs_item_ptr(leaf, path->slots[0],
8522 struct btrfs_chunk);
8523 btrfs_add_block_group(fs_info, 0,
8524 btrfs_chunk_type(leaf, chunk),
8525 key.objectid, key.offset,
8526 btrfs_chunk_length(leaf, chunk));
8527 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8528 key.offset + btrfs_chunk_length(leaf, chunk),
8534 cache = btrfs_lookup_first_block_group(fs_info, start);
8538 start = cache->key.objectid + cache->key.offset;
8541 btrfs_free_path(path);
8545 static int reset_balance(struct btrfs_trans_handle *trans,
8546 struct btrfs_fs_info *fs_info)
8548 struct btrfs_root *root = fs_info->tree_root;
8549 struct btrfs_path *path;
8550 struct extent_buffer *leaf;
8551 struct btrfs_key key;
8552 int del_slot, del_nr = 0;
8556 path = btrfs_alloc_path();
8560 key.objectid = BTRFS_BALANCE_OBJECTID;
8561 key.type = BTRFS_BALANCE_ITEM_KEY;
8564 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8569 goto reinit_data_reloc;
8574 ret = btrfs_del_item(trans, root, path);
8577 btrfs_release_path(path);
8579 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8580 key.type = BTRFS_ROOT_ITEM_KEY;
8583 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8587 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8592 ret = btrfs_del_items(trans, root, path,
8599 btrfs_release_path(path);
8602 ret = btrfs_search_slot(trans, root, &key, path,
8609 leaf = path->nodes[0];
8610 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8611 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8613 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8618 del_slot = path->slots[0];
8627 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8631 btrfs_release_path(path);
8634 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8635 key.type = BTRFS_ROOT_ITEM_KEY;
8636 key.offset = (u64)-1;
8637 root = btrfs_read_fs_root(fs_info, &key);
8639 fprintf(stderr, "Error reading data reloc tree\n");
8640 ret = PTR_ERR(root);
8643 record_root_in_trans(trans, root);
8644 ret = btrfs_fsck_reinit_root(trans, root, 0);
8647 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8649 btrfs_free_path(path);
8653 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8654 struct btrfs_fs_info *fs_info)
8660 * The only reason we don't do this is because right now we're just
8661 * walking the trees we find and pinning down their bytes, we don't look
8662 * at any of the leaves. In order to do mixed groups we'd have to check
8663 * the leaves of any fs roots and pin down the bytes for any file
8664 * extents we find. Not hard but why do it if we don't have to?
8666 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8667 fprintf(stderr, "We don't support re-initing the extent tree "
8668 "for mixed block groups yet, please notify a btrfs "
8669 "developer you want to do this so they can add this "
8670 "functionality.\n");
8675 * first we need to walk all of the trees except the extent tree and pin
8676 * down the bytes that are in use so we don't overwrite any existing
8679 ret = pin_metadata_blocks(fs_info);
8681 fprintf(stderr, "error pinning down used bytes\n");
8686 * Need to drop all the block groups since we're going to recreate all
8689 btrfs_free_block_groups(fs_info);
8690 ret = reset_block_groups(fs_info);
8692 fprintf(stderr, "error resetting the block groups\n");
8696 /* Ok we can allocate now, reinit the extent root */
8697 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8699 fprintf(stderr, "extent root initialization failed\n");
8701 * When the transaction code is updated we should end the
8702 * transaction, but for now progs only knows about commit so
8703 * just return an error.
8709 * Now we have all the in-memory block groups setup so we can make
8710 * allocations properly, and the metadata we care about is safe since we
8711 * pinned all of it above.
8714 struct btrfs_block_group_cache *cache;
8716 cache = btrfs_lookup_first_block_group(fs_info, start);
8719 start = cache->key.objectid + cache->key.offset;
8720 ret = btrfs_insert_item(trans, fs_info->extent_root,
8721 &cache->key, &cache->item,
8722 sizeof(cache->item));
8724 fprintf(stderr, "Error adding block group\n");
8727 btrfs_extent_post_op(trans, fs_info->extent_root);
8730 ret = reset_balance(trans, fs_info);
8732 fprintf(stderr, "error reseting the pending balance\n");
8737 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8739 struct btrfs_path *path;
8740 struct btrfs_trans_handle *trans;
8741 struct btrfs_key key;
8744 printf("Recowing metadata block %llu\n", eb->start);
8745 key.objectid = btrfs_header_owner(eb);
8746 key.type = BTRFS_ROOT_ITEM_KEY;
8747 key.offset = (u64)-1;
8749 root = btrfs_read_fs_root(root->fs_info, &key);
8751 fprintf(stderr, "Couldn't find owner root %llu\n",
8753 return PTR_ERR(root);
8756 path = btrfs_alloc_path();
8760 trans = btrfs_start_transaction(root, 1);
8761 if (IS_ERR(trans)) {
8762 btrfs_free_path(path);
8763 return PTR_ERR(trans);
8766 path->lowest_level = btrfs_header_level(eb);
8767 if (path->lowest_level)
8768 btrfs_node_key_to_cpu(eb, &key, 0);
8770 btrfs_item_key_to_cpu(eb, &key, 0);
8772 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8773 btrfs_commit_transaction(trans, root);
8774 btrfs_free_path(path);
8778 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8780 struct btrfs_path *path;
8781 struct btrfs_trans_handle *trans;
8782 struct btrfs_key key;
8785 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8786 bad->key.type, bad->key.offset);
8787 key.objectid = bad->root_id;
8788 key.type = BTRFS_ROOT_ITEM_KEY;
8789 key.offset = (u64)-1;
8791 root = btrfs_read_fs_root(root->fs_info, &key);
8793 fprintf(stderr, "Couldn't find owner root %llu\n",
8795 return PTR_ERR(root);
8798 path = btrfs_alloc_path();
8802 trans = btrfs_start_transaction(root, 1);
8803 if (IS_ERR(trans)) {
8804 btrfs_free_path(path);
8805 return PTR_ERR(trans);
8808 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8814 ret = btrfs_del_item(trans, root, path);
8816 btrfs_commit_transaction(trans, root);
8817 btrfs_free_path(path);
8821 static int zero_log_tree(struct btrfs_root *root)
8823 struct btrfs_trans_handle *trans;
8826 trans = btrfs_start_transaction(root, 1);
8827 if (IS_ERR(trans)) {
8828 ret = PTR_ERR(trans);
8831 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8832 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8833 ret = btrfs_commit_transaction(trans, root);
8837 static int populate_csum(struct btrfs_trans_handle *trans,
8838 struct btrfs_root *csum_root, char *buf, u64 start,
8845 while (offset < len) {
8846 sectorsize = csum_root->sectorsize;
8847 ret = read_extent_data(csum_root, buf, start + offset,
8851 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8852 start + offset, buf, sectorsize);
8855 offset += sectorsize;
8860 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8861 struct btrfs_root *csum_root,
8862 struct btrfs_root *cur_root)
8864 struct btrfs_path *path;
8865 struct btrfs_key key;
8866 struct extent_buffer *node;
8867 struct btrfs_file_extent_item *fi;
8874 path = btrfs_alloc_path();
8877 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8887 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8890 /* Iterate all regular file extents and fill its csum */
8892 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8894 if (key.type != BTRFS_EXTENT_DATA_KEY)
8896 node = path->nodes[0];
8897 slot = path->slots[0];
8898 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8899 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8901 start = btrfs_file_extent_disk_bytenr(node, fi);
8902 len = btrfs_file_extent_disk_num_bytes(node, fi);
8904 ret = populate_csum(trans, csum_root, buf, start, len);
8911 * TODO: if next leaf is corrupted, jump to nearest next valid
8914 ret = btrfs_next_item(cur_root, path);
8924 btrfs_free_path(path);
8929 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8930 struct btrfs_root *csum_root)
8932 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8933 struct btrfs_path *path;
8934 struct btrfs_root *tree_root = fs_info->tree_root;
8935 struct btrfs_root *cur_root;
8936 struct extent_buffer *node;
8937 struct btrfs_key key;
8941 path = btrfs_alloc_path();
8945 key.objectid = BTRFS_FS_TREE_OBJECTID;
8947 key.type = BTRFS_ROOT_ITEM_KEY;
8949 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8958 node = path->nodes[0];
8959 slot = path->slots[0];
8960 btrfs_item_key_to_cpu(node, &key, slot);
8961 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8963 if (key.type != BTRFS_ROOT_ITEM_KEY)
8965 if (!is_fstree(key.objectid))
8967 key.offset = (u64)-1;
8969 cur_root = btrfs_read_fs_root(fs_info, &key);
8970 if (IS_ERR(cur_root) || !cur_root) {
8971 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8975 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8980 ret = btrfs_next_item(tree_root, path);
8990 btrfs_free_path(path);
8994 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8995 struct btrfs_root *csum_root)
8997 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8998 struct btrfs_path *path;
8999 struct btrfs_extent_item *ei;
9000 struct extent_buffer *leaf;
9002 struct btrfs_key key;
9005 path = btrfs_alloc_path();
9010 key.type = BTRFS_EXTENT_ITEM_KEY;
9013 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9015 btrfs_free_path(path);
9019 buf = malloc(csum_root->sectorsize);
9021 btrfs_free_path(path);
9026 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9027 ret = btrfs_next_leaf(extent_root, path);
9035 leaf = path->nodes[0];
9037 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9038 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9043 ei = btrfs_item_ptr(leaf, path->slots[0],
9044 struct btrfs_extent_item);
9045 if (!(btrfs_extent_flags(leaf, ei) &
9046 BTRFS_EXTENT_FLAG_DATA)) {
9051 ret = populate_csum(trans, csum_root, buf, key.objectid,
9058 btrfs_free_path(path);
9064 * Recalculate the csum and put it into the csum tree.
9066 * Extent tree init will wipe out all the extent info, so in that case, we
9067 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9068 * will use fs/subvol trees to init the csum tree.
9070 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9071 struct btrfs_root *csum_root,
9075 return fill_csum_tree_from_fs(trans, csum_root);
9077 return fill_csum_tree_from_extent(trans, csum_root);
9080 struct root_item_info {
9081 /* level of the root */
9083 /* number of nodes at this level, must be 1 for a root */
9087 struct cache_extent cache_extent;
9090 static struct cache_tree *roots_info_cache = NULL;
9092 static void free_roots_info_cache(void)
9094 if (!roots_info_cache)
9097 while (!cache_tree_empty(roots_info_cache)) {
9098 struct cache_extent *entry;
9099 struct root_item_info *rii;
9101 entry = first_cache_extent(roots_info_cache);
9104 remove_cache_extent(roots_info_cache, entry);
9105 rii = container_of(entry, struct root_item_info, cache_extent);
9109 free(roots_info_cache);
9110 roots_info_cache = NULL;
9113 static int build_roots_info_cache(struct btrfs_fs_info *info)
9116 struct btrfs_key key;
9117 struct extent_buffer *leaf;
9118 struct btrfs_path *path;
9120 if (!roots_info_cache) {
9121 roots_info_cache = malloc(sizeof(*roots_info_cache));
9122 if (!roots_info_cache)
9124 cache_tree_init(roots_info_cache);
9127 path = btrfs_alloc_path();
9132 key.type = BTRFS_EXTENT_ITEM_KEY;
9135 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9138 leaf = path->nodes[0];
9141 struct btrfs_key found_key;
9142 struct btrfs_extent_item *ei;
9143 struct btrfs_extent_inline_ref *iref;
9144 int slot = path->slots[0];
9149 struct cache_extent *entry;
9150 struct root_item_info *rii;
9152 if (slot >= btrfs_header_nritems(leaf)) {
9153 ret = btrfs_next_leaf(info->extent_root, path);
9160 leaf = path->nodes[0];
9161 slot = path->slots[0];
9164 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9166 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9167 found_key.type != BTRFS_METADATA_ITEM_KEY)
9170 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9171 flags = btrfs_extent_flags(leaf, ei);
9173 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9174 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9177 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9178 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9179 level = found_key.offset;
9181 struct btrfs_tree_block_info *info;
9183 info = (struct btrfs_tree_block_info *)(ei + 1);
9184 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9185 level = btrfs_tree_block_level(leaf, info);
9189 * For a root extent, it must be of the following type and the
9190 * first (and only one) iref in the item.
9192 type = btrfs_extent_inline_ref_type(leaf, iref);
9193 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9196 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9197 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9199 rii = malloc(sizeof(struct root_item_info));
9204 rii->cache_extent.start = root_id;
9205 rii->cache_extent.size = 1;
9206 rii->level = (u8)-1;
9207 entry = &rii->cache_extent;
9208 ret = insert_cache_extent(roots_info_cache, entry);
9211 rii = container_of(entry, struct root_item_info,
9215 ASSERT(rii->cache_extent.start == root_id);
9216 ASSERT(rii->cache_extent.size == 1);
9218 if (level > rii->level || rii->level == (u8)-1) {
9220 rii->bytenr = found_key.objectid;
9221 rii->gen = btrfs_extent_generation(leaf, ei);
9222 rii->node_count = 1;
9223 } else if (level == rii->level) {
9231 btrfs_free_path(path);
9236 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9237 struct btrfs_path *path,
9238 const struct btrfs_key *root_key,
9239 const int read_only_mode)
9241 const u64 root_id = root_key->objectid;
9242 struct cache_extent *entry;
9243 struct root_item_info *rii;
9244 struct btrfs_root_item ri;
9245 unsigned long offset;
9247 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9250 "Error: could not find extent items for root %llu\n",
9251 root_key->objectid);
9255 rii = container_of(entry, struct root_item_info, cache_extent);
9256 ASSERT(rii->cache_extent.start == root_id);
9257 ASSERT(rii->cache_extent.size == 1);
9259 if (rii->node_count != 1) {
9261 "Error: could not find btree root extent for root %llu\n",
9266 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9267 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9269 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9270 btrfs_root_level(&ri) != rii->level ||
9271 btrfs_root_generation(&ri) != rii->gen) {
9274 * If we're in repair mode but our caller told us to not update
9275 * the root item, i.e. just check if it needs to be updated, don't
9276 * print this message, since the caller will call us again shortly
9277 * for the same root item without read only mode (the caller will
9278 * open a transaction first).
9280 if (!(read_only_mode && repair))
9282 "%sroot item for root %llu,"
9283 " current bytenr %llu, current gen %llu, current level %u,"
9284 " new bytenr %llu, new gen %llu, new level %u\n",
9285 (read_only_mode ? "" : "fixing "),
9287 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9288 btrfs_root_level(&ri),
9289 rii->bytenr, rii->gen, rii->level);
9291 if (btrfs_root_generation(&ri) > rii->gen) {
9293 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9294 root_id, btrfs_root_generation(&ri), rii->gen);
9298 if (!read_only_mode) {
9299 btrfs_set_root_bytenr(&ri, rii->bytenr);
9300 btrfs_set_root_level(&ri, rii->level);
9301 btrfs_set_root_generation(&ri, rii->gen);
9302 write_extent_buffer(path->nodes[0], &ri,
9303 offset, sizeof(ri));
9313 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9314 * caused read-only snapshots to be corrupted if they were created at a moment
9315 * when the source subvolume/snapshot had orphan items. The issue was that the
9316 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9317 * node instead of the post orphan cleanup root node.
9318 * So this function, and its callees, just detects and fixes those cases. Even
9319 * though the regression was for read-only snapshots, this function applies to
9320 * any snapshot/subvolume root.
9321 * This must be run before any other repair code - not doing it so, makes other
9322 * repair code delete or modify backrefs in the extent tree for example, which
9323 * will result in an inconsistent fs after repairing the root items.
9325 static int repair_root_items(struct btrfs_fs_info *info)
9327 struct btrfs_path *path = NULL;
9328 struct btrfs_key key;
9329 struct extent_buffer *leaf;
9330 struct btrfs_trans_handle *trans = NULL;
9335 ret = build_roots_info_cache(info);
9339 path = btrfs_alloc_path();
9345 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9346 key.type = BTRFS_ROOT_ITEM_KEY;
9351 * Avoid opening and committing transactions if a leaf doesn't have
9352 * any root items that need to be fixed, so that we avoid rotating
9353 * backup roots unnecessarily.
9356 trans = btrfs_start_transaction(info->tree_root, 1);
9357 if (IS_ERR(trans)) {
9358 ret = PTR_ERR(trans);
9363 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9367 leaf = path->nodes[0];
9370 struct btrfs_key found_key;
9372 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9373 int no_more_keys = find_next_key(path, &key);
9375 btrfs_release_path(path);
9377 ret = btrfs_commit_transaction(trans,
9389 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9391 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9393 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9396 ret = maybe_repair_root_item(info, path, &found_key,
9401 if (!trans && repair) {
9404 btrfs_release_path(path);
9414 free_roots_info_cache();
9415 btrfs_free_path(path);
9417 btrfs_commit_transaction(trans, info->tree_root);
9424 const char * const cmd_check_usage[] = {
9425 "btrfs check [options] <device>",
9426 "Check structural inegrity of a filesystem (unmounted).",
9427 "Check structural inegrity of an unmounted filesystem. Verify internal",
9428 "trees' consistency and item connectivity. In the repair mode try to",
9429 "fix the problems found.",
9430 "WARNING: the repair mode is considered dangerous",
9432 "-s|--super <superblock> use this superblock copy",
9433 "-b|--backup use the backup root copy",
9434 "--repair try to repair the filesystem",
9435 "--readonly run in read-only mode (default)",
9436 "--init-csum-tree create a new CRC tree",
9437 "--init-extent-tree create a new extent tree",
9438 "--check-data-csum verify checkums of data blocks",
9439 "-Q|--qgroup-report print a report on qgroup consistency",
9440 "-E|--subvol-extents <subvolid>",
9441 " print subvolume extents and sharing state",
9442 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9443 "-p|--progress indicate progress",
9447 int cmd_check(int argc, char **argv)
9449 struct cache_tree root_cache;
9450 struct btrfs_root *root;
9451 struct btrfs_fs_info *info;
9454 u64 tree_root_bytenr = 0;
9455 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9458 int init_csum_tree = 0;
9460 int qgroup_report = 0;
9461 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9465 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9466 OPT_CHECK_CSUM, OPT_READONLY };
9467 static const struct option long_options[] = {
9468 { "super", required_argument, NULL, 's' },
9469 { "repair", no_argument, NULL, OPT_REPAIR },
9470 { "readonly", no_argument, NULL, OPT_READONLY },
9471 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9472 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9473 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9474 { "backup", no_argument, NULL, 'b' },
9475 { "subvol-extents", required_argument, NULL, 'E' },
9476 { "qgroup-report", no_argument, NULL, 'Q' },
9477 { "tree-root", required_argument, NULL, 'r' },
9478 { "progress", no_argument, NULL, 'p' },
9482 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9486 case 'a': /* ignored */ break;
9488 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9491 num = arg_strtou64(optarg);
9492 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9494 "ERROR: super mirror should be less than: %d\n",
9495 BTRFS_SUPER_MIRROR_MAX);
9498 bytenr = btrfs_sb_offset(((int)num));
9499 printf("using SB copy %llu, bytenr %llu\n", num,
9500 (unsigned long long)bytenr);
9506 subvolid = arg_strtou64(optarg);
9509 tree_root_bytenr = arg_strtou64(optarg);
9512 ctx.progress_enabled = true;
9516 usage(cmd_check_usage);
9518 printf("enabling repair mode\n");
9520 ctree_flags |= OPEN_CTREE_WRITES;
9526 printf("Creating a new CRC tree\n");
9529 ctree_flags |= OPEN_CTREE_WRITES;
9531 case OPT_INIT_EXTENT:
9532 init_extent_tree = 1;
9533 ctree_flags |= (OPEN_CTREE_WRITES |
9534 OPEN_CTREE_NO_BLOCK_GROUPS);
9537 case OPT_CHECK_CSUM:
9538 check_data_csum = 1;
9542 argc = argc - optind;
9544 if (check_argc_exact(argc, 1))
9545 usage(cmd_check_usage);
9547 if (ctx.progress_enabled) {
9548 ctx.tp = TASK_NOTHING;
9549 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9552 /* This check is the only reason for --readonly to exist */
9553 if (readonly && repair) {
9554 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9559 cache_tree_init(&root_cache);
9561 if((ret = check_mounted(argv[optind])) < 0) {
9562 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9565 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9570 /* only allow partial opening under repair mode */
9572 ctree_flags |= OPEN_CTREE_PARTIAL;
9574 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9577 fprintf(stderr, "Couldn't open file system\n");
9583 root = info->fs_root;
9586 * repair mode will force us to commit transaction which
9587 * will make us fail to load log tree when mounting.
9589 if (repair && btrfs_super_log_root(info->super_copy)) {
9590 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9595 ret = zero_log_tree(root);
9597 fprintf(stderr, "fail to zero log tree\n");
9602 uuid_unparse(info->super_copy->fsid, uuidbuf);
9603 if (qgroup_report) {
9604 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9606 ret = qgroup_verify_all(info);
9608 print_qgroup_report(1);
9612 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9613 subvolid, argv[optind], uuidbuf);
9614 ret = print_extent_state(info, subvolid);
9617 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9619 if (!extent_buffer_uptodate(info->tree_root->node) ||
9620 !extent_buffer_uptodate(info->dev_root->node) ||
9621 !extent_buffer_uptodate(info->chunk_root->node)) {
9622 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9627 if (init_extent_tree || init_csum_tree) {
9628 struct btrfs_trans_handle *trans;
9630 trans = btrfs_start_transaction(info->extent_root, 0);
9631 if (IS_ERR(trans)) {
9632 fprintf(stderr, "Error starting transaction\n");
9633 ret = PTR_ERR(trans);
9637 if (init_extent_tree) {
9638 printf("Creating a new extent tree\n");
9639 ret = reinit_extent_tree(trans, info);
9644 if (init_csum_tree) {
9645 fprintf(stderr, "Reinit crc root\n");
9646 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9648 fprintf(stderr, "crc root initialization failed\n");
9653 ret = fill_csum_tree(trans, info->csum_root,
9656 fprintf(stderr, "crc refilling failed\n");
9661 * Ok now we commit and run the normal fsck, which will add
9662 * extent entries for all of the items it finds.
9664 ret = btrfs_commit_transaction(trans, info->extent_root);
9668 if (!extent_buffer_uptodate(info->extent_root->node)) {
9669 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9673 if (!extent_buffer_uptodate(info->csum_root->node)) {
9674 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9679 if (!ctx.progress_enabled)
9680 fprintf(stderr, "checking extents\n");
9681 ret = check_chunks_and_extents(root);
9683 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9685 ret = repair_root_items(info);
9689 fprintf(stderr, "Fixed %d roots.\n", ret);
9691 } else if (ret > 0) {
9693 "Found %d roots with an outdated root item.\n",
9696 "Please run a filesystem check with the option --repair to fix them.\n");
9701 if (!ctx.progress_enabled)
9702 fprintf(stderr, "checking free space cache\n");
9703 ret = check_space_cache(root);
9708 * We used to have to have these hole extents in between our real
9709 * extents so if we don't have this flag set we need to make sure there
9710 * are no gaps in the file extents for inodes, otherwise we can just
9711 * ignore it when this happens.
9713 no_holes = btrfs_fs_incompat(root->fs_info,
9714 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9715 if (!ctx.progress_enabled)
9716 fprintf(stderr, "checking fs roots\n");
9717 ret = check_fs_roots(root, &root_cache);
9721 fprintf(stderr, "checking csums\n");
9722 ret = check_csums(root);
9726 fprintf(stderr, "checking root refs\n");
9727 ret = check_root_refs(root, &root_cache);
9731 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9732 struct extent_buffer *eb;
9734 eb = list_first_entry(&root->fs_info->recow_ebs,
9735 struct extent_buffer, recow);
9736 list_del_init(&eb->recow);
9737 ret = recow_extent_buffer(root, eb);
9742 while (!list_empty(&delete_items)) {
9743 struct bad_item *bad;
9745 bad = list_first_entry(&delete_items, struct bad_item, list);
9746 list_del_init(&bad->list);
9748 ret = delete_bad_item(root, bad);
9752 if (info->quota_enabled) {
9754 fprintf(stderr, "checking quota groups\n");
9755 err = qgroup_verify_all(info);
9760 if (!list_empty(&root->fs_info->recow_ebs)) {
9761 fprintf(stderr, "Transid errors in file system\n");
9765 print_qgroup_report(0);
9766 if (found_old_backref) { /*
9767 * there was a disk format change when mixed
9768 * backref was in testing tree. The old format
9769 * existed about one week.
9771 printf("\n * Found old mixed backref format. "
9772 "The old format is not supported! *"
9773 "\n * Please mount the FS in readonly mode, "
9774 "backup data and re-format the FS. *\n\n");
9777 printf("found %llu bytes used err is %d\n",
9778 (unsigned long long)bytes_used, ret);
9779 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9780 printf("total tree bytes: %llu\n",
9781 (unsigned long long)total_btree_bytes);
9782 printf("total fs tree bytes: %llu\n",
9783 (unsigned long long)total_fs_tree_bytes);
9784 printf("total extent tree bytes: %llu\n",
9785 (unsigned long long)total_extent_tree_bytes);
9786 printf("btree space waste bytes: %llu\n",
9787 (unsigned long long)btree_space_waste);
9788 printf("file data blocks allocated: %llu\n referenced %llu\n",
9789 (unsigned long long)data_bytes_allocated,
9790 (unsigned long long)data_bytes_referenced);
9792 free_root_recs_tree(&root_cache);
9796 if (ctx.progress_enabled)
9797 task_deinit(ctx.info);