2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct inode_backref *tmp;
570 struct orphan_data_extent *src_orphan;
571 struct orphan_data_extent *dst_orphan;
575 rec = malloc(sizeof(*rec));
577 return ERR_PTR(-ENOMEM);
578 memcpy(rec, orig_rec, sizeof(*rec));
580 INIT_LIST_HEAD(&rec->backrefs);
581 INIT_LIST_HEAD(&rec->orphan_extents);
582 rec->holes = RB_ROOT;
584 list_for_each_entry(orig, &orig_rec->backrefs, list) {
585 size = sizeof(*orig) + orig->namelen + 1;
586 backref = malloc(size);
591 memcpy(backref, orig, size);
592 list_add_tail(&backref->list, &rec->backrefs);
594 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595 dst_orphan = malloc(sizeof(*dst_orphan));
600 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
603 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
609 if (!list_empty(&rec->backrefs))
610 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611 list_del(&orig->list);
615 if (!list_empty(&rec->orphan_extents))
616 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617 list_del(&orig->list);
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
629 struct orphan_data_extent *orphan;
631 if (list_empty(orphan_extents))
633 printf("The following data extent is lost in tree %llu:\n",
635 list_for_each_entry(orphan, orphan_extents, list) {
636 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637 orphan->objectid, orphan->offset, orphan->disk_bytenr,
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
644 u64 root_objectid = root->root_key.objectid;
645 int errors = rec->errors;
649 /* reloc root errors, we print its corresponding fs root objectid*/
650 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651 root_objectid = root->root_key.offset;
652 fprintf(stderr, "reloc");
654 fprintf(stderr, "root %llu inode %llu errors %x",
655 (unsigned long long) root_objectid,
656 (unsigned long long) rec->ino, rec->errors);
658 if (errors & I_ERR_NO_INODE_ITEM)
659 fprintf(stderr, ", no inode item");
660 if (errors & I_ERR_NO_ORPHAN_ITEM)
661 fprintf(stderr, ", no orphan item");
662 if (errors & I_ERR_DUP_INODE_ITEM)
663 fprintf(stderr, ", dup inode item");
664 if (errors & I_ERR_DUP_DIR_INDEX)
665 fprintf(stderr, ", dup dir index");
666 if (errors & I_ERR_ODD_DIR_ITEM)
667 fprintf(stderr, ", odd dir item");
668 if (errors & I_ERR_ODD_FILE_EXTENT)
669 fprintf(stderr, ", odd file extent");
670 if (errors & I_ERR_BAD_FILE_EXTENT)
671 fprintf(stderr, ", bad file extent");
672 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673 fprintf(stderr, ", file extent overlap");
674 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675 fprintf(stderr, ", file extent discount");
676 if (errors & I_ERR_DIR_ISIZE_WRONG)
677 fprintf(stderr, ", dir isize wrong");
678 if (errors & I_ERR_FILE_NBYTES_WRONG)
679 fprintf(stderr, ", nbytes wrong");
680 if (errors & I_ERR_ODD_CSUM_ITEM)
681 fprintf(stderr, ", odd csum item");
682 if (errors & I_ERR_SOME_CSUM_MISSING)
683 fprintf(stderr, ", some csum missing");
684 if (errors & I_ERR_LINK_COUNT_WRONG)
685 fprintf(stderr, ", link count wrong");
686 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687 fprintf(stderr, ", orphan file extent");
688 fprintf(stderr, "\n");
689 /* Print the orphan extents if needed */
690 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
693 /* Print the holes if needed */
694 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695 struct file_extent_hole *hole;
696 struct rb_node *node;
699 node = rb_first(&rec->holes);
700 fprintf(stderr, "Found file extent holes:\n");
703 hole = rb_entry(node, struct file_extent_hole, node);
704 fprintf(stderr, "\tstart: %llu, len: %llu\n",
705 hole->start, hole->len);
706 node = rb_next(node);
709 fprintf(stderr, "\tstart: 0, len: %llu\n",
710 round_up(rec->isize, root->sectorsize));
714 static void print_ref_error(int errors)
716 if (errors & REF_ERR_NO_DIR_ITEM)
717 fprintf(stderr, ", no dir item");
718 if (errors & REF_ERR_NO_DIR_INDEX)
719 fprintf(stderr, ", no dir index");
720 if (errors & REF_ERR_NO_INODE_REF)
721 fprintf(stderr, ", no inode ref");
722 if (errors & REF_ERR_DUP_DIR_ITEM)
723 fprintf(stderr, ", dup dir item");
724 if (errors & REF_ERR_DUP_DIR_INDEX)
725 fprintf(stderr, ", dup dir index");
726 if (errors & REF_ERR_DUP_INODE_REF)
727 fprintf(stderr, ", dup inode ref");
728 if (errors & REF_ERR_INDEX_UNMATCH)
729 fprintf(stderr, ", index unmatch");
730 if (errors & REF_ERR_FILETYPE_UNMATCH)
731 fprintf(stderr, ", filetype unmatch");
732 if (errors & REF_ERR_NAME_TOO_LONG)
733 fprintf(stderr, ", name too long");
734 if (errors & REF_ERR_NO_ROOT_REF)
735 fprintf(stderr, ", no root ref");
736 if (errors & REF_ERR_NO_ROOT_BACKREF)
737 fprintf(stderr, ", no root backref");
738 if (errors & REF_ERR_DUP_ROOT_REF)
739 fprintf(stderr, ", dup root ref");
740 if (errors & REF_ERR_DUP_ROOT_BACKREF)
741 fprintf(stderr, ", dup root backref");
742 fprintf(stderr, "\n");
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
748 struct ptr_node *node;
749 struct cache_extent *cache;
750 struct inode_record *rec = NULL;
753 cache = lookup_cache_extent(inode_cache, ino, 1);
755 node = container_of(cache, struct ptr_node, cache);
757 if (mod && rec->refs > 1) {
758 node->data = clone_inode_rec(rec);
759 if (IS_ERR(node->data))
765 rec = calloc(1, sizeof(*rec));
767 return ERR_PTR(-ENOMEM);
769 rec->extent_start = (u64)-1;
771 INIT_LIST_HEAD(&rec->backrefs);
772 INIT_LIST_HEAD(&rec->orphan_extents);
773 rec->holes = RB_ROOT;
775 node = malloc(sizeof(*node));
778 return ERR_PTR(-ENOMEM);
780 node->cache.start = ino;
781 node->cache.size = 1;
784 if (ino == BTRFS_FREE_INO_OBJECTID)
787 ret = insert_cache_extent(inode_cache, &node->cache);
789 return ERR_PTR(-EEXIST);
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
796 struct orphan_data_extent *orphan;
798 while (!list_empty(orphan_extents)) {
799 orphan = list_entry(orphan_extents->next,
800 struct orphan_data_extent, list);
801 list_del(&orphan->list);
806 static void free_inode_rec(struct inode_record *rec)
808 struct inode_backref *backref;
813 while (!list_empty(&rec->backrefs)) {
814 backref = list_entry(rec->backrefs.next,
815 struct inode_backref, list);
816 list_del(&backref->list);
819 free_orphan_data_extents(&rec->orphan_extents);
820 free_file_extent_holes(&rec->holes);
824 static int can_free_inode_rec(struct inode_record *rec)
826 if (!rec->errors && rec->checked && rec->found_inode_item &&
827 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833 struct inode_record *rec)
835 struct cache_extent *cache;
836 struct inode_backref *tmp, *backref;
837 struct ptr_node *node;
838 unsigned char filetype;
840 if (!rec->found_inode_item)
843 filetype = imode_to_type(rec->imode);
844 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845 if (backref->found_dir_item && backref->found_dir_index) {
846 if (backref->filetype != filetype)
847 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848 if (!backref->errors && backref->found_inode_ref &&
849 rec->nlink == rec->found_link) {
850 list_del(&backref->list);
856 if (!rec->checked || rec->merging)
859 if (S_ISDIR(rec->imode)) {
860 if (rec->found_size != rec->isize)
861 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862 if (rec->found_file_extent)
863 rec->errors |= I_ERR_ODD_FILE_EXTENT;
864 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865 if (rec->found_dir_item)
866 rec->errors |= I_ERR_ODD_DIR_ITEM;
867 if (rec->found_size != rec->nbytes)
868 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869 if (rec->nlink > 0 && !no_holes &&
870 (rec->extent_end < rec->isize ||
871 first_extent_gap(&rec->holes) < rec->isize))
872 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
875 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876 if (rec->found_csum_item && rec->nodatasum)
877 rec->errors |= I_ERR_ODD_CSUM_ITEM;
878 if (rec->some_csum_missing && !rec->nodatasum)
879 rec->errors |= I_ERR_SOME_CSUM_MISSING;
882 BUG_ON(rec->refs != 1);
883 if (can_free_inode_rec(rec)) {
884 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885 node = container_of(cache, struct ptr_node, cache);
886 BUG_ON(node->data != rec);
887 remove_cache_extent(inode_cache, &node->cache);
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
895 struct btrfs_path path;
896 struct btrfs_key key;
899 key.objectid = BTRFS_ORPHAN_OBJECTID;
900 key.type = BTRFS_ORPHAN_ITEM_KEY;
903 btrfs_init_path(&path);
904 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905 btrfs_release_path(&path);
911 static int process_inode_item(struct extent_buffer *eb,
912 int slot, struct btrfs_key *key,
913 struct shared_node *active_node)
915 struct inode_record *rec;
916 struct btrfs_inode_item *item;
918 rec = active_node->current;
919 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920 if (rec->found_inode_item) {
921 rec->errors |= I_ERR_DUP_INODE_ITEM;
924 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925 rec->nlink = btrfs_inode_nlink(eb, item);
926 rec->isize = btrfs_inode_size(eb, item);
927 rec->nbytes = btrfs_inode_nbytes(eb, item);
928 rec->imode = btrfs_inode_mode(eb, item);
929 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
931 rec->found_inode_item = 1;
933 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934 maybe_free_inode_rec(&active_node->inode_cache, rec);
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
940 int namelen, u64 dir)
942 struct inode_backref *backref;
944 list_for_each_entry(backref, &rec->backrefs, list) {
945 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
947 if (backref->dir != dir || backref->namelen != namelen)
949 if (memcmp(name, backref->name, namelen))
954 backref = malloc(sizeof(*backref) + namelen + 1);
955 memset(backref, 0, sizeof(*backref));
957 backref->namelen = namelen;
958 memcpy(backref->name, name, namelen);
959 backref->name[namelen] = '\0';
960 list_add_tail(&backref->list, &rec->backrefs);
964 static int add_inode_backref(struct cache_tree *inode_cache,
965 u64 ino, u64 dir, u64 index,
966 const char *name, int namelen,
967 int filetype, int itemtype, int errors)
969 struct inode_record *rec;
970 struct inode_backref *backref;
972 rec = get_inode_rec(inode_cache, ino, 1);
974 backref = get_inode_backref(rec, name, namelen, dir);
976 backref->errors |= errors;
977 if (itemtype == BTRFS_DIR_INDEX_KEY) {
978 if (backref->found_dir_index)
979 backref->errors |= REF_ERR_DUP_DIR_INDEX;
980 if (backref->found_inode_ref && backref->index != index)
981 backref->errors |= REF_ERR_INDEX_UNMATCH;
982 if (backref->found_dir_item && backref->filetype != filetype)
983 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
985 backref->index = index;
986 backref->filetype = filetype;
987 backref->found_dir_index = 1;
988 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
990 if (backref->found_dir_item)
991 backref->errors |= REF_ERR_DUP_DIR_ITEM;
992 if (backref->found_dir_index && backref->filetype != filetype)
993 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
995 backref->filetype = filetype;
996 backref->found_dir_item = 1;
997 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
998 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
999 if (backref->found_inode_ref)
1000 backref->errors |= REF_ERR_DUP_INODE_REF;
1001 if (backref->found_dir_index && backref->index != index)
1002 backref->errors |= REF_ERR_INDEX_UNMATCH;
1004 backref->index = index;
1006 backref->ref_type = itemtype;
1007 backref->found_inode_ref = 1;
1012 maybe_free_inode_rec(inode_cache, rec);
1016 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1017 struct cache_tree *dst_cache)
1019 struct inode_backref *backref;
1024 list_for_each_entry(backref, &src->backrefs, list) {
1025 if (backref->found_dir_index) {
1026 add_inode_backref(dst_cache, dst->ino, backref->dir,
1027 backref->index, backref->name,
1028 backref->namelen, backref->filetype,
1029 BTRFS_DIR_INDEX_KEY, backref->errors);
1031 if (backref->found_dir_item) {
1033 add_inode_backref(dst_cache, dst->ino,
1034 backref->dir, 0, backref->name,
1035 backref->namelen, backref->filetype,
1036 BTRFS_DIR_ITEM_KEY, backref->errors);
1038 if (backref->found_inode_ref) {
1039 add_inode_backref(dst_cache, dst->ino,
1040 backref->dir, backref->index,
1041 backref->name, backref->namelen, 0,
1042 backref->ref_type, backref->errors);
1046 if (src->found_dir_item)
1047 dst->found_dir_item = 1;
1048 if (src->found_file_extent)
1049 dst->found_file_extent = 1;
1050 if (src->found_csum_item)
1051 dst->found_csum_item = 1;
1052 if (src->some_csum_missing)
1053 dst->some_csum_missing = 1;
1054 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1055 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1060 BUG_ON(src->found_link < dir_count);
1061 dst->found_link += src->found_link - dir_count;
1062 dst->found_size += src->found_size;
1063 if (src->extent_start != (u64)-1) {
1064 if (dst->extent_start == (u64)-1) {
1065 dst->extent_start = src->extent_start;
1066 dst->extent_end = src->extent_end;
1068 if (dst->extent_end > src->extent_start)
1069 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1070 else if (dst->extent_end < src->extent_start) {
1071 ret = add_file_extent_hole(&dst->holes,
1073 src->extent_start - dst->extent_end);
1075 if (dst->extent_end < src->extent_end)
1076 dst->extent_end = src->extent_end;
1080 dst->errors |= src->errors;
1081 if (src->found_inode_item) {
1082 if (!dst->found_inode_item) {
1083 dst->nlink = src->nlink;
1084 dst->isize = src->isize;
1085 dst->nbytes = src->nbytes;
1086 dst->imode = src->imode;
1087 dst->nodatasum = src->nodatasum;
1088 dst->found_inode_item = 1;
1090 dst->errors |= I_ERR_DUP_INODE_ITEM;
1098 static int splice_shared_node(struct shared_node *src_node,
1099 struct shared_node *dst_node)
1101 struct cache_extent *cache;
1102 struct ptr_node *node, *ins;
1103 struct cache_tree *src, *dst;
1104 struct inode_record *rec, *conflict;
1105 u64 current_ino = 0;
1109 if (--src_node->refs == 0)
1111 if (src_node->current)
1112 current_ino = src_node->current->ino;
1114 src = &src_node->root_cache;
1115 dst = &dst_node->root_cache;
1117 cache = search_cache_extent(src, 0);
1119 node = container_of(cache, struct ptr_node, cache);
1121 cache = next_cache_extent(cache);
1124 remove_cache_extent(src, &node->cache);
1127 ins = malloc(sizeof(*ins));
1128 ins->cache.start = node->cache.start;
1129 ins->cache.size = node->cache.size;
1133 ret = insert_cache_extent(dst, &ins->cache);
1134 if (ret == -EEXIST) {
1135 conflict = get_inode_rec(dst, rec->ino, 1);
1136 BUG_ON(IS_ERR(conflict));
1137 merge_inode_recs(rec, conflict, dst);
1139 conflict->checked = 1;
1140 if (dst_node->current == conflict)
1141 dst_node->current = NULL;
1143 maybe_free_inode_rec(dst, conflict);
1144 free_inode_rec(rec);
1151 if (src == &src_node->root_cache) {
1152 src = &src_node->inode_cache;
1153 dst = &dst_node->inode_cache;
1157 if (current_ino > 0 && (!dst_node->current ||
1158 current_ino > dst_node->current->ino)) {
1159 if (dst_node->current) {
1160 dst_node->current->checked = 1;
1161 maybe_free_inode_rec(dst, dst_node->current);
1163 dst_node->current = get_inode_rec(dst, current_ino, 1);
1164 BUG_ON(IS_ERR(dst_node->current));
1169 static void free_inode_ptr(struct cache_extent *cache)
1171 struct ptr_node *node;
1172 struct inode_record *rec;
1174 node = container_of(cache, struct ptr_node, cache);
1176 free_inode_rec(rec);
1180 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1182 static struct shared_node *find_shared_node(struct cache_tree *shared,
1185 struct cache_extent *cache;
1186 struct shared_node *node;
1188 cache = lookup_cache_extent(shared, bytenr, 1);
1190 node = container_of(cache, struct shared_node, cache);
1196 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1199 struct shared_node *node;
1201 node = calloc(1, sizeof(*node));
1202 node->cache.start = bytenr;
1203 node->cache.size = 1;
1204 cache_tree_init(&node->root_cache);
1205 cache_tree_init(&node->inode_cache);
1208 ret = insert_cache_extent(shared, &node->cache);
1213 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1214 struct walk_control *wc, int level)
1216 struct shared_node *node;
1217 struct shared_node *dest;
1219 if (level == wc->active_node)
1222 BUG_ON(wc->active_node <= level);
1223 node = find_shared_node(&wc->shared, bytenr);
1225 add_shared_node(&wc->shared, bytenr, refs);
1226 node = find_shared_node(&wc->shared, bytenr);
1227 wc->nodes[level] = node;
1228 wc->active_node = level;
1232 if (wc->root_level == wc->active_node &&
1233 btrfs_root_refs(&root->root_item) == 0) {
1234 if (--node->refs == 0) {
1235 free_inode_recs_tree(&node->root_cache);
1236 free_inode_recs_tree(&node->inode_cache);
1237 remove_cache_extent(&wc->shared, &node->cache);
1243 dest = wc->nodes[wc->active_node];
1244 splice_shared_node(node, dest);
1245 if (node->refs == 0) {
1246 remove_cache_extent(&wc->shared, &node->cache);
1252 static int leave_shared_node(struct btrfs_root *root,
1253 struct walk_control *wc, int level)
1255 struct shared_node *node;
1256 struct shared_node *dest;
1259 if (level == wc->root_level)
1262 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1266 BUG_ON(i >= BTRFS_MAX_LEVEL);
1268 node = wc->nodes[wc->active_node];
1269 wc->nodes[wc->active_node] = NULL;
1270 wc->active_node = i;
1272 dest = wc->nodes[wc->active_node];
1273 if (wc->active_node < wc->root_level ||
1274 btrfs_root_refs(&root->root_item) > 0) {
1275 BUG_ON(node->refs <= 1);
1276 splice_shared_node(node, dest);
1278 BUG_ON(node->refs < 2);
1287 * 1 - if the root with id child_root_id is a child of root parent_root_id
1288 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1289 * has other root(s) as parent(s)
1290 * 2 - if the root child_root_id doesn't have any parent roots
1292 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1295 struct btrfs_path path;
1296 struct btrfs_key key;
1297 struct extent_buffer *leaf;
1301 btrfs_init_path(&path);
1303 key.objectid = parent_root_id;
1304 key.type = BTRFS_ROOT_REF_KEY;
1305 key.offset = child_root_id;
1306 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1310 btrfs_release_path(&path);
1314 key.objectid = child_root_id;
1315 key.type = BTRFS_ROOT_BACKREF_KEY;
1317 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1323 leaf = path.nodes[0];
1324 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1325 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1328 leaf = path.nodes[0];
1331 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1332 if (key.objectid != child_root_id ||
1333 key.type != BTRFS_ROOT_BACKREF_KEY)
1338 if (key.offset == parent_root_id) {
1339 btrfs_release_path(&path);
1346 btrfs_release_path(&path);
1349 return has_parent ? 0 : 2;
1352 static int process_dir_item(struct btrfs_root *root,
1353 struct extent_buffer *eb,
1354 int slot, struct btrfs_key *key,
1355 struct shared_node *active_node)
1365 struct btrfs_dir_item *di;
1366 struct inode_record *rec;
1367 struct cache_tree *root_cache;
1368 struct cache_tree *inode_cache;
1369 struct btrfs_key location;
1370 char namebuf[BTRFS_NAME_LEN];
1372 root_cache = &active_node->root_cache;
1373 inode_cache = &active_node->inode_cache;
1374 rec = active_node->current;
1375 rec->found_dir_item = 1;
1377 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1378 total = btrfs_item_size_nr(eb, slot);
1379 while (cur < total) {
1381 btrfs_dir_item_key_to_cpu(eb, di, &location);
1382 name_len = btrfs_dir_name_len(eb, di);
1383 data_len = btrfs_dir_data_len(eb, di);
1384 filetype = btrfs_dir_type(eb, di);
1386 rec->found_size += name_len;
1387 if (name_len <= BTRFS_NAME_LEN) {
1391 len = BTRFS_NAME_LEN;
1392 error = REF_ERR_NAME_TOO_LONG;
1394 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1396 if (location.type == BTRFS_INODE_ITEM_KEY) {
1397 add_inode_backref(inode_cache, location.objectid,
1398 key->objectid, key->offset, namebuf,
1399 len, filetype, key->type, error);
1400 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1401 add_inode_backref(root_cache, location.objectid,
1402 key->objectid, key->offset,
1403 namebuf, len, filetype,
1406 fprintf(stderr, "invalid location in dir item %u\n",
1408 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1409 key->objectid, key->offset, namebuf,
1410 len, filetype, key->type, error);
1413 len = sizeof(*di) + name_len + data_len;
1414 di = (struct btrfs_dir_item *)((char *)di + len);
1417 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1418 rec->errors |= I_ERR_DUP_DIR_INDEX;
1423 static int process_inode_ref(struct extent_buffer *eb,
1424 int slot, struct btrfs_key *key,
1425 struct shared_node *active_node)
1433 struct cache_tree *inode_cache;
1434 struct btrfs_inode_ref *ref;
1435 char namebuf[BTRFS_NAME_LEN];
1437 inode_cache = &active_node->inode_cache;
1439 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1440 total = btrfs_item_size_nr(eb, slot);
1441 while (cur < total) {
1442 name_len = btrfs_inode_ref_name_len(eb, ref);
1443 index = btrfs_inode_ref_index(eb, ref);
1444 if (name_len <= BTRFS_NAME_LEN) {
1448 len = BTRFS_NAME_LEN;
1449 error = REF_ERR_NAME_TOO_LONG;
1451 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1452 add_inode_backref(inode_cache, key->objectid, key->offset,
1453 index, namebuf, len, 0, key->type, error);
1455 len = sizeof(*ref) + name_len;
1456 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1462 static int process_inode_extref(struct extent_buffer *eb,
1463 int slot, struct btrfs_key *key,
1464 struct shared_node *active_node)
1473 struct cache_tree *inode_cache;
1474 struct btrfs_inode_extref *extref;
1475 char namebuf[BTRFS_NAME_LEN];
1477 inode_cache = &active_node->inode_cache;
1479 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1480 total = btrfs_item_size_nr(eb, slot);
1481 while (cur < total) {
1482 name_len = btrfs_inode_extref_name_len(eb, extref);
1483 index = btrfs_inode_extref_index(eb, extref);
1484 parent = btrfs_inode_extref_parent(eb, extref);
1485 if (name_len <= BTRFS_NAME_LEN) {
1489 len = BTRFS_NAME_LEN;
1490 error = REF_ERR_NAME_TOO_LONG;
1492 read_extent_buffer(eb, namebuf,
1493 (unsigned long)(extref + 1), len);
1494 add_inode_backref(inode_cache, key->objectid, parent,
1495 index, namebuf, len, 0, key->type, error);
1497 len = sizeof(*extref) + name_len;
1498 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1505 static int count_csum_range(struct btrfs_root *root, u64 start,
1506 u64 len, u64 *found)
1508 struct btrfs_key key;
1509 struct btrfs_path path;
1510 struct extent_buffer *leaf;
1515 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1517 btrfs_init_path(&path);
1519 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1521 key.type = BTRFS_EXTENT_CSUM_KEY;
1523 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1527 if (ret > 0 && path.slots[0] > 0) {
1528 leaf = path.nodes[0];
1529 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1530 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1531 key.type == BTRFS_EXTENT_CSUM_KEY)
1536 leaf = path.nodes[0];
1537 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1538 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1543 leaf = path.nodes[0];
1546 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1548 key.type != BTRFS_EXTENT_CSUM_KEY)
1551 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1552 if (key.offset >= start + len)
1555 if (key.offset > start)
1558 size = btrfs_item_size_nr(leaf, path.slots[0]);
1559 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1560 if (csum_end > start) {
1561 size = min(csum_end - start, len);
1570 btrfs_release_path(&path);
1576 static int process_file_extent(struct btrfs_root *root,
1577 struct extent_buffer *eb,
1578 int slot, struct btrfs_key *key,
1579 struct shared_node *active_node)
1581 struct inode_record *rec;
1582 struct btrfs_file_extent_item *fi;
1584 u64 disk_bytenr = 0;
1585 u64 extent_offset = 0;
1586 u64 mask = root->sectorsize - 1;
1590 rec = active_node->current;
1591 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1592 rec->found_file_extent = 1;
1594 if (rec->extent_start == (u64)-1) {
1595 rec->extent_start = key->offset;
1596 rec->extent_end = key->offset;
1599 if (rec->extent_end > key->offset)
1600 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1601 else if (rec->extent_end < key->offset) {
1602 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1603 key->offset - rec->extent_end);
1608 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1609 extent_type = btrfs_file_extent_type(eb, fi);
1611 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1612 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1614 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1615 rec->found_size += num_bytes;
1616 num_bytes = (num_bytes + mask) & ~mask;
1617 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1618 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1619 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1620 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1621 extent_offset = btrfs_file_extent_offset(eb, fi);
1622 if (num_bytes == 0 || (num_bytes & mask))
1623 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1624 if (num_bytes + extent_offset >
1625 btrfs_file_extent_ram_bytes(eb, fi))
1626 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1627 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1628 (btrfs_file_extent_compression(eb, fi) ||
1629 btrfs_file_extent_encryption(eb, fi) ||
1630 btrfs_file_extent_other_encoding(eb, fi)))
1631 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1632 if (disk_bytenr > 0)
1633 rec->found_size += num_bytes;
1635 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1637 rec->extent_end = key->offset + num_bytes;
1640 * The data reloc tree will copy full extents into its inode and then
1641 * copy the corresponding csums. Because the extent it copied could be
1642 * a preallocated extent that hasn't been written to yet there may be no
1643 * csums to copy, ergo we won't have csums for our file extent. This is
1644 * ok so just don't bother checking csums if the inode belongs to the
1647 if (disk_bytenr > 0 &&
1648 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1650 if (btrfs_file_extent_compression(eb, fi))
1651 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1653 disk_bytenr += extent_offset;
1655 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1658 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1660 rec->found_csum_item = 1;
1661 if (found < num_bytes)
1662 rec->some_csum_missing = 1;
1663 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1665 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1671 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1672 struct walk_control *wc)
1674 struct btrfs_key key;
1678 struct cache_tree *inode_cache;
1679 struct shared_node *active_node;
1681 if (wc->root_level == wc->active_node &&
1682 btrfs_root_refs(&root->root_item) == 0)
1685 active_node = wc->nodes[wc->active_node];
1686 inode_cache = &active_node->inode_cache;
1687 nritems = btrfs_header_nritems(eb);
1688 for (i = 0; i < nritems; i++) {
1689 btrfs_item_key_to_cpu(eb, &key, i);
1691 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1693 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1696 if (active_node->current == NULL ||
1697 active_node->current->ino < key.objectid) {
1698 if (active_node->current) {
1699 active_node->current->checked = 1;
1700 maybe_free_inode_rec(inode_cache,
1701 active_node->current);
1703 active_node->current = get_inode_rec(inode_cache,
1705 BUG_ON(IS_ERR(active_node->current));
1708 case BTRFS_DIR_ITEM_KEY:
1709 case BTRFS_DIR_INDEX_KEY:
1710 ret = process_dir_item(root, eb, i, &key, active_node);
1712 case BTRFS_INODE_REF_KEY:
1713 ret = process_inode_ref(eb, i, &key, active_node);
1715 case BTRFS_INODE_EXTREF_KEY:
1716 ret = process_inode_extref(eb, i, &key, active_node);
1718 case BTRFS_INODE_ITEM_KEY:
1719 ret = process_inode_item(eb, i, &key, active_node);
1721 case BTRFS_EXTENT_DATA_KEY:
1722 ret = process_file_extent(root, eb, i, &key,
1732 static void reada_walk_down(struct btrfs_root *root,
1733 struct extent_buffer *node, int slot)
1742 level = btrfs_header_level(node);
1746 nritems = btrfs_header_nritems(node);
1747 blocksize = btrfs_level_size(root, level - 1);
1748 for (i = slot; i < nritems; i++) {
1749 bytenr = btrfs_node_blockptr(node, i);
1750 ptr_gen = btrfs_node_ptr_generation(node, i);
1751 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1756 * Check the child node/leaf by the following condition:
1757 * 1. the first item key of the node/leaf should be the same with the one
1759 * 2. block in parent node should match the child node/leaf.
1760 * 3. generation of parent node and child's header should be consistent.
1762 * Or the child node/leaf pointed by the key in parent is not valid.
1764 * We hope to check leaf owner too, but since subvol may share leaves,
1765 * which makes leaf owner check not so strong, key check should be
1766 * sufficient enough for that case.
1768 static int check_child_node(struct btrfs_root *root,
1769 struct extent_buffer *parent, int slot,
1770 struct extent_buffer *child)
1772 struct btrfs_key parent_key;
1773 struct btrfs_key child_key;
1776 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1777 if (btrfs_header_level(child) == 0)
1778 btrfs_item_key_to_cpu(child, &child_key, 0);
1780 btrfs_node_key_to_cpu(child, &child_key, 0);
1782 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1785 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1786 parent_key.objectid, parent_key.type, parent_key.offset,
1787 child_key.objectid, child_key.type, child_key.offset);
1789 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1791 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1792 btrfs_node_blockptr(parent, slot),
1793 btrfs_header_bytenr(child));
1795 if (btrfs_node_ptr_generation(parent, slot) !=
1796 btrfs_header_generation(child)) {
1798 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1799 btrfs_header_generation(child),
1800 btrfs_node_ptr_generation(parent, slot));
1805 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1806 struct walk_control *wc, int *level)
1808 enum btrfs_tree_block_status status;
1811 struct extent_buffer *next;
1812 struct extent_buffer *cur;
1817 WARN_ON(*level < 0);
1818 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1819 ret = btrfs_lookup_extent_info(NULL, root,
1820 path->nodes[*level]->start,
1821 *level, 1, &refs, NULL);
1828 ret = enter_shared_node(root, path->nodes[*level]->start,
1836 while (*level >= 0) {
1837 WARN_ON(*level < 0);
1838 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1839 cur = path->nodes[*level];
1841 if (btrfs_header_level(cur) != *level)
1844 if (path->slots[*level] >= btrfs_header_nritems(cur))
1847 ret = process_one_leaf(root, cur, wc);
1852 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1853 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1854 blocksize = btrfs_level_size(root, *level - 1);
1855 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1861 ret = enter_shared_node(root, bytenr, refs,
1864 path->slots[*level]++;
1869 next = btrfs_find_tree_block(root, bytenr, blocksize);
1870 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1871 free_extent_buffer(next);
1872 reada_walk_down(root, cur, path->slots[*level]);
1873 next = read_tree_block(root, bytenr, blocksize,
1875 if (!extent_buffer_uptodate(next)) {
1876 struct btrfs_key node_key;
1878 btrfs_node_key_to_cpu(path->nodes[*level],
1880 path->slots[*level]);
1881 btrfs_add_corrupt_extent_record(root->fs_info,
1883 path->nodes[*level]->start,
1884 root->leafsize, *level);
1890 ret = check_child_node(root, cur, path->slots[*level], next);
1896 if (btrfs_is_leaf(next))
1897 status = btrfs_check_leaf(root, NULL, next);
1899 status = btrfs_check_node(root, NULL, next);
1900 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1901 free_extent_buffer(next);
1906 *level = *level - 1;
1907 free_extent_buffer(path->nodes[*level]);
1908 path->nodes[*level] = next;
1909 path->slots[*level] = 0;
1912 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1916 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1917 struct walk_control *wc, int *level)
1920 struct extent_buffer *leaf;
1922 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1923 leaf = path->nodes[i];
1924 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1929 free_extent_buffer(path->nodes[*level]);
1930 path->nodes[*level] = NULL;
1931 BUG_ON(*level > wc->active_node);
1932 if (*level == wc->active_node)
1933 leave_shared_node(root, wc, *level);
1940 static int check_root_dir(struct inode_record *rec)
1942 struct inode_backref *backref;
1945 if (!rec->found_inode_item || rec->errors)
1947 if (rec->nlink != 1 || rec->found_link != 0)
1949 if (list_empty(&rec->backrefs))
1951 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1952 if (!backref->found_inode_ref)
1954 if (backref->index != 0 || backref->namelen != 2 ||
1955 memcmp(backref->name, "..", 2))
1957 if (backref->found_dir_index || backref->found_dir_item)
1964 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1965 struct btrfs_root *root, struct btrfs_path *path,
1966 struct inode_record *rec)
1968 struct btrfs_inode_item *ei;
1969 struct btrfs_key key;
1972 key.objectid = rec->ino;
1973 key.type = BTRFS_INODE_ITEM_KEY;
1974 key.offset = (u64)-1;
1976 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1980 if (!path->slots[0]) {
1987 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1988 if (key.objectid != rec->ino) {
1993 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1994 struct btrfs_inode_item);
1995 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1996 btrfs_mark_buffer_dirty(path->nodes[0]);
1997 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1998 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1999 root->root_key.objectid);
2001 btrfs_release_path(path);
2005 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2006 struct btrfs_root *root,
2007 struct btrfs_path *path,
2008 struct inode_record *rec)
2012 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2013 btrfs_release_path(path);
2015 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2019 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2020 struct btrfs_root *root,
2021 struct btrfs_path *path,
2022 struct inode_record *rec)
2024 struct btrfs_inode_item *ei;
2025 struct btrfs_key key;
2028 key.objectid = rec->ino;
2029 key.type = BTRFS_INODE_ITEM_KEY;
2032 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2039 /* Since ret == 0, no need to check anything */
2040 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2041 struct btrfs_inode_item);
2042 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2043 btrfs_mark_buffer_dirty(path->nodes[0]);
2044 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2045 printf("reset nbytes for ino %llu root %llu\n",
2046 rec->ino, root->root_key.objectid);
2048 btrfs_release_path(path);
2052 static int add_missing_dir_index(struct btrfs_root *root,
2053 struct cache_tree *inode_cache,
2054 struct inode_record *rec,
2055 struct inode_backref *backref)
2057 struct btrfs_path *path;
2058 struct btrfs_trans_handle *trans;
2059 struct btrfs_dir_item *dir_item;
2060 struct extent_buffer *leaf;
2061 struct btrfs_key key;
2062 struct btrfs_disk_key disk_key;
2063 struct inode_record *dir_rec;
2064 unsigned long name_ptr;
2065 u32 data_size = sizeof(*dir_item) + backref->namelen;
2068 path = btrfs_alloc_path();
2072 trans = btrfs_start_transaction(root, 1);
2073 if (IS_ERR(trans)) {
2074 btrfs_free_path(path);
2075 return PTR_ERR(trans);
2078 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2079 (unsigned long long)rec->ino);
2080 key.objectid = backref->dir;
2081 key.type = BTRFS_DIR_INDEX_KEY;
2082 key.offset = backref->index;
2084 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2087 leaf = path->nodes[0];
2088 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2090 disk_key.objectid = cpu_to_le64(rec->ino);
2091 disk_key.type = BTRFS_INODE_ITEM_KEY;
2092 disk_key.offset = 0;
2094 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2095 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2096 btrfs_set_dir_data_len(leaf, dir_item, 0);
2097 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2098 name_ptr = (unsigned long)(dir_item + 1);
2099 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2100 btrfs_mark_buffer_dirty(leaf);
2101 btrfs_free_path(path);
2102 btrfs_commit_transaction(trans, root);
2104 backref->found_dir_index = 1;
2105 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2106 BUG_ON(IS_ERR(dir_rec));
2109 dir_rec->found_size += backref->namelen;
2110 if (dir_rec->found_size == dir_rec->isize &&
2111 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2112 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2113 if (dir_rec->found_size != dir_rec->isize)
2114 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2119 static int delete_dir_index(struct btrfs_root *root,
2120 struct cache_tree *inode_cache,
2121 struct inode_record *rec,
2122 struct inode_backref *backref)
2124 struct btrfs_trans_handle *trans;
2125 struct btrfs_dir_item *di;
2126 struct btrfs_path *path;
2129 path = btrfs_alloc_path();
2133 trans = btrfs_start_transaction(root, 1);
2134 if (IS_ERR(trans)) {
2135 btrfs_free_path(path);
2136 return PTR_ERR(trans);
2140 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2141 (unsigned long long)backref->dir,
2142 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2143 (unsigned long long)root->objectid);
2145 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2146 backref->name, backref->namelen,
2147 backref->index, -1);
2150 btrfs_free_path(path);
2151 btrfs_commit_transaction(trans, root);
2158 ret = btrfs_del_item(trans, root, path);
2160 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2162 btrfs_free_path(path);
2163 btrfs_commit_transaction(trans, root);
2167 static int create_inode_item(struct btrfs_root *root,
2168 struct inode_record *rec,
2169 struct inode_backref *backref, int root_dir)
2171 struct btrfs_trans_handle *trans;
2172 struct btrfs_inode_item inode_item;
2173 time_t now = time(NULL);
2176 trans = btrfs_start_transaction(root, 1);
2177 if (IS_ERR(trans)) {
2178 ret = PTR_ERR(trans);
2182 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2183 "be incomplete, please check permissions and content after "
2184 "the fsck completes.\n", (unsigned long long)root->objectid,
2185 (unsigned long long)rec->ino);
2187 memset(&inode_item, 0, sizeof(inode_item));
2188 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2190 btrfs_set_stack_inode_nlink(&inode_item, 1);
2192 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2193 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2194 if (rec->found_dir_item) {
2195 if (rec->found_file_extent)
2196 fprintf(stderr, "root %llu inode %llu has both a dir "
2197 "item and extents, unsure if it is a dir or a "
2198 "regular file so setting it as a directory\n",
2199 (unsigned long long)root->objectid,
2200 (unsigned long long)rec->ino);
2201 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2202 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2203 } else if (!rec->found_dir_item) {
2204 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2205 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2207 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2208 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2209 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2210 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2211 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2212 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2213 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2214 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2216 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2218 btrfs_commit_transaction(trans, root);
2222 static int repair_inode_backrefs(struct btrfs_root *root,
2223 struct inode_record *rec,
2224 struct cache_tree *inode_cache,
2227 struct inode_backref *tmp, *backref;
2228 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2232 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2233 if (!delete && rec->ino == root_dirid) {
2234 if (!rec->found_inode_item) {
2235 ret = create_inode_item(root, rec, backref, 1);
2242 /* Index 0 for root dir's are special, don't mess with it */
2243 if (rec->ino == root_dirid && backref->index == 0)
2247 ((backref->found_dir_index && !backref->found_inode_ref) ||
2248 (backref->found_dir_index && backref->found_inode_ref &&
2249 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2250 ret = delete_dir_index(root, inode_cache, rec, backref);
2254 list_del(&backref->list);
2258 if (!delete && !backref->found_dir_index &&
2259 backref->found_dir_item && backref->found_inode_ref) {
2260 ret = add_missing_dir_index(root, inode_cache, rec,
2265 if (backref->found_dir_item &&
2266 backref->found_dir_index &&
2267 backref->found_dir_index) {
2268 if (!backref->errors &&
2269 backref->found_inode_ref) {
2270 list_del(&backref->list);
2276 if (!delete && (!backref->found_dir_index &&
2277 !backref->found_dir_item &&
2278 backref->found_inode_ref)) {
2279 struct btrfs_trans_handle *trans;
2280 struct btrfs_key location;
2282 ret = check_dir_conflict(root, backref->name,
2288 * let nlink fixing routine to handle it,
2289 * which can do it better.
2294 location.objectid = rec->ino;
2295 location.type = BTRFS_INODE_ITEM_KEY;
2296 location.offset = 0;
2298 trans = btrfs_start_transaction(root, 1);
2299 if (IS_ERR(trans)) {
2300 ret = PTR_ERR(trans);
2303 fprintf(stderr, "adding missing dir index/item pair "
2305 (unsigned long long)rec->ino);
2306 ret = btrfs_insert_dir_item(trans, root, backref->name,
2308 backref->dir, &location,
2309 imode_to_type(rec->imode),
2312 btrfs_commit_transaction(trans, root);
2316 if (!delete && (backref->found_inode_ref &&
2317 backref->found_dir_index &&
2318 backref->found_dir_item &&
2319 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2320 !rec->found_inode_item)) {
2321 ret = create_inode_item(root, rec, backref, 0);
2328 return ret ? ret : repaired;
2332 * To determine the file type for nlink/inode_item repair
2334 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2335 * Return -ENOENT if file type is not found.
2337 static int find_file_type(struct inode_record *rec, u8 *type)
2339 struct inode_backref *backref;
2341 /* For inode item recovered case */
2342 if (rec->found_inode_item) {
2343 *type = imode_to_type(rec->imode);
2347 list_for_each_entry(backref, &rec->backrefs, list) {
2348 if (backref->found_dir_index || backref->found_dir_item) {
2349 *type = backref->filetype;
2357 * To determine the file name for nlink repair
2359 * Return 0 if file name is found, set name and namelen.
2360 * Return -ENOENT if file name is not found.
2362 static int find_file_name(struct inode_record *rec,
2363 char *name, int *namelen)
2365 struct inode_backref *backref;
2367 list_for_each_entry(backref, &rec->backrefs, list) {
2368 if (backref->found_dir_index || backref->found_dir_item ||
2369 backref->found_inode_ref) {
2370 memcpy(name, backref->name, backref->namelen);
2371 *namelen = backref->namelen;
2378 /* Reset the nlink of the inode to the correct one */
2379 static int reset_nlink(struct btrfs_trans_handle *trans,
2380 struct btrfs_root *root,
2381 struct btrfs_path *path,
2382 struct inode_record *rec)
2384 struct inode_backref *backref;
2385 struct inode_backref *tmp;
2386 struct btrfs_key key;
2387 struct btrfs_inode_item *inode_item;
2390 /* We don't believe this either, reset it and iterate backref */
2391 rec->found_link = 0;
2393 /* Remove all backref including the valid ones */
2394 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2395 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2396 backref->index, backref->name,
2397 backref->namelen, 0);
2401 /* remove invalid backref, so it won't be added back */
2402 if (!(backref->found_dir_index &&
2403 backref->found_dir_item &&
2404 backref->found_inode_ref)) {
2405 list_del(&backref->list);
2412 /* Set nlink to 0 */
2413 key.objectid = rec->ino;
2414 key.type = BTRFS_INODE_ITEM_KEY;
2416 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2423 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2424 struct btrfs_inode_item);
2425 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2426 btrfs_mark_buffer_dirty(path->nodes[0]);
2427 btrfs_release_path(path);
2430 * Add back valid inode_ref/dir_item/dir_index,
2431 * add_link() will handle the nlink inc, so new nlink must be correct
2433 list_for_each_entry(backref, &rec->backrefs, list) {
2434 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2435 backref->name, backref->namelen,
2436 backref->filetype, &backref->index, 1);
2441 btrfs_release_path(path);
2445 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2446 struct btrfs_root *root,
2447 struct btrfs_path *path,
2448 struct inode_record *rec)
2450 char *dir_name = "lost+found";
2451 char namebuf[BTRFS_NAME_LEN] = {0};
2456 int name_recovered = 0;
2457 int type_recovered = 0;
2461 * Get file name and type first before these invalid inode ref
2462 * are deleted by remove_all_invalid_backref()
2464 name_recovered = !find_file_name(rec, namebuf, &namelen);
2465 type_recovered = !find_file_type(rec, &type);
2467 if (!name_recovered) {
2468 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2469 rec->ino, rec->ino);
2470 namelen = count_digits(rec->ino);
2471 sprintf(namebuf, "%llu", rec->ino);
2474 if (!type_recovered) {
2475 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2477 type = BTRFS_FT_REG_FILE;
2481 ret = reset_nlink(trans, root, path, rec);
2484 "Failed to reset nlink for inode %llu: %s\n",
2485 rec->ino, strerror(-ret));
2489 if (rec->found_link == 0) {
2490 lost_found_ino = root->highest_inode;
2491 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2496 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2497 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2500 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2501 dir_name, strerror(-ret));
2504 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2505 namebuf, namelen, type, NULL, 1);
2507 * Add ".INO" suffix several times to handle case where
2508 * "FILENAME.INO" is already taken by another file.
2510 while (ret == -EEXIST) {
2512 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2514 if (namelen + count_digits(rec->ino) + 1 >
2519 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2521 namelen += count_digits(rec->ino) + 1;
2522 ret = btrfs_add_link(trans, root, rec->ino,
2523 lost_found_ino, namebuf,
2524 namelen, type, NULL, 1);
2528 "Failed to link the inode %llu to %s dir: %s\n",
2529 rec->ino, dir_name, strerror(-ret));
2533 * Just increase the found_link, don't actually add the
2534 * backref. This will make things easier and this inode
2535 * record will be freed after the repair is done.
2536 * So fsck will not report problem about this inode.
2539 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2540 namelen, namebuf, dir_name);
2542 printf("Fixed the nlink of inode %llu\n", rec->ino);
2545 * Clear the flag anyway, or we will loop forever for the same inode
2546 * as it will not be removed from the bad inode list and the dead loop
2549 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2550 btrfs_release_path(path);
2555 * Check if there is any normal(reg or prealloc) file extent for given
2557 * This is used to determine the file type when neither its dir_index/item or
2558 * inode_item exists.
2560 * This will *NOT* report error, if any error happens, just consider it does
2561 * not have any normal file extent.
2563 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2565 struct btrfs_path *path;
2566 struct btrfs_key key;
2567 struct btrfs_key found_key;
2568 struct btrfs_file_extent_item *fi;
2572 path = btrfs_alloc_path();
2576 key.type = BTRFS_EXTENT_DATA_KEY;
2579 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2584 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2585 ret = btrfs_next_leaf(root, path);
2592 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2594 if (found_key.objectid != ino ||
2595 found_key.type != BTRFS_EXTENT_DATA_KEY)
2597 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2598 struct btrfs_file_extent_item);
2599 type = btrfs_file_extent_type(path->nodes[0], fi);
2600 if (type != BTRFS_FILE_EXTENT_INLINE) {
2606 btrfs_free_path(path);
2610 static u32 btrfs_type_to_imode(u8 type)
2612 static u32 imode_by_btrfs_type[] = {
2613 [BTRFS_FT_REG_FILE] = S_IFREG,
2614 [BTRFS_FT_DIR] = S_IFDIR,
2615 [BTRFS_FT_CHRDEV] = S_IFCHR,
2616 [BTRFS_FT_BLKDEV] = S_IFBLK,
2617 [BTRFS_FT_FIFO] = S_IFIFO,
2618 [BTRFS_FT_SOCK] = S_IFSOCK,
2619 [BTRFS_FT_SYMLINK] = S_IFLNK,
2622 return imode_by_btrfs_type[(type)];
2625 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2626 struct btrfs_root *root,
2627 struct btrfs_path *path,
2628 struct inode_record *rec)
2632 int type_recovered = 0;
2635 printf("Trying to rebuild inode:%llu\n", rec->ino);
2637 type_recovered = !find_file_type(rec, &filetype);
2640 * Try to determine inode type if type not found.
2642 * For found regular file extent, it must be FILE.
2643 * For found dir_item/index, it must be DIR.
2645 * For undetermined one, use FILE as fallback.
2648 * 1. If found backref(inode_index/item is already handled) to it,
2650 * Need new inode-inode ref structure to allow search for that.
2652 if (!type_recovered) {
2653 if (rec->found_file_extent &&
2654 find_normal_file_extent(root, rec->ino)) {
2656 filetype = BTRFS_FT_REG_FILE;
2657 } else if (rec->found_dir_item) {
2659 filetype = BTRFS_FT_DIR;
2660 } else if (!list_empty(&rec->orphan_extents)) {
2662 filetype = BTRFS_FT_REG_FILE;
2664 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2667 filetype = BTRFS_FT_REG_FILE;
2671 ret = btrfs_new_inode(trans, root, rec->ino,
2672 mode | btrfs_type_to_imode(filetype));
2677 * Here inode rebuild is done, we only rebuild the inode item,
2678 * don't repair the nlink(like move to lost+found).
2679 * That is the job of nlink repair.
2681 * We just fill the record and return
2683 rec->found_dir_item = 1;
2684 rec->imode = mode | btrfs_type_to_imode(filetype);
2686 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2687 /* Ensure the inode_nlinks repair function will be called */
2688 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2693 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2694 struct btrfs_root *root,
2695 struct btrfs_path *path,
2696 struct inode_record *rec)
2698 struct orphan_data_extent *orphan;
2699 struct orphan_data_extent *tmp;
2702 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2704 * Check for conflicting file extents
2706 * Here we don't know whether the extents is compressed or not,
2707 * so we can only assume it not compressed nor data offset,
2708 * and use its disk_len as extent length.
2710 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2711 orphan->offset, orphan->disk_len, 0);
2712 btrfs_release_path(path);
2717 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2718 orphan->disk_bytenr, orphan->disk_len);
2719 ret = btrfs_free_extent(trans,
2720 root->fs_info->extent_root,
2721 orphan->disk_bytenr, orphan->disk_len,
2722 0, root->objectid, orphan->objectid,
2727 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2728 orphan->offset, orphan->disk_bytenr,
2729 orphan->disk_len, orphan->disk_len);
2733 /* Update file size info */
2734 rec->found_size += orphan->disk_len;
2735 if (rec->found_size == rec->nbytes)
2736 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2738 /* Update the file extent hole info too */
2739 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2743 if (RB_EMPTY_ROOT(&rec->holes))
2744 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2746 list_del(&orphan->list);
2749 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2754 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2755 struct btrfs_root *root,
2756 struct btrfs_path *path,
2757 struct inode_record *rec)
2759 struct rb_node *node;
2760 struct file_extent_hole *hole;
2764 node = rb_first(&rec->holes);
2768 hole = rb_entry(node, struct file_extent_hole, node);
2769 ret = btrfs_punch_hole(trans, root, rec->ino,
2770 hole->start, hole->len);
2773 ret = del_file_extent_hole(&rec->holes, hole->start,
2777 if (RB_EMPTY_ROOT(&rec->holes))
2778 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2779 node = rb_first(&rec->holes);
2781 /* special case for a file losing all its file extent */
2783 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2784 round_up(rec->isize, root->sectorsize));
2788 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2789 rec->ino, root->objectid);
2794 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2796 struct btrfs_trans_handle *trans;
2797 struct btrfs_path *path;
2800 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2801 I_ERR_NO_ORPHAN_ITEM |
2802 I_ERR_LINK_COUNT_WRONG |
2803 I_ERR_NO_INODE_ITEM |
2804 I_ERR_FILE_EXTENT_ORPHAN |
2805 I_ERR_FILE_EXTENT_DISCOUNT|
2806 I_ERR_FILE_NBYTES_WRONG)))
2809 path = btrfs_alloc_path();
2814 * For nlink repair, it may create a dir and add link, so
2815 * 2 for parent(256)'s dir_index and dir_item
2816 * 2 for lost+found dir's inode_item and inode_ref
2817 * 1 for the new inode_ref of the file
2818 * 2 for lost+found dir's dir_index and dir_item for the file
2820 trans = btrfs_start_transaction(root, 7);
2821 if (IS_ERR(trans)) {
2822 btrfs_free_path(path);
2823 return PTR_ERR(trans);
2826 if (rec->errors & I_ERR_NO_INODE_ITEM)
2827 ret = repair_inode_no_item(trans, root, path, rec);
2828 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2829 ret = repair_inode_orphan_extent(trans, root, path, rec);
2830 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2831 ret = repair_inode_discount_extent(trans, root, path, rec);
2832 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2833 ret = repair_inode_isize(trans, root, path, rec);
2834 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2835 ret = repair_inode_orphan_item(trans, root, path, rec);
2836 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2837 ret = repair_inode_nlinks(trans, root, path, rec);
2838 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2839 ret = repair_inode_nbytes(trans, root, path, rec);
2840 btrfs_commit_transaction(trans, root);
2841 btrfs_free_path(path);
2845 static int check_inode_recs(struct btrfs_root *root,
2846 struct cache_tree *inode_cache)
2848 struct cache_extent *cache;
2849 struct ptr_node *node;
2850 struct inode_record *rec;
2851 struct inode_backref *backref;
2856 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2858 if (btrfs_root_refs(&root->root_item) == 0) {
2859 if (!cache_tree_empty(inode_cache))
2860 fprintf(stderr, "warning line %d\n", __LINE__);
2865 * We need to record the highest inode number for later 'lost+found'
2867 * We must select a ino not used/refered by any existing inode, or
2868 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2869 * this may cause 'lost+found' dir has wrong nlinks.
2871 cache = last_cache_extent(inode_cache);
2873 node = container_of(cache, struct ptr_node, cache);
2875 if (rec->ino > root->highest_inode)
2876 root->highest_inode = rec->ino;
2880 * We need to repair backrefs first because we could change some of the
2881 * errors in the inode recs.
2883 * We also need to go through and delete invalid backrefs first and then
2884 * add the correct ones second. We do this because we may get EEXIST
2885 * when adding back the correct index because we hadn't yet deleted the
2888 * For example, if we were missing a dir index then the directories
2889 * isize would be wrong, so if we fixed the isize to what we thought it
2890 * would be and then fixed the backref we'd still have a invalid fs, so
2891 * we need to add back the dir index and then check to see if the isize
2896 if (stage == 3 && !err)
2899 cache = search_cache_extent(inode_cache, 0);
2900 while (repair && cache) {
2901 node = container_of(cache, struct ptr_node, cache);
2903 cache = next_cache_extent(cache);
2905 /* Need to free everything up and rescan */
2907 remove_cache_extent(inode_cache, &node->cache);
2909 free_inode_rec(rec);
2913 if (list_empty(&rec->backrefs))
2916 ret = repair_inode_backrefs(root, rec, inode_cache,
2930 rec = get_inode_rec(inode_cache, root_dirid, 0);
2931 BUG_ON(IS_ERR(rec));
2933 ret = check_root_dir(rec);
2935 fprintf(stderr, "root %llu root dir %llu error\n",
2936 (unsigned long long)root->root_key.objectid,
2937 (unsigned long long)root_dirid);
2938 print_inode_error(root, rec);
2943 struct btrfs_trans_handle *trans;
2945 trans = btrfs_start_transaction(root, 1);
2946 if (IS_ERR(trans)) {
2947 err = PTR_ERR(trans);
2952 "root %llu missing its root dir, recreating\n",
2953 (unsigned long long)root->objectid);
2955 ret = btrfs_make_root_dir(trans, root, root_dirid);
2958 btrfs_commit_transaction(trans, root);
2962 fprintf(stderr, "root %llu root dir %llu not found\n",
2963 (unsigned long long)root->root_key.objectid,
2964 (unsigned long long)root_dirid);
2968 cache = search_cache_extent(inode_cache, 0);
2971 node = container_of(cache, struct ptr_node, cache);
2973 remove_cache_extent(inode_cache, &node->cache);
2975 if (rec->ino == root_dirid ||
2976 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2977 free_inode_rec(rec);
2981 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2982 ret = check_orphan_item(root, rec->ino);
2984 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2985 if (can_free_inode_rec(rec)) {
2986 free_inode_rec(rec);
2991 if (!rec->found_inode_item)
2992 rec->errors |= I_ERR_NO_INODE_ITEM;
2993 if (rec->found_link != rec->nlink)
2994 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2996 ret = try_repair_inode(root, rec);
2997 if (ret == 0 && can_free_inode_rec(rec)) {
2998 free_inode_rec(rec);
3004 if (!(repair && ret == 0))
3006 print_inode_error(root, rec);
3007 list_for_each_entry(backref, &rec->backrefs, list) {
3008 if (!backref->found_dir_item)
3009 backref->errors |= REF_ERR_NO_DIR_ITEM;
3010 if (!backref->found_dir_index)
3011 backref->errors |= REF_ERR_NO_DIR_INDEX;
3012 if (!backref->found_inode_ref)
3013 backref->errors |= REF_ERR_NO_INODE_REF;
3014 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3015 " namelen %u name %s filetype %d errors %x",
3016 (unsigned long long)backref->dir,
3017 (unsigned long long)backref->index,
3018 backref->namelen, backref->name,
3019 backref->filetype, backref->errors);
3020 print_ref_error(backref->errors);
3022 free_inode_rec(rec);
3024 return (error > 0) ? -1 : 0;
3027 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3030 struct cache_extent *cache;
3031 struct root_record *rec = NULL;
3034 cache = lookup_cache_extent(root_cache, objectid, 1);
3036 rec = container_of(cache, struct root_record, cache);
3038 rec = calloc(1, sizeof(*rec));
3039 rec->objectid = objectid;
3040 INIT_LIST_HEAD(&rec->backrefs);
3041 rec->cache.start = objectid;
3042 rec->cache.size = 1;
3044 ret = insert_cache_extent(root_cache, &rec->cache);
3050 static struct root_backref *get_root_backref(struct root_record *rec,
3051 u64 ref_root, u64 dir, u64 index,
3052 const char *name, int namelen)
3054 struct root_backref *backref;
3056 list_for_each_entry(backref, &rec->backrefs, list) {
3057 if (backref->ref_root != ref_root || backref->dir != dir ||
3058 backref->namelen != namelen)
3060 if (memcmp(name, backref->name, namelen))
3065 backref = calloc(1, sizeof(*backref) + namelen + 1);
3066 backref->ref_root = ref_root;
3068 backref->index = index;
3069 backref->namelen = namelen;
3070 memcpy(backref->name, name, namelen);
3071 backref->name[namelen] = '\0';
3072 list_add_tail(&backref->list, &rec->backrefs);
3076 static void free_root_record(struct cache_extent *cache)
3078 struct root_record *rec;
3079 struct root_backref *backref;
3081 rec = container_of(cache, struct root_record, cache);
3082 while (!list_empty(&rec->backrefs)) {
3083 backref = list_entry(rec->backrefs.next,
3084 struct root_backref, list);
3085 list_del(&backref->list);
3092 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3094 static int add_root_backref(struct cache_tree *root_cache,
3095 u64 root_id, u64 ref_root, u64 dir, u64 index,
3096 const char *name, int namelen,
3097 int item_type, int errors)
3099 struct root_record *rec;
3100 struct root_backref *backref;
3102 rec = get_root_rec(root_cache, root_id);
3103 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3105 backref->errors |= errors;
3107 if (item_type != BTRFS_DIR_ITEM_KEY) {
3108 if (backref->found_dir_index || backref->found_back_ref ||
3109 backref->found_forward_ref) {
3110 if (backref->index != index)
3111 backref->errors |= REF_ERR_INDEX_UNMATCH;
3113 backref->index = index;
3117 if (item_type == BTRFS_DIR_ITEM_KEY) {
3118 if (backref->found_forward_ref)
3120 backref->found_dir_item = 1;
3121 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3122 backref->found_dir_index = 1;
3123 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3124 if (backref->found_forward_ref)
3125 backref->errors |= REF_ERR_DUP_ROOT_REF;
3126 else if (backref->found_dir_item)
3128 backref->found_forward_ref = 1;
3129 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3130 if (backref->found_back_ref)
3131 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3132 backref->found_back_ref = 1;
3137 if (backref->found_forward_ref && backref->found_dir_item)
3138 backref->reachable = 1;
3142 static int merge_root_recs(struct btrfs_root *root,
3143 struct cache_tree *src_cache,
3144 struct cache_tree *dst_cache)
3146 struct cache_extent *cache;
3147 struct ptr_node *node;
3148 struct inode_record *rec;
3149 struct inode_backref *backref;
3152 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3153 free_inode_recs_tree(src_cache);
3158 cache = search_cache_extent(src_cache, 0);
3161 node = container_of(cache, struct ptr_node, cache);
3163 remove_cache_extent(src_cache, &node->cache);
3166 ret = is_child_root(root, root->objectid, rec->ino);
3172 list_for_each_entry(backref, &rec->backrefs, list) {
3173 BUG_ON(backref->found_inode_ref);
3174 if (backref->found_dir_item)
3175 add_root_backref(dst_cache, rec->ino,
3176 root->root_key.objectid, backref->dir,
3177 backref->index, backref->name,
3178 backref->namelen, BTRFS_DIR_ITEM_KEY,
3180 if (backref->found_dir_index)
3181 add_root_backref(dst_cache, rec->ino,
3182 root->root_key.objectid, backref->dir,
3183 backref->index, backref->name,
3184 backref->namelen, BTRFS_DIR_INDEX_KEY,
3188 free_inode_rec(rec);
3195 static int check_root_refs(struct btrfs_root *root,
3196 struct cache_tree *root_cache)
3198 struct root_record *rec;
3199 struct root_record *ref_root;
3200 struct root_backref *backref;
3201 struct cache_extent *cache;
3207 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3210 /* fixme: this can not detect circular references */
3213 cache = search_cache_extent(root_cache, 0);
3217 rec = container_of(cache, struct root_record, cache);
3218 cache = next_cache_extent(cache);
3220 if (rec->found_ref == 0)
3223 list_for_each_entry(backref, &rec->backrefs, list) {
3224 if (!backref->reachable)
3227 ref_root = get_root_rec(root_cache,
3229 if (ref_root->found_ref > 0)
3232 backref->reachable = 0;
3234 if (rec->found_ref == 0)
3240 cache = search_cache_extent(root_cache, 0);
3244 rec = container_of(cache, struct root_record, cache);
3245 cache = next_cache_extent(cache);
3247 if (rec->found_ref == 0 &&
3248 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3249 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3250 ret = check_orphan_item(root->fs_info->tree_root,
3256 * If we don't have a root item then we likely just have
3257 * a dir item in a snapshot for this root but no actual
3258 * ref key or anything so it's meaningless.
3260 if (!rec->found_root_item)
3263 fprintf(stderr, "fs tree %llu not referenced\n",
3264 (unsigned long long)rec->objectid);
3268 if (rec->found_ref > 0 && !rec->found_root_item)
3270 list_for_each_entry(backref, &rec->backrefs, list) {
3271 if (!backref->found_dir_item)
3272 backref->errors |= REF_ERR_NO_DIR_ITEM;
3273 if (!backref->found_dir_index)
3274 backref->errors |= REF_ERR_NO_DIR_INDEX;
3275 if (!backref->found_back_ref)
3276 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3277 if (!backref->found_forward_ref)
3278 backref->errors |= REF_ERR_NO_ROOT_REF;
3279 if (backref->reachable && backref->errors)
3286 fprintf(stderr, "fs tree %llu refs %u %s\n",
3287 (unsigned long long)rec->objectid, rec->found_ref,
3288 rec->found_root_item ? "" : "not found");
3290 list_for_each_entry(backref, &rec->backrefs, list) {
3291 if (!backref->reachable)
3293 if (!backref->errors && rec->found_root_item)
3295 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3296 " index %llu namelen %u name %s errors %x\n",
3297 (unsigned long long)backref->ref_root,
3298 (unsigned long long)backref->dir,
3299 (unsigned long long)backref->index,
3300 backref->namelen, backref->name,
3302 print_ref_error(backref->errors);
3305 return errors > 0 ? 1 : 0;
3308 static int process_root_ref(struct extent_buffer *eb, int slot,
3309 struct btrfs_key *key,
3310 struct cache_tree *root_cache)
3316 struct btrfs_root_ref *ref;
3317 char namebuf[BTRFS_NAME_LEN];
3320 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3322 dirid = btrfs_root_ref_dirid(eb, ref);
3323 index = btrfs_root_ref_sequence(eb, ref);
3324 name_len = btrfs_root_ref_name_len(eb, ref);
3326 if (name_len <= BTRFS_NAME_LEN) {
3330 len = BTRFS_NAME_LEN;
3331 error = REF_ERR_NAME_TOO_LONG;
3333 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3335 if (key->type == BTRFS_ROOT_REF_KEY) {
3336 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3337 index, namebuf, len, key->type, error);
3339 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3340 index, namebuf, len, key->type, error);
3345 static void free_corrupt_block(struct cache_extent *cache)
3347 struct btrfs_corrupt_block *corrupt;
3349 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3353 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3356 * Repair the btree of the given root.
3358 * The fix is to remove the node key in corrupt_blocks cache_tree.
3359 * and rebalance the tree.
3360 * After the fix, the btree should be writeable.
3362 static int repair_btree(struct btrfs_root *root,
3363 struct cache_tree *corrupt_blocks)
3365 struct btrfs_trans_handle *trans;
3366 struct btrfs_path *path;
3367 struct btrfs_corrupt_block *corrupt;
3368 struct cache_extent *cache;
3369 struct btrfs_key key;
3374 if (cache_tree_empty(corrupt_blocks))
3377 path = btrfs_alloc_path();
3381 trans = btrfs_start_transaction(root, 1);
3382 if (IS_ERR(trans)) {
3383 ret = PTR_ERR(trans);
3384 fprintf(stderr, "Error starting transaction: %s\n",
3388 cache = first_cache_extent(corrupt_blocks);
3390 corrupt = container_of(cache, struct btrfs_corrupt_block,
3392 level = corrupt->level;
3393 path->lowest_level = level;
3394 key.objectid = corrupt->key.objectid;
3395 key.type = corrupt->key.type;
3396 key.offset = corrupt->key.offset;
3399 * Here we don't want to do any tree balance, since it may
3400 * cause a balance with corrupted brother leaf/node,
3401 * so ins_len set to 0 here.
3402 * Balance will be done after all corrupt node/leaf is deleted.
3404 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3407 offset = btrfs_node_blockptr(path->nodes[level],
3408 path->slots[level]);
3410 /* Remove the ptr */
3411 ret = btrfs_del_ptr(trans, root, path, level,
3412 path->slots[level]);
3416 * Remove the corresponding extent
3417 * return value is not concerned.
3419 btrfs_release_path(path);
3420 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3421 0, root->root_key.objectid,
3423 cache = next_cache_extent(cache);
3426 /* Balance the btree using btrfs_search_slot() */
3427 cache = first_cache_extent(corrupt_blocks);
3429 corrupt = container_of(cache, struct btrfs_corrupt_block,
3431 memcpy(&key, &corrupt->key, sizeof(key));
3432 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3435 /* return will always >0 since it won't find the item */
3437 btrfs_release_path(path);
3438 cache = next_cache_extent(cache);
3441 btrfs_commit_transaction(trans, root);
3443 btrfs_free_path(path);
3447 static int check_fs_root(struct btrfs_root *root,
3448 struct cache_tree *root_cache,
3449 struct walk_control *wc)
3455 struct btrfs_path path;
3456 struct shared_node root_node;
3457 struct root_record *rec;
3458 struct btrfs_root_item *root_item = &root->root_item;
3459 struct cache_tree corrupt_blocks;
3460 struct orphan_data_extent *orphan;
3461 struct orphan_data_extent *tmp;
3462 enum btrfs_tree_block_status status;
3465 * Reuse the corrupt_block cache tree to record corrupted tree block
3467 * Unlike the usage in extent tree check, here we do it in a per
3468 * fs/subvol tree base.
3470 cache_tree_init(&corrupt_blocks);
3471 root->fs_info->corrupt_blocks = &corrupt_blocks;
3473 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3474 rec = get_root_rec(root_cache, root->root_key.objectid);
3475 if (btrfs_root_refs(root_item) > 0)
3476 rec->found_root_item = 1;
3479 btrfs_init_path(&path);
3480 memset(&root_node, 0, sizeof(root_node));
3481 cache_tree_init(&root_node.root_cache);
3482 cache_tree_init(&root_node.inode_cache);
3484 /* Move the orphan extent record to corresponding inode_record */
3485 list_for_each_entry_safe(orphan, tmp,
3486 &root->orphan_data_extents, list) {
3487 struct inode_record *inode;
3489 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3491 BUG_ON(IS_ERR(inode));
3492 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3493 list_move(&orphan->list, &inode->orphan_extents);
3496 level = btrfs_header_level(root->node);
3497 memset(wc->nodes, 0, sizeof(wc->nodes));
3498 wc->nodes[level] = &root_node;
3499 wc->active_node = level;
3500 wc->root_level = level;
3502 /* We may not have checked the root block, lets do that now */
3503 if (btrfs_is_leaf(root->node))
3504 status = btrfs_check_leaf(root, NULL, root->node);
3506 status = btrfs_check_node(root, NULL, root->node);
3507 if (status != BTRFS_TREE_BLOCK_CLEAN)
3510 if (btrfs_root_refs(root_item) > 0 ||
3511 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3512 path.nodes[level] = root->node;
3513 extent_buffer_get(root->node);
3514 path.slots[level] = 0;
3516 struct btrfs_key key;
3517 struct btrfs_disk_key found_key;
3519 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3520 level = root_item->drop_level;
3521 path.lowest_level = level;
3522 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3525 btrfs_node_key(path.nodes[level], &found_key,
3527 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3528 sizeof(found_key)));
3532 wret = walk_down_tree(root, &path, wc, &level);
3538 wret = walk_up_tree(root, &path, wc, &level);
3545 btrfs_release_path(&path);
3547 if (!cache_tree_empty(&corrupt_blocks)) {
3548 struct cache_extent *cache;
3549 struct btrfs_corrupt_block *corrupt;
3551 printf("The following tree block(s) is corrupted in tree %llu:\n",
3552 root->root_key.objectid);
3553 cache = first_cache_extent(&corrupt_blocks);
3555 corrupt = container_of(cache,
3556 struct btrfs_corrupt_block,
3558 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3559 cache->start, corrupt->level,
3560 corrupt->key.objectid, corrupt->key.type,
3561 corrupt->key.offset);
3562 cache = next_cache_extent(cache);
3565 printf("Try to repair the btree for root %llu\n",
3566 root->root_key.objectid);
3567 ret = repair_btree(root, &corrupt_blocks);
3569 fprintf(stderr, "Failed to repair btree: %s\n",
3572 printf("Btree for root %llu is fixed\n",
3573 root->root_key.objectid);
3577 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3581 if (root_node.current) {
3582 root_node.current->checked = 1;
3583 maybe_free_inode_rec(&root_node.inode_cache,
3587 err = check_inode_recs(root, &root_node.inode_cache);
3591 free_corrupt_blocks_tree(&corrupt_blocks);
3592 root->fs_info->corrupt_blocks = NULL;
3593 free_orphan_data_extents(&root->orphan_data_extents);
3597 static int fs_root_objectid(u64 objectid)
3599 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3600 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3602 return is_fstree(objectid);
3605 static int check_fs_roots(struct btrfs_root *root,
3606 struct cache_tree *root_cache)
3608 struct btrfs_path path;
3609 struct btrfs_key key;
3610 struct walk_control wc;
3611 struct extent_buffer *leaf, *tree_node;
3612 struct btrfs_root *tmp_root;
3613 struct btrfs_root *tree_root = root->fs_info->tree_root;
3617 if (ctx.progress_enabled) {
3618 ctx.tp = TASK_FS_ROOTS;
3619 task_start(ctx.info);
3623 * Just in case we made any changes to the extent tree that weren't
3624 * reflected into the free space cache yet.
3627 reset_cached_block_groups(root->fs_info);
3628 memset(&wc, 0, sizeof(wc));
3629 cache_tree_init(&wc.shared);
3630 btrfs_init_path(&path);
3635 key.type = BTRFS_ROOT_ITEM_KEY;
3636 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3641 tree_node = tree_root->node;
3643 if (tree_node != tree_root->node) {
3644 free_root_recs_tree(root_cache);
3645 btrfs_release_path(&path);
3648 leaf = path.nodes[0];
3649 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3650 ret = btrfs_next_leaf(tree_root, &path);
3656 leaf = path.nodes[0];
3658 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3659 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3660 fs_root_objectid(key.objectid)) {
3661 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3662 tmp_root = btrfs_read_fs_root_no_cache(
3663 root->fs_info, &key);
3665 key.offset = (u64)-1;
3666 tmp_root = btrfs_read_fs_root(
3667 root->fs_info, &key);
3669 if (IS_ERR(tmp_root)) {
3673 ret = check_fs_root(tmp_root, root_cache, &wc);
3674 if (ret == -EAGAIN) {
3675 free_root_recs_tree(root_cache);
3676 btrfs_release_path(&path);
3681 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3682 btrfs_free_fs_root(tmp_root);
3683 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3684 key.type == BTRFS_ROOT_BACKREF_KEY) {
3685 process_root_ref(leaf, path.slots[0], &key,
3692 btrfs_release_path(&path);
3694 free_extent_cache_tree(&wc.shared);
3695 if (!cache_tree_empty(&wc.shared))
3696 fprintf(stderr, "warning line %d\n", __LINE__);
3698 task_stop(ctx.info);
3703 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3705 struct list_head *cur = rec->backrefs.next;
3706 struct extent_backref *back;
3707 struct tree_backref *tback;
3708 struct data_backref *dback;
3712 while(cur != &rec->backrefs) {
3713 back = list_entry(cur, struct extent_backref, list);
3715 if (!back->found_extent_tree) {
3719 if (back->is_data) {
3720 dback = (struct data_backref *)back;
3721 fprintf(stderr, "Backref %llu %s %llu"
3722 " owner %llu offset %llu num_refs %lu"
3723 " not found in extent tree\n",
3724 (unsigned long long)rec->start,
3725 back->full_backref ?
3727 back->full_backref ?
3728 (unsigned long long)dback->parent:
3729 (unsigned long long)dback->root,
3730 (unsigned long long)dback->owner,
3731 (unsigned long long)dback->offset,
3732 (unsigned long)dback->num_refs);
3734 tback = (struct tree_backref *)back;
3735 fprintf(stderr, "Backref %llu parent %llu"
3736 " root %llu not found in extent tree\n",
3737 (unsigned long long)rec->start,
3738 (unsigned long long)tback->parent,
3739 (unsigned long long)tback->root);
3742 if (!back->is_data && !back->found_ref) {
3746 tback = (struct tree_backref *)back;
3747 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3748 (unsigned long long)rec->start,
3749 back->full_backref ? "parent" : "root",
3750 back->full_backref ?
3751 (unsigned long long)tback->parent :
3752 (unsigned long long)tback->root, back);
3754 if (back->is_data) {
3755 dback = (struct data_backref *)back;
3756 if (dback->found_ref != dback->num_refs) {
3760 fprintf(stderr, "Incorrect local backref count"
3761 " on %llu %s %llu owner %llu"
3762 " offset %llu found %u wanted %u back %p\n",
3763 (unsigned long long)rec->start,
3764 back->full_backref ?
3766 back->full_backref ?
3767 (unsigned long long)dback->parent:
3768 (unsigned long long)dback->root,
3769 (unsigned long long)dback->owner,
3770 (unsigned long long)dback->offset,
3771 dback->found_ref, dback->num_refs, back);
3773 if (dback->disk_bytenr != rec->start) {
3777 fprintf(stderr, "Backref disk bytenr does not"
3778 " match extent record, bytenr=%llu, "
3779 "ref bytenr=%llu\n",
3780 (unsigned long long)rec->start,
3781 (unsigned long long)dback->disk_bytenr);
3784 if (dback->bytes != rec->nr) {
3788 fprintf(stderr, "Backref bytes do not match "
3789 "extent backref, bytenr=%llu, ref "
3790 "bytes=%llu, backref bytes=%llu\n",
3791 (unsigned long long)rec->start,
3792 (unsigned long long)rec->nr,
3793 (unsigned long long)dback->bytes);
3796 if (!back->is_data) {
3799 dback = (struct data_backref *)back;
3800 found += dback->found_ref;
3803 if (found != rec->refs) {
3807 fprintf(stderr, "Incorrect global backref count "
3808 "on %llu found %llu wanted %llu\n",
3809 (unsigned long long)rec->start,
3810 (unsigned long long)found,
3811 (unsigned long long)rec->refs);
3817 static int free_all_extent_backrefs(struct extent_record *rec)
3819 struct extent_backref *back;
3820 struct list_head *cur;
3821 while (!list_empty(&rec->backrefs)) {
3822 cur = rec->backrefs.next;
3823 back = list_entry(cur, struct extent_backref, list);
3830 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3831 struct cache_tree *extent_cache)
3833 struct cache_extent *cache;
3834 struct extent_record *rec;
3837 cache = first_cache_extent(extent_cache);
3840 rec = container_of(cache, struct extent_record, cache);
3841 remove_cache_extent(extent_cache, cache);
3842 free_all_extent_backrefs(rec);
3847 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3848 struct extent_record *rec)
3850 if (rec->content_checked && rec->owner_ref_checked &&
3851 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3852 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3853 !rec->bad_full_backref && !rec->crossing_stripes &&
3854 !rec->wrong_chunk_type) {
3855 remove_cache_extent(extent_cache, &rec->cache);
3856 free_all_extent_backrefs(rec);
3857 list_del_init(&rec->list);
3863 static int check_owner_ref(struct btrfs_root *root,
3864 struct extent_record *rec,
3865 struct extent_buffer *buf)
3867 struct extent_backref *node;
3868 struct tree_backref *back;
3869 struct btrfs_root *ref_root;
3870 struct btrfs_key key;
3871 struct btrfs_path path;
3872 struct extent_buffer *parent;
3877 list_for_each_entry(node, &rec->backrefs, list) {
3880 if (!node->found_ref)
3882 if (node->full_backref)
3884 back = (struct tree_backref *)node;
3885 if (btrfs_header_owner(buf) == back->root)
3888 BUG_ON(rec->is_root);
3890 /* try to find the block by search corresponding fs tree */
3891 key.objectid = btrfs_header_owner(buf);
3892 key.type = BTRFS_ROOT_ITEM_KEY;
3893 key.offset = (u64)-1;
3895 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3896 if (IS_ERR(ref_root))
3899 level = btrfs_header_level(buf);
3901 btrfs_item_key_to_cpu(buf, &key, 0);
3903 btrfs_node_key_to_cpu(buf, &key, 0);
3905 btrfs_init_path(&path);
3906 path.lowest_level = level + 1;
3907 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3911 parent = path.nodes[level + 1];
3912 if (parent && buf->start == btrfs_node_blockptr(parent,
3913 path.slots[level + 1]))
3916 btrfs_release_path(&path);
3917 return found ? 0 : 1;
3920 static int is_extent_tree_record(struct extent_record *rec)
3922 struct list_head *cur = rec->backrefs.next;
3923 struct extent_backref *node;
3924 struct tree_backref *back;
3927 while(cur != &rec->backrefs) {
3928 node = list_entry(cur, struct extent_backref, list);
3932 back = (struct tree_backref *)node;
3933 if (node->full_backref)
3935 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3942 static int record_bad_block_io(struct btrfs_fs_info *info,
3943 struct cache_tree *extent_cache,
3946 struct extent_record *rec;
3947 struct cache_extent *cache;
3948 struct btrfs_key key;
3950 cache = lookup_cache_extent(extent_cache, start, len);
3954 rec = container_of(cache, struct extent_record, cache);
3955 if (!is_extent_tree_record(rec))
3958 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3959 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3962 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3963 struct extent_buffer *buf, int slot)
3965 if (btrfs_header_level(buf)) {
3966 struct btrfs_key_ptr ptr1, ptr2;
3968 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3969 sizeof(struct btrfs_key_ptr));
3970 read_extent_buffer(buf, &ptr2,
3971 btrfs_node_key_ptr_offset(slot + 1),
3972 sizeof(struct btrfs_key_ptr));
3973 write_extent_buffer(buf, &ptr1,
3974 btrfs_node_key_ptr_offset(slot + 1),
3975 sizeof(struct btrfs_key_ptr));
3976 write_extent_buffer(buf, &ptr2,
3977 btrfs_node_key_ptr_offset(slot),
3978 sizeof(struct btrfs_key_ptr));
3980 struct btrfs_disk_key key;
3981 btrfs_node_key(buf, &key, 0);
3982 btrfs_fixup_low_keys(root, path, &key,
3983 btrfs_header_level(buf) + 1);
3986 struct btrfs_item *item1, *item2;
3987 struct btrfs_key k1, k2;
3988 char *item1_data, *item2_data;
3989 u32 item1_offset, item2_offset, item1_size, item2_size;
3991 item1 = btrfs_item_nr(slot);
3992 item2 = btrfs_item_nr(slot + 1);
3993 btrfs_item_key_to_cpu(buf, &k1, slot);
3994 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3995 item1_offset = btrfs_item_offset(buf, item1);
3996 item2_offset = btrfs_item_offset(buf, item2);
3997 item1_size = btrfs_item_size(buf, item1);
3998 item2_size = btrfs_item_size(buf, item2);
4000 item1_data = malloc(item1_size);
4003 item2_data = malloc(item2_size);
4009 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4010 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4012 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4013 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4017 btrfs_set_item_offset(buf, item1, item2_offset);
4018 btrfs_set_item_offset(buf, item2, item1_offset);
4019 btrfs_set_item_size(buf, item1, item2_size);
4020 btrfs_set_item_size(buf, item2, item1_size);
4022 path->slots[0] = slot;
4023 btrfs_set_item_key_unsafe(root, path, &k2);
4024 path->slots[0] = slot + 1;
4025 btrfs_set_item_key_unsafe(root, path, &k1);
4030 static int fix_key_order(struct btrfs_trans_handle *trans,
4031 struct btrfs_root *root,
4032 struct btrfs_path *path)
4034 struct extent_buffer *buf;
4035 struct btrfs_key k1, k2;
4037 int level = path->lowest_level;
4040 buf = path->nodes[level];
4041 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4043 btrfs_node_key_to_cpu(buf, &k1, i);
4044 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4046 btrfs_item_key_to_cpu(buf, &k1, i);
4047 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4049 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4051 ret = swap_values(root, path, buf, i);
4054 btrfs_mark_buffer_dirty(buf);
4060 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4061 struct btrfs_root *root,
4062 struct btrfs_path *path,
4063 struct extent_buffer *buf, int slot)
4065 struct btrfs_key key;
4066 int nritems = btrfs_header_nritems(buf);
4068 btrfs_item_key_to_cpu(buf, &key, slot);
4070 /* These are all the keys we can deal with missing. */
4071 if (key.type != BTRFS_DIR_INDEX_KEY &&
4072 key.type != BTRFS_EXTENT_ITEM_KEY &&
4073 key.type != BTRFS_METADATA_ITEM_KEY &&
4074 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4075 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4078 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4079 (unsigned long long)key.objectid, key.type,
4080 (unsigned long long)key.offset, slot, buf->start);
4081 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4082 btrfs_item_nr_offset(slot + 1),
4083 sizeof(struct btrfs_item) *
4084 (nritems - slot - 1));
4085 btrfs_set_header_nritems(buf, nritems - 1);
4087 struct btrfs_disk_key disk_key;
4089 btrfs_item_key(buf, &disk_key, 0);
4090 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4092 btrfs_mark_buffer_dirty(buf);
4096 static int fix_item_offset(struct btrfs_trans_handle *trans,
4097 struct btrfs_root *root,
4098 struct btrfs_path *path)
4100 struct extent_buffer *buf;
4104 /* We should only get this for leaves */
4105 BUG_ON(path->lowest_level);
4106 buf = path->nodes[0];
4108 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4109 unsigned int shift = 0, offset;
4111 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4112 BTRFS_LEAF_DATA_SIZE(root)) {
4113 if (btrfs_item_end_nr(buf, i) >
4114 BTRFS_LEAF_DATA_SIZE(root)) {
4115 ret = delete_bogus_item(trans, root, path,
4119 fprintf(stderr, "item is off the end of the "
4120 "leaf, can't fix\n");
4124 shift = BTRFS_LEAF_DATA_SIZE(root) -
4125 btrfs_item_end_nr(buf, i);
4126 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4127 btrfs_item_offset_nr(buf, i - 1)) {
4128 if (btrfs_item_end_nr(buf, i) >
4129 btrfs_item_offset_nr(buf, i - 1)) {
4130 ret = delete_bogus_item(trans, root, path,
4134 fprintf(stderr, "items overlap, can't fix\n");
4138 shift = btrfs_item_offset_nr(buf, i - 1) -
4139 btrfs_item_end_nr(buf, i);
4144 printf("Shifting item nr %d by %u bytes in block %llu\n",
4145 i, shift, (unsigned long long)buf->start);
4146 offset = btrfs_item_offset_nr(buf, i);
4147 memmove_extent_buffer(buf,
4148 btrfs_leaf_data(buf) + offset + shift,
4149 btrfs_leaf_data(buf) + offset,
4150 btrfs_item_size_nr(buf, i));
4151 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4153 btrfs_mark_buffer_dirty(buf);
4157 * We may have moved things, in which case we want to exit so we don't
4158 * write those changes out. Once we have proper abort functionality in
4159 * progs this can be changed to something nicer.
4166 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4167 * then just return -EIO.
4169 static int try_to_fix_bad_block(struct btrfs_root *root,
4170 struct extent_buffer *buf,
4171 enum btrfs_tree_block_status status)
4173 struct btrfs_trans_handle *trans;
4174 struct ulist *roots;
4175 struct ulist_node *node;
4176 struct btrfs_root *search_root;
4177 struct btrfs_path *path;
4178 struct ulist_iterator iter;
4179 struct btrfs_key root_key, key;
4182 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4183 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4186 path = btrfs_alloc_path();
4190 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4193 btrfs_free_path(path);
4197 ULIST_ITER_INIT(&iter);
4198 while ((node = ulist_next(roots, &iter))) {
4199 root_key.objectid = node->val;
4200 root_key.type = BTRFS_ROOT_ITEM_KEY;
4201 root_key.offset = (u64)-1;
4203 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4210 trans = btrfs_start_transaction(search_root, 0);
4211 if (IS_ERR(trans)) {
4212 ret = PTR_ERR(trans);
4216 path->lowest_level = btrfs_header_level(buf);
4217 path->skip_check_block = 1;
4218 if (path->lowest_level)
4219 btrfs_node_key_to_cpu(buf, &key, 0);
4221 btrfs_item_key_to_cpu(buf, &key, 0);
4222 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4225 btrfs_commit_transaction(trans, search_root);
4228 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4229 ret = fix_key_order(trans, search_root, path);
4230 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4231 ret = fix_item_offset(trans, search_root, path);
4233 btrfs_commit_transaction(trans, search_root);
4236 btrfs_release_path(path);
4237 btrfs_commit_transaction(trans, search_root);
4240 btrfs_free_path(path);
4244 static int check_block(struct btrfs_root *root,
4245 struct cache_tree *extent_cache,
4246 struct extent_buffer *buf, u64 flags)
4248 struct extent_record *rec;
4249 struct cache_extent *cache;
4250 struct btrfs_key key;
4251 enum btrfs_tree_block_status status;
4255 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4258 rec = container_of(cache, struct extent_record, cache);
4259 rec->generation = btrfs_header_generation(buf);
4261 level = btrfs_header_level(buf);
4262 if (btrfs_header_nritems(buf) > 0) {
4265 btrfs_item_key_to_cpu(buf, &key, 0);
4267 btrfs_node_key_to_cpu(buf, &key, 0);
4269 rec->info_objectid = key.objectid;
4271 rec->info_level = level;
4273 if (btrfs_is_leaf(buf))
4274 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4276 status = btrfs_check_node(root, &rec->parent_key, buf);
4278 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4280 status = try_to_fix_bad_block(root, buf, status);
4281 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4283 fprintf(stderr, "bad block %llu\n",
4284 (unsigned long long)buf->start);
4287 * Signal to callers we need to start the scan over
4288 * again since we'll have cow'ed blocks.
4293 rec->content_checked = 1;
4294 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4295 rec->owner_ref_checked = 1;
4297 ret = check_owner_ref(root, rec, buf);
4299 rec->owner_ref_checked = 1;
4303 maybe_free_extent_rec(extent_cache, rec);
4307 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4308 u64 parent, u64 root)
4310 struct list_head *cur = rec->backrefs.next;
4311 struct extent_backref *node;
4312 struct tree_backref *back;
4314 while(cur != &rec->backrefs) {
4315 node = list_entry(cur, struct extent_backref, list);
4319 back = (struct tree_backref *)node;
4321 if (!node->full_backref)
4323 if (parent == back->parent)
4326 if (node->full_backref)
4328 if (back->root == root)
4335 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4336 u64 parent, u64 root)
4338 struct tree_backref *ref = malloc(sizeof(*ref));
4339 memset(&ref->node, 0, sizeof(ref->node));
4341 ref->parent = parent;
4342 ref->node.full_backref = 1;
4345 ref->node.full_backref = 0;
4347 list_add_tail(&ref->node.list, &rec->backrefs);
4352 static struct data_backref *find_data_backref(struct extent_record *rec,
4353 u64 parent, u64 root,
4354 u64 owner, u64 offset,
4356 u64 disk_bytenr, u64 bytes)
4358 struct list_head *cur = rec->backrefs.next;
4359 struct extent_backref *node;
4360 struct data_backref *back;
4362 while(cur != &rec->backrefs) {
4363 node = list_entry(cur, struct extent_backref, list);
4367 back = (struct data_backref *)node;
4369 if (!node->full_backref)
4371 if (parent == back->parent)
4374 if (node->full_backref)
4376 if (back->root == root && back->owner == owner &&
4377 back->offset == offset) {
4378 if (found_ref && node->found_ref &&
4379 (back->bytes != bytes ||
4380 back->disk_bytenr != disk_bytenr))
4389 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4390 u64 parent, u64 root,
4391 u64 owner, u64 offset,
4394 struct data_backref *ref = malloc(sizeof(*ref));
4395 memset(&ref->node, 0, sizeof(ref->node));
4396 ref->node.is_data = 1;
4399 ref->parent = parent;
4402 ref->node.full_backref = 1;
4406 ref->offset = offset;
4407 ref->node.full_backref = 0;
4409 ref->bytes = max_size;
4412 list_add_tail(&ref->node.list, &rec->backrefs);
4413 if (max_size > rec->max_size)
4414 rec->max_size = max_size;
4418 /* Check if the type of extent matches with its chunk */
4419 static void check_extent_type(struct extent_record *rec)
4421 struct btrfs_block_group_cache *bg_cache;
4423 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4427 /* data extent, check chunk directly*/
4428 if (!rec->metadata) {
4429 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4430 rec->wrong_chunk_type = 1;
4434 /* metadata extent, check the obvious case first */
4435 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4436 BTRFS_BLOCK_GROUP_METADATA))) {
4437 rec->wrong_chunk_type = 1;
4442 * Check SYSTEM extent, as it's also marked as metadata, we can only
4443 * make sure it's a SYSTEM extent by its backref
4445 if (!list_empty(&rec->backrefs)) {
4446 struct extent_backref *node;
4447 struct tree_backref *tback;
4450 node = list_entry(rec->backrefs.next, struct extent_backref,
4452 if (node->is_data) {
4453 /* tree block shouldn't have data backref */
4454 rec->wrong_chunk_type = 1;
4457 tback = container_of(node, struct tree_backref, node);
4459 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4460 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4462 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4463 if (!(bg_cache->flags & bg_type))
4464 rec->wrong_chunk_type = 1;
4468 static int add_extent_rec(struct cache_tree *extent_cache,
4469 struct btrfs_key *parent_key, u64 parent_gen,
4470 u64 start, u64 nr, u64 extent_item_refs,
4471 int is_root, int inc_ref, int set_checked,
4472 int metadata, int extent_rec, u64 max_size)
4474 struct extent_record *rec;
4475 struct cache_extent *cache;
4479 cache = lookup_cache_extent(extent_cache, start, nr);
4481 rec = container_of(cache, struct extent_record, cache);
4485 rec->nr = max(nr, max_size);
4488 * We need to make sure to reset nr to whatever the extent
4489 * record says was the real size, this way we can compare it to
4493 if (start != rec->start || rec->found_rec) {
4494 struct extent_record *tmp;
4497 if (list_empty(&rec->list))
4498 list_add_tail(&rec->list,
4499 &duplicate_extents);
4502 * We have to do this song and dance in case we
4503 * find an extent record that falls inside of
4504 * our current extent record but does not have
4505 * the same objectid.
4507 tmp = malloc(sizeof(*tmp));
4511 tmp->max_size = max_size;
4514 tmp->metadata = metadata;
4515 tmp->extent_item_refs = extent_item_refs;
4516 INIT_LIST_HEAD(&tmp->list);
4517 list_add_tail(&tmp->list, &rec->dups);
4518 rec->num_duplicates++;
4525 if (extent_item_refs && !dup) {
4526 if (rec->extent_item_refs) {
4527 fprintf(stderr, "block %llu rec "
4528 "extent_item_refs %llu, passed %llu\n",
4529 (unsigned long long)start,
4530 (unsigned long long)
4531 rec->extent_item_refs,
4532 (unsigned long long)extent_item_refs);
4534 rec->extent_item_refs = extent_item_refs;
4539 rec->content_checked = 1;
4540 rec->owner_ref_checked = 1;
4544 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4546 rec->parent_generation = parent_gen;
4548 if (rec->max_size < max_size)
4549 rec->max_size = max_size;
4552 * A metadata extent can't cross stripe_len boundary, otherwise
4553 * kernel scrub won't be able to handle it.
4554 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4557 if (metadata && check_crossing_stripes(rec->start,
4559 rec->crossing_stripes = 1;
4560 check_extent_type(rec);
4561 maybe_free_extent_rec(extent_cache, rec);
4564 rec = malloc(sizeof(*rec));
4566 rec->max_size = max_size;
4567 rec->nr = max(nr, max_size);
4568 rec->found_rec = !!extent_rec;
4569 rec->content_checked = 0;
4570 rec->owner_ref_checked = 0;
4571 rec->num_duplicates = 0;
4572 rec->metadata = metadata;
4573 rec->flag_block_full_backref = -1;
4574 rec->bad_full_backref = 0;
4575 rec->crossing_stripes = 0;
4576 rec->wrong_chunk_type = 0;
4577 INIT_LIST_HEAD(&rec->backrefs);
4578 INIT_LIST_HEAD(&rec->dups);
4579 INIT_LIST_HEAD(&rec->list);
4591 if (extent_item_refs)
4592 rec->extent_item_refs = extent_item_refs;
4594 rec->extent_item_refs = 0;
4597 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4599 memset(&rec->parent_key, 0, sizeof(*parent_key));
4602 rec->parent_generation = parent_gen;
4604 rec->parent_generation = 0;
4606 rec->cache.start = start;
4607 rec->cache.size = nr;
4608 ret = insert_cache_extent(extent_cache, &rec->cache);
4612 rec->content_checked = 1;
4613 rec->owner_ref_checked = 1;
4617 if (check_crossing_stripes(rec->start, rec->max_size))
4618 rec->crossing_stripes = 1;
4619 check_extent_type(rec);
4623 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4624 u64 parent, u64 root, int found_ref)
4626 struct extent_record *rec;
4627 struct tree_backref *back;
4628 struct cache_extent *cache;
4630 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4632 add_extent_rec(extent_cache, NULL, 0, bytenr,
4633 1, 0, 0, 0, 0, 1, 0, 0);
4634 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4639 rec = container_of(cache, struct extent_record, cache);
4640 if (rec->start != bytenr) {
4644 back = find_tree_backref(rec, parent, root);
4646 back = alloc_tree_backref(rec, parent, root);
4649 if (back->node.found_ref) {
4650 fprintf(stderr, "Extent back ref already exists "
4651 "for %llu parent %llu root %llu \n",
4652 (unsigned long long)bytenr,
4653 (unsigned long long)parent,
4654 (unsigned long long)root);
4656 back->node.found_ref = 1;
4658 if (back->node.found_extent_tree) {
4659 fprintf(stderr, "Extent back ref already exists "
4660 "for %llu parent %llu root %llu \n",
4661 (unsigned long long)bytenr,
4662 (unsigned long long)parent,
4663 (unsigned long long)root);
4665 back->node.found_extent_tree = 1;
4667 check_extent_type(rec);
4668 maybe_free_extent_rec(extent_cache, rec);
4672 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4673 u64 parent, u64 root, u64 owner, u64 offset,
4674 u32 num_refs, int found_ref, u64 max_size)
4676 struct extent_record *rec;
4677 struct data_backref *back;
4678 struct cache_extent *cache;
4680 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4682 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4684 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4689 rec = container_of(cache, struct extent_record, cache);
4690 if (rec->max_size < max_size)
4691 rec->max_size = max_size;
4694 * If found_ref is set then max_size is the real size and must match the
4695 * existing refs. So if we have already found a ref then we need to
4696 * make sure that this ref matches the existing one, otherwise we need
4697 * to add a new backref so we can notice that the backrefs don't match
4698 * and we need to figure out who is telling the truth. This is to
4699 * account for that awful fsync bug I introduced where we'd end up with
4700 * a btrfs_file_extent_item that would have its length include multiple
4701 * prealloc extents or point inside of a prealloc extent.
4703 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4706 back = alloc_data_backref(rec, parent, root, owner, offset,
4710 BUG_ON(num_refs != 1);
4711 if (back->node.found_ref)
4712 BUG_ON(back->bytes != max_size);
4713 back->node.found_ref = 1;
4714 back->found_ref += 1;
4715 back->bytes = max_size;
4716 back->disk_bytenr = bytenr;
4718 rec->content_checked = 1;
4719 rec->owner_ref_checked = 1;
4721 if (back->node.found_extent_tree) {
4722 fprintf(stderr, "Extent back ref already exists "
4723 "for %llu parent %llu root %llu "
4724 "owner %llu offset %llu num_refs %lu\n",
4725 (unsigned long long)bytenr,
4726 (unsigned long long)parent,
4727 (unsigned long long)root,
4728 (unsigned long long)owner,
4729 (unsigned long long)offset,
4730 (unsigned long)num_refs);
4732 back->num_refs = num_refs;
4733 back->node.found_extent_tree = 1;
4735 maybe_free_extent_rec(extent_cache, rec);
4739 static int add_pending(struct cache_tree *pending,
4740 struct cache_tree *seen, u64 bytenr, u32 size)
4743 ret = add_cache_extent(seen, bytenr, size);
4746 add_cache_extent(pending, bytenr, size);
4750 static int pick_next_pending(struct cache_tree *pending,
4751 struct cache_tree *reada,
4752 struct cache_tree *nodes,
4753 u64 last, struct block_info *bits, int bits_nr,
4756 unsigned long node_start = last;
4757 struct cache_extent *cache;
4760 cache = search_cache_extent(reada, 0);
4762 bits[0].start = cache->start;
4763 bits[0].size = cache->size;
4768 if (node_start > 32768)
4769 node_start -= 32768;
4771 cache = search_cache_extent(nodes, node_start);
4773 cache = search_cache_extent(nodes, 0);
4776 cache = search_cache_extent(pending, 0);
4781 bits[ret].start = cache->start;
4782 bits[ret].size = cache->size;
4783 cache = next_cache_extent(cache);
4785 } while (cache && ret < bits_nr);
4791 bits[ret].start = cache->start;
4792 bits[ret].size = cache->size;
4793 cache = next_cache_extent(cache);
4795 } while (cache && ret < bits_nr);
4797 if (bits_nr - ret > 8) {
4798 u64 lookup = bits[0].start + bits[0].size;
4799 struct cache_extent *next;
4800 next = search_cache_extent(pending, lookup);
4802 if (next->start - lookup > 32768)
4804 bits[ret].start = next->start;
4805 bits[ret].size = next->size;
4806 lookup = next->start + next->size;
4810 next = next_cache_extent(next);
4818 static void free_chunk_record(struct cache_extent *cache)
4820 struct chunk_record *rec;
4822 rec = container_of(cache, struct chunk_record, cache);
4823 list_del_init(&rec->list);
4824 list_del_init(&rec->dextents);
4828 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4830 cache_tree_free_extents(chunk_cache, free_chunk_record);
4833 static void free_device_record(struct rb_node *node)
4835 struct device_record *rec;
4837 rec = container_of(node, struct device_record, node);
4841 FREE_RB_BASED_TREE(device_cache, free_device_record);
4843 int insert_block_group_record(struct block_group_tree *tree,
4844 struct block_group_record *bg_rec)
4848 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4852 list_add_tail(&bg_rec->list, &tree->block_groups);
4856 static void free_block_group_record(struct cache_extent *cache)
4858 struct block_group_record *rec;
4860 rec = container_of(cache, struct block_group_record, cache);
4861 list_del_init(&rec->list);
4865 void free_block_group_tree(struct block_group_tree *tree)
4867 cache_tree_free_extents(&tree->tree, free_block_group_record);
4870 int insert_device_extent_record(struct device_extent_tree *tree,
4871 struct device_extent_record *de_rec)
4876 * Device extent is a bit different from the other extents, because
4877 * the extents which belong to the different devices may have the
4878 * same start and size, so we need use the special extent cache
4879 * search/insert functions.
4881 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4885 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4886 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4890 static void free_device_extent_record(struct cache_extent *cache)
4892 struct device_extent_record *rec;
4894 rec = container_of(cache, struct device_extent_record, cache);
4895 if (!list_empty(&rec->chunk_list))
4896 list_del_init(&rec->chunk_list);
4897 if (!list_empty(&rec->device_list))
4898 list_del_init(&rec->device_list);
4902 void free_device_extent_tree(struct device_extent_tree *tree)
4904 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4907 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4908 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4909 struct extent_buffer *leaf, int slot)
4911 struct btrfs_extent_ref_v0 *ref0;
4912 struct btrfs_key key;
4914 btrfs_item_key_to_cpu(leaf, &key, slot);
4915 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4916 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4917 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4919 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4920 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4926 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4927 struct btrfs_key *key,
4930 struct btrfs_chunk *ptr;
4931 struct chunk_record *rec;
4934 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4935 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4937 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4939 fprintf(stderr, "memory allocation failed\n");
4943 INIT_LIST_HEAD(&rec->list);
4944 INIT_LIST_HEAD(&rec->dextents);
4947 rec->cache.start = key->offset;
4948 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4950 rec->generation = btrfs_header_generation(leaf);
4952 rec->objectid = key->objectid;
4953 rec->type = key->type;
4954 rec->offset = key->offset;
4956 rec->length = rec->cache.size;
4957 rec->owner = btrfs_chunk_owner(leaf, ptr);
4958 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4959 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4960 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4961 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4962 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4963 rec->num_stripes = num_stripes;
4964 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4966 for (i = 0; i < rec->num_stripes; ++i) {
4967 rec->stripes[i].devid =
4968 btrfs_stripe_devid_nr(leaf, ptr, i);
4969 rec->stripes[i].offset =
4970 btrfs_stripe_offset_nr(leaf, ptr, i);
4971 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4972 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4979 static int process_chunk_item(struct cache_tree *chunk_cache,
4980 struct btrfs_key *key, struct extent_buffer *eb,
4983 struct chunk_record *rec;
4986 rec = btrfs_new_chunk_record(eb, key, slot);
4987 ret = insert_cache_extent(chunk_cache, &rec->cache);
4989 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4990 rec->offset, rec->length);
4997 static int process_device_item(struct rb_root *dev_cache,
4998 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5000 struct btrfs_dev_item *ptr;
5001 struct device_record *rec;
5004 ptr = btrfs_item_ptr(eb,
5005 slot, struct btrfs_dev_item);
5007 rec = malloc(sizeof(*rec));
5009 fprintf(stderr, "memory allocation failed\n");
5013 rec->devid = key->offset;
5014 rec->generation = btrfs_header_generation(eb);
5016 rec->objectid = key->objectid;
5017 rec->type = key->type;
5018 rec->offset = key->offset;
5020 rec->devid = btrfs_device_id(eb, ptr);
5021 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5022 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5024 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5026 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5033 struct block_group_record *
5034 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5037 struct btrfs_block_group_item *ptr;
5038 struct block_group_record *rec;
5040 rec = calloc(1, sizeof(*rec));
5042 fprintf(stderr, "memory allocation failed\n");
5046 rec->cache.start = key->objectid;
5047 rec->cache.size = key->offset;
5049 rec->generation = btrfs_header_generation(leaf);
5051 rec->objectid = key->objectid;
5052 rec->type = key->type;
5053 rec->offset = key->offset;
5055 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5056 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5058 INIT_LIST_HEAD(&rec->list);
5063 static int process_block_group_item(struct block_group_tree *block_group_cache,
5064 struct btrfs_key *key,
5065 struct extent_buffer *eb, int slot)
5067 struct block_group_record *rec;
5070 rec = btrfs_new_block_group_record(eb, key, slot);
5071 ret = insert_block_group_record(block_group_cache, rec);
5073 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5074 rec->objectid, rec->offset);
5081 struct device_extent_record *
5082 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5083 struct btrfs_key *key, int slot)
5085 struct device_extent_record *rec;
5086 struct btrfs_dev_extent *ptr;
5088 rec = calloc(1, sizeof(*rec));
5090 fprintf(stderr, "memory allocation failed\n");
5094 rec->cache.objectid = key->objectid;
5095 rec->cache.start = key->offset;
5097 rec->generation = btrfs_header_generation(leaf);
5099 rec->objectid = key->objectid;
5100 rec->type = key->type;
5101 rec->offset = key->offset;
5103 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5104 rec->chunk_objecteid =
5105 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5107 btrfs_dev_extent_chunk_offset(leaf, ptr);
5108 rec->length = btrfs_dev_extent_length(leaf, ptr);
5109 rec->cache.size = rec->length;
5111 INIT_LIST_HEAD(&rec->chunk_list);
5112 INIT_LIST_HEAD(&rec->device_list);
5118 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5119 struct btrfs_key *key, struct extent_buffer *eb,
5122 struct device_extent_record *rec;
5125 rec = btrfs_new_device_extent_record(eb, key, slot);
5126 ret = insert_device_extent_record(dev_extent_cache, rec);
5129 "Device extent[%llu, %llu, %llu] existed.\n",
5130 rec->objectid, rec->offset, rec->length);
5137 static int process_extent_item(struct btrfs_root *root,
5138 struct cache_tree *extent_cache,
5139 struct extent_buffer *eb, int slot)
5141 struct btrfs_extent_item *ei;
5142 struct btrfs_extent_inline_ref *iref;
5143 struct btrfs_extent_data_ref *dref;
5144 struct btrfs_shared_data_ref *sref;
5145 struct btrfs_key key;
5149 u32 item_size = btrfs_item_size_nr(eb, slot);
5155 btrfs_item_key_to_cpu(eb, &key, slot);
5157 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5159 num_bytes = root->leafsize;
5161 num_bytes = key.offset;
5164 if (item_size < sizeof(*ei)) {
5165 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5166 struct btrfs_extent_item_v0 *ei0;
5167 BUG_ON(item_size != sizeof(*ei0));
5168 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5169 refs = btrfs_extent_refs_v0(eb, ei0);
5173 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5174 num_bytes, refs, 0, 0, 0, metadata, 1,
5178 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5179 refs = btrfs_extent_refs(eb, ei);
5180 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5185 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5186 refs, 0, 0, 0, metadata, 1, num_bytes);
5188 ptr = (unsigned long)(ei + 1);
5189 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5190 key.type == BTRFS_EXTENT_ITEM_KEY)
5191 ptr += sizeof(struct btrfs_tree_block_info);
5193 end = (unsigned long)ei + item_size;
5195 iref = (struct btrfs_extent_inline_ref *)ptr;
5196 type = btrfs_extent_inline_ref_type(eb, iref);
5197 offset = btrfs_extent_inline_ref_offset(eb, iref);
5199 case BTRFS_TREE_BLOCK_REF_KEY:
5200 add_tree_backref(extent_cache, key.objectid,
5203 case BTRFS_SHARED_BLOCK_REF_KEY:
5204 add_tree_backref(extent_cache, key.objectid,
5207 case BTRFS_EXTENT_DATA_REF_KEY:
5208 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5209 add_data_backref(extent_cache, key.objectid, 0,
5210 btrfs_extent_data_ref_root(eb, dref),
5211 btrfs_extent_data_ref_objectid(eb,
5213 btrfs_extent_data_ref_offset(eb, dref),
5214 btrfs_extent_data_ref_count(eb, dref),
5217 case BTRFS_SHARED_DATA_REF_KEY:
5218 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5219 add_data_backref(extent_cache, key.objectid, offset,
5221 btrfs_shared_data_ref_count(eb, sref),
5225 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5226 key.objectid, key.type, num_bytes);
5229 ptr += btrfs_extent_inline_ref_size(type);
5236 static int check_cache_range(struct btrfs_root *root,
5237 struct btrfs_block_group_cache *cache,
5238 u64 offset, u64 bytes)
5240 struct btrfs_free_space *entry;
5246 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5247 bytenr = btrfs_sb_offset(i);
5248 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5249 cache->key.objectid, bytenr, 0,
5250 &logical, &nr, &stripe_len);
5255 if (logical[nr] + stripe_len <= offset)
5257 if (offset + bytes <= logical[nr])
5259 if (logical[nr] == offset) {
5260 if (stripe_len >= bytes) {
5264 bytes -= stripe_len;
5265 offset += stripe_len;
5266 } else if (logical[nr] < offset) {
5267 if (logical[nr] + stripe_len >=
5272 bytes = (offset + bytes) -
5273 (logical[nr] + stripe_len);
5274 offset = logical[nr] + stripe_len;
5277 * Could be tricky, the super may land in the
5278 * middle of the area we're checking. First
5279 * check the easiest case, it's at the end.
5281 if (logical[nr] + stripe_len >=
5283 bytes = logical[nr] - offset;
5287 /* Check the left side */
5288 ret = check_cache_range(root, cache,
5290 logical[nr] - offset);
5296 /* Now we continue with the right side */
5297 bytes = (offset + bytes) -
5298 (logical[nr] + stripe_len);
5299 offset = logical[nr] + stripe_len;
5306 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5308 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5309 offset, offset+bytes);
5313 if (entry->offset != offset) {
5314 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5319 if (entry->bytes != bytes) {
5320 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5321 bytes, entry->bytes, offset);
5325 unlink_free_space(cache->free_space_ctl, entry);
5330 static int verify_space_cache(struct btrfs_root *root,
5331 struct btrfs_block_group_cache *cache)
5333 struct btrfs_path *path;
5334 struct extent_buffer *leaf;
5335 struct btrfs_key key;
5339 path = btrfs_alloc_path();
5343 root = root->fs_info->extent_root;
5345 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5347 key.objectid = last;
5349 key.type = BTRFS_EXTENT_ITEM_KEY;
5351 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5356 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5357 ret = btrfs_next_leaf(root, path);
5365 leaf = path->nodes[0];
5366 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5367 if (key.objectid >= cache->key.offset + cache->key.objectid)
5369 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5370 key.type != BTRFS_METADATA_ITEM_KEY) {
5375 if (last == key.objectid) {
5376 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5377 last = key.objectid + key.offset;
5379 last = key.objectid + root->leafsize;
5384 ret = check_cache_range(root, cache, last,
5385 key.objectid - last);
5388 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5389 last = key.objectid + key.offset;
5391 last = key.objectid + root->leafsize;
5395 if (last < cache->key.objectid + cache->key.offset)
5396 ret = check_cache_range(root, cache, last,
5397 cache->key.objectid +
5398 cache->key.offset - last);
5401 btrfs_free_path(path);
5404 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5405 fprintf(stderr, "There are still entries left in the space "
5413 static int check_space_cache(struct btrfs_root *root)
5415 struct btrfs_block_group_cache *cache;
5416 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5420 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5421 btrfs_super_generation(root->fs_info->super_copy) !=
5422 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5423 printf("cache and super generation don't match, space cache "
5424 "will be invalidated\n");
5428 if (ctx.progress_enabled) {
5429 ctx.tp = TASK_FREE_SPACE;
5430 task_start(ctx.info);
5434 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5438 start = cache->key.objectid + cache->key.offset;
5439 if (!cache->free_space_ctl) {
5440 if (btrfs_init_free_space_ctl(cache,
5441 root->sectorsize)) {
5446 btrfs_remove_free_space_cache(cache);
5449 ret = load_free_space_cache(root->fs_info, cache);
5453 ret = verify_space_cache(root, cache);
5455 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5456 cache->key.objectid);
5461 task_stop(ctx.info);
5463 return error ? -EINVAL : 0;
5466 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5467 u64 num_bytes, unsigned long leaf_offset,
5468 struct extent_buffer *eb) {
5471 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5473 unsigned long csum_offset;
5477 u64 data_checked = 0;
5483 if (num_bytes % root->sectorsize)
5486 data = malloc(num_bytes);
5490 while (offset < num_bytes) {
5493 read_len = num_bytes - offset;
5494 /* read as much space once a time */
5495 ret = read_extent_data(root, data + offset,
5496 bytenr + offset, &read_len, mirror);
5500 /* verify every 4k data's checksum */
5501 while (data_checked < read_len) {
5503 tmp = offset + data_checked;
5505 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5506 csum, root->sectorsize);
5507 btrfs_csum_final(csum, (char *)&csum);
5509 csum_offset = leaf_offset +
5510 tmp / root->sectorsize * csum_size;
5511 read_extent_buffer(eb, (char *)&csum_expected,
5512 csum_offset, csum_size);
5513 /* try another mirror */
5514 if (csum != csum_expected) {
5515 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5516 mirror, bytenr + tmp,
5517 csum, csum_expected);
5518 num_copies = btrfs_num_copies(
5519 &root->fs_info->mapping_tree,
5521 if (mirror < num_copies - 1) {
5526 data_checked += root->sectorsize;
5535 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5538 struct btrfs_path *path;
5539 struct extent_buffer *leaf;
5540 struct btrfs_key key;
5543 path = btrfs_alloc_path();
5545 fprintf(stderr, "Error allocing path\n");
5549 key.objectid = bytenr;
5550 key.type = BTRFS_EXTENT_ITEM_KEY;
5551 key.offset = (u64)-1;
5554 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5557 fprintf(stderr, "Error looking up extent record %d\n", ret);
5558 btrfs_free_path(path);
5561 if (path->slots[0] > 0) {
5564 ret = btrfs_prev_leaf(root, path);
5567 } else if (ret > 0) {
5574 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5577 * Block group items come before extent items if they have the same
5578 * bytenr, so walk back one more just in case. Dear future traveler,
5579 * first congrats on mastering time travel. Now if it's not too much
5580 * trouble could you go back to 2006 and tell Chris to make the
5581 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5582 * EXTENT_ITEM_KEY please?
5584 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5585 if (path->slots[0] > 0) {
5588 ret = btrfs_prev_leaf(root, path);
5591 } else if (ret > 0) {
5596 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5600 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5601 ret = btrfs_next_leaf(root, path);
5603 fprintf(stderr, "Error going to next leaf "
5605 btrfs_free_path(path);
5611 leaf = path->nodes[0];
5612 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5613 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5617 if (key.objectid + key.offset < bytenr) {
5621 if (key.objectid > bytenr + num_bytes)
5624 if (key.objectid == bytenr) {
5625 if (key.offset >= num_bytes) {
5629 num_bytes -= key.offset;
5630 bytenr += key.offset;
5631 } else if (key.objectid < bytenr) {
5632 if (key.objectid + key.offset >= bytenr + num_bytes) {
5636 num_bytes = (bytenr + num_bytes) -
5637 (key.objectid + key.offset);
5638 bytenr = key.objectid + key.offset;
5640 if (key.objectid + key.offset < bytenr + num_bytes) {
5641 u64 new_start = key.objectid + key.offset;
5642 u64 new_bytes = bytenr + num_bytes - new_start;
5645 * Weird case, the extent is in the middle of
5646 * our range, we'll have to search one side
5647 * and then the other. Not sure if this happens
5648 * in real life, but no harm in coding it up
5649 * anyway just in case.
5651 btrfs_release_path(path);
5652 ret = check_extent_exists(root, new_start,
5655 fprintf(stderr, "Right section didn't "
5659 num_bytes = key.objectid - bytenr;
5662 num_bytes = key.objectid - bytenr;
5669 if (num_bytes && !ret) {
5670 fprintf(stderr, "There are no extents for csum range "
5671 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5675 btrfs_free_path(path);
5679 static int check_csums(struct btrfs_root *root)
5681 struct btrfs_path *path;
5682 struct extent_buffer *leaf;
5683 struct btrfs_key key;
5684 u64 offset = 0, num_bytes = 0;
5685 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5689 unsigned long leaf_offset;
5691 root = root->fs_info->csum_root;
5692 if (!extent_buffer_uptodate(root->node)) {
5693 fprintf(stderr, "No valid csum tree found\n");
5697 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5698 key.type = BTRFS_EXTENT_CSUM_KEY;
5701 path = btrfs_alloc_path();
5705 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5707 fprintf(stderr, "Error searching csum tree %d\n", ret);
5708 btrfs_free_path(path);
5712 if (ret > 0 && path->slots[0])
5717 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5718 ret = btrfs_next_leaf(root, path);
5720 fprintf(stderr, "Error going to next leaf "
5727 leaf = path->nodes[0];
5729 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5730 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5735 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5736 csum_size) * root->sectorsize;
5737 if (!check_data_csum)
5738 goto skip_csum_check;
5739 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5740 ret = check_extent_csums(root, key.offset, data_len,
5746 offset = key.offset;
5747 } else if (key.offset != offset + num_bytes) {
5748 ret = check_extent_exists(root, offset, num_bytes);
5750 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5751 "there is no extent record\n",
5752 offset, offset+num_bytes);
5755 offset = key.offset;
5758 num_bytes += data_len;
5762 btrfs_free_path(path);
5766 static int is_dropped_key(struct btrfs_key *key,
5767 struct btrfs_key *drop_key) {
5768 if (key->objectid < drop_key->objectid)
5770 else if (key->objectid == drop_key->objectid) {
5771 if (key->type < drop_key->type)
5773 else if (key->type == drop_key->type) {
5774 if (key->offset < drop_key->offset)
5782 * Here are the rules for FULL_BACKREF.
5784 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5785 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5787 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5788 * if it happened after the relocation occurred since we'll have dropped the
5789 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5790 * have no real way to know for sure.
5792 * We process the blocks one root at a time, and we start from the lowest root
5793 * objectid and go to the highest. So we can just lookup the owner backref for
5794 * the record and if we don't find it then we know it doesn't exist and we have
5797 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5798 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5799 * be set or not and then we can check later once we've gathered all the refs.
5801 static int calc_extent_flag(struct btrfs_root *root,
5802 struct cache_tree *extent_cache,
5803 struct extent_buffer *buf,
5804 struct root_item_record *ri,
5807 struct extent_record *rec;
5808 struct cache_extent *cache;
5809 struct tree_backref *tback;
5812 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5813 /* we have added this extent before */
5815 rec = container_of(cache, struct extent_record, cache);
5818 * Except file/reloc tree, we can not have
5821 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5826 if (buf->start == ri->bytenr)
5829 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5832 owner = btrfs_header_owner(buf);
5833 if (owner == ri->objectid)
5836 tback = find_tree_backref(rec, 0, owner);
5841 if (rec->flag_block_full_backref != -1 &&
5842 rec->flag_block_full_backref != 0)
5843 rec->bad_full_backref = 1;
5846 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5847 if (rec->flag_block_full_backref != -1 &&
5848 rec->flag_block_full_backref != 1)
5849 rec->bad_full_backref = 1;
5853 static int run_next_block(struct btrfs_root *root,
5854 struct block_info *bits,
5857 struct cache_tree *pending,
5858 struct cache_tree *seen,
5859 struct cache_tree *reada,
5860 struct cache_tree *nodes,
5861 struct cache_tree *extent_cache,
5862 struct cache_tree *chunk_cache,
5863 struct rb_root *dev_cache,
5864 struct block_group_tree *block_group_cache,
5865 struct device_extent_tree *dev_extent_cache,
5866 struct root_item_record *ri)
5868 struct extent_buffer *buf;
5869 struct extent_record *rec = NULL;
5880 struct btrfs_key key;
5881 struct cache_extent *cache;
5884 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5885 bits_nr, &reada_bits);
5890 for(i = 0; i < nritems; i++) {
5891 ret = add_cache_extent(reada, bits[i].start,
5896 /* fixme, get the parent transid */
5897 readahead_tree_block(root, bits[i].start,
5901 *last = bits[0].start;
5902 bytenr = bits[0].start;
5903 size = bits[0].size;
5905 cache = lookup_cache_extent(pending, bytenr, size);
5907 remove_cache_extent(pending, cache);
5910 cache = lookup_cache_extent(reada, bytenr, size);
5912 remove_cache_extent(reada, cache);
5915 cache = lookup_cache_extent(nodes, bytenr, size);
5917 remove_cache_extent(nodes, cache);
5920 cache = lookup_cache_extent(extent_cache, bytenr, size);
5922 rec = container_of(cache, struct extent_record, cache);
5923 gen = rec->parent_generation;
5926 /* fixme, get the real parent transid */
5927 buf = read_tree_block(root, bytenr, size, gen);
5928 if (!extent_buffer_uptodate(buf)) {
5929 record_bad_block_io(root->fs_info,
5930 extent_cache, bytenr, size);
5934 nritems = btrfs_header_nritems(buf);
5937 if (!init_extent_tree) {
5938 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5939 btrfs_header_level(buf), 1, NULL,
5942 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5944 fprintf(stderr, "Couldn't calc extent flags\n");
5945 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5950 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5952 fprintf(stderr, "Couldn't calc extent flags\n");
5953 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5957 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5959 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5960 ri->objectid == btrfs_header_owner(buf)) {
5962 * Ok we got to this block from it's original owner and
5963 * we have FULL_BACKREF set. Relocation can leave
5964 * converted blocks over so this is altogether possible,
5965 * however it's not possible if the generation > the
5966 * last snapshot, so check for this case.
5968 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5969 btrfs_header_generation(buf) > ri->last_snapshot) {
5970 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5971 rec->bad_full_backref = 1;
5976 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5977 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5978 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5979 rec->bad_full_backref = 1;
5983 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5984 rec->flag_block_full_backref = 1;
5988 rec->flag_block_full_backref = 0;
5990 owner = btrfs_header_owner(buf);
5993 ret = check_block(root, extent_cache, buf, flags);
5997 if (btrfs_is_leaf(buf)) {
5998 btree_space_waste += btrfs_leaf_free_space(root, buf);
5999 for (i = 0; i < nritems; i++) {
6000 struct btrfs_file_extent_item *fi;
6001 btrfs_item_key_to_cpu(buf, &key, i);
6002 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6003 process_extent_item(root, extent_cache, buf,
6007 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6008 process_extent_item(root, extent_cache, buf,
6012 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6014 btrfs_item_size_nr(buf, i);
6017 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6018 process_chunk_item(chunk_cache, &key, buf, i);
6021 if (key.type == BTRFS_DEV_ITEM_KEY) {
6022 process_device_item(dev_cache, &key, buf, i);
6025 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6026 process_block_group_item(block_group_cache,
6030 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6031 process_device_extent_item(dev_extent_cache,
6036 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6037 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6038 process_extent_ref_v0(extent_cache, buf, i);
6045 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6046 add_tree_backref(extent_cache, key.objectid, 0,
6050 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6051 add_tree_backref(extent_cache, key.objectid,
6055 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6056 struct btrfs_extent_data_ref *ref;
6057 ref = btrfs_item_ptr(buf, i,
6058 struct btrfs_extent_data_ref);
6059 add_data_backref(extent_cache,
6061 btrfs_extent_data_ref_root(buf, ref),
6062 btrfs_extent_data_ref_objectid(buf,
6064 btrfs_extent_data_ref_offset(buf, ref),
6065 btrfs_extent_data_ref_count(buf, ref),
6066 0, root->sectorsize);
6069 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6070 struct btrfs_shared_data_ref *ref;
6071 ref = btrfs_item_ptr(buf, i,
6072 struct btrfs_shared_data_ref);
6073 add_data_backref(extent_cache,
6074 key.objectid, key.offset, 0, 0, 0,
6075 btrfs_shared_data_ref_count(buf, ref),
6076 0, root->sectorsize);
6079 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6080 struct bad_item *bad;
6082 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6086 bad = malloc(sizeof(struct bad_item));
6089 INIT_LIST_HEAD(&bad->list);
6090 memcpy(&bad->key, &key,
6091 sizeof(struct btrfs_key));
6092 bad->root_id = owner;
6093 list_add_tail(&bad->list, &delete_items);
6096 if (key.type != BTRFS_EXTENT_DATA_KEY)
6098 fi = btrfs_item_ptr(buf, i,
6099 struct btrfs_file_extent_item);
6100 if (btrfs_file_extent_type(buf, fi) ==
6101 BTRFS_FILE_EXTENT_INLINE)
6103 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6106 data_bytes_allocated +=
6107 btrfs_file_extent_disk_num_bytes(buf, fi);
6108 if (data_bytes_allocated < root->sectorsize) {
6111 data_bytes_referenced +=
6112 btrfs_file_extent_num_bytes(buf, fi);
6113 add_data_backref(extent_cache,
6114 btrfs_file_extent_disk_bytenr(buf, fi),
6115 parent, owner, key.objectid, key.offset -
6116 btrfs_file_extent_offset(buf, fi), 1, 1,
6117 btrfs_file_extent_disk_num_bytes(buf, fi));
6121 struct btrfs_key first_key;
6123 first_key.objectid = 0;
6126 btrfs_item_key_to_cpu(buf, &first_key, 0);
6127 level = btrfs_header_level(buf);
6128 for (i = 0; i < nritems; i++) {
6129 ptr = btrfs_node_blockptr(buf, i);
6130 size = btrfs_level_size(root, level - 1);
6131 btrfs_node_key_to_cpu(buf, &key, i);
6133 if ((level == ri->drop_level)
6134 && is_dropped_key(&key, &ri->drop_key)) {
6138 ret = add_extent_rec(extent_cache, &key,
6139 btrfs_node_ptr_generation(buf, i),
6140 ptr, size, 0, 0, 1, 0, 1, 0,
6144 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6147 add_pending(nodes, seen, ptr, size);
6149 add_pending(pending, seen, ptr, size);
6152 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6153 nritems) * sizeof(struct btrfs_key_ptr);
6155 total_btree_bytes += buf->len;
6156 if (fs_root_objectid(btrfs_header_owner(buf)))
6157 total_fs_tree_bytes += buf->len;
6158 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6159 total_extent_tree_bytes += buf->len;
6160 if (!found_old_backref &&
6161 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6162 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6163 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6164 found_old_backref = 1;
6166 free_extent_buffer(buf);
6170 static int add_root_to_pending(struct extent_buffer *buf,
6171 struct cache_tree *extent_cache,
6172 struct cache_tree *pending,
6173 struct cache_tree *seen,
6174 struct cache_tree *nodes,
6177 if (btrfs_header_level(buf) > 0)
6178 add_pending(nodes, seen, buf->start, buf->len);
6180 add_pending(pending, seen, buf->start, buf->len);
6181 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6182 0, 1, 1, 0, 1, 0, buf->len);
6184 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6185 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6186 add_tree_backref(extent_cache, buf->start, buf->start,
6189 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6193 /* as we fix the tree, we might be deleting blocks that
6194 * we're tracking for repair. This hook makes sure we
6195 * remove any backrefs for blocks as we are fixing them.
6197 static int free_extent_hook(struct btrfs_trans_handle *trans,
6198 struct btrfs_root *root,
6199 u64 bytenr, u64 num_bytes, u64 parent,
6200 u64 root_objectid, u64 owner, u64 offset,
6203 struct extent_record *rec;
6204 struct cache_extent *cache;
6206 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6208 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6209 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6213 rec = container_of(cache, struct extent_record, cache);
6215 struct data_backref *back;
6216 back = find_data_backref(rec, parent, root_objectid, owner,
6217 offset, 1, bytenr, num_bytes);
6220 if (back->node.found_ref) {
6221 back->found_ref -= refs_to_drop;
6223 rec->refs -= refs_to_drop;
6225 if (back->node.found_extent_tree) {
6226 back->num_refs -= refs_to_drop;
6227 if (rec->extent_item_refs)
6228 rec->extent_item_refs -= refs_to_drop;
6230 if (back->found_ref == 0)
6231 back->node.found_ref = 0;
6232 if (back->num_refs == 0)
6233 back->node.found_extent_tree = 0;
6235 if (!back->node.found_extent_tree && back->node.found_ref) {
6236 list_del(&back->node.list);
6240 struct tree_backref *back;
6241 back = find_tree_backref(rec, parent, root_objectid);
6244 if (back->node.found_ref) {
6247 back->node.found_ref = 0;
6249 if (back->node.found_extent_tree) {
6250 if (rec->extent_item_refs)
6251 rec->extent_item_refs--;
6252 back->node.found_extent_tree = 0;
6254 if (!back->node.found_extent_tree && back->node.found_ref) {
6255 list_del(&back->node.list);
6259 maybe_free_extent_rec(extent_cache, rec);
6264 static int delete_extent_records(struct btrfs_trans_handle *trans,
6265 struct btrfs_root *root,
6266 struct btrfs_path *path,
6267 u64 bytenr, u64 new_len)
6269 struct btrfs_key key;
6270 struct btrfs_key found_key;
6271 struct extent_buffer *leaf;
6276 key.objectid = bytenr;
6278 key.offset = (u64)-1;
6281 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6288 if (path->slots[0] == 0)
6294 leaf = path->nodes[0];
6295 slot = path->slots[0];
6297 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6298 if (found_key.objectid != bytenr)
6301 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6302 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6303 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6304 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6305 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6306 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6307 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6308 btrfs_release_path(path);
6309 if (found_key.type == 0) {
6310 if (found_key.offset == 0)
6312 key.offset = found_key.offset - 1;
6313 key.type = found_key.type;
6315 key.type = found_key.type - 1;
6316 key.offset = (u64)-1;
6320 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6321 found_key.objectid, found_key.type, found_key.offset);
6323 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6326 btrfs_release_path(path);
6328 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6329 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6330 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6331 found_key.offset : root->leafsize;
6333 ret = btrfs_update_block_group(trans, root, bytenr,
6340 btrfs_release_path(path);
6345 * for a single backref, this will allocate a new extent
6346 * and add the backref to it.
6348 static int record_extent(struct btrfs_trans_handle *trans,
6349 struct btrfs_fs_info *info,
6350 struct btrfs_path *path,
6351 struct extent_record *rec,
6352 struct extent_backref *back,
6353 int allocated, u64 flags)
6356 struct btrfs_root *extent_root = info->extent_root;
6357 struct extent_buffer *leaf;
6358 struct btrfs_key ins_key;
6359 struct btrfs_extent_item *ei;
6360 struct tree_backref *tback;
6361 struct data_backref *dback;
6362 struct btrfs_tree_block_info *bi;
6365 rec->max_size = max_t(u64, rec->max_size,
6366 info->extent_root->leafsize);
6369 u32 item_size = sizeof(*ei);
6372 item_size += sizeof(*bi);
6374 ins_key.objectid = rec->start;
6375 ins_key.offset = rec->max_size;
6376 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6378 ret = btrfs_insert_empty_item(trans, extent_root, path,
6379 &ins_key, item_size);
6383 leaf = path->nodes[0];
6384 ei = btrfs_item_ptr(leaf, path->slots[0],
6385 struct btrfs_extent_item);
6387 btrfs_set_extent_refs(leaf, ei, 0);
6388 btrfs_set_extent_generation(leaf, ei, rec->generation);
6390 if (back->is_data) {
6391 btrfs_set_extent_flags(leaf, ei,
6392 BTRFS_EXTENT_FLAG_DATA);
6394 struct btrfs_disk_key copy_key;;
6396 tback = (struct tree_backref *)back;
6397 bi = (struct btrfs_tree_block_info *)(ei + 1);
6398 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6401 btrfs_set_disk_key_objectid(©_key,
6402 rec->info_objectid);
6403 btrfs_set_disk_key_type(©_key, 0);
6404 btrfs_set_disk_key_offset(©_key, 0);
6406 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6407 btrfs_set_tree_block_key(leaf, bi, ©_key);
6409 btrfs_set_extent_flags(leaf, ei,
6410 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6413 btrfs_mark_buffer_dirty(leaf);
6414 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6415 rec->max_size, 1, 0);
6418 btrfs_release_path(path);
6421 if (back->is_data) {
6425 dback = (struct data_backref *)back;
6426 if (back->full_backref)
6427 parent = dback->parent;
6431 for (i = 0; i < dback->found_ref; i++) {
6432 /* if parent != 0, we're doing a full backref
6433 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6434 * just makes the backref allocator create a data
6437 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6438 rec->start, rec->max_size,
6442 BTRFS_FIRST_FREE_OBJECTID :
6448 fprintf(stderr, "adding new data backref"
6449 " on %llu %s %llu owner %llu"
6450 " offset %llu found %d\n",
6451 (unsigned long long)rec->start,
6452 back->full_backref ?
6454 back->full_backref ?
6455 (unsigned long long)parent :
6456 (unsigned long long)dback->root,
6457 (unsigned long long)dback->owner,
6458 (unsigned long long)dback->offset,
6463 tback = (struct tree_backref *)back;
6464 if (back->full_backref)
6465 parent = tback->parent;
6469 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6470 rec->start, rec->max_size,
6471 parent, tback->root, 0, 0);
6472 fprintf(stderr, "adding new tree backref on "
6473 "start %llu len %llu parent %llu root %llu\n",
6474 rec->start, rec->max_size, parent, tback->root);
6477 btrfs_release_path(path);
6481 struct extent_entry {
6486 struct list_head list;
6489 static struct extent_entry *find_entry(struct list_head *entries,
6490 u64 bytenr, u64 bytes)
6492 struct extent_entry *entry = NULL;
6494 list_for_each_entry(entry, entries, list) {
6495 if (entry->bytenr == bytenr && entry->bytes == bytes)
6502 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6504 struct extent_entry *entry, *best = NULL, *prev = NULL;
6506 list_for_each_entry(entry, entries, list) {
6513 * If there are as many broken entries as entries then we know
6514 * not to trust this particular entry.
6516 if (entry->broken == entry->count)
6520 * If our current entry == best then we can't be sure our best
6521 * is really the best, so we need to keep searching.
6523 if (best && best->count == entry->count) {
6529 /* Prev == entry, not good enough, have to keep searching */
6530 if (!prev->broken && prev->count == entry->count)
6534 best = (prev->count > entry->count) ? prev : entry;
6535 else if (best->count < entry->count)
6543 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6544 struct data_backref *dback, struct extent_entry *entry)
6546 struct btrfs_trans_handle *trans;
6547 struct btrfs_root *root;
6548 struct btrfs_file_extent_item *fi;
6549 struct extent_buffer *leaf;
6550 struct btrfs_key key;
6554 key.objectid = dback->root;
6555 key.type = BTRFS_ROOT_ITEM_KEY;
6556 key.offset = (u64)-1;
6557 root = btrfs_read_fs_root(info, &key);
6559 fprintf(stderr, "Couldn't find root for our ref\n");
6564 * The backref points to the original offset of the extent if it was
6565 * split, so we need to search down to the offset we have and then walk
6566 * forward until we find the backref we're looking for.
6568 key.objectid = dback->owner;
6569 key.type = BTRFS_EXTENT_DATA_KEY;
6570 key.offset = dback->offset;
6571 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6573 fprintf(stderr, "Error looking up ref %d\n", ret);
6578 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6579 ret = btrfs_next_leaf(root, path);
6581 fprintf(stderr, "Couldn't find our ref, next\n");
6585 leaf = path->nodes[0];
6586 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6587 if (key.objectid != dback->owner ||
6588 key.type != BTRFS_EXTENT_DATA_KEY) {
6589 fprintf(stderr, "Couldn't find our ref, search\n");
6592 fi = btrfs_item_ptr(leaf, path->slots[0],
6593 struct btrfs_file_extent_item);
6594 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6595 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6597 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6602 btrfs_release_path(path);
6604 trans = btrfs_start_transaction(root, 1);
6606 return PTR_ERR(trans);
6609 * Ok we have the key of the file extent we want to fix, now we can cow
6610 * down to the thing and fix it.
6612 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6614 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6615 key.objectid, key.type, key.offset, ret);
6619 fprintf(stderr, "Well that's odd, we just found this key "
6620 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6625 leaf = path->nodes[0];
6626 fi = btrfs_item_ptr(leaf, path->slots[0],
6627 struct btrfs_file_extent_item);
6629 if (btrfs_file_extent_compression(leaf, fi) &&
6630 dback->disk_bytenr != entry->bytenr) {
6631 fprintf(stderr, "Ref doesn't match the record start and is "
6632 "compressed, please take a btrfs-image of this file "
6633 "system and send it to a btrfs developer so they can "
6634 "complete this functionality for bytenr %Lu\n",
6635 dback->disk_bytenr);
6640 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6641 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6642 } else if (dback->disk_bytenr > entry->bytenr) {
6643 u64 off_diff, offset;
6645 off_diff = dback->disk_bytenr - entry->bytenr;
6646 offset = btrfs_file_extent_offset(leaf, fi);
6647 if (dback->disk_bytenr + offset +
6648 btrfs_file_extent_num_bytes(leaf, fi) >
6649 entry->bytenr + entry->bytes) {
6650 fprintf(stderr, "Ref is past the entry end, please "
6651 "take a btrfs-image of this file system and "
6652 "send it to a btrfs developer, ref %Lu\n",
6653 dback->disk_bytenr);
6658 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6659 btrfs_set_file_extent_offset(leaf, fi, offset);
6660 } else if (dback->disk_bytenr < entry->bytenr) {
6663 offset = btrfs_file_extent_offset(leaf, fi);
6664 if (dback->disk_bytenr + offset < entry->bytenr) {
6665 fprintf(stderr, "Ref is before the entry start, please"
6666 " take a btrfs-image of this file system and "
6667 "send it to a btrfs developer, ref %Lu\n",
6668 dback->disk_bytenr);
6673 offset += dback->disk_bytenr;
6674 offset -= entry->bytenr;
6675 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6676 btrfs_set_file_extent_offset(leaf, fi, offset);
6679 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6682 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6683 * only do this if we aren't using compression, otherwise it's a
6686 if (!btrfs_file_extent_compression(leaf, fi))
6687 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6689 printf("ram bytes may be wrong?\n");
6690 btrfs_mark_buffer_dirty(leaf);
6692 err = btrfs_commit_transaction(trans, root);
6693 btrfs_release_path(path);
6694 return ret ? ret : err;
6697 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6698 struct extent_record *rec)
6700 struct extent_backref *back;
6701 struct data_backref *dback;
6702 struct extent_entry *entry, *best = NULL;
6705 int broken_entries = 0;
6710 * Metadata is easy and the backrefs should always agree on bytenr and
6711 * size, if not we've got bigger issues.
6716 list_for_each_entry(back, &rec->backrefs, list) {
6717 if (back->full_backref || !back->is_data)
6720 dback = (struct data_backref *)back;
6723 * We only pay attention to backrefs that we found a real
6726 if (dback->found_ref == 0)
6730 * For now we only catch when the bytes don't match, not the
6731 * bytenr. We can easily do this at the same time, but I want
6732 * to have a fs image to test on before we just add repair
6733 * functionality willy-nilly so we know we won't screw up the
6737 entry = find_entry(&entries, dback->disk_bytenr,
6740 entry = malloc(sizeof(struct extent_entry));
6745 memset(entry, 0, sizeof(*entry));
6746 entry->bytenr = dback->disk_bytenr;
6747 entry->bytes = dback->bytes;
6748 list_add_tail(&entry->list, &entries);
6753 * If we only have on entry we may think the entries agree when
6754 * in reality they don't so we have to do some extra checking.
6756 if (dback->disk_bytenr != rec->start ||
6757 dback->bytes != rec->nr || back->broken)
6768 /* Yay all the backrefs agree, carry on good sir */
6769 if (nr_entries <= 1 && !mismatch)
6772 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6773 "%Lu\n", rec->start);
6776 * First we want to see if the backrefs can agree amongst themselves who
6777 * is right, so figure out which one of the entries has the highest
6780 best = find_most_right_entry(&entries);
6783 * Ok so we may have an even split between what the backrefs think, so
6784 * this is where we use the extent ref to see what it thinks.
6787 entry = find_entry(&entries, rec->start, rec->nr);
6788 if (!entry && (!broken_entries || !rec->found_rec)) {
6789 fprintf(stderr, "Backrefs don't agree with each other "
6790 "and extent record doesn't agree with anybody,"
6791 " so we can't fix bytenr %Lu bytes %Lu\n",
6792 rec->start, rec->nr);
6795 } else if (!entry) {
6797 * Ok our backrefs were broken, we'll assume this is the
6798 * correct value and add an entry for this range.
6800 entry = malloc(sizeof(struct extent_entry));
6805 memset(entry, 0, sizeof(*entry));
6806 entry->bytenr = rec->start;
6807 entry->bytes = rec->nr;
6808 list_add_tail(&entry->list, &entries);
6812 best = find_most_right_entry(&entries);
6814 fprintf(stderr, "Backrefs and extent record evenly "
6815 "split on who is right, this is going to "
6816 "require user input to fix bytenr %Lu bytes "
6817 "%Lu\n", rec->start, rec->nr);
6824 * I don't think this can happen currently as we'll abort() if we catch
6825 * this case higher up, but in case somebody removes that we still can't
6826 * deal with it properly here yet, so just bail out of that's the case.
6828 if (best->bytenr != rec->start) {
6829 fprintf(stderr, "Extent start and backref starts don't match, "
6830 "please use btrfs-image on this file system and send "
6831 "it to a btrfs developer so they can make fsck fix "
6832 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6833 rec->start, rec->nr);
6839 * Ok great we all agreed on an extent record, let's go find the real
6840 * references and fix up the ones that don't match.
6842 list_for_each_entry(back, &rec->backrefs, list) {
6843 if (back->full_backref || !back->is_data)
6846 dback = (struct data_backref *)back;
6849 * Still ignoring backrefs that don't have a real ref attached
6852 if (dback->found_ref == 0)
6855 if (dback->bytes == best->bytes &&
6856 dback->disk_bytenr == best->bytenr)
6859 ret = repair_ref(info, path, dback, best);
6865 * Ok we messed with the actual refs, which means we need to drop our
6866 * entire cache and go back and rescan. I know this is a huge pain and
6867 * adds a lot of extra work, but it's the only way to be safe. Once all
6868 * the backrefs agree we may not need to do anything to the extent
6873 while (!list_empty(&entries)) {
6874 entry = list_entry(entries.next, struct extent_entry, list);
6875 list_del_init(&entry->list);
6881 static int process_duplicates(struct btrfs_root *root,
6882 struct cache_tree *extent_cache,
6883 struct extent_record *rec)
6885 struct extent_record *good, *tmp;
6886 struct cache_extent *cache;
6890 * If we found a extent record for this extent then return, or if we
6891 * have more than one duplicate we are likely going to need to delete
6894 if (rec->found_rec || rec->num_duplicates > 1)
6897 /* Shouldn't happen but just in case */
6898 BUG_ON(!rec->num_duplicates);
6901 * So this happens if we end up with a backref that doesn't match the
6902 * actual extent entry. So either the backref is bad or the extent
6903 * entry is bad. Either way we want to have the extent_record actually
6904 * reflect what we found in the extent_tree, so we need to take the
6905 * duplicate out and use that as the extent_record since the only way we
6906 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6908 remove_cache_extent(extent_cache, &rec->cache);
6910 good = list_entry(rec->dups.next, struct extent_record, list);
6911 list_del_init(&good->list);
6912 INIT_LIST_HEAD(&good->backrefs);
6913 INIT_LIST_HEAD(&good->dups);
6914 good->cache.start = good->start;
6915 good->cache.size = good->nr;
6916 good->content_checked = 0;
6917 good->owner_ref_checked = 0;
6918 good->num_duplicates = 0;
6919 good->refs = rec->refs;
6920 list_splice_init(&rec->backrefs, &good->backrefs);
6922 cache = lookup_cache_extent(extent_cache, good->start,
6926 tmp = container_of(cache, struct extent_record, cache);
6929 * If we find another overlapping extent and it's found_rec is
6930 * set then it's a duplicate and we need to try and delete
6933 if (tmp->found_rec || tmp->num_duplicates > 0) {
6934 if (list_empty(&good->list))
6935 list_add_tail(&good->list,
6936 &duplicate_extents);
6937 good->num_duplicates += tmp->num_duplicates + 1;
6938 list_splice_init(&tmp->dups, &good->dups);
6939 list_del_init(&tmp->list);
6940 list_add_tail(&tmp->list, &good->dups);
6941 remove_cache_extent(extent_cache, &tmp->cache);
6946 * Ok we have another non extent item backed extent rec, so lets
6947 * just add it to this extent and carry on like we did above.
6949 good->refs += tmp->refs;
6950 list_splice_init(&tmp->backrefs, &good->backrefs);
6951 remove_cache_extent(extent_cache, &tmp->cache);
6954 ret = insert_cache_extent(extent_cache, &good->cache);
6957 return good->num_duplicates ? 0 : 1;
6960 static int delete_duplicate_records(struct btrfs_root *root,
6961 struct extent_record *rec)
6963 struct btrfs_trans_handle *trans;
6964 LIST_HEAD(delete_list);
6965 struct btrfs_path *path;
6966 struct extent_record *tmp, *good, *n;
6969 struct btrfs_key key;
6971 path = btrfs_alloc_path();
6978 /* Find the record that covers all of the duplicates. */
6979 list_for_each_entry(tmp, &rec->dups, list) {
6980 if (good->start < tmp->start)
6982 if (good->nr > tmp->nr)
6985 if (tmp->start + tmp->nr < good->start + good->nr) {
6986 fprintf(stderr, "Ok we have overlapping extents that "
6987 "aren't completely covered by eachother, this "
6988 "is going to require more careful thought. "
6989 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6990 tmp->start, tmp->nr, good->start, good->nr);
6997 list_add_tail(&rec->list, &delete_list);
6999 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7002 list_move_tail(&tmp->list, &delete_list);
7005 root = root->fs_info->extent_root;
7006 trans = btrfs_start_transaction(root, 1);
7007 if (IS_ERR(trans)) {
7008 ret = PTR_ERR(trans);
7012 list_for_each_entry(tmp, &delete_list, list) {
7013 if (tmp->found_rec == 0)
7015 key.objectid = tmp->start;
7016 key.type = BTRFS_EXTENT_ITEM_KEY;
7017 key.offset = tmp->nr;
7019 /* Shouldn't happen but just in case */
7020 if (tmp->metadata) {
7021 fprintf(stderr, "Well this shouldn't happen, extent "
7022 "record overlaps but is metadata? "
7023 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7027 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7033 ret = btrfs_del_item(trans, root, path);
7036 btrfs_release_path(path);
7039 err = btrfs_commit_transaction(trans, root);
7043 while (!list_empty(&delete_list)) {
7044 tmp = list_entry(delete_list.next, struct extent_record, list);
7045 list_del_init(&tmp->list);
7051 while (!list_empty(&rec->dups)) {
7052 tmp = list_entry(rec->dups.next, struct extent_record, list);
7053 list_del_init(&tmp->list);
7057 btrfs_free_path(path);
7059 if (!ret && !nr_del)
7060 rec->num_duplicates = 0;
7062 return ret ? ret : nr_del;
7065 static int find_possible_backrefs(struct btrfs_fs_info *info,
7066 struct btrfs_path *path,
7067 struct cache_tree *extent_cache,
7068 struct extent_record *rec)
7070 struct btrfs_root *root;
7071 struct extent_backref *back;
7072 struct data_backref *dback;
7073 struct cache_extent *cache;
7074 struct btrfs_file_extent_item *fi;
7075 struct btrfs_key key;
7079 list_for_each_entry(back, &rec->backrefs, list) {
7080 /* Don't care about full backrefs (poor unloved backrefs) */
7081 if (back->full_backref || !back->is_data)
7084 dback = (struct data_backref *)back;
7086 /* We found this one, we don't need to do a lookup */
7087 if (dback->found_ref)
7090 key.objectid = dback->root;
7091 key.type = BTRFS_ROOT_ITEM_KEY;
7092 key.offset = (u64)-1;
7094 root = btrfs_read_fs_root(info, &key);
7096 /* No root, definitely a bad ref, skip */
7097 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7099 /* Other err, exit */
7101 return PTR_ERR(root);
7103 key.objectid = dback->owner;
7104 key.type = BTRFS_EXTENT_DATA_KEY;
7105 key.offset = dback->offset;
7106 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7108 btrfs_release_path(path);
7111 /* Didn't find it, we can carry on */
7116 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7117 struct btrfs_file_extent_item);
7118 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7119 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7120 btrfs_release_path(path);
7121 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7123 struct extent_record *tmp;
7124 tmp = container_of(cache, struct extent_record, cache);
7127 * If we found an extent record for the bytenr for this
7128 * particular backref then we can't add it to our
7129 * current extent record. We only want to add backrefs
7130 * that don't have a corresponding extent item in the
7131 * extent tree since they likely belong to this record
7132 * and we need to fix it if it doesn't match bytenrs.
7138 dback->found_ref += 1;
7139 dback->disk_bytenr = bytenr;
7140 dback->bytes = bytes;
7143 * Set this so the verify backref code knows not to trust the
7144 * values in this backref.
7153 * Record orphan data ref into corresponding root.
7155 * Return 0 if the extent item contains data ref and recorded.
7156 * Return 1 if the extent item contains no useful data ref
7157 * On that case, it may contains only shared_dataref or metadata backref
7158 * or the file extent exists(this should be handled by the extent bytenr
7160 * Return <0 if something goes wrong.
7162 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7163 struct extent_record *rec)
7165 struct btrfs_key key;
7166 struct btrfs_root *dest_root;
7167 struct extent_backref *back;
7168 struct data_backref *dback;
7169 struct orphan_data_extent *orphan;
7170 struct btrfs_path *path;
7171 int recorded_data_ref = 0;
7176 path = btrfs_alloc_path();
7179 list_for_each_entry(back, &rec->backrefs, list) {
7180 if (back->full_backref || !back->is_data ||
7181 !back->found_extent_tree)
7183 dback = (struct data_backref *)back;
7184 if (dback->found_ref)
7186 key.objectid = dback->root;
7187 key.type = BTRFS_ROOT_ITEM_KEY;
7188 key.offset = (u64)-1;
7190 dest_root = btrfs_read_fs_root(fs_info, &key);
7192 /* For non-exist root we just skip it */
7193 if (IS_ERR(dest_root) || !dest_root)
7196 key.objectid = dback->owner;
7197 key.type = BTRFS_EXTENT_DATA_KEY;
7198 key.offset = dback->offset;
7200 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7202 * For ret < 0, it's OK since the fs-tree may be corrupted,
7203 * we need to record it for inode/file extent rebuild.
7204 * For ret > 0, we record it only for file extent rebuild.
7205 * For ret == 0, the file extent exists but only bytenr
7206 * mismatch, let the original bytenr fix routine to handle,
7212 orphan = malloc(sizeof(*orphan));
7217 INIT_LIST_HEAD(&orphan->list);
7218 orphan->root = dback->root;
7219 orphan->objectid = dback->owner;
7220 orphan->offset = dback->offset;
7221 orphan->disk_bytenr = rec->cache.start;
7222 orphan->disk_len = rec->cache.size;
7223 list_add(&dest_root->orphan_data_extents, &orphan->list);
7224 recorded_data_ref = 1;
7227 btrfs_free_path(path);
7229 return !recorded_data_ref;
7235 * when an incorrect extent item is found, this will delete
7236 * all of the existing entries for it and recreate them
7237 * based on what the tree scan found.
7239 static int fixup_extent_refs(struct btrfs_fs_info *info,
7240 struct cache_tree *extent_cache,
7241 struct extent_record *rec)
7243 struct btrfs_trans_handle *trans = NULL;
7245 struct btrfs_path *path;
7246 struct list_head *cur = rec->backrefs.next;
7247 struct cache_extent *cache;
7248 struct extent_backref *back;
7252 if (rec->flag_block_full_backref)
7253 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7255 path = btrfs_alloc_path();
7259 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7261 * Sometimes the backrefs themselves are so broken they don't
7262 * get attached to any meaningful rec, so first go back and
7263 * check any of our backrefs that we couldn't find and throw
7264 * them into the list if we find the backref so that
7265 * verify_backrefs can figure out what to do.
7267 ret = find_possible_backrefs(info, path, extent_cache, rec);
7272 /* step one, make sure all of the backrefs agree */
7273 ret = verify_backrefs(info, path, rec);
7277 trans = btrfs_start_transaction(info->extent_root, 1);
7278 if (IS_ERR(trans)) {
7279 ret = PTR_ERR(trans);
7283 /* step two, delete all the existing records */
7284 ret = delete_extent_records(trans, info->extent_root, path,
7285 rec->start, rec->max_size);
7290 /* was this block corrupt? If so, don't add references to it */
7291 cache = lookup_cache_extent(info->corrupt_blocks,
7292 rec->start, rec->max_size);
7298 /* step three, recreate all the refs we did find */
7299 while(cur != &rec->backrefs) {
7300 back = list_entry(cur, struct extent_backref, list);
7304 * if we didn't find any references, don't create a
7307 if (!back->found_ref)
7310 rec->bad_full_backref = 0;
7311 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7319 int err = btrfs_commit_transaction(trans, info->extent_root);
7324 btrfs_free_path(path);
7328 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7329 struct extent_record *rec)
7331 struct btrfs_trans_handle *trans;
7332 struct btrfs_root *root = fs_info->extent_root;
7333 struct btrfs_path *path;
7334 struct btrfs_extent_item *ei;
7335 struct btrfs_key key;
7339 key.objectid = rec->start;
7340 if (rec->metadata) {
7341 key.type = BTRFS_METADATA_ITEM_KEY;
7342 key.offset = rec->info_level;
7344 key.type = BTRFS_EXTENT_ITEM_KEY;
7345 key.offset = rec->max_size;
7348 path = btrfs_alloc_path();
7352 trans = btrfs_start_transaction(root, 0);
7353 if (IS_ERR(trans)) {
7354 btrfs_free_path(path);
7355 return PTR_ERR(trans);
7358 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7360 btrfs_free_path(path);
7361 btrfs_commit_transaction(trans, root);
7364 fprintf(stderr, "Didn't find extent for %llu\n",
7365 (unsigned long long)rec->start);
7366 btrfs_free_path(path);
7367 btrfs_commit_transaction(trans, root);
7371 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7372 struct btrfs_extent_item);
7373 flags = btrfs_extent_flags(path->nodes[0], ei);
7374 if (rec->flag_block_full_backref) {
7375 fprintf(stderr, "setting full backref on %llu\n",
7376 (unsigned long long)key.objectid);
7377 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7379 fprintf(stderr, "clearing full backref on %llu\n",
7380 (unsigned long long)key.objectid);
7381 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7383 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7384 btrfs_mark_buffer_dirty(path->nodes[0]);
7385 btrfs_free_path(path);
7386 return btrfs_commit_transaction(trans, root);
7389 /* right now we only prune from the extent allocation tree */
7390 static int prune_one_block(struct btrfs_trans_handle *trans,
7391 struct btrfs_fs_info *info,
7392 struct btrfs_corrupt_block *corrupt)
7395 struct btrfs_path path;
7396 struct extent_buffer *eb;
7400 int level = corrupt->level + 1;
7402 btrfs_init_path(&path);
7404 /* we want to stop at the parent to our busted block */
7405 path.lowest_level = level;
7407 ret = btrfs_search_slot(trans, info->extent_root,
7408 &corrupt->key, &path, -1, 1);
7413 eb = path.nodes[level];
7420 * hopefully the search gave us the block we want to prune,
7421 * lets try that first
7423 slot = path.slots[level];
7424 found = btrfs_node_blockptr(eb, slot);
7425 if (found == corrupt->cache.start)
7428 nritems = btrfs_header_nritems(eb);
7430 /* the search failed, lets scan this node and hope we find it */
7431 for (slot = 0; slot < nritems; slot++) {
7432 found = btrfs_node_blockptr(eb, slot);
7433 if (found == corrupt->cache.start)
7437 * we couldn't find the bad block. TODO, search all the nodes for pointers
7440 if (eb == info->extent_root->node) {
7445 btrfs_release_path(&path);
7450 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7451 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7454 btrfs_release_path(&path);
7458 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7460 struct btrfs_trans_handle *trans = NULL;
7461 struct cache_extent *cache;
7462 struct btrfs_corrupt_block *corrupt;
7465 cache = search_cache_extent(info->corrupt_blocks, 0);
7469 trans = btrfs_start_transaction(info->extent_root, 1);
7471 return PTR_ERR(trans);
7473 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7474 prune_one_block(trans, info, corrupt);
7475 remove_cache_extent(info->corrupt_blocks, cache);
7478 return btrfs_commit_transaction(trans, info->extent_root);
7482 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7484 struct btrfs_block_group_cache *cache;
7489 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7490 &start, &end, EXTENT_DIRTY);
7493 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7499 cache = btrfs_lookup_first_block_group(fs_info, start);
7504 start = cache->key.objectid + cache->key.offset;
7508 static int check_extent_refs(struct btrfs_root *root,
7509 struct cache_tree *extent_cache)
7511 struct extent_record *rec;
7512 struct cache_extent *cache;
7521 * if we're doing a repair, we have to make sure
7522 * we don't allocate from the problem extents.
7523 * In the worst case, this will be all the
7526 cache = search_cache_extent(extent_cache, 0);
7528 rec = container_of(cache, struct extent_record, cache);
7529 set_extent_dirty(root->fs_info->excluded_extents,
7531 rec->start + rec->max_size - 1,
7533 cache = next_cache_extent(cache);
7536 /* pin down all the corrupted blocks too */
7537 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7539 set_extent_dirty(root->fs_info->excluded_extents,
7541 cache->start + cache->size - 1,
7543 cache = next_cache_extent(cache);
7545 prune_corrupt_blocks(root->fs_info);
7546 reset_cached_block_groups(root->fs_info);
7549 reset_cached_block_groups(root->fs_info);
7552 * We need to delete any duplicate entries we find first otherwise we
7553 * could mess up the extent tree when we have backrefs that actually
7554 * belong to a different extent item and not the weird duplicate one.
7556 while (repair && !list_empty(&duplicate_extents)) {
7557 rec = list_entry(duplicate_extents.next, struct extent_record,
7559 list_del_init(&rec->list);
7561 /* Sometimes we can find a backref before we find an actual
7562 * extent, so we need to process it a little bit to see if there
7563 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7564 * if this is a backref screwup. If we need to delete stuff
7565 * process_duplicates() will return 0, otherwise it will return
7568 if (process_duplicates(root, extent_cache, rec))
7570 ret = delete_duplicate_records(root, rec);
7574 * delete_duplicate_records will return the number of entries
7575 * deleted, so if it's greater than 0 then we know we actually
7576 * did something and we need to remove.
7590 cache = search_cache_extent(extent_cache, 0);
7593 rec = container_of(cache, struct extent_record, cache);
7594 if (rec->num_duplicates) {
7595 fprintf(stderr, "extent item %llu has multiple extent "
7596 "items\n", (unsigned long long)rec->start);
7601 if (rec->refs != rec->extent_item_refs) {
7602 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7603 (unsigned long long)rec->start,
7604 (unsigned long long)rec->nr);
7605 fprintf(stderr, "extent item %llu, found %llu\n",
7606 (unsigned long long)rec->extent_item_refs,
7607 (unsigned long long)rec->refs);
7608 ret = record_orphan_data_extents(root->fs_info, rec);
7615 * we can't use the extent to repair file
7616 * extent, let the fallback method handle it.
7618 if (!fixed && repair) {
7619 ret = fixup_extent_refs(
7630 if (all_backpointers_checked(rec, 1)) {
7631 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7632 (unsigned long long)rec->start,
7633 (unsigned long long)rec->nr);
7635 if (!fixed && !recorded && repair) {
7636 ret = fixup_extent_refs(root->fs_info,
7645 if (!rec->owner_ref_checked) {
7646 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7647 (unsigned long long)rec->start,
7648 (unsigned long long)rec->nr);
7649 if (!fixed && !recorded && repair) {
7650 ret = fixup_extent_refs(root->fs_info,
7659 if (rec->bad_full_backref) {
7660 fprintf(stderr, "bad full backref, on [%llu]\n",
7661 (unsigned long long)rec->start);
7663 ret = fixup_extent_flags(root->fs_info, rec);
7672 * Although it's not a extent ref's problem, we reuse this
7673 * routine for error reporting.
7674 * No repair function yet.
7676 if (rec->crossing_stripes) {
7678 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7679 rec->start, rec->start + rec->max_size);
7684 if (rec->wrong_chunk_type) {
7686 "bad extent [%llu, %llu), type mismatch with chunk\n",
7687 rec->start, rec->start + rec->max_size);
7692 remove_cache_extent(extent_cache, cache);
7693 free_all_extent_backrefs(rec);
7694 if (!init_extent_tree && repair && (!cur_err || fixed))
7695 clear_extent_dirty(root->fs_info->excluded_extents,
7697 rec->start + rec->max_size - 1,
7703 if (ret && ret != -EAGAIN) {
7704 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7707 struct btrfs_trans_handle *trans;
7709 root = root->fs_info->extent_root;
7710 trans = btrfs_start_transaction(root, 1);
7711 if (IS_ERR(trans)) {
7712 ret = PTR_ERR(trans);
7716 btrfs_fix_block_accounting(trans, root);
7717 ret = btrfs_commit_transaction(trans, root);
7722 fprintf(stderr, "repaired damaged extent references\n");
7728 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7732 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7733 stripe_size = length;
7734 stripe_size /= num_stripes;
7735 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7736 stripe_size = length * 2;
7737 stripe_size /= num_stripes;
7738 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7739 stripe_size = length;
7740 stripe_size /= (num_stripes - 1);
7741 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7742 stripe_size = length;
7743 stripe_size /= (num_stripes - 2);
7745 stripe_size = length;
7751 * Check the chunk with its block group/dev list ref:
7752 * Return 0 if all refs seems valid.
7753 * Return 1 if part of refs seems valid, need later check for rebuild ref
7754 * like missing block group and needs to search extent tree to rebuild them.
7755 * Return -1 if essential refs are missing and unable to rebuild.
7757 static int check_chunk_refs(struct chunk_record *chunk_rec,
7758 struct block_group_tree *block_group_cache,
7759 struct device_extent_tree *dev_extent_cache,
7762 struct cache_extent *block_group_item;
7763 struct block_group_record *block_group_rec;
7764 struct cache_extent *dev_extent_item;
7765 struct device_extent_record *dev_extent_rec;
7769 int metadump_v2 = 0;
7773 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7776 if (block_group_item) {
7777 block_group_rec = container_of(block_group_item,
7778 struct block_group_record,
7780 if (chunk_rec->length != block_group_rec->offset ||
7781 chunk_rec->offset != block_group_rec->objectid ||
7783 chunk_rec->type_flags != block_group_rec->flags)) {
7786 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7787 chunk_rec->objectid,
7792 chunk_rec->type_flags,
7793 block_group_rec->objectid,
7794 block_group_rec->type,
7795 block_group_rec->offset,
7796 block_group_rec->offset,
7797 block_group_rec->objectid,
7798 block_group_rec->flags);
7801 list_del_init(&block_group_rec->list);
7802 chunk_rec->bg_rec = block_group_rec;
7807 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7808 chunk_rec->objectid,
7813 chunk_rec->type_flags);
7820 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7821 chunk_rec->num_stripes);
7822 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7823 devid = chunk_rec->stripes[i].devid;
7824 offset = chunk_rec->stripes[i].offset;
7825 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7826 devid, offset, length);
7827 if (dev_extent_item) {
7828 dev_extent_rec = container_of(dev_extent_item,
7829 struct device_extent_record,
7831 if (dev_extent_rec->objectid != devid ||
7832 dev_extent_rec->offset != offset ||
7833 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7834 dev_extent_rec->length != length) {
7837 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7838 chunk_rec->objectid,
7841 chunk_rec->stripes[i].devid,
7842 chunk_rec->stripes[i].offset,
7843 dev_extent_rec->objectid,
7844 dev_extent_rec->offset,
7845 dev_extent_rec->length);
7848 list_move(&dev_extent_rec->chunk_list,
7849 &chunk_rec->dextents);
7854 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7855 chunk_rec->objectid,
7858 chunk_rec->stripes[i].devid,
7859 chunk_rec->stripes[i].offset);
7866 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7867 int check_chunks(struct cache_tree *chunk_cache,
7868 struct block_group_tree *block_group_cache,
7869 struct device_extent_tree *dev_extent_cache,
7870 struct list_head *good, struct list_head *bad,
7871 struct list_head *rebuild, int silent)
7873 struct cache_extent *chunk_item;
7874 struct chunk_record *chunk_rec;
7875 struct block_group_record *bg_rec;
7876 struct device_extent_record *dext_rec;
7880 chunk_item = first_cache_extent(chunk_cache);
7881 while (chunk_item) {
7882 chunk_rec = container_of(chunk_item, struct chunk_record,
7884 err = check_chunk_refs(chunk_rec, block_group_cache,
7885 dev_extent_cache, silent);
7888 if (err == 0 && good)
7889 list_add_tail(&chunk_rec->list, good);
7890 if (err > 0 && rebuild)
7891 list_add_tail(&chunk_rec->list, rebuild);
7893 list_add_tail(&chunk_rec->list, bad);
7894 chunk_item = next_cache_extent(chunk_item);
7897 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7900 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7908 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7912 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7923 static int check_device_used(struct device_record *dev_rec,
7924 struct device_extent_tree *dext_cache)
7926 struct cache_extent *cache;
7927 struct device_extent_record *dev_extent_rec;
7930 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7932 dev_extent_rec = container_of(cache,
7933 struct device_extent_record,
7935 if (dev_extent_rec->objectid != dev_rec->devid)
7938 list_del_init(&dev_extent_rec->device_list);
7939 total_byte += dev_extent_rec->length;
7940 cache = next_cache_extent(cache);
7943 if (total_byte != dev_rec->byte_used) {
7945 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7946 total_byte, dev_rec->byte_used, dev_rec->objectid,
7947 dev_rec->type, dev_rec->offset);
7954 /* check btrfs_dev_item -> btrfs_dev_extent */
7955 static int check_devices(struct rb_root *dev_cache,
7956 struct device_extent_tree *dev_extent_cache)
7958 struct rb_node *dev_node;
7959 struct device_record *dev_rec;
7960 struct device_extent_record *dext_rec;
7964 dev_node = rb_first(dev_cache);
7966 dev_rec = container_of(dev_node, struct device_record, node);
7967 err = check_device_used(dev_rec, dev_extent_cache);
7971 dev_node = rb_next(dev_node);
7973 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7976 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7977 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7984 static int add_root_item_to_list(struct list_head *head,
7985 u64 objectid, u64 bytenr, u64 last_snapshot,
7986 u8 level, u8 drop_level,
7987 int level_size, struct btrfs_key *drop_key)
7990 struct root_item_record *ri_rec;
7991 ri_rec = malloc(sizeof(*ri_rec));
7994 ri_rec->bytenr = bytenr;
7995 ri_rec->objectid = objectid;
7996 ri_rec->level = level;
7997 ri_rec->level_size = level_size;
7998 ri_rec->drop_level = drop_level;
7999 ri_rec->last_snapshot = last_snapshot;
8001 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8002 list_add_tail(&ri_rec->list, head);
8007 static void free_root_item_list(struct list_head *list)
8009 struct root_item_record *ri_rec;
8011 while (!list_empty(list)) {
8012 ri_rec = list_first_entry(list, struct root_item_record,
8014 list_del_init(&ri_rec->list);
8019 static int deal_root_from_list(struct list_head *list,
8020 struct btrfs_root *root,
8021 struct block_info *bits,
8023 struct cache_tree *pending,
8024 struct cache_tree *seen,
8025 struct cache_tree *reada,
8026 struct cache_tree *nodes,
8027 struct cache_tree *extent_cache,
8028 struct cache_tree *chunk_cache,
8029 struct rb_root *dev_cache,
8030 struct block_group_tree *block_group_cache,
8031 struct device_extent_tree *dev_extent_cache)
8036 while (!list_empty(list)) {
8037 struct root_item_record *rec;
8038 struct extent_buffer *buf;
8039 rec = list_entry(list->next,
8040 struct root_item_record, list);
8042 buf = read_tree_block(root->fs_info->tree_root,
8043 rec->bytenr, rec->level_size, 0);
8044 if (!extent_buffer_uptodate(buf)) {
8045 free_extent_buffer(buf);
8049 add_root_to_pending(buf, extent_cache, pending,
8050 seen, nodes, rec->objectid);
8052 * To rebuild extent tree, we need deal with snapshot
8053 * one by one, otherwise we deal with node firstly which
8054 * can maximize readahead.
8057 ret = run_next_block(root, bits, bits_nr, &last,
8058 pending, seen, reada, nodes,
8059 extent_cache, chunk_cache,
8060 dev_cache, block_group_cache,
8061 dev_extent_cache, rec);
8065 free_extent_buffer(buf);
8066 list_del(&rec->list);
8072 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8073 reada, nodes, extent_cache, chunk_cache,
8074 dev_cache, block_group_cache,
8075 dev_extent_cache, NULL);
8085 static int check_chunks_and_extents(struct btrfs_root *root)
8087 struct rb_root dev_cache;
8088 struct cache_tree chunk_cache;
8089 struct block_group_tree block_group_cache;
8090 struct device_extent_tree dev_extent_cache;
8091 struct cache_tree extent_cache;
8092 struct cache_tree seen;
8093 struct cache_tree pending;
8094 struct cache_tree reada;
8095 struct cache_tree nodes;
8096 struct extent_io_tree excluded_extents;
8097 struct cache_tree corrupt_blocks;
8098 struct btrfs_path path;
8099 struct btrfs_key key;
8100 struct btrfs_key found_key;
8102 struct block_info *bits;
8104 struct extent_buffer *leaf;
8106 struct btrfs_root_item ri;
8107 struct list_head dropping_trees;
8108 struct list_head normal_trees;
8109 struct btrfs_root *root1;
8114 dev_cache = RB_ROOT;
8115 cache_tree_init(&chunk_cache);
8116 block_group_tree_init(&block_group_cache);
8117 device_extent_tree_init(&dev_extent_cache);
8119 cache_tree_init(&extent_cache);
8120 cache_tree_init(&seen);
8121 cache_tree_init(&pending);
8122 cache_tree_init(&nodes);
8123 cache_tree_init(&reada);
8124 cache_tree_init(&corrupt_blocks);
8125 extent_io_tree_init(&excluded_extents);
8126 INIT_LIST_HEAD(&dropping_trees);
8127 INIT_LIST_HEAD(&normal_trees);
8130 root->fs_info->excluded_extents = &excluded_extents;
8131 root->fs_info->fsck_extent_cache = &extent_cache;
8132 root->fs_info->free_extent_hook = free_extent_hook;
8133 root->fs_info->corrupt_blocks = &corrupt_blocks;
8137 bits = malloc(bits_nr * sizeof(struct block_info));
8143 if (ctx.progress_enabled) {
8144 ctx.tp = TASK_EXTENTS;
8145 task_start(ctx.info);
8149 root1 = root->fs_info->tree_root;
8150 level = btrfs_header_level(root1->node);
8151 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8152 root1->node->start, 0, level, 0,
8153 btrfs_level_size(root1, level), NULL);
8156 root1 = root->fs_info->chunk_root;
8157 level = btrfs_header_level(root1->node);
8158 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8159 root1->node->start, 0, level, 0,
8160 btrfs_level_size(root1, level), NULL);
8163 btrfs_init_path(&path);
8166 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8167 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8172 leaf = path.nodes[0];
8173 slot = path.slots[0];
8174 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8175 ret = btrfs_next_leaf(root, &path);
8178 leaf = path.nodes[0];
8179 slot = path.slots[0];
8181 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8182 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8183 unsigned long offset;
8186 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8187 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8188 last_snapshot = btrfs_root_last_snapshot(&ri);
8189 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8190 level = btrfs_root_level(&ri);
8191 level_size = btrfs_level_size(root, level);
8192 ret = add_root_item_to_list(&normal_trees,
8194 btrfs_root_bytenr(&ri),
8195 last_snapshot, level,
8196 0, level_size, NULL);
8200 level = btrfs_root_level(&ri);
8201 level_size = btrfs_level_size(root, level);
8202 objectid = found_key.objectid;
8203 btrfs_disk_key_to_cpu(&found_key,
8205 ret = add_root_item_to_list(&dropping_trees,
8207 btrfs_root_bytenr(&ri),
8208 last_snapshot, level,
8210 level_size, &found_key);
8217 btrfs_release_path(&path);
8220 * check_block can return -EAGAIN if it fixes something, please keep
8221 * this in mind when dealing with return values from these functions, if
8222 * we get -EAGAIN we want to fall through and restart the loop.
8224 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8225 &seen, &reada, &nodes, &extent_cache,
8226 &chunk_cache, &dev_cache, &block_group_cache,
8233 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8234 &pending, &seen, &reada, &nodes,
8235 &extent_cache, &chunk_cache, &dev_cache,
8236 &block_group_cache, &dev_extent_cache);
8243 ret = check_chunks(&chunk_cache, &block_group_cache,
8244 &dev_extent_cache, NULL, NULL, NULL, 0);
8251 ret = check_extent_refs(root, &extent_cache);
8258 ret = check_devices(&dev_cache, &dev_extent_cache);
8263 task_stop(ctx.info);
8265 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8266 extent_io_tree_cleanup(&excluded_extents);
8267 root->fs_info->fsck_extent_cache = NULL;
8268 root->fs_info->free_extent_hook = NULL;
8269 root->fs_info->corrupt_blocks = NULL;
8270 root->fs_info->excluded_extents = NULL;
8273 free_chunk_cache_tree(&chunk_cache);
8274 free_device_cache_tree(&dev_cache);
8275 free_block_group_tree(&block_group_cache);
8276 free_device_extent_tree(&dev_extent_cache);
8277 free_extent_cache_tree(&seen);
8278 free_extent_cache_tree(&pending);
8279 free_extent_cache_tree(&reada);
8280 free_extent_cache_tree(&nodes);
8283 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8284 free_extent_cache_tree(&seen);
8285 free_extent_cache_tree(&pending);
8286 free_extent_cache_tree(&reada);
8287 free_extent_cache_tree(&nodes);
8288 free_chunk_cache_tree(&chunk_cache);
8289 free_block_group_tree(&block_group_cache);
8290 free_device_cache_tree(&dev_cache);
8291 free_device_extent_tree(&dev_extent_cache);
8292 free_extent_record_cache(root->fs_info, &extent_cache);
8293 free_root_item_list(&normal_trees);
8294 free_root_item_list(&dropping_trees);
8295 extent_io_tree_cleanup(&excluded_extents);
8299 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8300 struct btrfs_root *root, int overwrite)
8302 struct extent_buffer *c;
8303 struct extent_buffer *old = root->node;
8306 struct btrfs_disk_key disk_key = {0,0,0};
8312 extent_buffer_get(c);
8315 c = btrfs_alloc_free_block(trans, root,
8316 btrfs_level_size(root, 0),
8317 root->root_key.objectid,
8318 &disk_key, level, 0, 0);
8321 extent_buffer_get(c);
8325 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8326 btrfs_set_header_level(c, level);
8327 btrfs_set_header_bytenr(c, c->start);
8328 btrfs_set_header_generation(c, trans->transid);
8329 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8330 btrfs_set_header_owner(c, root->root_key.objectid);
8332 write_extent_buffer(c, root->fs_info->fsid,
8333 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8335 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8336 btrfs_header_chunk_tree_uuid(c),
8339 btrfs_mark_buffer_dirty(c);
8341 * this case can happen in the following case:
8343 * 1.overwrite previous root.
8345 * 2.reinit reloc data root, this is because we skip pin
8346 * down reloc data tree before which means we can allocate
8347 * same block bytenr here.
8349 if (old->start == c->start) {
8350 btrfs_set_root_generation(&root->root_item,
8352 root->root_item.level = btrfs_header_level(root->node);
8353 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8354 &root->root_key, &root->root_item);
8356 free_extent_buffer(c);
8360 free_extent_buffer(old);
8362 add_root_to_dirty_list(root);
8366 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8367 struct extent_buffer *eb, int tree_root)
8369 struct extent_buffer *tmp;
8370 struct btrfs_root_item *ri;
8371 struct btrfs_key key;
8374 int level = btrfs_header_level(eb);
8380 * If we have pinned this block before, don't pin it again.
8381 * This can not only avoid forever loop with broken filesystem
8382 * but also give us some speedups.
8384 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8385 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8388 btrfs_pin_extent(fs_info, eb->start, eb->len);
8390 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8391 nritems = btrfs_header_nritems(eb);
8392 for (i = 0; i < nritems; i++) {
8394 btrfs_item_key_to_cpu(eb, &key, i);
8395 if (key.type != BTRFS_ROOT_ITEM_KEY)
8397 /* Skip the extent root and reloc roots */
8398 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8399 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8400 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8402 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8403 bytenr = btrfs_disk_root_bytenr(eb, ri);
8406 * If at any point we start needing the real root we
8407 * will have to build a stump root for the root we are
8408 * in, but for now this doesn't actually use the root so
8409 * just pass in extent_root.
8411 tmp = read_tree_block(fs_info->extent_root, bytenr,
8413 if (!extent_buffer_uptodate(tmp)) {
8414 fprintf(stderr, "Error reading root block\n");
8417 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8418 free_extent_buffer(tmp);
8422 bytenr = btrfs_node_blockptr(eb, i);
8424 /* If we aren't the tree root don't read the block */
8425 if (level == 1 && !tree_root) {
8426 btrfs_pin_extent(fs_info, bytenr, leafsize);
8430 tmp = read_tree_block(fs_info->extent_root, bytenr,
8432 if (!extent_buffer_uptodate(tmp)) {
8433 fprintf(stderr, "Error reading tree block\n");
8436 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8437 free_extent_buffer(tmp);
8446 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8450 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8454 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8457 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8459 struct btrfs_block_group_cache *cache;
8460 struct btrfs_path *path;
8461 struct extent_buffer *leaf;
8462 struct btrfs_chunk *chunk;
8463 struct btrfs_key key;
8467 path = btrfs_alloc_path();
8472 key.type = BTRFS_CHUNK_ITEM_KEY;
8475 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8477 btrfs_free_path(path);
8482 * We do this in case the block groups were screwed up and had alloc
8483 * bits that aren't actually set on the chunks. This happens with
8484 * restored images every time and could happen in real life I guess.
8486 fs_info->avail_data_alloc_bits = 0;
8487 fs_info->avail_metadata_alloc_bits = 0;
8488 fs_info->avail_system_alloc_bits = 0;
8490 /* First we need to create the in-memory block groups */
8492 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8493 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8495 btrfs_free_path(path);
8503 leaf = path->nodes[0];
8504 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8505 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8510 chunk = btrfs_item_ptr(leaf, path->slots[0],
8511 struct btrfs_chunk);
8512 btrfs_add_block_group(fs_info, 0,
8513 btrfs_chunk_type(leaf, chunk),
8514 key.objectid, key.offset,
8515 btrfs_chunk_length(leaf, chunk));
8516 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8517 key.offset + btrfs_chunk_length(leaf, chunk),
8523 cache = btrfs_lookup_first_block_group(fs_info, start);
8527 start = cache->key.objectid + cache->key.offset;
8530 btrfs_free_path(path);
8534 static int reset_balance(struct btrfs_trans_handle *trans,
8535 struct btrfs_fs_info *fs_info)
8537 struct btrfs_root *root = fs_info->tree_root;
8538 struct btrfs_path *path;
8539 struct extent_buffer *leaf;
8540 struct btrfs_key key;
8541 int del_slot, del_nr = 0;
8545 path = btrfs_alloc_path();
8549 key.objectid = BTRFS_BALANCE_OBJECTID;
8550 key.type = BTRFS_BALANCE_ITEM_KEY;
8553 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8558 goto reinit_data_reloc;
8563 ret = btrfs_del_item(trans, root, path);
8566 btrfs_release_path(path);
8568 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8569 key.type = BTRFS_ROOT_ITEM_KEY;
8572 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8576 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8581 ret = btrfs_del_items(trans, root, path,
8588 btrfs_release_path(path);
8591 ret = btrfs_search_slot(trans, root, &key, path,
8598 leaf = path->nodes[0];
8599 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8600 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8602 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8607 del_slot = path->slots[0];
8616 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8620 btrfs_release_path(path);
8623 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8624 key.type = BTRFS_ROOT_ITEM_KEY;
8625 key.offset = (u64)-1;
8626 root = btrfs_read_fs_root(fs_info, &key);
8628 fprintf(stderr, "Error reading data reloc tree\n");
8629 ret = PTR_ERR(root);
8632 record_root_in_trans(trans, root);
8633 ret = btrfs_fsck_reinit_root(trans, root, 0);
8636 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8638 btrfs_free_path(path);
8642 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8643 struct btrfs_fs_info *fs_info)
8649 * The only reason we don't do this is because right now we're just
8650 * walking the trees we find and pinning down their bytes, we don't look
8651 * at any of the leaves. In order to do mixed groups we'd have to check
8652 * the leaves of any fs roots and pin down the bytes for any file
8653 * extents we find. Not hard but why do it if we don't have to?
8655 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8656 fprintf(stderr, "We don't support re-initing the extent tree "
8657 "for mixed block groups yet, please notify a btrfs "
8658 "developer you want to do this so they can add this "
8659 "functionality.\n");
8664 * first we need to walk all of the trees except the extent tree and pin
8665 * down the bytes that are in use so we don't overwrite any existing
8668 ret = pin_metadata_blocks(fs_info);
8670 fprintf(stderr, "error pinning down used bytes\n");
8675 * Need to drop all the block groups since we're going to recreate all
8678 btrfs_free_block_groups(fs_info);
8679 ret = reset_block_groups(fs_info);
8681 fprintf(stderr, "error resetting the block groups\n");
8685 /* Ok we can allocate now, reinit the extent root */
8686 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8688 fprintf(stderr, "extent root initialization failed\n");
8690 * When the transaction code is updated we should end the
8691 * transaction, but for now progs only knows about commit so
8692 * just return an error.
8698 * Now we have all the in-memory block groups setup so we can make
8699 * allocations properly, and the metadata we care about is safe since we
8700 * pinned all of it above.
8703 struct btrfs_block_group_cache *cache;
8705 cache = btrfs_lookup_first_block_group(fs_info, start);
8708 start = cache->key.objectid + cache->key.offset;
8709 ret = btrfs_insert_item(trans, fs_info->extent_root,
8710 &cache->key, &cache->item,
8711 sizeof(cache->item));
8713 fprintf(stderr, "Error adding block group\n");
8716 btrfs_extent_post_op(trans, fs_info->extent_root);
8719 ret = reset_balance(trans, fs_info);
8721 fprintf(stderr, "error reseting the pending balance\n");
8726 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8728 struct btrfs_path *path;
8729 struct btrfs_trans_handle *trans;
8730 struct btrfs_key key;
8733 printf("Recowing metadata block %llu\n", eb->start);
8734 key.objectid = btrfs_header_owner(eb);
8735 key.type = BTRFS_ROOT_ITEM_KEY;
8736 key.offset = (u64)-1;
8738 root = btrfs_read_fs_root(root->fs_info, &key);
8740 fprintf(stderr, "Couldn't find owner root %llu\n",
8742 return PTR_ERR(root);
8745 path = btrfs_alloc_path();
8749 trans = btrfs_start_transaction(root, 1);
8750 if (IS_ERR(trans)) {
8751 btrfs_free_path(path);
8752 return PTR_ERR(trans);
8755 path->lowest_level = btrfs_header_level(eb);
8756 if (path->lowest_level)
8757 btrfs_node_key_to_cpu(eb, &key, 0);
8759 btrfs_item_key_to_cpu(eb, &key, 0);
8761 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8762 btrfs_commit_transaction(trans, root);
8763 btrfs_free_path(path);
8767 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8769 struct btrfs_path *path;
8770 struct btrfs_trans_handle *trans;
8771 struct btrfs_key key;
8774 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8775 bad->key.type, bad->key.offset);
8776 key.objectid = bad->root_id;
8777 key.type = BTRFS_ROOT_ITEM_KEY;
8778 key.offset = (u64)-1;
8780 root = btrfs_read_fs_root(root->fs_info, &key);
8782 fprintf(stderr, "Couldn't find owner root %llu\n",
8784 return PTR_ERR(root);
8787 path = btrfs_alloc_path();
8791 trans = btrfs_start_transaction(root, 1);
8792 if (IS_ERR(trans)) {
8793 btrfs_free_path(path);
8794 return PTR_ERR(trans);
8797 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8803 ret = btrfs_del_item(trans, root, path);
8805 btrfs_commit_transaction(trans, root);
8806 btrfs_free_path(path);
8810 static int zero_log_tree(struct btrfs_root *root)
8812 struct btrfs_trans_handle *trans;
8815 trans = btrfs_start_transaction(root, 1);
8816 if (IS_ERR(trans)) {
8817 ret = PTR_ERR(trans);
8820 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8821 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8822 ret = btrfs_commit_transaction(trans, root);
8826 static int populate_csum(struct btrfs_trans_handle *trans,
8827 struct btrfs_root *csum_root, char *buf, u64 start,
8834 while (offset < len) {
8835 sectorsize = csum_root->sectorsize;
8836 ret = read_extent_data(csum_root, buf, start + offset,
8840 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8841 start + offset, buf, sectorsize);
8844 offset += sectorsize;
8849 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8850 struct btrfs_root *csum_root,
8851 struct btrfs_root *cur_root)
8853 struct btrfs_path *path;
8854 struct btrfs_key key;
8855 struct extent_buffer *node;
8856 struct btrfs_file_extent_item *fi;
8863 path = btrfs_alloc_path();
8866 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8876 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8879 /* Iterate all regular file extents and fill its csum */
8881 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8883 if (key.type != BTRFS_EXTENT_DATA_KEY)
8885 node = path->nodes[0];
8886 slot = path->slots[0];
8887 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8888 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8890 start = btrfs_file_extent_disk_bytenr(node, fi);
8891 len = btrfs_file_extent_disk_num_bytes(node, fi);
8893 ret = populate_csum(trans, csum_root, buf, start, len);
8900 * TODO: if next leaf is corrupted, jump to nearest next valid
8903 ret = btrfs_next_item(cur_root, path);
8913 btrfs_free_path(path);
8918 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8919 struct btrfs_root *csum_root)
8921 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8922 struct btrfs_path *path;
8923 struct btrfs_root *tree_root = fs_info->tree_root;
8924 struct btrfs_root *cur_root;
8925 struct extent_buffer *node;
8926 struct btrfs_key key;
8930 path = btrfs_alloc_path();
8934 key.objectid = BTRFS_FS_TREE_OBJECTID;
8936 key.type = BTRFS_ROOT_ITEM_KEY;
8938 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8947 node = path->nodes[0];
8948 slot = path->slots[0];
8949 btrfs_item_key_to_cpu(node, &key, slot);
8950 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8952 if (key.type != BTRFS_ROOT_ITEM_KEY)
8954 if (!is_fstree(key.objectid))
8956 key.offset = (u64)-1;
8958 cur_root = btrfs_read_fs_root(fs_info, &key);
8959 if (IS_ERR(cur_root) || !cur_root) {
8960 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8964 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8969 ret = btrfs_next_item(tree_root, path);
8979 btrfs_free_path(path);
8983 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8984 struct btrfs_root *csum_root)
8986 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8987 struct btrfs_path *path;
8988 struct btrfs_extent_item *ei;
8989 struct extent_buffer *leaf;
8991 struct btrfs_key key;
8994 path = btrfs_alloc_path();
8999 key.type = BTRFS_EXTENT_ITEM_KEY;
9002 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9004 btrfs_free_path(path);
9008 buf = malloc(csum_root->sectorsize);
9010 btrfs_free_path(path);
9015 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9016 ret = btrfs_next_leaf(extent_root, path);
9024 leaf = path->nodes[0];
9026 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9027 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9032 ei = btrfs_item_ptr(leaf, path->slots[0],
9033 struct btrfs_extent_item);
9034 if (!(btrfs_extent_flags(leaf, ei) &
9035 BTRFS_EXTENT_FLAG_DATA)) {
9040 ret = populate_csum(trans, csum_root, buf, key.objectid,
9047 btrfs_free_path(path);
9053 * Recalculate the csum and put it into the csum tree.
9055 * Extent tree init will wipe out all the extent info, so in that case, we
9056 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9057 * will use fs/subvol trees to init the csum tree.
9059 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9060 struct btrfs_root *csum_root,
9064 return fill_csum_tree_from_fs(trans, csum_root);
9066 return fill_csum_tree_from_extent(trans, csum_root);
9069 struct root_item_info {
9070 /* level of the root */
9072 /* number of nodes at this level, must be 1 for a root */
9076 struct cache_extent cache_extent;
9079 static struct cache_tree *roots_info_cache = NULL;
9081 static void free_roots_info_cache(void)
9083 if (!roots_info_cache)
9086 while (!cache_tree_empty(roots_info_cache)) {
9087 struct cache_extent *entry;
9088 struct root_item_info *rii;
9090 entry = first_cache_extent(roots_info_cache);
9093 remove_cache_extent(roots_info_cache, entry);
9094 rii = container_of(entry, struct root_item_info, cache_extent);
9098 free(roots_info_cache);
9099 roots_info_cache = NULL;
9102 static int build_roots_info_cache(struct btrfs_fs_info *info)
9105 struct btrfs_key key;
9106 struct extent_buffer *leaf;
9107 struct btrfs_path *path;
9109 if (!roots_info_cache) {
9110 roots_info_cache = malloc(sizeof(*roots_info_cache));
9111 if (!roots_info_cache)
9113 cache_tree_init(roots_info_cache);
9116 path = btrfs_alloc_path();
9121 key.type = BTRFS_EXTENT_ITEM_KEY;
9124 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9127 leaf = path->nodes[0];
9130 struct btrfs_key found_key;
9131 struct btrfs_extent_item *ei;
9132 struct btrfs_extent_inline_ref *iref;
9133 int slot = path->slots[0];
9138 struct cache_extent *entry;
9139 struct root_item_info *rii;
9141 if (slot >= btrfs_header_nritems(leaf)) {
9142 ret = btrfs_next_leaf(info->extent_root, path);
9149 leaf = path->nodes[0];
9150 slot = path->slots[0];
9153 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9155 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9156 found_key.type != BTRFS_METADATA_ITEM_KEY)
9159 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9160 flags = btrfs_extent_flags(leaf, ei);
9162 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9163 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9166 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9167 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9168 level = found_key.offset;
9170 struct btrfs_tree_block_info *info;
9172 info = (struct btrfs_tree_block_info *)(ei + 1);
9173 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9174 level = btrfs_tree_block_level(leaf, info);
9178 * For a root extent, it must be of the following type and the
9179 * first (and only one) iref in the item.
9181 type = btrfs_extent_inline_ref_type(leaf, iref);
9182 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9185 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9186 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9188 rii = malloc(sizeof(struct root_item_info));
9193 rii->cache_extent.start = root_id;
9194 rii->cache_extent.size = 1;
9195 rii->level = (u8)-1;
9196 entry = &rii->cache_extent;
9197 ret = insert_cache_extent(roots_info_cache, entry);
9200 rii = container_of(entry, struct root_item_info,
9204 ASSERT(rii->cache_extent.start == root_id);
9205 ASSERT(rii->cache_extent.size == 1);
9207 if (level > rii->level || rii->level == (u8)-1) {
9209 rii->bytenr = found_key.objectid;
9210 rii->gen = btrfs_extent_generation(leaf, ei);
9211 rii->node_count = 1;
9212 } else if (level == rii->level) {
9220 btrfs_free_path(path);
9225 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9226 struct btrfs_path *path,
9227 const struct btrfs_key *root_key,
9228 const int read_only_mode)
9230 const u64 root_id = root_key->objectid;
9231 struct cache_extent *entry;
9232 struct root_item_info *rii;
9233 struct btrfs_root_item ri;
9234 unsigned long offset;
9236 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9239 "Error: could not find extent items for root %llu\n",
9240 root_key->objectid);
9244 rii = container_of(entry, struct root_item_info, cache_extent);
9245 ASSERT(rii->cache_extent.start == root_id);
9246 ASSERT(rii->cache_extent.size == 1);
9248 if (rii->node_count != 1) {
9250 "Error: could not find btree root extent for root %llu\n",
9255 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9256 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9258 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9259 btrfs_root_level(&ri) != rii->level ||
9260 btrfs_root_generation(&ri) != rii->gen) {
9263 * If we're in repair mode but our caller told us to not update
9264 * the root item, i.e. just check if it needs to be updated, don't
9265 * print this message, since the caller will call us again shortly
9266 * for the same root item without read only mode (the caller will
9267 * open a transaction first).
9269 if (!(read_only_mode && repair))
9271 "%sroot item for root %llu,"
9272 " current bytenr %llu, current gen %llu, current level %u,"
9273 " new bytenr %llu, new gen %llu, new level %u\n",
9274 (read_only_mode ? "" : "fixing "),
9276 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9277 btrfs_root_level(&ri),
9278 rii->bytenr, rii->gen, rii->level);
9280 if (btrfs_root_generation(&ri) > rii->gen) {
9282 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9283 root_id, btrfs_root_generation(&ri), rii->gen);
9287 if (!read_only_mode) {
9288 btrfs_set_root_bytenr(&ri, rii->bytenr);
9289 btrfs_set_root_level(&ri, rii->level);
9290 btrfs_set_root_generation(&ri, rii->gen);
9291 write_extent_buffer(path->nodes[0], &ri,
9292 offset, sizeof(ri));
9302 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9303 * caused read-only snapshots to be corrupted if they were created at a moment
9304 * when the source subvolume/snapshot had orphan items. The issue was that the
9305 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9306 * node instead of the post orphan cleanup root node.
9307 * So this function, and its callees, just detects and fixes those cases. Even
9308 * though the regression was for read-only snapshots, this function applies to
9309 * any snapshot/subvolume root.
9310 * This must be run before any other repair code - not doing it so, makes other
9311 * repair code delete or modify backrefs in the extent tree for example, which
9312 * will result in an inconsistent fs after repairing the root items.
9314 static int repair_root_items(struct btrfs_fs_info *info)
9316 struct btrfs_path *path = NULL;
9317 struct btrfs_key key;
9318 struct extent_buffer *leaf;
9319 struct btrfs_trans_handle *trans = NULL;
9324 ret = build_roots_info_cache(info);
9328 path = btrfs_alloc_path();
9334 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9335 key.type = BTRFS_ROOT_ITEM_KEY;
9340 * Avoid opening and committing transactions if a leaf doesn't have
9341 * any root items that need to be fixed, so that we avoid rotating
9342 * backup roots unnecessarily.
9345 trans = btrfs_start_transaction(info->tree_root, 1);
9346 if (IS_ERR(trans)) {
9347 ret = PTR_ERR(trans);
9352 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9356 leaf = path->nodes[0];
9359 struct btrfs_key found_key;
9361 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9362 int no_more_keys = find_next_key(path, &key);
9364 btrfs_release_path(path);
9366 ret = btrfs_commit_transaction(trans,
9378 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9380 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9382 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9385 ret = maybe_repair_root_item(info, path, &found_key,
9390 if (!trans && repair) {
9393 btrfs_release_path(path);
9403 free_roots_info_cache();
9404 btrfs_free_path(path);
9406 btrfs_commit_transaction(trans, info->tree_root);
9413 const char * const cmd_check_usage[] = {
9414 "btrfs check [options] <device>",
9415 "Check structural inegrity of a filesystem (unmounted).",
9416 "Check structural inegrity of an unmounted filesystem. Verify internal",
9417 "trees' consistency and item connectivity. In the repair mode try to",
9418 "fix the problems found.",
9419 "WARNING: the repair mode is considered dangerous",
9421 "-s|--super <superblock> use this superblock copy",
9422 "-b|--backup use the backup root copy",
9423 "--repair try to repair the filesystem",
9424 "--readonly run in read-only mode (default)",
9425 "--init-csum-tree create a new CRC tree",
9426 "--init-extent-tree create a new extent tree",
9427 "--check-data-csum verify checkums of data blocks",
9428 "-Q|--qgroup-report print a report on qgroup consistency",
9429 "-E|--subvol-extents <subvolid>",
9430 " print subvolume extents and sharing state",
9431 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9432 "-p|--progress indicate progress",
9436 int cmd_check(int argc, char **argv)
9438 struct cache_tree root_cache;
9439 struct btrfs_root *root;
9440 struct btrfs_fs_info *info;
9443 u64 tree_root_bytenr = 0;
9444 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9447 int init_csum_tree = 0;
9449 int qgroup_report = 0;
9450 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9454 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9455 OPT_CHECK_CSUM, OPT_READONLY };
9456 static const struct option long_options[] = {
9457 { "super", required_argument, NULL, 's' },
9458 { "repair", no_argument, NULL, OPT_REPAIR },
9459 { "readonly", no_argument, NULL, OPT_READONLY },
9460 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9461 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9462 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9463 { "backup", no_argument, NULL, 'b' },
9464 { "subvol-extents", required_argument, NULL, 'E' },
9465 { "qgroup-report", no_argument, NULL, 'Q' },
9466 { "tree-root", required_argument, NULL, 'r' },
9467 { "progress", no_argument, NULL, 'p' },
9471 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9475 case 'a': /* ignored */ break;
9477 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9480 num = arg_strtou64(optarg);
9481 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9483 "ERROR: super mirror should be less than: %d\n",
9484 BTRFS_SUPER_MIRROR_MAX);
9487 bytenr = btrfs_sb_offset(((int)num));
9488 printf("using SB copy %llu, bytenr %llu\n", num,
9489 (unsigned long long)bytenr);
9495 subvolid = arg_strtou64(optarg);
9498 tree_root_bytenr = arg_strtou64(optarg);
9501 ctx.progress_enabled = true;
9505 usage(cmd_check_usage);
9507 printf("enabling repair mode\n");
9509 ctree_flags |= OPEN_CTREE_WRITES;
9515 printf("Creating a new CRC tree\n");
9518 ctree_flags |= OPEN_CTREE_WRITES;
9520 case OPT_INIT_EXTENT:
9521 init_extent_tree = 1;
9522 ctree_flags |= (OPEN_CTREE_WRITES |
9523 OPEN_CTREE_NO_BLOCK_GROUPS);
9526 case OPT_CHECK_CSUM:
9527 check_data_csum = 1;
9531 argc = argc - optind;
9533 if (check_argc_exact(argc, 1))
9534 usage(cmd_check_usage);
9536 if (ctx.progress_enabled) {
9537 ctx.tp = TASK_NOTHING;
9538 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9541 /* This check is the only reason for --readonly to exist */
9542 if (readonly && repair) {
9543 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9548 cache_tree_init(&root_cache);
9550 if((ret = check_mounted(argv[optind])) < 0) {
9551 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9554 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9559 /* only allow partial opening under repair mode */
9561 ctree_flags |= OPEN_CTREE_PARTIAL;
9563 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9566 fprintf(stderr, "Couldn't open file system\n");
9572 root = info->fs_root;
9575 * repair mode will force us to commit transaction which
9576 * will make us fail to load log tree when mounting.
9578 if (repair && btrfs_super_log_root(info->super_copy)) {
9579 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9584 ret = zero_log_tree(root);
9586 fprintf(stderr, "fail to zero log tree\n");
9591 uuid_unparse(info->super_copy->fsid, uuidbuf);
9592 if (qgroup_report) {
9593 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9595 ret = qgroup_verify_all(info);
9597 print_qgroup_report(1);
9601 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9602 subvolid, argv[optind], uuidbuf);
9603 ret = print_extent_state(info, subvolid);
9606 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9608 if (!extent_buffer_uptodate(info->tree_root->node) ||
9609 !extent_buffer_uptodate(info->dev_root->node) ||
9610 !extent_buffer_uptodate(info->chunk_root->node)) {
9611 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9616 if (init_extent_tree || init_csum_tree) {
9617 struct btrfs_trans_handle *trans;
9619 trans = btrfs_start_transaction(info->extent_root, 0);
9620 if (IS_ERR(trans)) {
9621 fprintf(stderr, "Error starting transaction\n");
9622 ret = PTR_ERR(trans);
9626 if (init_extent_tree) {
9627 printf("Creating a new extent tree\n");
9628 ret = reinit_extent_tree(trans, info);
9633 if (init_csum_tree) {
9634 fprintf(stderr, "Reinit crc root\n");
9635 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9637 fprintf(stderr, "crc root initialization failed\n");
9642 ret = fill_csum_tree(trans, info->csum_root,
9645 fprintf(stderr, "crc refilling failed\n");
9650 * Ok now we commit and run the normal fsck, which will add
9651 * extent entries for all of the items it finds.
9653 ret = btrfs_commit_transaction(trans, info->extent_root);
9657 if (!extent_buffer_uptodate(info->extent_root->node)) {
9658 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9662 if (!extent_buffer_uptodate(info->csum_root->node)) {
9663 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9668 if (!ctx.progress_enabled)
9669 fprintf(stderr, "checking extents\n");
9670 ret = check_chunks_and_extents(root);
9672 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9674 ret = repair_root_items(info);
9678 fprintf(stderr, "Fixed %d roots.\n", ret);
9680 } else if (ret > 0) {
9682 "Found %d roots with an outdated root item.\n",
9685 "Please run a filesystem check with the option --repair to fix them.\n");
9690 if (!ctx.progress_enabled)
9691 fprintf(stderr, "checking free space cache\n");
9692 ret = check_space_cache(root);
9697 * We used to have to have these hole extents in between our real
9698 * extents so if we don't have this flag set we need to make sure there
9699 * are no gaps in the file extents for inodes, otherwise we can just
9700 * ignore it when this happens.
9702 no_holes = btrfs_fs_incompat(root->fs_info,
9703 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9704 if (!ctx.progress_enabled)
9705 fprintf(stderr, "checking fs roots\n");
9706 ret = check_fs_roots(root, &root_cache);
9710 fprintf(stderr, "checking csums\n");
9711 ret = check_csums(root);
9715 fprintf(stderr, "checking root refs\n");
9716 ret = check_root_refs(root, &root_cache);
9720 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9721 struct extent_buffer *eb;
9723 eb = list_first_entry(&root->fs_info->recow_ebs,
9724 struct extent_buffer, recow);
9725 list_del_init(&eb->recow);
9726 ret = recow_extent_buffer(root, eb);
9731 while (!list_empty(&delete_items)) {
9732 struct bad_item *bad;
9734 bad = list_first_entry(&delete_items, struct bad_item, list);
9735 list_del_init(&bad->list);
9737 ret = delete_bad_item(root, bad);
9741 if (info->quota_enabled) {
9743 fprintf(stderr, "checking quota groups\n");
9744 err = qgroup_verify_all(info);
9749 if (!list_empty(&root->fs_info->recow_ebs)) {
9750 fprintf(stderr, "Transid errors in file system\n");
9754 print_qgroup_report(0);
9755 if (found_old_backref) { /*
9756 * there was a disk format change when mixed
9757 * backref was in testing tree. The old format
9758 * existed about one week.
9760 printf("\n * Found old mixed backref format. "
9761 "The old format is not supported! *"
9762 "\n * Please mount the FS in readonly mode, "
9763 "backup data and re-format the FS. *\n\n");
9766 printf("found %llu bytes used err is %d\n",
9767 (unsigned long long)bytes_used, ret);
9768 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9769 printf("total tree bytes: %llu\n",
9770 (unsigned long long)total_btree_bytes);
9771 printf("total fs tree bytes: %llu\n",
9772 (unsigned long long)total_fs_tree_bytes);
9773 printf("total extent tree bytes: %llu\n",
9774 (unsigned long long)total_extent_tree_bytes);
9775 printf("btree space waste bytes: %llu\n",
9776 (unsigned long long)btree_space_waste);
9777 printf("file data blocks allocated: %llu\n referenced %llu\n",
9778 (unsigned long long)data_bytes_allocated,
9779 (unsigned long long)data_bytes_referenced);
9781 free_root_recs_tree(&root_cache);
9785 if (ctx.progress_enabled)
9786 task_deinit(ctx.info);