2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct inode_backref *tmp;
570 struct orphan_data_extent *src_orphan;
571 struct orphan_data_extent *dst_orphan;
575 rec = malloc(sizeof(*rec));
577 return ERR_PTR(-ENOMEM);
578 memcpy(rec, orig_rec, sizeof(*rec));
580 INIT_LIST_HEAD(&rec->backrefs);
581 INIT_LIST_HEAD(&rec->orphan_extents);
582 rec->holes = RB_ROOT;
584 list_for_each_entry(orig, &orig_rec->backrefs, list) {
585 size = sizeof(*orig) + orig->namelen + 1;
586 backref = malloc(size);
591 memcpy(backref, orig, size);
592 list_add_tail(&backref->list, &rec->backrefs);
594 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595 dst_orphan = malloc(sizeof(*dst_orphan));
600 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
603 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
609 if (!list_empty(&rec->backrefs))
610 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611 list_del(&orig->list);
615 if (!list_empty(&rec->orphan_extents))
616 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617 list_del(&orig->list);
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
629 struct orphan_data_extent *orphan;
631 if (list_empty(orphan_extents))
633 printf("The following data extent is lost in tree %llu:\n",
635 list_for_each_entry(orphan, orphan_extents, list) {
636 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637 orphan->objectid, orphan->offset, orphan->disk_bytenr,
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
644 u64 root_objectid = root->root_key.objectid;
645 int errors = rec->errors;
649 /* reloc root errors, we print its corresponding fs root objectid*/
650 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651 root_objectid = root->root_key.offset;
652 fprintf(stderr, "reloc");
654 fprintf(stderr, "root %llu inode %llu errors %x",
655 (unsigned long long) root_objectid,
656 (unsigned long long) rec->ino, rec->errors);
658 if (errors & I_ERR_NO_INODE_ITEM)
659 fprintf(stderr, ", no inode item");
660 if (errors & I_ERR_NO_ORPHAN_ITEM)
661 fprintf(stderr, ", no orphan item");
662 if (errors & I_ERR_DUP_INODE_ITEM)
663 fprintf(stderr, ", dup inode item");
664 if (errors & I_ERR_DUP_DIR_INDEX)
665 fprintf(stderr, ", dup dir index");
666 if (errors & I_ERR_ODD_DIR_ITEM)
667 fprintf(stderr, ", odd dir item");
668 if (errors & I_ERR_ODD_FILE_EXTENT)
669 fprintf(stderr, ", odd file extent");
670 if (errors & I_ERR_BAD_FILE_EXTENT)
671 fprintf(stderr, ", bad file extent");
672 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673 fprintf(stderr, ", file extent overlap");
674 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675 fprintf(stderr, ", file extent discount");
676 if (errors & I_ERR_DIR_ISIZE_WRONG)
677 fprintf(stderr, ", dir isize wrong");
678 if (errors & I_ERR_FILE_NBYTES_WRONG)
679 fprintf(stderr, ", nbytes wrong");
680 if (errors & I_ERR_ODD_CSUM_ITEM)
681 fprintf(stderr, ", odd csum item");
682 if (errors & I_ERR_SOME_CSUM_MISSING)
683 fprintf(stderr, ", some csum missing");
684 if (errors & I_ERR_LINK_COUNT_WRONG)
685 fprintf(stderr, ", link count wrong");
686 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687 fprintf(stderr, ", orphan file extent");
688 fprintf(stderr, "\n");
689 /* Print the orphan extents if needed */
690 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
693 /* Print the holes if needed */
694 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695 struct file_extent_hole *hole;
696 struct rb_node *node;
699 node = rb_first(&rec->holes);
700 fprintf(stderr, "Found file extent holes:\n");
703 hole = rb_entry(node, struct file_extent_hole, node);
704 fprintf(stderr, "\tstart: %llu, len: %llu\n",
705 hole->start, hole->len);
706 node = rb_next(node);
709 fprintf(stderr, "\tstart: 0, len: %llu\n",
710 round_up(rec->isize, root->sectorsize));
714 static void print_ref_error(int errors)
716 if (errors & REF_ERR_NO_DIR_ITEM)
717 fprintf(stderr, ", no dir item");
718 if (errors & REF_ERR_NO_DIR_INDEX)
719 fprintf(stderr, ", no dir index");
720 if (errors & REF_ERR_NO_INODE_REF)
721 fprintf(stderr, ", no inode ref");
722 if (errors & REF_ERR_DUP_DIR_ITEM)
723 fprintf(stderr, ", dup dir item");
724 if (errors & REF_ERR_DUP_DIR_INDEX)
725 fprintf(stderr, ", dup dir index");
726 if (errors & REF_ERR_DUP_INODE_REF)
727 fprintf(stderr, ", dup inode ref");
728 if (errors & REF_ERR_INDEX_UNMATCH)
729 fprintf(stderr, ", index unmatch");
730 if (errors & REF_ERR_FILETYPE_UNMATCH)
731 fprintf(stderr, ", filetype unmatch");
732 if (errors & REF_ERR_NAME_TOO_LONG)
733 fprintf(stderr, ", name too long");
734 if (errors & REF_ERR_NO_ROOT_REF)
735 fprintf(stderr, ", no root ref");
736 if (errors & REF_ERR_NO_ROOT_BACKREF)
737 fprintf(stderr, ", no root backref");
738 if (errors & REF_ERR_DUP_ROOT_REF)
739 fprintf(stderr, ", dup root ref");
740 if (errors & REF_ERR_DUP_ROOT_BACKREF)
741 fprintf(stderr, ", dup root backref");
742 fprintf(stderr, "\n");
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
748 struct ptr_node *node;
749 struct cache_extent *cache;
750 struct inode_record *rec = NULL;
753 cache = lookup_cache_extent(inode_cache, ino, 1);
755 node = container_of(cache, struct ptr_node, cache);
757 if (mod && rec->refs > 1) {
758 node->data = clone_inode_rec(rec);
759 if (IS_ERR(node->data))
765 rec = calloc(1, sizeof(*rec));
767 return ERR_PTR(-ENOMEM);
769 rec->extent_start = (u64)-1;
771 INIT_LIST_HEAD(&rec->backrefs);
772 INIT_LIST_HEAD(&rec->orphan_extents);
773 rec->holes = RB_ROOT;
775 node = malloc(sizeof(*node));
778 return ERR_PTR(-ENOMEM);
780 node->cache.start = ino;
781 node->cache.size = 1;
784 if (ino == BTRFS_FREE_INO_OBJECTID)
787 ret = insert_cache_extent(inode_cache, &node->cache);
789 return ERR_PTR(-EEXIST);
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
796 struct orphan_data_extent *orphan;
798 while (!list_empty(orphan_extents)) {
799 orphan = list_entry(orphan_extents->next,
800 struct orphan_data_extent, list);
801 list_del(&orphan->list);
806 static void free_inode_rec(struct inode_record *rec)
808 struct inode_backref *backref;
813 while (!list_empty(&rec->backrefs)) {
814 backref = list_entry(rec->backrefs.next,
815 struct inode_backref, list);
816 list_del(&backref->list);
819 free_orphan_data_extents(&rec->orphan_extents);
820 free_file_extent_holes(&rec->holes);
824 static int can_free_inode_rec(struct inode_record *rec)
826 if (!rec->errors && rec->checked && rec->found_inode_item &&
827 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833 struct inode_record *rec)
835 struct cache_extent *cache;
836 struct inode_backref *tmp, *backref;
837 struct ptr_node *node;
838 unsigned char filetype;
840 if (!rec->found_inode_item)
843 filetype = imode_to_type(rec->imode);
844 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845 if (backref->found_dir_item && backref->found_dir_index) {
846 if (backref->filetype != filetype)
847 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848 if (!backref->errors && backref->found_inode_ref &&
849 rec->nlink == rec->found_link) {
850 list_del(&backref->list);
856 if (!rec->checked || rec->merging)
859 if (S_ISDIR(rec->imode)) {
860 if (rec->found_size != rec->isize)
861 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862 if (rec->found_file_extent)
863 rec->errors |= I_ERR_ODD_FILE_EXTENT;
864 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865 if (rec->found_dir_item)
866 rec->errors |= I_ERR_ODD_DIR_ITEM;
867 if (rec->found_size != rec->nbytes)
868 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869 if (rec->nlink > 0 && !no_holes &&
870 (rec->extent_end < rec->isize ||
871 first_extent_gap(&rec->holes) < rec->isize))
872 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
875 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876 if (rec->found_csum_item && rec->nodatasum)
877 rec->errors |= I_ERR_ODD_CSUM_ITEM;
878 if (rec->some_csum_missing && !rec->nodatasum)
879 rec->errors |= I_ERR_SOME_CSUM_MISSING;
882 BUG_ON(rec->refs != 1);
883 if (can_free_inode_rec(rec)) {
884 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885 node = container_of(cache, struct ptr_node, cache);
886 BUG_ON(node->data != rec);
887 remove_cache_extent(inode_cache, &node->cache);
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
895 struct btrfs_path path;
896 struct btrfs_key key;
899 key.objectid = BTRFS_ORPHAN_OBJECTID;
900 key.type = BTRFS_ORPHAN_ITEM_KEY;
903 btrfs_init_path(&path);
904 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905 btrfs_release_path(&path);
911 static int process_inode_item(struct extent_buffer *eb,
912 int slot, struct btrfs_key *key,
913 struct shared_node *active_node)
915 struct inode_record *rec;
916 struct btrfs_inode_item *item;
918 rec = active_node->current;
919 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920 if (rec->found_inode_item) {
921 rec->errors |= I_ERR_DUP_INODE_ITEM;
924 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925 rec->nlink = btrfs_inode_nlink(eb, item);
926 rec->isize = btrfs_inode_size(eb, item);
927 rec->nbytes = btrfs_inode_nbytes(eb, item);
928 rec->imode = btrfs_inode_mode(eb, item);
929 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
931 rec->found_inode_item = 1;
933 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934 maybe_free_inode_rec(&active_node->inode_cache, rec);
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
940 int namelen, u64 dir)
942 struct inode_backref *backref;
944 list_for_each_entry(backref, &rec->backrefs, list) {
945 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
947 if (backref->dir != dir || backref->namelen != namelen)
949 if (memcmp(name, backref->name, namelen))
954 backref = malloc(sizeof(*backref) + namelen + 1);
955 memset(backref, 0, sizeof(*backref));
957 backref->namelen = namelen;
958 memcpy(backref->name, name, namelen);
959 backref->name[namelen] = '\0';
960 list_add_tail(&backref->list, &rec->backrefs);
964 static int add_inode_backref(struct cache_tree *inode_cache,
965 u64 ino, u64 dir, u64 index,
966 const char *name, int namelen,
967 int filetype, int itemtype, int errors)
969 struct inode_record *rec;
970 struct inode_backref *backref;
972 rec = get_inode_rec(inode_cache, ino, 1);
974 backref = get_inode_backref(rec, name, namelen, dir);
976 backref->errors |= errors;
977 if (itemtype == BTRFS_DIR_INDEX_KEY) {
978 if (backref->found_dir_index)
979 backref->errors |= REF_ERR_DUP_DIR_INDEX;
980 if (backref->found_inode_ref && backref->index != index)
981 backref->errors |= REF_ERR_INDEX_UNMATCH;
982 if (backref->found_dir_item && backref->filetype != filetype)
983 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
985 backref->index = index;
986 backref->filetype = filetype;
987 backref->found_dir_index = 1;
988 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
990 if (backref->found_dir_item)
991 backref->errors |= REF_ERR_DUP_DIR_ITEM;
992 if (backref->found_dir_index && backref->filetype != filetype)
993 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
995 backref->filetype = filetype;
996 backref->found_dir_item = 1;
997 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
998 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
999 if (backref->found_inode_ref)
1000 backref->errors |= REF_ERR_DUP_INODE_REF;
1001 if (backref->found_dir_index && backref->index != index)
1002 backref->errors |= REF_ERR_INDEX_UNMATCH;
1004 backref->index = index;
1006 backref->ref_type = itemtype;
1007 backref->found_inode_ref = 1;
1012 maybe_free_inode_rec(inode_cache, rec);
1016 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1017 struct cache_tree *dst_cache)
1019 struct inode_backref *backref;
1024 list_for_each_entry(backref, &src->backrefs, list) {
1025 if (backref->found_dir_index) {
1026 add_inode_backref(dst_cache, dst->ino, backref->dir,
1027 backref->index, backref->name,
1028 backref->namelen, backref->filetype,
1029 BTRFS_DIR_INDEX_KEY, backref->errors);
1031 if (backref->found_dir_item) {
1033 add_inode_backref(dst_cache, dst->ino,
1034 backref->dir, 0, backref->name,
1035 backref->namelen, backref->filetype,
1036 BTRFS_DIR_ITEM_KEY, backref->errors);
1038 if (backref->found_inode_ref) {
1039 add_inode_backref(dst_cache, dst->ino,
1040 backref->dir, backref->index,
1041 backref->name, backref->namelen, 0,
1042 backref->ref_type, backref->errors);
1046 if (src->found_dir_item)
1047 dst->found_dir_item = 1;
1048 if (src->found_file_extent)
1049 dst->found_file_extent = 1;
1050 if (src->found_csum_item)
1051 dst->found_csum_item = 1;
1052 if (src->some_csum_missing)
1053 dst->some_csum_missing = 1;
1054 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1055 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1060 BUG_ON(src->found_link < dir_count);
1061 dst->found_link += src->found_link - dir_count;
1062 dst->found_size += src->found_size;
1063 if (src->extent_start != (u64)-1) {
1064 if (dst->extent_start == (u64)-1) {
1065 dst->extent_start = src->extent_start;
1066 dst->extent_end = src->extent_end;
1068 if (dst->extent_end > src->extent_start)
1069 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1070 else if (dst->extent_end < src->extent_start) {
1071 ret = add_file_extent_hole(&dst->holes,
1073 src->extent_start - dst->extent_end);
1075 if (dst->extent_end < src->extent_end)
1076 dst->extent_end = src->extent_end;
1080 dst->errors |= src->errors;
1081 if (src->found_inode_item) {
1082 if (!dst->found_inode_item) {
1083 dst->nlink = src->nlink;
1084 dst->isize = src->isize;
1085 dst->nbytes = src->nbytes;
1086 dst->imode = src->imode;
1087 dst->nodatasum = src->nodatasum;
1088 dst->found_inode_item = 1;
1090 dst->errors |= I_ERR_DUP_INODE_ITEM;
1098 static int splice_shared_node(struct shared_node *src_node,
1099 struct shared_node *dst_node)
1101 struct cache_extent *cache;
1102 struct ptr_node *node, *ins;
1103 struct cache_tree *src, *dst;
1104 struct inode_record *rec, *conflict;
1105 u64 current_ino = 0;
1109 if (--src_node->refs == 0)
1111 if (src_node->current)
1112 current_ino = src_node->current->ino;
1114 src = &src_node->root_cache;
1115 dst = &dst_node->root_cache;
1117 cache = search_cache_extent(src, 0);
1119 node = container_of(cache, struct ptr_node, cache);
1121 cache = next_cache_extent(cache);
1124 remove_cache_extent(src, &node->cache);
1127 ins = malloc(sizeof(*ins));
1128 ins->cache.start = node->cache.start;
1129 ins->cache.size = node->cache.size;
1133 ret = insert_cache_extent(dst, &ins->cache);
1134 if (ret == -EEXIST) {
1135 conflict = get_inode_rec(dst, rec->ino, 1);
1136 BUG_ON(IS_ERR(conflict));
1137 merge_inode_recs(rec, conflict, dst);
1139 conflict->checked = 1;
1140 if (dst_node->current == conflict)
1141 dst_node->current = NULL;
1143 maybe_free_inode_rec(dst, conflict);
1144 free_inode_rec(rec);
1151 if (src == &src_node->root_cache) {
1152 src = &src_node->inode_cache;
1153 dst = &dst_node->inode_cache;
1157 if (current_ino > 0 && (!dst_node->current ||
1158 current_ino > dst_node->current->ino)) {
1159 if (dst_node->current) {
1160 dst_node->current->checked = 1;
1161 maybe_free_inode_rec(dst, dst_node->current);
1163 dst_node->current = get_inode_rec(dst, current_ino, 1);
1164 BUG_ON(IS_ERR(dst_node->current));
1169 static void free_inode_ptr(struct cache_extent *cache)
1171 struct ptr_node *node;
1172 struct inode_record *rec;
1174 node = container_of(cache, struct ptr_node, cache);
1176 free_inode_rec(rec);
1180 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1182 static struct shared_node *find_shared_node(struct cache_tree *shared,
1185 struct cache_extent *cache;
1186 struct shared_node *node;
1188 cache = lookup_cache_extent(shared, bytenr, 1);
1190 node = container_of(cache, struct shared_node, cache);
1196 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1199 struct shared_node *node;
1201 node = calloc(1, sizeof(*node));
1204 node->cache.start = bytenr;
1205 node->cache.size = 1;
1206 cache_tree_init(&node->root_cache);
1207 cache_tree_init(&node->inode_cache);
1210 ret = insert_cache_extent(shared, &node->cache);
1215 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1216 struct walk_control *wc, int level)
1218 struct shared_node *node;
1219 struct shared_node *dest;
1222 if (level == wc->active_node)
1225 BUG_ON(wc->active_node <= level);
1226 node = find_shared_node(&wc->shared, bytenr);
1228 ret = add_shared_node(&wc->shared, bytenr, refs);
1230 node = find_shared_node(&wc->shared, bytenr);
1231 wc->nodes[level] = node;
1232 wc->active_node = level;
1236 if (wc->root_level == wc->active_node &&
1237 btrfs_root_refs(&root->root_item) == 0) {
1238 if (--node->refs == 0) {
1239 free_inode_recs_tree(&node->root_cache);
1240 free_inode_recs_tree(&node->inode_cache);
1241 remove_cache_extent(&wc->shared, &node->cache);
1247 dest = wc->nodes[wc->active_node];
1248 splice_shared_node(node, dest);
1249 if (node->refs == 0) {
1250 remove_cache_extent(&wc->shared, &node->cache);
1256 static int leave_shared_node(struct btrfs_root *root,
1257 struct walk_control *wc, int level)
1259 struct shared_node *node;
1260 struct shared_node *dest;
1263 if (level == wc->root_level)
1266 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1270 BUG_ON(i >= BTRFS_MAX_LEVEL);
1272 node = wc->nodes[wc->active_node];
1273 wc->nodes[wc->active_node] = NULL;
1274 wc->active_node = i;
1276 dest = wc->nodes[wc->active_node];
1277 if (wc->active_node < wc->root_level ||
1278 btrfs_root_refs(&root->root_item) > 0) {
1279 BUG_ON(node->refs <= 1);
1280 splice_shared_node(node, dest);
1282 BUG_ON(node->refs < 2);
1291 * 1 - if the root with id child_root_id is a child of root parent_root_id
1292 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1293 * has other root(s) as parent(s)
1294 * 2 - if the root child_root_id doesn't have any parent roots
1296 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1299 struct btrfs_path path;
1300 struct btrfs_key key;
1301 struct extent_buffer *leaf;
1305 btrfs_init_path(&path);
1307 key.objectid = parent_root_id;
1308 key.type = BTRFS_ROOT_REF_KEY;
1309 key.offset = child_root_id;
1310 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1314 btrfs_release_path(&path);
1318 key.objectid = child_root_id;
1319 key.type = BTRFS_ROOT_BACKREF_KEY;
1321 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1327 leaf = path.nodes[0];
1328 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1329 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1332 leaf = path.nodes[0];
1335 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1336 if (key.objectid != child_root_id ||
1337 key.type != BTRFS_ROOT_BACKREF_KEY)
1342 if (key.offset == parent_root_id) {
1343 btrfs_release_path(&path);
1350 btrfs_release_path(&path);
1353 return has_parent ? 0 : 2;
1356 static int process_dir_item(struct btrfs_root *root,
1357 struct extent_buffer *eb,
1358 int slot, struct btrfs_key *key,
1359 struct shared_node *active_node)
1369 struct btrfs_dir_item *di;
1370 struct inode_record *rec;
1371 struct cache_tree *root_cache;
1372 struct cache_tree *inode_cache;
1373 struct btrfs_key location;
1374 char namebuf[BTRFS_NAME_LEN];
1376 root_cache = &active_node->root_cache;
1377 inode_cache = &active_node->inode_cache;
1378 rec = active_node->current;
1379 rec->found_dir_item = 1;
1381 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1382 total = btrfs_item_size_nr(eb, slot);
1383 while (cur < total) {
1385 btrfs_dir_item_key_to_cpu(eb, di, &location);
1386 name_len = btrfs_dir_name_len(eb, di);
1387 data_len = btrfs_dir_data_len(eb, di);
1388 filetype = btrfs_dir_type(eb, di);
1390 rec->found_size += name_len;
1391 if (name_len <= BTRFS_NAME_LEN) {
1395 len = BTRFS_NAME_LEN;
1396 error = REF_ERR_NAME_TOO_LONG;
1398 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1400 if (location.type == BTRFS_INODE_ITEM_KEY) {
1401 add_inode_backref(inode_cache, location.objectid,
1402 key->objectid, key->offset, namebuf,
1403 len, filetype, key->type, error);
1404 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1405 add_inode_backref(root_cache, location.objectid,
1406 key->objectid, key->offset,
1407 namebuf, len, filetype,
1410 fprintf(stderr, "invalid location in dir item %u\n",
1412 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1413 key->objectid, key->offset, namebuf,
1414 len, filetype, key->type, error);
1417 len = sizeof(*di) + name_len + data_len;
1418 di = (struct btrfs_dir_item *)((char *)di + len);
1421 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1422 rec->errors |= I_ERR_DUP_DIR_INDEX;
1427 static int process_inode_ref(struct extent_buffer *eb,
1428 int slot, struct btrfs_key *key,
1429 struct shared_node *active_node)
1437 struct cache_tree *inode_cache;
1438 struct btrfs_inode_ref *ref;
1439 char namebuf[BTRFS_NAME_LEN];
1441 inode_cache = &active_node->inode_cache;
1443 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1444 total = btrfs_item_size_nr(eb, slot);
1445 while (cur < total) {
1446 name_len = btrfs_inode_ref_name_len(eb, ref);
1447 index = btrfs_inode_ref_index(eb, ref);
1448 if (name_len <= BTRFS_NAME_LEN) {
1452 len = BTRFS_NAME_LEN;
1453 error = REF_ERR_NAME_TOO_LONG;
1455 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1456 add_inode_backref(inode_cache, key->objectid, key->offset,
1457 index, namebuf, len, 0, key->type, error);
1459 len = sizeof(*ref) + name_len;
1460 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1466 static int process_inode_extref(struct extent_buffer *eb,
1467 int slot, struct btrfs_key *key,
1468 struct shared_node *active_node)
1477 struct cache_tree *inode_cache;
1478 struct btrfs_inode_extref *extref;
1479 char namebuf[BTRFS_NAME_LEN];
1481 inode_cache = &active_node->inode_cache;
1483 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1484 total = btrfs_item_size_nr(eb, slot);
1485 while (cur < total) {
1486 name_len = btrfs_inode_extref_name_len(eb, extref);
1487 index = btrfs_inode_extref_index(eb, extref);
1488 parent = btrfs_inode_extref_parent(eb, extref);
1489 if (name_len <= BTRFS_NAME_LEN) {
1493 len = BTRFS_NAME_LEN;
1494 error = REF_ERR_NAME_TOO_LONG;
1496 read_extent_buffer(eb, namebuf,
1497 (unsigned long)(extref + 1), len);
1498 add_inode_backref(inode_cache, key->objectid, parent,
1499 index, namebuf, len, 0, key->type, error);
1501 len = sizeof(*extref) + name_len;
1502 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1509 static int count_csum_range(struct btrfs_root *root, u64 start,
1510 u64 len, u64 *found)
1512 struct btrfs_key key;
1513 struct btrfs_path path;
1514 struct extent_buffer *leaf;
1519 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1521 btrfs_init_path(&path);
1523 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1525 key.type = BTRFS_EXTENT_CSUM_KEY;
1527 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1531 if (ret > 0 && path.slots[0] > 0) {
1532 leaf = path.nodes[0];
1533 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1534 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1535 key.type == BTRFS_EXTENT_CSUM_KEY)
1540 leaf = path.nodes[0];
1541 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1552 key.type != BTRFS_EXTENT_CSUM_KEY)
1555 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1556 if (key.offset >= start + len)
1559 if (key.offset > start)
1562 size = btrfs_item_size_nr(leaf, path.slots[0]);
1563 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1564 if (csum_end > start) {
1565 size = min(csum_end - start, len);
1574 btrfs_release_path(&path);
1580 static int process_file_extent(struct btrfs_root *root,
1581 struct extent_buffer *eb,
1582 int slot, struct btrfs_key *key,
1583 struct shared_node *active_node)
1585 struct inode_record *rec;
1586 struct btrfs_file_extent_item *fi;
1588 u64 disk_bytenr = 0;
1589 u64 extent_offset = 0;
1590 u64 mask = root->sectorsize - 1;
1594 rec = active_node->current;
1595 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1596 rec->found_file_extent = 1;
1598 if (rec->extent_start == (u64)-1) {
1599 rec->extent_start = key->offset;
1600 rec->extent_end = key->offset;
1603 if (rec->extent_end > key->offset)
1604 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1605 else if (rec->extent_end < key->offset) {
1606 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1607 key->offset - rec->extent_end);
1612 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1613 extent_type = btrfs_file_extent_type(eb, fi);
1615 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1616 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1618 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1619 rec->found_size += num_bytes;
1620 num_bytes = (num_bytes + mask) & ~mask;
1621 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1622 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1623 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1624 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1625 extent_offset = btrfs_file_extent_offset(eb, fi);
1626 if (num_bytes == 0 || (num_bytes & mask))
1627 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1628 if (num_bytes + extent_offset >
1629 btrfs_file_extent_ram_bytes(eb, fi))
1630 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1631 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1632 (btrfs_file_extent_compression(eb, fi) ||
1633 btrfs_file_extent_encryption(eb, fi) ||
1634 btrfs_file_extent_other_encoding(eb, fi)))
1635 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1636 if (disk_bytenr > 0)
1637 rec->found_size += num_bytes;
1639 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1641 rec->extent_end = key->offset + num_bytes;
1644 * The data reloc tree will copy full extents into its inode and then
1645 * copy the corresponding csums. Because the extent it copied could be
1646 * a preallocated extent that hasn't been written to yet there may be no
1647 * csums to copy, ergo we won't have csums for our file extent. This is
1648 * ok so just don't bother checking csums if the inode belongs to the
1651 if (disk_bytenr > 0 &&
1652 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1654 if (btrfs_file_extent_compression(eb, fi))
1655 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1657 disk_bytenr += extent_offset;
1659 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1662 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1664 rec->found_csum_item = 1;
1665 if (found < num_bytes)
1666 rec->some_csum_missing = 1;
1667 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1669 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1675 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1676 struct walk_control *wc)
1678 struct btrfs_key key;
1682 struct cache_tree *inode_cache;
1683 struct shared_node *active_node;
1685 if (wc->root_level == wc->active_node &&
1686 btrfs_root_refs(&root->root_item) == 0)
1689 active_node = wc->nodes[wc->active_node];
1690 inode_cache = &active_node->inode_cache;
1691 nritems = btrfs_header_nritems(eb);
1692 for (i = 0; i < nritems; i++) {
1693 btrfs_item_key_to_cpu(eb, &key, i);
1695 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1697 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1700 if (active_node->current == NULL ||
1701 active_node->current->ino < key.objectid) {
1702 if (active_node->current) {
1703 active_node->current->checked = 1;
1704 maybe_free_inode_rec(inode_cache,
1705 active_node->current);
1707 active_node->current = get_inode_rec(inode_cache,
1709 BUG_ON(IS_ERR(active_node->current));
1712 case BTRFS_DIR_ITEM_KEY:
1713 case BTRFS_DIR_INDEX_KEY:
1714 ret = process_dir_item(root, eb, i, &key, active_node);
1716 case BTRFS_INODE_REF_KEY:
1717 ret = process_inode_ref(eb, i, &key, active_node);
1719 case BTRFS_INODE_EXTREF_KEY:
1720 ret = process_inode_extref(eb, i, &key, active_node);
1722 case BTRFS_INODE_ITEM_KEY:
1723 ret = process_inode_item(eb, i, &key, active_node);
1725 case BTRFS_EXTENT_DATA_KEY:
1726 ret = process_file_extent(root, eb, i, &key,
1736 static void reada_walk_down(struct btrfs_root *root,
1737 struct extent_buffer *node, int slot)
1746 level = btrfs_header_level(node);
1750 nritems = btrfs_header_nritems(node);
1751 blocksize = btrfs_level_size(root, level - 1);
1752 for (i = slot; i < nritems; i++) {
1753 bytenr = btrfs_node_blockptr(node, i);
1754 ptr_gen = btrfs_node_ptr_generation(node, i);
1755 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1760 * Check the child node/leaf by the following condition:
1761 * 1. the first item key of the node/leaf should be the same with the one
1763 * 2. block in parent node should match the child node/leaf.
1764 * 3. generation of parent node and child's header should be consistent.
1766 * Or the child node/leaf pointed by the key in parent is not valid.
1768 * We hope to check leaf owner too, but since subvol may share leaves,
1769 * which makes leaf owner check not so strong, key check should be
1770 * sufficient enough for that case.
1772 static int check_child_node(struct btrfs_root *root,
1773 struct extent_buffer *parent, int slot,
1774 struct extent_buffer *child)
1776 struct btrfs_key parent_key;
1777 struct btrfs_key child_key;
1780 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1781 if (btrfs_header_level(child) == 0)
1782 btrfs_item_key_to_cpu(child, &child_key, 0);
1784 btrfs_node_key_to_cpu(child, &child_key, 0);
1786 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1789 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1790 parent_key.objectid, parent_key.type, parent_key.offset,
1791 child_key.objectid, child_key.type, child_key.offset);
1793 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1795 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1796 btrfs_node_blockptr(parent, slot),
1797 btrfs_header_bytenr(child));
1799 if (btrfs_node_ptr_generation(parent, slot) !=
1800 btrfs_header_generation(child)) {
1802 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1803 btrfs_header_generation(child),
1804 btrfs_node_ptr_generation(parent, slot));
1809 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1810 struct walk_control *wc, int *level)
1812 enum btrfs_tree_block_status status;
1815 struct extent_buffer *next;
1816 struct extent_buffer *cur;
1821 WARN_ON(*level < 0);
1822 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1823 ret = btrfs_lookup_extent_info(NULL, root,
1824 path->nodes[*level]->start,
1825 *level, 1, &refs, NULL);
1832 ret = enter_shared_node(root, path->nodes[*level]->start,
1840 while (*level >= 0) {
1841 WARN_ON(*level < 0);
1842 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1843 cur = path->nodes[*level];
1845 if (btrfs_header_level(cur) != *level)
1848 if (path->slots[*level] >= btrfs_header_nritems(cur))
1851 ret = process_one_leaf(root, cur, wc);
1856 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1857 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1858 blocksize = btrfs_level_size(root, *level - 1);
1859 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1865 ret = enter_shared_node(root, bytenr, refs,
1868 path->slots[*level]++;
1873 next = btrfs_find_tree_block(root, bytenr, blocksize);
1874 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1875 free_extent_buffer(next);
1876 reada_walk_down(root, cur, path->slots[*level]);
1877 next = read_tree_block(root, bytenr, blocksize,
1879 if (!extent_buffer_uptodate(next)) {
1880 struct btrfs_key node_key;
1882 btrfs_node_key_to_cpu(path->nodes[*level],
1884 path->slots[*level]);
1885 btrfs_add_corrupt_extent_record(root->fs_info,
1887 path->nodes[*level]->start,
1888 root->leafsize, *level);
1894 ret = check_child_node(root, cur, path->slots[*level], next);
1900 if (btrfs_is_leaf(next))
1901 status = btrfs_check_leaf(root, NULL, next);
1903 status = btrfs_check_node(root, NULL, next);
1904 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1905 free_extent_buffer(next);
1910 *level = *level - 1;
1911 free_extent_buffer(path->nodes[*level]);
1912 path->nodes[*level] = next;
1913 path->slots[*level] = 0;
1916 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1920 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level)
1924 struct extent_buffer *leaf;
1926 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1927 leaf = path->nodes[i];
1928 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1933 free_extent_buffer(path->nodes[*level]);
1934 path->nodes[*level] = NULL;
1935 BUG_ON(*level > wc->active_node);
1936 if (*level == wc->active_node)
1937 leave_shared_node(root, wc, *level);
1944 static int check_root_dir(struct inode_record *rec)
1946 struct inode_backref *backref;
1949 if (!rec->found_inode_item || rec->errors)
1951 if (rec->nlink != 1 || rec->found_link != 0)
1953 if (list_empty(&rec->backrefs))
1955 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1956 if (!backref->found_inode_ref)
1958 if (backref->index != 0 || backref->namelen != 2 ||
1959 memcmp(backref->name, "..", 2))
1961 if (backref->found_dir_index || backref->found_dir_item)
1968 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1969 struct btrfs_root *root, struct btrfs_path *path,
1970 struct inode_record *rec)
1972 struct btrfs_inode_item *ei;
1973 struct btrfs_key key;
1976 key.objectid = rec->ino;
1977 key.type = BTRFS_INODE_ITEM_KEY;
1978 key.offset = (u64)-1;
1980 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1984 if (!path->slots[0]) {
1991 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1992 if (key.objectid != rec->ino) {
1997 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1998 struct btrfs_inode_item);
1999 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2000 btrfs_mark_buffer_dirty(path->nodes[0]);
2001 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2002 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2003 root->root_key.objectid);
2005 btrfs_release_path(path);
2009 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2010 struct btrfs_root *root,
2011 struct btrfs_path *path,
2012 struct inode_record *rec)
2016 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2017 btrfs_release_path(path);
2019 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2023 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2024 struct btrfs_root *root,
2025 struct btrfs_path *path,
2026 struct inode_record *rec)
2028 struct btrfs_inode_item *ei;
2029 struct btrfs_key key;
2032 key.objectid = rec->ino;
2033 key.type = BTRFS_INODE_ITEM_KEY;
2036 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2043 /* Since ret == 0, no need to check anything */
2044 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2045 struct btrfs_inode_item);
2046 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2047 btrfs_mark_buffer_dirty(path->nodes[0]);
2048 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2049 printf("reset nbytes for ino %llu root %llu\n",
2050 rec->ino, root->root_key.objectid);
2052 btrfs_release_path(path);
2056 static int add_missing_dir_index(struct btrfs_root *root,
2057 struct cache_tree *inode_cache,
2058 struct inode_record *rec,
2059 struct inode_backref *backref)
2061 struct btrfs_path *path;
2062 struct btrfs_trans_handle *trans;
2063 struct btrfs_dir_item *dir_item;
2064 struct extent_buffer *leaf;
2065 struct btrfs_key key;
2066 struct btrfs_disk_key disk_key;
2067 struct inode_record *dir_rec;
2068 unsigned long name_ptr;
2069 u32 data_size = sizeof(*dir_item) + backref->namelen;
2072 path = btrfs_alloc_path();
2076 trans = btrfs_start_transaction(root, 1);
2077 if (IS_ERR(trans)) {
2078 btrfs_free_path(path);
2079 return PTR_ERR(trans);
2082 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2083 (unsigned long long)rec->ino);
2084 key.objectid = backref->dir;
2085 key.type = BTRFS_DIR_INDEX_KEY;
2086 key.offset = backref->index;
2088 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2091 leaf = path->nodes[0];
2092 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2094 disk_key.objectid = cpu_to_le64(rec->ino);
2095 disk_key.type = BTRFS_INODE_ITEM_KEY;
2096 disk_key.offset = 0;
2098 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2099 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2100 btrfs_set_dir_data_len(leaf, dir_item, 0);
2101 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2102 name_ptr = (unsigned long)(dir_item + 1);
2103 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2104 btrfs_mark_buffer_dirty(leaf);
2105 btrfs_free_path(path);
2106 btrfs_commit_transaction(trans, root);
2108 backref->found_dir_index = 1;
2109 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2110 BUG_ON(IS_ERR(dir_rec));
2113 dir_rec->found_size += backref->namelen;
2114 if (dir_rec->found_size == dir_rec->isize &&
2115 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2116 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2117 if (dir_rec->found_size != dir_rec->isize)
2118 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2123 static int delete_dir_index(struct btrfs_root *root,
2124 struct cache_tree *inode_cache,
2125 struct inode_record *rec,
2126 struct inode_backref *backref)
2128 struct btrfs_trans_handle *trans;
2129 struct btrfs_dir_item *di;
2130 struct btrfs_path *path;
2133 path = btrfs_alloc_path();
2137 trans = btrfs_start_transaction(root, 1);
2138 if (IS_ERR(trans)) {
2139 btrfs_free_path(path);
2140 return PTR_ERR(trans);
2144 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2145 (unsigned long long)backref->dir,
2146 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2147 (unsigned long long)root->objectid);
2149 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2150 backref->name, backref->namelen,
2151 backref->index, -1);
2154 btrfs_free_path(path);
2155 btrfs_commit_transaction(trans, root);
2162 ret = btrfs_del_item(trans, root, path);
2164 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2166 btrfs_free_path(path);
2167 btrfs_commit_transaction(trans, root);
2171 static int create_inode_item(struct btrfs_root *root,
2172 struct inode_record *rec,
2173 struct inode_backref *backref, int root_dir)
2175 struct btrfs_trans_handle *trans;
2176 struct btrfs_inode_item inode_item;
2177 time_t now = time(NULL);
2180 trans = btrfs_start_transaction(root, 1);
2181 if (IS_ERR(trans)) {
2182 ret = PTR_ERR(trans);
2186 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2187 "be incomplete, please check permissions and content after "
2188 "the fsck completes.\n", (unsigned long long)root->objectid,
2189 (unsigned long long)rec->ino);
2191 memset(&inode_item, 0, sizeof(inode_item));
2192 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2194 btrfs_set_stack_inode_nlink(&inode_item, 1);
2196 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2197 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2198 if (rec->found_dir_item) {
2199 if (rec->found_file_extent)
2200 fprintf(stderr, "root %llu inode %llu has both a dir "
2201 "item and extents, unsure if it is a dir or a "
2202 "regular file so setting it as a directory\n",
2203 (unsigned long long)root->objectid,
2204 (unsigned long long)rec->ino);
2205 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2206 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2207 } else if (!rec->found_dir_item) {
2208 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2209 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2211 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2212 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2213 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2214 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2215 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2216 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2217 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2218 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2220 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2222 btrfs_commit_transaction(trans, root);
2226 static int repair_inode_backrefs(struct btrfs_root *root,
2227 struct inode_record *rec,
2228 struct cache_tree *inode_cache,
2231 struct inode_backref *tmp, *backref;
2232 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2236 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2237 if (!delete && rec->ino == root_dirid) {
2238 if (!rec->found_inode_item) {
2239 ret = create_inode_item(root, rec, backref, 1);
2246 /* Index 0 for root dir's are special, don't mess with it */
2247 if (rec->ino == root_dirid && backref->index == 0)
2251 ((backref->found_dir_index && !backref->found_inode_ref) ||
2252 (backref->found_dir_index && backref->found_inode_ref &&
2253 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2254 ret = delete_dir_index(root, inode_cache, rec, backref);
2258 list_del(&backref->list);
2262 if (!delete && !backref->found_dir_index &&
2263 backref->found_dir_item && backref->found_inode_ref) {
2264 ret = add_missing_dir_index(root, inode_cache, rec,
2269 if (backref->found_dir_item &&
2270 backref->found_dir_index &&
2271 backref->found_dir_index) {
2272 if (!backref->errors &&
2273 backref->found_inode_ref) {
2274 list_del(&backref->list);
2280 if (!delete && (!backref->found_dir_index &&
2281 !backref->found_dir_item &&
2282 backref->found_inode_ref)) {
2283 struct btrfs_trans_handle *trans;
2284 struct btrfs_key location;
2286 ret = check_dir_conflict(root, backref->name,
2292 * let nlink fixing routine to handle it,
2293 * which can do it better.
2298 location.objectid = rec->ino;
2299 location.type = BTRFS_INODE_ITEM_KEY;
2300 location.offset = 0;
2302 trans = btrfs_start_transaction(root, 1);
2303 if (IS_ERR(trans)) {
2304 ret = PTR_ERR(trans);
2307 fprintf(stderr, "adding missing dir index/item pair "
2309 (unsigned long long)rec->ino);
2310 ret = btrfs_insert_dir_item(trans, root, backref->name,
2312 backref->dir, &location,
2313 imode_to_type(rec->imode),
2316 btrfs_commit_transaction(trans, root);
2320 if (!delete && (backref->found_inode_ref &&
2321 backref->found_dir_index &&
2322 backref->found_dir_item &&
2323 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2324 !rec->found_inode_item)) {
2325 ret = create_inode_item(root, rec, backref, 0);
2332 return ret ? ret : repaired;
2336 * To determine the file type for nlink/inode_item repair
2338 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2339 * Return -ENOENT if file type is not found.
2341 static int find_file_type(struct inode_record *rec, u8 *type)
2343 struct inode_backref *backref;
2345 /* For inode item recovered case */
2346 if (rec->found_inode_item) {
2347 *type = imode_to_type(rec->imode);
2351 list_for_each_entry(backref, &rec->backrefs, list) {
2352 if (backref->found_dir_index || backref->found_dir_item) {
2353 *type = backref->filetype;
2361 * To determine the file name for nlink repair
2363 * Return 0 if file name is found, set name and namelen.
2364 * Return -ENOENT if file name is not found.
2366 static int find_file_name(struct inode_record *rec,
2367 char *name, int *namelen)
2369 struct inode_backref *backref;
2371 list_for_each_entry(backref, &rec->backrefs, list) {
2372 if (backref->found_dir_index || backref->found_dir_item ||
2373 backref->found_inode_ref) {
2374 memcpy(name, backref->name, backref->namelen);
2375 *namelen = backref->namelen;
2382 /* Reset the nlink of the inode to the correct one */
2383 static int reset_nlink(struct btrfs_trans_handle *trans,
2384 struct btrfs_root *root,
2385 struct btrfs_path *path,
2386 struct inode_record *rec)
2388 struct inode_backref *backref;
2389 struct inode_backref *tmp;
2390 struct btrfs_key key;
2391 struct btrfs_inode_item *inode_item;
2394 /* We don't believe this either, reset it and iterate backref */
2395 rec->found_link = 0;
2397 /* Remove all backref including the valid ones */
2398 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2399 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2400 backref->index, backref->name,
2401 backref->namelen, 0);
2405 /* remove invalid backref, so it won't be added back */
2406 if (!(backref->found_dir_index &&
2407 backref->found_dir_item &&
2408 backref->found_inode_ref)) {
2409 list_del(&backref->list);
2416 /* Set nlink to 0 */
2417 key.objectid = rec->ino;
2418 key.type = BTRFS_INODE_ITEM_KEY;
2420 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2427 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2428 struct btrfs_inode_item);
2429 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2430 btrfs_mark_buffer_dirty(path->nodes[0]);
2431 btrfs_release_path(path);
2434 * Add back valid inode_ref/dir_item/dir_index,
2435 * add_link() will handle the nlink inc, so new nlink must be correct
2437 list_for_each_entry(backref, &rec->backrefs, list) {
2438 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2439 backref->name, backref->namelen,
2440 backref->filetype, &backref->index, 1);
2445 btrfs_release_path(path);
2449 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2450 struct btrfs_root *root,
2451 struct btrfs_path *path,
2452 struct inode_record *rec)
2454 char *dir_name = "lost+found";
2455 char namebuf[BTRFS_NAME_LEN] = {0};
2460 int name_recovered = 0;
2461 int type_recovered = 0;
2465 * Get file name and type first before these invalid inode ref
2466 * are deleted by remove_all_invalid_backref()
2468 name_recovered = !find_file_name(rec, namebuf, &namelen);
2469 type_recovered = !find_file_type(rec, &type);
2471 if (!name_recovered) {
2472 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2473 rec->ino, rec->ino);
2474 namelen = count_digits(rec->ino);
2475 sprintf(namebuf, "%llu", rec->ino);
2478 if (!type_recovered) {
2479 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2481 type = BTRFS_FT_REG_FILE;
2485 ret = reset_nlink(trans, root, path, rec);
2488 "Failed to reset nlink for inode %llu: %s\n",
2489 rec->ino, strerror(-ret));
2493 if (rec->found_link == 0) {
2494 lost_found_ino = root->highest_inode;
2495 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2500 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2501 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2504 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2505 dir_name, strerror(-ret));
2508 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2509 namebuf, namelen, type, NULL, 1);
2511 * Add ".INO" suffix several times to handle case where
2512 * "FILENAME.INO" is already taken by another file.
2514 while (ret == -EEXIST) {
2516 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2518 if (namelen + count_digits(rec->ino) + 1 >
2523 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2525 namelen += count_digits(rec->ino) + 1;
2526 ret = btrfs_add_link(trans, root, rec->ino,
2527 lost_found_ino, namebuf,
2528 namelen, type, NULL, 1);
2532 "Failed to link the inode %llu to %s dir: %s\n",
2533 rec->ino, dir_name, strerror(-ret));
2537 * Just increase the found_link, don't actually add the
2538 * backref. This will make things easier and this inode
2539 * record will be freed after the repair is done.
2540 * So fsck will not report problem about this inode.
2543 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2544 namelen, namebuf, dir_name);
2546 printf("Fixed the nlink of inode %llu\n", rec->ino);
2549 * Clear the flag anyway, or we will loop forever for the same inode
2550 * as it will not be removed from the bad inode list and the dead loop
2553 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2554 btrfs_release_path(path);
2559 * Check if there is any normal(reg or prealloc) file extent for given
2561 * This is used to determine the file type when neither its dir_index/item or
2562 * inode_item exists.
2564 * This will *NOT* report error, if any error happens, just consider it does
2565 * not have any normal file extent.
2567 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2569 struct btrfs_path *path;
2570 struct btrfs_key key;
2571 struct btrfs_key found_key;
2572 struct btrfs_file_extent_item *fi;
2576 path = btrfs_alloc_path();
2580 key.type = BTRFS_EXTENT_DATA_KEY;
2583 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2588 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2589 ret = btrfs_next_leaf(root, path);
2596 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2598 if (found_key.objectid != ino ||
2599 found_key.type != BTRFS_EXTENT_DATA_KEY)
2601 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2602 struct btrfs_file_extent_item);
2603 type = btrfs_file_extent_type(path->nodes[0], fi);
2604 if (type != BTRFS_FILE_EXTENT_INLINE) {
2610 btrfs_free_path(path);
2614 static u32 btrfs_type_to_imode(u8 type)
2616 static u32 imode_by_btrfs_type[] = {
2617 [BTRFS_FT_REG_FILE] = S_IFREG,
2618 [BTRFS_FT_DIR] = S_IFDIR,
2619 [BTRFS_FT_CHRDEV] = S_IFCHR,
2620 [BTRFS_FT_BLKDEV] = S_IFBLK,
2621 [BTRFS_FT_FIFO] = S_IFIFO,
2622 [BTRFS_FT_SOCK] = S_IFSOCK,
2623 [BTRFS_FT_SYMLINK] = S_IFLNK,
2626 return imode_by_btrfs_type[(type)];
2629 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2630 struct btrfs_root *root,
2631 struct btrfs_path *path,
2632 struct inode_record *rec)
2636 int type_recovered = 0;
2639 printf("Trying to rebuild inode:%llu\n", rec->ino);
2641 type_recovered = !find_file_type(rec, &filetype);
2644 * Try to determine inode type if type not found.
2646 * For found regular file extent, it must be FILE.
2647 * For found dir_item/index, it must be DIR.
2649 * For undetermined one, use FILE as fallback.
2652 * 1. If found backref(inode_index/item is already handled) to it,
2654 * Need new inode-inode ref structure to allow search for that.
2656 if (!type_recovered) {
2657 if (rec->found_file_extent &&
2658 find_normal_file_extent(root, rec->ino)) {
2660 filetype = BTRFS_FT_REG_FILE;
2661 } else if (rec->found_dir_item) {
2663 filetype = BTRFS_FT_DIR;
2664 } else if (!list_empty(&rec->orphan_extents)) {
2666 filetype = BTRFS_FT_REG_FILE;
2668 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2671 filetype = BTRFS_FT_REG_FILE;
2675 ret = btrfs_new_inode(trans, root, rec->ino,
2676 mode | btrfs_type_to_imode(filetype));
2681 * Here inode rebuild is done, we only rebuild the inode item,
2682 * don't repair the nlink(like move to lost+found).
2683 * That is the job of nlink repair.
2685 * We just fill the record and return
2687 rec->found_dir_item = 1;
2688 rec->imode = mode | btrfs_type_to_imode(filetype);
2690 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2691 /* Ensure the inode_nlinks repair function will be called */
2692 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2697 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2698 struct btrfs_root *root,
2699 struct btrfs_path *path,
2700 struct inode_record *rec)
2702 struct orphan_data_extent *orphan;
2703 struct orphan_data_extent *tmp;
2706 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2708 * Check for conflicting file extents
2710 * Here we don't know whether the extents is compressed or not,
2711 * so we can only assume it not compressed nor data offset,
2712 * and use its disk_len as extent length.
2714 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2715 orphan->offset, orphan->disk_len, 0);
2716 btrfs_release_path(path);
2721 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2722 orphan->disk_bytenr, orphan->disk_len);
2723 ret = btrfs_free_extent(trans,
2724 root->fs_info->extent_root,
2725 orphan->disk_bytenr, orphan->disk_len,
2726 0, root->objectid, orphan->objectid,
2731 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2732 orphan->offset, orphan->disk_bytenr,
2733 orphan->disk_len, orphan->disk_len);
2737 /* Update file size info */
2738 rec->found_size += orphan->disk_len;
2739 if (rec->found_size == rec->nbytes)
2740 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2742 /* Update the file extent hole info too */
2743 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2747 if (RB_EMPTY_ROOT(&rec->holes))
2748 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2750 list_del(&orphan->list);
2753 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2758 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2759 struct btrfs_root *root,
2760 struct btrfs_path *path,
2761 struct inode_record *rec)
2763 struct rb_node *node;
2764 struct file_extent_hole *hole;
2768 node = rb_first(&rec->holes);
2772 hole = rb_entry(node, struct file_extent_hole, node);
2773 ret = btrfs_punch_hole(trans, root, rec->ino,
2774 hole->start, hole->len);
2777 ret = del_file_extent_hole(&rec->holes, hole->start,
2781 if (RB_EMPTY_ROOT(&rec->holes))
2782 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2783 node = rb_first(&rec->holes);
2785 /* special case for a file losing all its file extent */
2787 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2788 round_up(rec->isize, root->sectorsize));
2792 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2793 rec->ino, root->objectid);
2798 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2800 struct btrfs_trans_handle *trans;
2801 struct btrfs_path *path;
2804 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2805 I_ERR_NO_ORPHAN_ITEM |
2806 I_ERR_LINK_COUNT_WRONG |
2807 I_ERR_NO_INODE_ITEM |
2808 I_ERR_FILE_EXTENT_ORPHAN |
2809 I_ERR_FILE_EXTENT_DISCOUNT|
2810 I_ERR_FILE_NBYTES_WRONG)))
2813 path = btrfs_alloc_path();
2818 * For nlink repair, it may create a dir and add link, so
2819 * 2 for parent(256)'s dir_index and dir_item
2820 * 2 for lost+found dir's inode_item and inode_ref
2821 * 1 for the new inode_ref of the file
2822 * 2 for lost+found dir's dir_index and dir_item for the file
2824 trans = btrfs_start_transaction(root, 7);
2825 if (IS_ERR(trans)) {
2826 btrfs_free_path(path);
2827 return PTR_ERR(trans);
2830 if (rec->errors & I_ERR_NO_INODE_ITEM)
2831 ret = repair_inode_no_item(trans, root, path, rec);
2832 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2833 ret = repair_inode_orphan_extent(trans, root, path, rec);
2834 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2835 ret = repair_inode_discount_extent(trans, root, path, rec);
2836 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2837 ret = repair_inode_isize(trans, root, path, rec);
2838 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2839 ret = repair_inode_orphan_item(trans, root, path, rec);
2840 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2841 ret = repair_inode_nlinks(trans, root, path, rec);
2842 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2843 ret = repair_inode_nbytes(trans, root, path, rec);
2844 btrfs_commit_transaction(trans, root);
2845 btrfs_free_path(path);
2849 static int check_inode_recs(struct btrfs_root *root,
2850 struct cache_tree *inode_cache)
2852 struct cache_extent *cache;
2853 struct ptr_node *node;
2854 struct inode_record *rec;
2855 struct inode_backref *backref;
2860 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2862 if (btrfs_root_refs(&root->root_item) == 0) {
2863 if (!cache_tree_empty(inode_cache))
2864 fprintf(stderr, "warning line %d\n", __LINE__);
2869 * We need to record the highest inode number for later 'lost+found'
2871 * We must select a ino not used/refered by any existing inode, or
2872 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2873 * this may cause 'lost+found' dir has wrong nlinks.
2875 cache = last_cache_extent(inode_cache);
2877 node = container_of(cache, struct ptr_node, cache);
2879 if (rec->ino > root->highest_inode)
2880 root->highest_inode = rec->ino;
2884 * We need to repair backrefs first because we could change some of the
2885 * errors in the inode recs.
2887 * We also need to go through and delete invalid backrefs first and then
2888 * add the correct ones second. We do this because we may get EEXIST
2889 * when adding back the correct index because we hadn't yet deleted the
2892 * For example, if we were missing a dir index then the directories
2893 * isize would be wrong, so if we fixed the isize to what we thought it
2894 * would be and then fixed the backref we'd still have a invalid fs, so
2895 * we need to add back the dir index and then check to see if the isize
2900 if (stage == 3 && !err)
2903 cache = search_cache_extent(inode_cache, 0);
2904 while (repair && cache) {
2905 node = container_of(cache, struct ptr_node, cache);
2907 cache = next_cache_extent(cache);
2909 /* Need to free everything up and rescan */
2911 remove_cache_extent(inode_cache, &node->cache);
2913 free_inode_rec(rec);
2917 if (list_empty(&rec->backrefs))
2920 ret = repair_inode_backrefs(root, rec, inode_cache,
2934 rec = get_inode_rec(inode_cache, root_dirid, 0);
2935 BUG_ON(IS_ERR(rec));
2937 ret = check_root_dir(rec);
2939 fprintf(stderr, "root %llu root dir %llu error\n",
2940 (unsigned long long)root->root_key.objectid,
2941 (unsigned long long)root_dirid);
2942 print_inode_error(root, rec);
2947 struct btrfs_trans_handle *trans;
2949 trans = btrfs_start_transaction(root, 1);
2950 if (IS_ERR(trans)) {
2951 err = PTR_ERR(trans);
2956 "root %llu missing its root dir, recreating\n",
2957 (unsigned long long)root->objectid);
2959 ret = btrfs_make_root_dir(trans, root, root_dirid);
2962 btrfs_commit_transaction(trans, root);
2966 fprintf(stderr, "root %llu root dir %llu not found\n",
2967 (unsigned long long)root->root_key.objectid,
2968 (unsigned long long)root_dirid);
2972 cache = search_cache_extent(inode_cache, 0);
2975 node = container_of(cache, struct ptr_node, cache);
2977 remove_cache_extent(inode_cache, &node->cache);
2979 if (rec->ino == root_dirid ||
2980 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2981 free_inode_rec(rec);
2985 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2986 ret = check_orphan_item(root, rec->ino);
2988 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2989 if (can_free_inode_rec(rec)) {
2990 free_inode_rec(rec);
2995 if (!rec->found_inode_item)
2996 rec->errors |= I_ERR_NO_INODE_ITEM;
2997 if (rec->found_link != rec->nlink)
2998 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3000 ret = try_repair_inode(root, rec);
3001 if (ret == 0 && can_free_inode_rec(rec)) {
3002 free_inode_rec(rec);
3008 if (!(repair && ret == 0))
3010 print_inode_error(root, rec);
3011 list_for_each_entry(backref, &rec->backrefs, list) {
3012 if (!backref->found_dir_item)
3013 backref->errors |= REF_ERR_NO_DIR_ITEM;
3014 if (!backref->found_dir_index)
3015 backref->errors |= REF_ERR_NO_DIR_INDEX;
3016 if (!backref->found_inode_ref)
3017 backref->errors |= REF_ERR_NO_INODE_REF;
3018 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3019 " namelen %u name %s filetype %d errors %x",
3020 (unsigned long long)backref->dir,
3021 (unsigned long long)backref->index,
3022 backref->namelen, backref->name,
3023 backref->filetype, backref->errors);
3024 print_ref_error(backref->errors);
3026 free_inode_rec(rec);
3028 return (error > 0) ? -1 : 0;
3031 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3034 struct cache_extent *cache;
3035 struct root_record *rec = NULL;
3038 cache = lookup_cache_extent(root_cache, objectid, 1);
3040 rec = container_of(cache, struct root_record, cache);
3042 rec = calloc(1, sizeof(*rec));
3043 rec->objectid = objectid;
3044 INIT_LIST_HEAD(&rec->backrefs);
3045 rec->cache.start = objectid;
3046 rec->cache.size = 1;
3048 ret = insert_cache_extent(root_cache, &rec->cache);
3054 static struct root_backref *get_root_backref(struct root_record *rec,
3055 u64 ref_root, u64 dir, u64 index,
3056 const char *name, int namelen)
3058 struct root_backref *backref;
3060 list_for_each_entry(backref, &rec->backrefs, list) {
3061 if (backref->ref_root != ref_root || backref->dir != dir ||
3062 backref->namelen != namelen)
3064 if (memcmp(name, backref->name, namelen))
3069 backref = calloc(1, sizeof(*backref) + namelen + 1);
3070 backref->ref_root = ref_root;
3072 backref->index = index;
3073 backref->namelen = namelen;
3074 memcpy(backref->name, name, namelen);
3075 backref->name[namelen] = '\0';
3076 list_add_tail(&backref->list, &rec->backrefs);
3080 static void free_root_record(struct cache_extent *cache)
3082 struct root_record *rec;
3083 struct root_backref *backref;
3085 rec = container_of(cache, struct root_record, cache);
3086 while (!list_empty(&rec->backrefs)) {
3087 backref = list_entry(rec->backrefs.next,
3088 struct root_backref, list);
3089 list_del(&backref->list);
3096 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3098 static int add_root_backref(struct cache_tree *root_cache,
3099 u64 root_id, u64 ref_root, u64 dir, u64 index,
3100 const char *name, int namelen,
3101 int item_type, int errors)
3103 struct root_record *rec;
3104 struct root_backref *backref;
3106 rec = get_root_rec(root_cache, root_id);
3107 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3109 backref->errors |= errors;
3111 if (item_type != BTRFS_DIR_ITEM_KEY) {
3112 if (backref->found_dir_index || backref->found_back_ref ||
3113 backref->found_forward_ref) {
3114 if (backref->index != index)
3115 backref->errors |= REF_ERR_INDEX_UNMATCH;
3117 backref->index = index;
3121 if (item_type == BTRFS_DIR_ITEM_KEY) {
3122 if (backref->found_forward_ref)
3124 backref->found_dir_item = 1;
3125 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3126 backref->found_dir_index = 1;
3127 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3128 if (backref->found_forward_ref)
3129 backref->errors |= REF_ERR_DUP_ROOT_REF;
3130 else if (backref->found_dir_item)
3132 backref->found_forward_ref = 1;
3133 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3134 if (backref->found_back_ref)
3135 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3136 backref->found_back_ref = 1;
3141 if (backref->found_forward_ref && backref->found_dir_item)
3142 backref->reachable = 1;
3146 static int merge_root_recs(struct btrfs_root *root,
3147 struct cache_tree *src_cache,
3148 struct cache_tree *dst_cache)
3150 struct cache_extent *cache;
3151 struct ptr_node *node;
3152 struct inode_record *rec;
3153 struct inode_backref *backref;
3156 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3157 free_inode_recs_tree(src_cache);
3162 cache = search_cache_extent(src_cache, 0);
3165 node = container_of(cache, struct ptr_node, cache);
3167 remove_cache_extent(src_cache, &node->cache);
3170 ret = is_child_root(root, root->objectid, rec->ino);
3176 list_for_each_entry(backref, &rec->backrefs, list) {
3177 BUG_ON(backref->found_inode_ref);
3178 if (backref->found_dir_item)
3179 add_root_backref(dst_cache, rec->ino,
3180 root->root_key.objectid, backref->dir,
3181 backref->index, backref->name,
3182 backref->namelen, BTRFS_DIR_ITEM_KEY,
3184 if (backref->found_dir_index)
3185 add_root_backref(dst_cache, rec->ino,
3186 root->root_key.objectid, backref->dir,
3187 backref->index, backref->name,
3188 backref->namelen, BTRFS_DIR_INDEX_KEY,
3192 free_inode_rec(rec);
3199 static int check_root_refs(struct btrfs_root *root,
3200 struct cache_tree *root_cache)
3202 struct root_record *rec;
3203 struct root_record *ref_root;
3204 struct root_backref *backref;
3205 struct cache_extent *cache;
3211 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3214 /* fixme: this can not detect circular references */
3217 cache = search_cache_extent(root_cache, 0);
3221 rec = container_of(cache, struct root_record, cache);
3222 cache = next_cache_extent(cache);
3224 if (rec->found_ref == 0)
3227 list_for_each_entry(backref, &rec->backrefs, list) {
3228 if (!backref->reachable)
3231 ref_root = get_root_rec(root_cache,
3233 if (ref_root->found_ref > 0)
3236 backref->reachable = 0;
3238 if (rec->found_ref == 0)
3244 cache = search_cache_extent(root_cache, 0);
3248 rec = container_of(cache, struct root_record, cache);
3249 cache = next_cache_extent(cache);
3251 if (rec->found_ref == 0 &&
3252 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3253 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3254 ret = check_orphan_item(root->fs_info->tree_root,
3260 * If we don't have a root item then we likely just have
3261 * a dir item in a snapshot for this root but no actual
3262 * ref key or anything so it's meaningless.
3264 if (!rec->found_root_item)
3267 fprintf(stderr, "fs tree %llu not referenced\n",
3268 (unsigned long long)rec->objectid);
3272 if (rec->found_ref > 0 && !rec->found_root_item)
3274 list_for_each_entry(backref, &rec->backrefs, list) {
3275 if (!backref->found_dir_item)
3276 backref->errors |= REF_ERR_NO_DIR_ITEM;
3277 if (!backref->found_dir_index)
3278 backref->errors |= REF_ERR_NO_DIR_INDEX;
3279 if (!backref->found_back_ref)
3280 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3281 if (!backref->found_forward_ref)
3282 backref->errors |= REF_ERR_NO_ROOT_REF;
3283 if (backref->reachable && backref->errors)
3290 fprintf(stderr, "fs tree %llu refs %u %s\n",
3291 (unsigned long long)rec->objectid, rec->found_ref,
3292 rec->found_root_item ? "" : "not found");
3294 list_for_each_entry(backref, &rec->backrefs, list) {
3295 if (!backref->reachable)
3297 if (!backref->errors && rec->found_root_item)
3299 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3300 " index %llu namelen %u name %s errors %x\n",
3301 (unsigned long long)backref->ref_root,
3302 (unsigned long long)backref->dir,
3303 (unsigned long long)backref->index,
3304 backref->namelen, backref->name,
3306 print_ref_error(backref->errors);
3309 return errors > 0 ? 1 : 0;
3312 static int process_root_ref(struct extent_buffer *eb, int slot,
3313 struct btrfs_key *key,
3314 struct cache_tree *root_cache)
3320 struct btrfs_root_ref *ref;
3321 char namebuf[BTRFS_NAME_LEN];
3324 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3326 dirid = btrfs_root_ref_dirid(eb, ref);
3327 index = btrfs_root_ref_sequence(eb, ref);
3328 name_len = btrfs_root_ref_name_len(eb, ref);
3330 if (name_len <= BTRFS_NAME_LEN) {
3334 len = BTRFS_NAME_LEN;
3335 error = REF_ERR_NAME_TOO_LONG;
3337 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3339 if (key->type == BTRFS_ROOT_REF_KEY) {
3340 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3341 index, namebuf, len, key->type, error);
3343 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3344 index, namebuf, len, key->type, error);
3349 static void free_corrupt_block(struct cache_extent *cache)
3351 struct btrfs_corrupt_block *corrupt;
3353 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3357 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3360 * Repair the btree of the given root.
3362 * The fix is to remove the node key in corrupt_blocks cache_tree.
3363 * and rebalance the tree.
3364 * After the fix, the btree should be writeable.
3366 static int repair_btree(struct btrfs_root *root,
3367 struct cache_tree *corrupt_blocks)
3369 struct btrfs_trans_handle *trans;
3370 struct btrfs_path *path;
3371 struct btrfs_corrupt_block *corrupt;
3372 struct cache_extent *cache;
3373 struct btrfs_key key;
3378 if (cache_tree_empty(corrupt_blocks))
3381 path = btrfs_alloc_path();
3385 trans = btrfs_start_transaction(root, 1);
3386 if (IS_ERR(trans)) {
3387 ret = PTR_ERR(trans);
3388 fprintf(stderr, "Error starting transaction: %s\n",
3392 cache = first_cache_extent(corrupt_blocks);
3394 corrupt = container_of(cache, struct btrfs_corrupt_block,
3396 level = corrupt->level;
3397 path->lowest_level = level;
3398 key.objectid = corrupt->key.objectid;
3399 key.type = corrupt->key.type;
3400 key.offset = corrupt->key.offset;
3403 * Here we don't want to do any tree balance, since it may
3404 * cause a balance with corrupted brother leaf/node,
3405 * so ins_len set to 0 here.
3406 * Balance will be done after all corrupt node/leaf is deleted.
3408 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3411 offset = btrfs_node_blockptr(path->nodes[level],
3412 path->slots[level]);
3414 /* Remove the ptr */
3415 ret = btrfs_del_ptr(trans, root, path, level,
3416 path->slots[level]);
3420 * Remove the corresponding extent
3421 * return value is not concerned.
3423 btrfs_release_path(path);
3424 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3425 0, root->root_key.objectid,
3427 cache = next_cache_extent(cache);
3430 /* Balance the btree using btrfs_search_slot() */
3431 cache = first_cache_extent(corrupt_blocks);
3433 corrupt = container_of(cache, struct btrfs_corrupt_block,
3435 memcpy(&key, &corrupt->key, sizeof(key));
3436 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3439 /* return will always >0 since it won't find the item */
3441 btrfs_release_path(path);
3442 cache = next_cache_extent(cache);
3445 btrfs_commit_transaction(trans, root);
3447 btrfs_free_path(path);
3451 static int check_fs_root(struct btrfs_root *root,
3452 struct cache_tree *root_cache,
3453 struct walk_control *wc)
3459 struct btrfs_path path;
3460 struct shared_node root_node;
3461 struct root_record *rec;
3462 struct btrfs_root_item *root_item = &root->root_item;
3463 struct cache_tree corrupt_blocks;
3464 struct orphan_data_extent *orphan;
3465 struct orphan_data_extent *tmp;
3466 enum btrfs_tree_block_status status;
3469 * Reuse the corrupt_block cache tree to record corrupted tree block
3471 * Unlike the usage in extent tree check, here we do it in a per
3472 * fs/subvol tree base.
3474 cache_tree_init(&corrupt_blocks);
3475 root->fs_info->corrupt_blocks = &corrupt_blocks;
3477 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3478 rec = get_root_rec(root_cache, root->root_key.objectid);
3479 if (btrfs_root_refs(root_item) > 0)
3480 rec->found_root_item = 1;
3483 btrfs_init_path(&path);
3484 memset(&root_node, 0, sizeof(root_node));
3485 cache_tree_init(&root_node.root_cache);
3486 cache_tree_init(&root_node.inode_cache);
3488 /* Move the orphan extent record to corresponding inode_record */
3489 list_for_each_entry_safe(orphan, tmp,
3490 &root->orphan_data_extents, list) {
3491 struct inode_record *inode;
3493 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3495 BUG_ON(IS_ERR(inode));
3496 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3497 list_move(&orphan->list, &inode->orphan_extents);
3500 level = btrfs_header_level(root->node);
3501 memset(wc->nodes, 0, sizeof(wc->nodes));
3502 wc->nodes[level] = &root_node;
3503 wc->active_node = level;
3504 wc->root_level = level;
3506 /* We may not have checked the root block, lets do that now */
3507 if (btrfs_is_leaf(root->node))
3508 status = btrfs_check_leaf(root, NULL, root->node);
3510 status = btrfs_check_node(root, NULL, root->node);
3511 if (status != BTRFS_TREE_BLOCK_CLEAN)
3514 if (btrfs_root_refs(root_item) > 0 ||
3515 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3516 path.nodes[level] = root->node;
3517 extent_buffer_get(root->node);
3518 path.slots[level] = 0;
3520 struct btrfs_key key;
3521 struct btrfs_disk_key found_key;
3523 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3524 level = root_item->drop_level;
3525 path.lowest_level = level;
3526 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3529 btrfs_node_key(path.nodes[level], &found_key,
3531 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3532 sizeof(found_key)));
3536 wret = walk_down_tree(root, &path, wc, &level);
3542 wret = walk_up_tree(root, &path, wc, &level);
3549 btrfs_release_path(&path);
3551 if (!cache_tree_empty(&corrupt_blocks)) {
3552 struct cache_extent *cache;
3553 struct btrfs_corrupt_block *corrupt;
3555 printf("The following tree block(s) is corrupted in tree %llu:\n",
3556 root->root_key.objectid);
3557 cache = first_cache_extent(&corrupt_blocks);
3559 corrupt = container_of(cache,
3560 struct btrfs_corrupt_block,
3562 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3563 cache->start, corrupt->level,
3564 corrupt->key.objectid, corrupt->key.type,
3565 corrupt->key.offset);
3566 cache = next_cache_extent(cache);
3569 printf("Try to repair the btree for root %llu\n",
3570 root->root_key.objectid);
3571 ret = repair_btree(root, &corrupt_blocks);
3573 fprintf(stderr, "Failed to repair btree: %s\n",
3576 printf("Btree for root %llu is fixed\n",
3577 root->root_key.objectid);
3581 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3585 if (root_node.current) {
3586 root_node.current->checked = 1;
3587 maybe_free_inode_rec(&root_node.inode_cache,
3591 err = check_inode_recs(root, &root_node.inode_cache);
3595 free_corrupt_blocks_tree(&corrupt_blocks);
3596 root->fs_info->corrupt_blocks = NULL;
3597 free_orphan_data_extents(&root->orphan_data_extents);
3601 static int fs_root_objectid(u64 objectid)
3603 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3604 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3606 return is_fstree(objectid);
3609 static int check_fs_roots(struct btrfs_root *root,
3610 struct cache_tree *root_cache)
3612 struct btrfs_path path;
3613 struct btrfs_key key;
3614 struct walk_control wc;
3615 struct extent_buffer *leaf, *tree_node;
3616 struct btrfs_root *tmp_root;
3617 struct btrfs_root *tree_root = root->fs_info->tree_root;
3621 if (ctx.progress_enabled) {
3622 ctx.tp = TASK_FS_ROOTS;
3623 task_start(ctx.info);
3627 * Just in case we made any changes to the extent tree that weren't
3628 * reflected into the free space cache yet.
3631 reset_cached_block_groups(root->fs_info);
3632 memset(&wc, 0, sizeof(wc));
3633 cache_tree_init(&wc.shared);
3634 btrfs_init_path(&path);
3639 key.type = BTRFS_ROOT_ITEM_KEY;
3640 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3645 tree_node = tree_root->node;
3647 if (tree_node != tree_root->node) {
3648 free_root_recs_tree(root_cache);
3649 btrfs_release_path(&path);
3652 leaf = path.nodes[0];
3653 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3654 ret = btrfs_next_leaf(tree_root, &path);
3660 leaf = path.nodes[0];
3662 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3663 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3664 fs_root_objectid(key.objectid)) {
3665 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3666 tmp_root = btrfs_read_fs_root_no_cache(
3667 root->fs_info, &key);
3669 key.offset = (u64)-1;
3670 tmp_root = btrfs_read_fs_root(
3671 root->fs_info, &key);
3673 if (IS_ERR(tmp_root)) {
3677 ret = check_fs_root(tmp_root, root_cache, &wc);
3678 if (ret == -EAGAIN) {
3679 free_root_recs_tree(root_cache);
3680 btrfs_release_path(&path);
3685 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3686 btrfs_free_fs_root(tmp_root);
3687 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3688 key.type == BTRFS_ROOT_BACKREF_KEY) {
3689 process_root_ref(leaf, path.slots[0], &key,
3696 btrfs_release_path(&path);
3698 free_extent_cache_tree(&wc.shared);
3699 if (!cache_tree_empty(&wc.shared))
3700 fprintf(stderr, "warning line %d\n", __LINE__);
3702 task_stop(ctx.info);
3707 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3709 struct list_head *cur = rec->backrefs.next;
3710 struct extent_backref *back;
3711 struct tree_backref *tback;
3712 struct data_backref *dback;
3716 while(cur != &rec->backrefs) {
3717 back = list_entry(cur, struct extent_backref, list);
3719 if (!back->found_extent_tree) {
3723 if (back->is_data) {
3724 dback = (struct data_backref *)back;
3725 fprintf(stderr, "Backref %llu %s %llu"
3726 " owner %llu offset %llu num_refs %lu"
3727 " not found in extent tree\n",
3728 (unsigned long long)rec->start,
3729 back->full_backref ?
3731 back->full_backref ?
3732 (unsigned long long)dback->parent:
3733 (unsigned long long)dback->root,
3734 (unsigned long long)dback->owner,
3735 (unsigned long long)dback->offset,
3736 (unsigned long)dback->num_refs);
3738 tback = (struct tree_backref *)back;
3739 fprintf(stderr, "Backref %llu parent %llu"
3740 " root %llu not found in extent tree\n",
3741 (unsigned long long)rec->start,
3742 (unsigned long long)tback->parent,
3743 (unsigned long long)tback->root);
3746 if (!back->is_data && !back->found_ref) {
3750 tback = (struct tree_backref *)back;
3751 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3752 (unsigned long long)rec->start,
3753 back->full_backref ? "parent" : "root",
3754 back->full_backref ?
3755 (unsigned long long)tback->parent :
3756 (unsigned long long)tback->root, back);
3758 if (back->is_data) {
3759 dback = (struct data_backref *)back;
3760 if (dback->found_ref != dback->num_refs) {
3764 fprintf(stderr, "Incorrect local backref count"
3765 " on %llu %s %llu owner %llu"
3766 " offset %llu found %u wanted %u back %p\n",
3767 (unsigned long long)rec->start,
3768 back->full_backref ?
3770 back->full_backref ?
3771 (unsigned long long)dback->parent:
3772 (unsigned long long)dback->root,
3773 (unsigned long long)dback->owner,
3774 (unsigned long long)dback->offset,
3775 dback->found_ref, dback->num_refs, back);
3777 if (dback->disk_bytenr != rec->start) {
3781 fprintf(stderr, "Backref disk bytenr does not"
3782 " match extent record, bytenr=%llu, "
3783 "ref bytenr=%llu\n",
3784 (unsigned long long)rec->start,
3785 (unsigned long long)dback->disk_bytenr);
3788 if (dback->bytes != rec->nr) {
3792 fprintf(stderr, "Backref bytes do not match "
3793 "extent backref, bytenr=%llu, ref "
3794 "bytes=%llu, backref bytes=%llu\n",
3795 (unsigned long long)rec->start,
3796 (unsigned long long)rec->nr,
3797 (unsigned long long)dback->bytes);
3800 if (!back->is_data) {
3803 dback = (struct data_backref *)back;
3804 found += dback->found_ref;
3807 if (found != rec->refs) {
3811 fprintf(stderr, "Incorrect global backref count "
3812 "on %llu found %llu wanted %llu\n",
3813 (unsigned long long)rec->start,
3814 (unsigned long long)found,
3815 (unsigned long long)rec->refs);
3821 static int free_all_extent_backrefs(struct extent_record *rec)
3823 struct extent_backref *back;
3824 struct list_head *cur;
3825 while (!list_empty(&rec->backrefs)) {
3826 cur = rec->backrefs.next;
3827 back = list_entry(cur, struct extent_backref, list);
3834 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3835 struct cache_tree *extent_cache)
3837 struct cache_extent *cache;
3838 struct extent_record *rec;
3841 cache = first_cache_extent(extent_cache);
3844 rec = container_of(cache, struct extent_record, cache);
3845 remove_cache_extent(extent_cache, cache);
3846 free_all_extent_backrefs(rec);
3851 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3852 struct extent_record *rec)
3854 if (rec->content_checked && rec->owner_ref_checked &&
3855 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3856 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3857 !rec->bad_full_backref && !rec->crossing_stripes &&
3858 !rec->wrong_chunk_type) {
3859 remove_cache_extent(extent_cache, &rec->cache);
3860 free_all_extent_backrefs(rec);
3861 list_del_init(&rec->list);
3867 static int check_owner_ref(struct btrfs_root *root,
3868 struct extent_record *rec,
3869 struct extent_buffer *buf)
3871 struct extent_backref *node;
3872 struct tree_backref *back;
3873 struct btrfs_root *ref_root;
3874 struct btrfs_key key;
3875 struct btrfs_path path;
3876 struct extent_buffer *parent;
3881 list_for_each_entry(node, &rec->backrefs, list) {
3884 if (!node->found_ref)
3886 if (node->full_backref)
3888 back = (struct tree_backref *)node;
3889 if (btrfs_header_owner(buf) == back->root)
3892 BUG_ON(rec->is_root);
3894 /* try to find the block by search corresponding fs tree */
3895 key.objectid = btrfs_header_owner(buf);
3896 key.type = BTRFS_ROOT_ITEM_KEY;
3897 key.offset = (u64)-1;
3899 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3900 if (IS_ERR(ref_root))
3903 level = btrfs_header_level(buf);
3905 btrfs_item_key_to_cpu(buf, &key, 0);
3907 btrfs_node_key_to_cpu(buf, &key, 0);
3909 btrfs_init_path(&path);
3910 path.lowest_level = level + 1;
3911 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3915 parent = path.nodes[level + 1];
3916 if (parent && buf->start == btrfs_node_blockptr(parent,
3917 path.slots[level + 1]))
3920 btrfs_release_path(&path);
3921 return found ? 0 : 1;
3924 static int is_extent_tree_record(struct extent_record *rec)
3926 struct list_head *cur = rec->backrefs.next;
3927 struct extent_backref *node;
3928 struct tree_backref *back;
3931 while(cur != &rec->backrefs) {
3932 node = list_entry(cur, struct extent_backref, list);
3936 back = (struct tree_backref *)node;
3937 if (node->full_backref)
3939 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3946 static int record_bad_block_io(struct btrfs_fs_info *info,
3947 struct cache_tree *extent_cache,
3950 struct extent_record *rec;
3951 struct cache_extent *cache;
3952 struct btrfs_key key;
3954 cache = lookup_cache_extent(extent_cache, start, len);
3958 rec = container_of(cache, struct extent_record, cache);
3959 if (!is_extent_tree_record(rec))
3962 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3963 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3966 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3967 struct extent_buffer *buf, int slot)
3969 if (btrfs_header_level(buf)) {
3970 struct btrfs_key_ptr ptr1, ptr2;
3972 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3973 sizeof(struct btrfs_key_ptr));
3974 read_extent_buffer(buf, &ptr2,
3975 btrfs_node_key_ptr_offset(slot + 1),
3976 sizeof(struct btrfs_key_ptr));
3977 write_extent_buffer(buf, &ptr1,
3978 btrfs_node_key_ptr_offset(slot + 1),
3979 sizeof(struct btrfs_key_ptr));
3980 write_extent_buffer(buf, &ptr2,
3981 btrfs_node_key_ptr_offset(slot),
3982 sizeof(struct btrfs_key_ptr));
3984 struct btrfs_disk_key key;
3985 btrfs_node_key(buf, &key, 0);
3986 btrfs_fixup_low_keys(root, path, &key,
3987 btrfs_header_level(buf) + 1);
3990 struct btrfs_item *item1, *item2;
3991 struct btrfs_key k1, k2;
3992 char *item1_data, *item2_data;
3993 u32 item1_offset, item2_offset, item1_size, item2_size;
3995 item1 = btrfs_item_nr(slot);
3996 item2 = btrfs_item_nr(slot + 1);
3997 btrfs_item_key_to_cpu(buf, &k1, slot);
3998 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3999 item1_offset = btrfs_item_offset(buf, item1);
4000 item2_offset = btrfs_item_offset(buf, item2);
4001 item1_size = btrfs_item_size(buf, item1);
4002 item2_size = btrfs_item_size(buf, item2);
4004 item1_data = malloc(item1_size);
4007 item2_data = malloc(item2_size);
4013 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4014 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4016 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4017 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4021 btrfs_set_item_offset(buf, item1, item2_offset);
4022 btrfs_set_item_offset(buf, item2, item1_offset);
4023 btrfs_set_item_size(buf, item1, item2_size);
4024 btrfs_set_item_size(buf, item2, item1_size);
4026 path->slots[0] = slot;
4027 btrfs_set_item_key_unsafe(root, path, &k2);
4028 path->slots[0] = slot + 1;
4029 btrfs_set_item_key_unsafe(root, path, &k1);
4034 static int fix_key_order(struct btrfs_trans_handle *trans,
4035 struct btrfs_root *root,
4036 struct btrfs_path *path)
4038 struct extent_buffer *buf;
4039 struct btrfs_key k1, k2;
4041 int level = path->lowest_level;
4044 buf = path->nodes[level];
4045 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4047 btrfs_node_key_to_cpu(buf, &k1, i);
4048 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4050 btrfs_item_key_to_cpu(buf, &k1, i);
4051 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4053 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4055 ret = swap_values(root, path, buf, i);
4058 btrfs_mark_buffer_dirty(buf);
4064 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4065 struct btrfs_root *root,
4066 struct btrfs_path *path,
4067 struct extent_buffer *buf, int slot)
4069 struct btrfs_key key;
4070 int nritems = btrfs_header_nritems(buf);
4072 btrfs_item_key_to_cpu(buf, &key, slot);
4074 /* These are all the keys we can deal with missing. */
4075 if (key.type != BTRFS_DIR_INDEX_KEY &&
4076 key.type != BTRFS_EXTENT_ITEM_KEY &&
4077 key.type != BTRFS_METADATA_ITEM_KEY &&
4078 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4079 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4082 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4083 (unsigned long long)key.objectid, key.type,
4084 (unsigned long long)key.offset, slot, buf->start);
4085 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4086 btrfs_item_nr_offset(slot + 1),
4087 sizeof(struct btrfs_item) *
4088 (nritems - slot - 1));
4089 btrfs_set_header_nritems(buf, nritems - 1);
4091 struct btrfs_disk_key disk_key;
4093 btrfs_item_key(buf, &disk_key, 0);
4094 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4096 btrfs_mark_buffer_dirty(buf);
4100 static int fix_item_offset(struct btrfs_trans_handle *trans,
4101 struct btrfs_root *root,
4102 struct btrfs_path *path)
4104 struct extent_buffer *buf;
4108 /* We should only get this for leaves */
4109 BUG_ON(path->lowest_level);
4110 buf = path->nodes[0];
4112 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4113 unsigned int shift = 0, offset;
4115 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4116 BTRFS_LEAF_DATA_SIZE(root)) {
4117 if (btrfs_item_end_nr(buf, i) >
4118 BTRFS_LEAF_DATA_SIZE(root)) {
4119 ret = delete_bogus_item(trans, root, path,
4123 fprintf(stderr, "item is off the end of the "
4124 "leaf, can't fix\n");
4128 shift = BTRFS_LEAF_DATA_SIZE(root) -
4129 btrfs_item_end_nr(buf, i);
4130 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4131 btrfs_item_offset_nr(buf, i - 1)) {
4132 if (btrfs_item_end_nr(buf, i) >
4133 btrfs_item_offset_nr(buf, i - 1)) {
4134 ret = delete_bogus_item(trans, root, path,
4138 fprintf(stderr, "items overlap, can't fix\n");
4142 shift = btrfs_item_offset_nr(buf, i - 1) -
4143 btrfs_item_end_nr(buf, i);
4148 printf("Shifting item nr %d by %u bytes in block %llu\n",
4149 i, shift, (unsigned long long)buf->start);
4150 offset = btrfs_item_offset_nr(buf, i);
4151 memmove_extent_buffer(buf,
4152 btrfs_leaf_data(buf) + offset + shift,
4153 btrfs_leaf_data(buf) + offset,
4154 btrfs_item_size_nr(buf, i));
4155 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4157 btrfs_mark_buffer_dirty(buf);
4161 * We may have moved things, in which case we want to exit so we don't
4162 * write those changes out. Once we have proper abort functionality in
4163 * progs this can be changed to something nicer.
4170 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4171 * then just return -EIO.
4173 static int try_to_fix_bad_block(struct btrfs_root *root,
4174 struct extent_buffer *buf,
4175 enum btrfs_tree_block_status status)
4177 struct btrfs_trans_handle *trans;
4178 struct ulist *roots;
4179 struct ulist_node *node;
4180 struct btrfs_root *search_root;
4181 struct btrfs_path *path;
4182 struct ulist_iterator iter;
4183 struct btrfs_key root_key, key;
4186 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4187 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4190 path = btrfs_alloc_path();
4194 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4197 btrfs_free_path(path);
4201 ULIST_ITER_INIT(&iter);
4202 while ((node = ulist_next(roots, &iter))) {
4203 root_key.objectid = node->val;
4204 root_key.type = BTRFS_ROOT_ITEM_KEY;
4205 root_key.offset = (u64)-1;
4207 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4214 trans = btrfs_start_transaction(search_root, 0);
4215 if (IS_ERR(trans)) {
4216 ret = PTR_ERR(trans);
4220 path->lowest_level = btrfs_header_level(buf);
4221 path->skip_check_block = 1;
4222 if (path->lowest_level)
4223 btrfs_node_key_to_cpu(buf, &key, 0);
4225 btrfs_item_key_to_cpu(buf, &key, 0);
4226 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4229 btrfs_commit_transaction(trans, search_root);
4232 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4233 ret = fix_key_order(trans, search_root, path);
4234 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4235 ret = fix_item_offset(trans, search_root, path);
4237 btrfs_commit_transaction(trans, search_root);
4240 btrfs_release_path(path);
4241 btrfs_commit_transaction(trans, search_root);
4244 btrfs_free_path(path);
4248 static int check_block(struct btrfs_root *root,
4249 struct cache_tree *extent_cache,
4250 struct extent_buffer *buf, u64 flags)
4252 struct extent_record *rec;
4253 struct cache_extent *cache;
4254 struct btrfs_key key;
4255 enum btrfs_tree_block_status status;
4259 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4262 rec = container_of(cache, struct extent_record, cache);
4263 rec->generation = btrfs_header_generation(buf);
4265 level = btrfs_header_level(buf);
4266 if (btrfs_header_nritems(buf) > 0) {
4269 btrfs_item_key_to_cpu(buf, &key, 0);
4271 btrfs_node_key_to_cpu(buf, &key, 0);
4273 rec->info_objectid = key.objectid;
4275 rec->info_level = level;
4277 if (btrfs_is_leaf(buf))
4278 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4280 status = btrfs_check_node(root, &rec->parent_key, buf);
4282 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4284 status = try_to_fix_bad_block(root, buf, status);
4285 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4287 fprintf(stderr, "bad block %llu\n",
4288 (unsigned long long)buf->start);
4291 * Signal to callers we need to start the scan over
4292 * again since we'll have cow'ed blocks.
4297 rec->content_checked = 1;
4298 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4299 rec->owner_ref_checked = 1;
4301 ret = check_owner_ref(root, rec, buf);
4303 rec->owner_ref_checked = 1;
4307 maybe_free_extent_rec(extent_cache, rec);
4311 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4312 u64 parent, u64 root)
4314 struct list_head *cur = rec->backrefs.next;
4315 struct extent_backref *node;
4316 struct tree_backref *back;
4318 while(cur != &rec->backrefs) {
4319 node = list_entry(cur, struct extent_backref, list);
4323 back = (struct tree_backref *)node;
4325 if (!node->full_backref)
4327 if (parent == back->parent)
4330 if (node->full_backref)
4332 if (back->root == root)
4339 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4340 u64 parent, u64 root)
4342 struct tree_backref *ref = malloc(sizeof(*ref));
4343 memset(&ref->node, 0, sizeof(ref->node));
4345 ref->parent = parent;
4346 ref->node.full_backref = 1;
4349 ref->node.full_backref = 0;
4351 list_add_tail(&ref->node.list, &rec->backrefs);
4356 static struct data_backref *find_data_backref(struct extent_record *rec,
4357 u64 parent, u64 root,
4358 u64 owner, u64 offset,
4360 u64 disk_bytenr, u64 bytes)
4362 struct list_head *cur = rec->backrefs.next;
4363 struct extent_backref *node;
4364 struct data_backref *back;
4366 while(cur != &rec->backrefs) {
4367 node = list_entry(cur, struct extent_backref, list);
4371 back = (struct data_backref *)node;
4373 if (!node->full_backref)
4375 if (parent == back->parent)
4378 if (node->full_backref)
4380 if (back->root == root && back->owner == owner &&
4381 back->offset == offset) {
4382 if (found_ref && node->found_ref &&
4383 (back->bytes != bytes ||
4384 back->disk_bytenr != disk_bytenr))
4393 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4394 u64 parent, u64 root,
4395 u64 owner, u64 offset,
4398 struct data_backref *ref = malloc(sizeof(*ref));
4399 memset(&ref->node, 0, sizeof(ref->node));
4400 ref->node.is_data = 1;
4403 ref->parent = parent;
4406 ref->node.full_backref = 1;
4410 ref->offset = offset;
4411 ref->node.full_backref = 0;
4413 ref->bytes = max_size;
4416 list_add_tail(&ref->node.list, &rec->backrefs);
4417 if (max_size > rec->max_size)
4418 rec->max_size = max_size;
4422 /* Check if the type of extent matches with its chunk */
4423 static void check_extent_type(struct extent_record *rec)
4425 struct btrfs_block_group_cache *bg_cache;
4427 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4431 /* data extent, check chunk directly*/
4432 if (!rec->metadata) {
4433 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4434 rec->wrong_chunk_type = 1;
4438 /* metadata extent, check the obvious case first */
4439 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4440 BTRFS_BLOCK_GROUP_METADATA))) {
4441 rec->wrong_chunk_type = 1;
4446 * Check SYSTEM extent, as it's also marked as metadata, we can only
4447 * make sure it's a SYSTEM extent by its backref
4449 if (!list_empty(&rec->backrefs)) {
4450 struct extent_backref *node;
4451 struct tree_backref *tback;
4454 node = list_entry(rec->backrefs.next, struct extent_backref,
4456 if (node->is_data) {
4457 /* tree block shouldn't have data backref */
4458 rec->wrong_chunk_type = 1;
4461 tback = container_of(node, struct tree_backref, node);
4463 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4464 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4466 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4467 if (!(bg_cache->flags & bg_type))
4468 rec->wrong_chunk_type = 1;
4472 static int add_extent_rec(struct cache_tree *extent_cache,
4473 struct btrfs_key *parent_key, u64 parent_gen,
4474 u64 start, u64 nr, u64 extent_item_refs,
4475 int is_root, int inc_ref, int set_checked,
4476 int metadata, int extent_rec, u64 max_size)
4478 struct extent_record *rec;
4479 struct cache_extent *cache;
4483 cache = lookup_cache_extent(extent_cache, start, nr);
4485 rec = container_of(cache, struct extent_record, cache);
4489 rec->nr = max(nr, max_size);
4492 * We need to make sure to reset nr to whatever the extent
4493 * record says was the real size, this way we can compare it to
4497 if (start != rec->start || rec->found_rec) {
4498 struct extent_record *tmp;
4501 if (list_empty(&rec->list))
4502 list_add_tail(&rec->list,
4503 &duplicate_extents);
4506 * We have to do this song and dance in case we
4507 * find an extent record that falls inside of
4508 * our current extent record but does not have
4509 * the same objectid.
4511 tmp = malloc(sizeof(*tmp));
4515 tmp->max_size = max_size;
4518 tmp->metadata = metadata;
4519 tmp->extent_item_refs = extent_item_refs;
4520 INIT_LIST_HEAD(&tmp->list);
4521 list_add_tail(&tmp->list, &rec->dups);
4522 rec->num_duplicates++;
4529 if (extent_item_refs && !dup) {
4530 if (rec->extent_item_refs) {
4531 fprintf(stderr, "block %llu rec "
4532 "extent_item_refs %llu, passed %llu\n",
4533 (unsigned long long)start,
4534 (unsigned long long)
4535 rec->extent_item_refs,
4536 (unsigned long long)extent_item_refs);
4538 rec->extent_item_refs = extent_item_refs;
4543 rec->content_checked = 1;
4544 rec->owner_ref_checked = 1;
4548 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4550 rec->parent_generation = parent_gen;
4552 if (rec->max_size < max_size)
4553 rec->max_size = max_size;
4556 * A metadata extent can't cross stripe_len boundary, otherwise
4557 * kernel scrub won't be able to handle it.
4558 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4561 if (metadata && check_crossing_stripes(rec->start,
4563 rec->crossing_stripes = 1;
4564 check_extent_type(rec);
4565 maybe_free_extent_rec(extent_cache, rec);
4568 rec = malloc(sizeof(*rec));
4570 rec->max_size = max_size;
4571 rec->nr = max(nr, max_size);
4572 rec->found_rec = !!extent_rec;
4573 rec->content_checked = 0;
4574 rec->owner_ref_checked = 0;
4575 rec->num_duplicates = 0;
4576 rec->metadata = metadata;
4577 rec->flag_block_full_backref = -1;
4578 rec->bad_full_backref = 0;
4579 rec->crossing_stripes = 0;
4580 rec->wrong_chunk_type = 0;
4581 INIT_LIST_HEAD(&rec->backrefs);
4582 INIT_LIST_HEAD(&rec->dups);
4583 INIT_LIST_HEAD(&rec->list);
4595 if (extent_item_refs)
4596 rec->extent_item_refs = extent_item_refs;
4598 rec->extent_item_refs = 0;
4601 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4603 memset(&rec->parent_key, 0, sizeof(*parent_key));
4606 rec->parent_generation = parent_gen;
4608 rec->parent_generation = 0;
4610 rec->cache.start = start;
4611 rec->cache.size = nr;
4612 ret = insert_cache_extent(extent_cache, &rec->cache);
4616 rec->content_checked = 1;
4617 rec->owner_ref_checked = 1;
4621 if (check_crossing_stripes(rec->start, rec->max_size))
4622 rec->crossing_stripes = 1;
4623 check_extent_type(rec);
4627 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4628 u64 parent, u64 root, int found_ref)
4630 struct extent_record *rec;
4631 struct tree_backref *back;
4632 struct cache_extent *cache;
4634 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4636 add_extent_rec(extent_cache, NULL, 0, bytenr,
4637 1, 0, 0, 0, 0, 1, 0, 0);
4638 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4643 rec = container_of(cache, struct extent_record, cache);
4644 if (rec->start != bytenr) {
4648 back = find_tree_backref(rec, parent, root);
4650 back = alloc_tree_backref(rec, parent, root);
4653 if (back->node.found_ref) {
4654 fprintf(stderr, "Extent back ref already exists "
4655 "for %llu parent %llu root %llu \n",
4656 (unsigned long long)bytenr,
4657 (unsigned long long)parent,
4658 (unsigned long long)root);
4660 back->node.found_ref = 1;
4662 if (back->node.found_extent_tree) {
4663 fprintf(stderr, "Extent back ref already exists "
4664 "for %llu parent %llu root %llu \n",
4665 (unsigned long long)bytenr,
4666 (unsigned long long)parent,
4667 (unsigned long long)root);
4669 back->node.found_extent_tree = 1;
4671 check_extent_type(rec);
4672 maybe_free_extent_rec(extent_cache, rec);
4676 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4677 u64 parent, u64 root, u64 owner, u64 offset,
4678 u32 num_refs, int found_ref, u64 max_size)
4680 struct extent_record *rec;
4681 struct data_backref *back;
4682 struct cache_extent *cache;
4684 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4686 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4688 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4693 rec = container_of(cache, struct extent_record, cache);
4694 if (rec->max_size < max_size)
4695 rec->max_size = max_size;
4698 * If found_ref is set then max_size is the real size and must match the
4699 * existing refs. So if we have already found a ref then we need to
4700 * make sure that this ref matches the existing one, otherwise we need
4701 * to add a new backref so we can notice that the backrefs don't match
4702 * and we need to figure out who is telling the truth. This is to
4703 * account for that awful fsync bug I introduced where we'd end up with
4704 * a btrfs_file_extent_item that would have its length include multiple
4705 * prealloc extents or point inside of a prealloc extent.
4707 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4710 back = alloc_data_backref(rec, parent, root, owner, offset,
4714 BUG_ON(num_refs != 1);
4715 if (back->node.found_ref)
4716 BUG_ON(back->bytes != max_size);
4717 back->node.found_ref = 1;
4718 back->found_ref += 1;
4719 back->bytes = max_size;
4720 back->disk_bytenr = bytenr;
4722 rec->content_checked = 1;
4723 rec->owner_ref_checked = 1;
4725 if (back->node.found_extent_tree) {
4726 fprintf(stderr, "Extent back ref already exists "
4727 "for %llu parent %llu root %llu "
4728 "owner %llu offset %llu num_refs %lu\n",
4729 (unsigned long long)bytenr,
4730 (unsigned long long)parent,
4731 (unsigned long long)root,
4732 (unsigned long long)owner,
4733 (unsigned long long)offset,
4734 (unsigned long)num_refs);
4736 back->num_refs = num_refs;
4737 back->node.found_extent_tree = 1;
4739 maybe_free_extent_rec(extent_cache, rec);
4743 static int add_pending(struct cache_tree *pending,
4744 struct cache_tree *seen, u64 bytenr, u32 size)
4747 ret = add_cache_extent(seen, bytenr, size);
4750 add_cache_extent(pending, bytenr, size);
4754 static int pick_next_pending(struct cache_tree *pending,
4755 struct cache_tree *reada,
4756 struct cache_tree *nodes,
4757 u64 last, struct block_info *bits, int bits_nr,
4760 unsigned long node_start = last;
4761 struct cache_extent *cache;
4764 cache = search_cache_extent(reada, 0);
4766 bits[0].start = cache->start;
4767 bits[0].size = cache->size;
4772 if (node_start > 32768)
4773 node_start -= 32768;
4775 cache = search_cache_extent(nodes, node_start);
4777 cache = search_cache_extent(nodes, 0);
4780 cache = search_cache_extent(pending, 0);
4785 bits[ret].start = cache->start;
4786 bits[ret].size = cache->size;
4787 cache = next_cache_extent(cache);
4789 } while (cache && ret < bits_nr);
4795 bits[ret].start = cache->start;
4796 bits[ret].size = cache->size;
4797 cache = next_cache_extent(cache);
4799 } while (cache && ret < bits_nr);
4801 if (bits_nr - ret > 8) {
4802 u64 lookup = bits[0].start + bits[0].size;
4803 struct cache_extent *next;
4804 next = search_cache_extent(pending, lookup);
4806 if (next->start - lookup > 32768)
4808 bits[ret].start = next->start;
4809 bits[ret].size = next->size;
4810 lookup = next->start + next->size;
4814 next = next_cache_extent(next);
4822 static void free_chunk_record(struct cache_extent *cache)
4824 struct chunk_record *rec;
4826 rec = container_of(cache, struct chunk_record, cache);
4827 list_del_init(&rec->list);
4828 list_del_init(&rec->dextents);
4832 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4834 cache_tree_free_extents(chunk_cache, free_chunk_record);
4837 static void free_device_record(struct rb_node *node)
4839 struct device_record *rec;
4841 rec = container_of(node, struct device_record, node);
4845 FREE_RB_BASED_TREE(device_cache, free_device_record);
4847 int insert_block_group_record(struct block_group_tree *tree,
4848 struct block_group_record *bg_rec)
4852 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4856 list_add_tail(&bg_rec->list, &tree->block_groups);
4860 static void free_block_group_record(struct cache_extent *cache)
4862 struct block_group_record *rec;
4864 rec = container_of(cache, struct block_group_record, cache);
4865 list_del_init(&rec->list);
4869 void free_block_group_tree(struct block_group_tree *tree)
4871 cache_tree_free_extents(&tree->tree, free_block_group_record);
4874 int insert_device_extent_record(struct device_extent_tree *tree,
4875 struct device_extent_record *de_rec)
4880 * Device extent is a bit different from the other extents, because
4881 * the extents which belong to the different devices may have the
4882 * same start and size, so we need use the special extent cache
4883 * search/insert functions.
4885 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4889 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4890 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4894 static void free_device_extent_record(struct cache_extent *cache)
4896 struct device_extent_record *rec;
4898 rec = container_of(cache, struct device_extent_record, cache);
4899 if (!list_empty(&rec->chunk_list))
4900 list_del_init(&rec->chunk_list);
4901 if (!list_empty(&rec->device_list))
4902 list_del_init(&rec->device_list);
4906 void free_device_extent_tree(struct device_extent_tree *tree)
4908 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4911 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4912 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4913 struct extent_buffer *leaf, int slot)
4915 struct btrfs_extent_ref_v0 *ref0;
4916 struct btrfs_key key;
4918 btrfs_item_key_to_cpu(leaf, &key, slot);
4919 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4920 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4921 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4923 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4924 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4930 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4931 struct btrfs_key *key,
4934 struct btrfs_chunk *ptr;
4935 struct chunk_record *rec;
4938 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4939 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4941 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4943 fprintf(stderr, "memory allocation failed\n");
4947 INIT_LIST_HEAD(&rec->list);
4948 INIT_LIST_HEAD(&rec->dextents);
4951 rec->cache.start = key->offset;
4952 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4954 rec->generation = btrfs_header_generation(leaf);
4956 rec->objectid = key->objectid;
4957 rec->type = key->type;
4958 rec->offset = key->offset;
4960 rec->length = rec->cache.size;
4961 rec->owner = btrfs_chunk_owner(leaf, ptr);
4962 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4963 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4964 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4965 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4966 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4967 rec->num_stripes = num_stripes;
4968 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4970 for (i = 0; i < rec->num_stripes; ++i) {
4971 rec->stripes[i].devid =
4972 btrfs_stripe_devid_nr(leaf, ptr, i);
4973 rec->stripes[i].offset =
4974 btrfs_stripe_offset_nr(leaf, ptr, i);
4975 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4976 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4983 static int process_chunk_item(struct cache_tree *chunk_cache,
4984 struct btrfs_key *key, struct extent_buffer *eb,
4987 struct chunk_record *rec;
4990 rec = btrfs_new_chunk_record(eb, key, slot);
4991 ret = insert_cache_extent(chunk_cache, &rec->cache);
4993 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4994 rec->offset, rec->length);
5001 static int process_device_item(struct rb_root *dev_cache,
5002 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5004 struct btrfs_dev_item *ptr;
5005 struct device_record *rec;
5008 ptr = btrfs_item_ptr(eb,
5009 slot, struct btrfs_dev_item);
5011 rec = malloc(sizeof(*rec));
5013 fprintf(stderr, "memory allocation failed\n");
5017 rec->devid = key->offset;
5018 rec->generation = btrfs_header_generation(eb);
5020 rec->objectid = key->objectid;
5021 rec->type = key->type;
5022 rec->offset = key->offset;
5024 rec->devid = btrfs_device_id(eb, ptr);
5025 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5026 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5028 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5030 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5037 struct block_group_record *
5038 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5041 struct btrfs_block_group_item *ptr;
5042 struct block_group_record *rec;
5044 rec = calloc(1, sizeof(*rec));
5046 fprintf(stderr, "memory allocation failed\n");
5050 rec->cache.start = key->objectid;
5051 rec->cache.size = key->offset;
5053 rec->generation = btrfs_header_generation(leaf);
5055 rec->objectid = key->objectid;
5056 rec->type = key->type;
5057 rec->offset = key->offset;
5059 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5060 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5062 INIT_LIST_HEAD(&rec->list);
5067 static int process_block_group_item(struct block_group_tree *block_group_cache,
5068 struct btrfs_key *key,
5069 struct extent_buffer *eb, int slot)
5071 struct block_group_record *rec;
5074 rec = btrfs_new_block_group_record(eb, key, slot);
5075 ret = insert_block_group_record(block_group_cache, rec);
5077 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5078 rec->objectid, rec->offset);
5085 struct device_extent_record *
5086 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5087 struct btrfs_key *key, int slot)
5089 struct device_extent_record *rec;
5090 struct btrfs_dev_extent *ptr;
5092 rec = calloc(1, sizeof(*rec));
5094 fprintf(stderr, "memory allocation failed\n");
5098 rec->cache.objectid = key->objectid;
5099 rec->cache.start = key->offset;
5101 rec->generation = btrfs_header_generation(leaf);
5103 rec->objectid = key->objectid;
5104 rec->type = key->type;
5105 rec->offset = key->offset;
5107 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5108 rec->chunk_objecteid =
5109 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5111 btrfs_dev_extent_chunk_offset(leaf, ptr);
5112 rec->length = btrfs_dev_extent_length(leaf, ptr);
5113 rec->cache.size = rec->length;
5115 INIT_LIST_HEAD(&rec->chunk_list);
5116 INIT_LIST_HEAD(&rec->device_list);
5122 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5123 struct btrfs_key *key, struct extent_buffer *eb,
5126 struct device_extent_record *rec;
5129 rec = btrfs_new_device_extent_record(eb, key, slot);
5130 ret = insert_device_extent_record(dev_extent_cache, rec);
5133 "Device extent[%llu, %llu, %llu] existed.\n",
5134 rec->objectid, rec->offset, rec->length);
5141 static int process_extent_item(struct btrfs_root *root,
5142 struct cache_tree *extent_cache,
5143 struct extent_buffer *eb, int slot)
5145 struct btrfs_extent_item *ei;
5146 struct btrfs_extent_inline_ref *iref;
5147 struct btrfs_extent_data_ref *dref;
5148 struct btrfs_shared_data_ref *sref;
5149 struct btrfs_key key;
5153 u32 item_size = btrfs_item_size_nr(eb, slot);
5159 btrfs_item_key_to_cpu(eb, &key, slot);
5161 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5163 num_bytes = root->leafsize;
5165 num_bytes = key.offset;
5168 if (item_size < sizeof(*ei)) {
5169 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5170 struct btrfs_extent_item_v0 *ei0;
5171 BUG_ON(item_size != sizeof(*ei0));
5172 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5173 refs = btrfs_extent_refs_v0(eb, ei0);
5177 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5178 num_bytes, refs, 0, 0, 0, metadata, 1,
5182 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5183 refs = btrfs_extent_refs(eb, ei);
5184 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5189 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5190 refs, 0, 0, 0, metadata, 1, num_bytes);
5192 ptr = (unsigned long)(ei + 1);
5193 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5194 key.type == BTRFS_EXTENT_ITEM_KEY)
5195 ptr += sizeof(struct btrfs_tree_block_info);
5197 end = (unsigned long)ei + item_size;
5199 iref = (struct btrfs_extent_inline_ref *)ptr;
5200 type = btrfs_extent_inline_ref_type(eb, iref);
5201 offset = btrfs_extent_inline_ref_offset(eb, iref);
5203 case BTRFS_TREE_BLOCK_REF_KEY:
5204 add_tree_backref(extent_cache, key.objectid,
5207 case BTRFS_SHARED_BLOCK_REF_KEY:
5208 add_tree_backref(extent_cache, key.objectid,
5211 case BTRFS_EXTENT_DATA_REF_KEY:
5212 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5213 add_data_backref(extent_cache, key.objectid, 0,
5214 btrfs_extent_data_ref_root(eb, dref),
5215 btrfs_extent_data_ref_objectid(eb,
5217 btrfs_extent_data_ref_offset(eb, dref),
5218 btrfs_extent_data_ref_count(eb, dref),
5221 case BTRFS_SHARED_DATA_REF_KEY:
5222 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5223 add_data_backref(extent_cache, key.objectid, offset,
5225 btrfs_shared_data_ref_count(eb, sref),
5229 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5230 key.objectid, key.type, num_bytes);
5233 ptr += btrfs_extent_inline_ref_size(type);
5240 static int check_cache_range(struct btrfs_root *root,
5241 struct btrfs_block_group_cache *cache,
5242 u64 offset, u64 bytes)
5244 struct btrfs_free_space *entry;
5250 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5251 bytenr = btrfs_sb_offset(i);
5252 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5253 cache->key.objectid, bytenr, 0,
5254 &logical, &nr, &stripe_len);
5259 if (logical[nr] + stripe_len <= offset)
5261 if (offset + bytes <= logical[nr])
5263 if (logical[nr] == offset) {
5264 if (stripe_len >= bytes) {
5268 bytes -= stripe_len;
5269 offset += stripe_len;
5270 } else if (logical[nr] < offset) {
5271 if (logical[nr] + stripe_len >=
5276 bytes = (offset + bytes) -
5277 (logical[nr] + stripe_len);
5278 offset = logical[nr] + stripe_len;
5281 * Could be tricky, the super may land in the
5282 * middle of the area we're checking. First
5283 * check the easiest case, it's at the end.
5285 if (logical[nr] + stripe_len >=
5287 bytes = logical[nr] - offset;
5291 /* Check the left side */
5292 ret = check_cache_range(root, cache,
5294 logical[nr] - offset);
5300 /* Now we continue with the right side */
5301 bytes = (offset + bytes) -
5302 (logical[nr] + stripe_len);
5303 offset = logical[nr] + stripe_len;
5310 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5312 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5313 offset, offset+bytes);
5317 if (entry->offset != offset) {
5318 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5323 if (entry->bytes != bytes) {
5324 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5325 bytes, entry->bytes, offset);
5329 unlink_free_space(cache->free_space_ctl, entry);
5334 static int verify_space_cache(struct btrfs_root *root,
5335 struct btrfs_block_group_cache *cache)
5337 struct btrfs_path *path;
5338 struct extent_buffer *leaf;
5339 struct btrfs_key key;
5343 path = btrfs_alloc_path();
5347 root = root->fs_info->extent_root;
5349 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5351 key.objectid = last;
5353 key.type = BTRFS_EXTENT_ITEM_KEY;
5355 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5360 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5361 ret = btrfs_next_leaf(root, path);
5369 leaf = path->nodes[0];
5370 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5371 if (key.objectid >= cache->key.offset + cache->key.objectid)
5373 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5374 key.type != BTRFS_METADATA_ITEM_KEY) {
5379 if (last == key.objectid) {
5380 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5381 last = key.objectid + key.offset;
5383 last = key.objectid + root->leafsize;
5388 ret = check_cache_range(root, cache, last,
5389 key.objectid - last);
5392 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5393 last = key.objectid + key.offset;
5395 last = key.objectid + root->leafsize;
5399 if (last < cache->key.objectid + cache->key.offset)
5400 ret = check_cache_range(root, cache, last,
5401 cache->key.objectid +
5402 cache->key.offset - last);
5405 btrfs_free_path(path);
5408 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5409 fprintf(stderr, "There are still entries left in the space "
5417 static int check_space_cache(struct btrfs_root *root)
5419 struct btrfs_block_group_cache *cache;
5420 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5424 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5425 btrfs_super_generation(root->fs_info->super_copy) !=
5426 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5427 printf("cache and super generation don't match, space cache "
5428 "will be invalidated\n");
5432 if (ctx.progress_enabled) {
5433 ctx.tp = TASK_FREE_SPACE;
5434 task_start(ctx.info);
5438 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5442 start = cache->key.objectid + cache->key.offset;
5443 if (!cache->free_space_ctl) {
5444 if (btrfs_init_free_space_ctl(cache,
5445 root->sectorsize)) {
5450 btrfs_remove_free_space_cache(cache);
5453 ret = load_free_space_cache(root->fs_info, cache);
5457 ret = verify_space_cache(root, cache);
5459 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5460 cache->key.objectid);
5465 task_stop(ctx.info);
5467 return error ? -EINVAL : 0;
5470 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5471 u64 num_bytes, unsigned long leaf_offset,
5472 struct extent_buffer *eb) {
5475 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5477 unsigned long csum_offset;
5481 u64 data_checked = 0;
5487 if (num_bytes % root->sectorsize)
5490 data = malloc(num_bytes);
5494 while (offset < num_bytes) {
5497 read_len = num_bytes - offset;
5498 /* read as much space once a time */
5499 ret = read_extent_data(root, data + offset,
5500 bytenr + offset, &read_len, mirror);
5504 /* verify every 4k data's checksum */
5505 while (data_checked < read_len) {
5507 tmp = offset + data_checked;
5509 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5510 csum, root->sectorsize);
5511 btrfs_csum_final(csum, (char *)&csum);
5513 csum_offset = leaf_offset +
5514 tmp / root->sectorsize * csum_size;
5515 read_extent_buffer(eb, (char *)&csum_expected,
5516 csum_offset, csum_size);
5517 /* try another mirror */
5518 if (csum != csum_expected) {
5519 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5520 mirror, bytenr + tmp,
5521 csum, csum_expected);
5522 num_copies = btrfs_num_copies(
5523 &root->fs_info->mapping_tree,
5525 if (mirror < num_copies - 1) {
5530 data_checked += root->sectorsize;
5539 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5542 struct btrfs_path *path;
5543 struct extent_buffer *leaf;
5544 struct btrfs_key key;
5547 path = btrfs_alloc_path();
5549 fprintf(stderr, "Error allocing path\n");
5553 key.objectid = bytenr;
5554 key.type = BTRFS_EXTENT_ITEM_KEY;
5555 key.offset = (u64)-1;
5558 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5561 fprintf(stderr, "Error looking up extent record %d\n", ret);
5562 btrfs_free_path(path);
5565 if (path->slots[0] > 0) {
5568 ret = btrfs_prev_leaf(root, path);
5571 } else if (ret > 0) {
5578 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5581 * Block group items come before extent items if they have the same
5582 * bytenr, so walk back one more just in case. Dear future traveler,
5583 * first congrats on mastering time travel. Now if it's not too much
5584 * trouble could you go back to 2006 and tell Chris to make the
5585 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5586 * EXTENT_ITEM_KEY please?
5588 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5589 if (path->slots[0] > 0) {
5592 ret = btrfs_prev_leaf(root, path);
5595 } else if (ret > 0) {
5600 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5604 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5605 ret = btrfs_next_leaf(root, path);
5607 fprintf(stderr, "Error going to next leaf "
5609 btrfs_free_path(path);
5615 leaf = path->nodes[0];
5616 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5617 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5621 if (key.objectid + key.offset < bytenr) {
5625 if (key.objectid > bytenr + num_bytes)
5628 if (key.objectid == bytenr) {
5629 if (key.offset >= num_bytes) {
5633 num_bytes -= key.offset;
5634 bytenr += key.offset;
5635 } else if (key.objectid < bytenr) {
5636 if (key.objectid + key.offset >= bytenr + num_bytes) {
5640 num_bytes = (bytenr + num_bytes) -
5641 (key.objectid + key.offset);
5642 bytenr = key.objectid + key.offset;
5644 if (key.objectid + key.offset < bytenr + num_bytes) {
5645 u64 new_start = key.objectid + key.offset;
5646 u64 new_bytes = bytenr + num_bytes - new_start;
5649 * Weird case, the extent is in the middle of
5650 * our range, we'll have to search one side
5651 * and then the other. Not sure if this happens
5652 * in real life, but no harm in coding it up
5653 * anyway just in case.
5655 btrfs_release_path(path);
5656 ret = check_extent_exists(root, new_start,
5659 fprintf(stderr, "Right section didn't "
5663 num_bytes = key.objectid - bytenr;
5666 num_bytes = key.objectid - bytenr;
5673 if (num_bytes && !ret) {
5674 fprintf(stderr, "There are no extents for csum range "
5675 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5679 btrfs_free_path(path);
5683 static int check_csums(struct btrfs_root *root)
5685 struct btrfs_path *path;
5686 struct extent_buffer *leaf;
5687 struct btrfs_key key;
5688 u64 offset = 0, num_bytes = 0;
5689 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5693 unsigned long leaf_offset;
5695 root = root->fs_info->csum_root;
5696 if (!extent_buffer_uptodate(root->node)) {
5697 fprintf(stderr, "No valid csum tree found\n");
5701 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5702 key.type = BTRFS_EXTENT_CSUM_KEY;
5705 path = btrfs_alloc_path();
5709 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5711 fprintf(stderr, "Error searching csum tree %d\n", ret);
5712 btrfs_free_path(path);
5716 if (ret > 0 && path->slots[0])
5721 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5722 ret = btrfs_next_leaf(root, path);
5724 fprintf(stderr, "Error going to next leaf "
5731 leaf = path->nodes[0];
5733 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5734 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5739 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5740 csum_size) * root->sectorsize;
5741 if (!check_data_csum)
5742 goto skip_csum_check;
5743 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5744 ret = check_extent_csums(root, key.offset, data_len,
5750 offset = key.offset;
5751 } else if (key.offset != offset + num_bytes) {
5752 ret = check_extent_exists(root, offset, num_bytes);
5754 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5755 "there is no extent record\n",
5756 offset, offset+num_bytes);
5759 offset = key.offset;
5762 num_bytes += data_len;
5766 btrfs_free_path(path);
5770 static int is_dropped_key(struct btrfs_key *key,
5771 struct btrfs_key *drop_key) {
5772 if (key->objectid < drop_key->objectid)
5774 else if (key->objectid == drop_key->objectid) {
5775 if (key->type < drop_key->type)
5777 else if (key->type == drop_key->type) {
5778 if (key->offset < drop_key->offset)
5786 * Here are the rules for FULL_BACKREF.
5788 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5789 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5791 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5792 * if it happened after the relocation occurred since we'll have dropped the
5793 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5794 * have no real way to know for sure.
5796 * We process the blocks one root at a time, and we start from the lowest root
5797 * objectid and go to the highest. So we can just lookup the owner backref for
5798 * the record and if we don't find it then we know it doesn't exist and we have
5801 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5802 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5803 * be set or not and then we can check later once we've gathered all the refs.
5805 static int calc_extent_flag(struct btrfs_root *root,
5806 struct cache_tree *extent_cache,
5807 struct extent_buffer *buf,
5808 struct root_item_record *ri,
5811 struct extent_record *rec;
5812 struct cache_extent *cache;
5813 struct tree_backref *tback;
5816 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5817 /* we have added this extent before */
5819 rec = container_of(cache, struct extent_record, cache);
5822 * Except file/reloc tree, we can not have
5825 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5830 if (buf->start == ri->bytenr)
5833 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5836 owner = btrfs_header_owner(buf);
5837 if (owner == ri->objectid)
5840 tback = find_tree_backref(rec, 0, owner);
5845 if (rec->flag_block_full_backref != -1 &&
5846 rec->flag_block_full_backref != 0)
5847 rec->bad_full_backref = 1;
5850 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5851 if (rec->flag_block_full_backref != -1 &&
5852 rec->flag_block_full_backref != 1)
5853 rec->bad_full_backref = 1;
5857 static int run_next_block(struct btrfs_root *root,
5858 struct block_info *bits,
5861 struct cache_tree *pending,
5862 struct cache_tree *seen,
5863 struct cache_tree *reada,
5864 struct cache_tree *nodes,
5865 struct cache_tree *extent_cache,
5866 struct cache_tree *chunk_cache,
5867 struct rb_root *dev_cache,
5868 struct block_group_tree *block_group_cache,
5869 struct device_extent_tree *dev_extent_cache,
5870 struct root_item_record *ri)
5872 struct extent_buffer *buf;
5873 struct extent_record *rec = NULL;
5884 struct btrfs_key key;
5885 struct cache_extent *cache;
5888 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5889 bits_nr, &reada_bits);
5894 for(i = 0; i < nritems; i++) {
5895 ret = add_cache_extent(reada, bits[i].start,
5900 /* fixme, get the parent transid */
5901 readahead_tree_block(root, bits[i].start,
5905 *last = bits[0].start;
5906 bytenr = bits[0].start;
5907 size = bits[0].size;
5909 cache = lookup_cache_extent(pending, bytenr, size);
5911 remove_cache_extent(pending, cache);
5914 cache = lookup_cache_extent(reada, bytenr, size);
5916 remove_cache_extent(reada, cache);
5919 cache = lookup_cache_extent(nodes, bytenr, size);
5921 remove_cache_extent(nodes, cache);
5924 cache = lookup_cache_extent(extent_cache, bytenr, size);
5926 rec = container_of(cache, struct extent_record, cache);
5927 gen = rec->parent_generation;
5930 /* fixme, get the real parent transid */
5931 buf = read_tree_block(root, bytenr, size, gen);
5932 if (!extent_buffer_uptodate(buf)) {
5933 record_bad_block_io(root->fs_info,
5934 extent_cache, bytenr, size);
5938 nritems = btrfs_header_nritems(buf);
5941 if (!init_extent_tree) {
5942 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5943 btrfs_header_level(buf), 1, NULL,
5946 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5948 fprintf(stderr, "Couldn't calc extent flags\n");
5949 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5954 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5956 fprintf(stderr, "Couldn't calc extent flags\n");
5957 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5961 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5963 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5964 ri->objectid == btrfs_header_owner(buf)) {
5966 * Ok we got to this block from it's original owner and
5967 * we have FULL_BACKREF set. Relocation can leave
5968 * converted blocks over so this is altogether possible,
5969 * however it's not possible if the generation > the
5970 * last snapshot, so check for this case.
5972 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5973 btrfs_header_generation(buf) > ri->last_snapshot) {
5974 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5975 rec->bad_full_backref = 1;
5980 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5981 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5982 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5983 rec->bad_full_backref = 1;
5987 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5988 rec->flag_block_full_backref = 1;
5992 rec->flag_block_full_backref = 0;
5994 owner = btrfs_header_owner(buf);
5997 ret = check_block(root, extent_cache, buf, flags);
6001 if (btrfs_is_leaf(buf)) {
6002 btree_space_waste += btrfs_leaf_free_space(root, buf);
6003 for (i = 0; i < nritems; i++) {
6004 struct btrfs_file_extent_item *fi;
6005 btrfs_item_key_to_cpu(buf, &key, i);
6006 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6007 process_extent_item(root, extent_cache, buf,
6011 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6012 process_extent_item(root, extent_cache, buf,
6016 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6018 btrfs_item_size_nr(buf, i);
6021 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6022 process_chunk_item(chunk_cache, &key, buf, i);
6025 if (key.type == BTRFS_DEV_ITEM_KEY) {
6026 process_device_item(dev_cache, &key, buf, i);
6029 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6030 process_block_group_item(block_group_cache,
6034 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6035 process_device_extent_item(dev_extent_cache,
6040 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6041 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6042 process_extent_ref_v0(extent_cache, buf, i);
6049 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6050 add_tree_backref(extent_cache, key.objectid, 0,
6054 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6055 add_tree_backref(extent_cache, key.objectid,
6059 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6060 struct btrfs_extent_data_ref *ref;
6061 ref = btrfs_item_ptr(buf, i,
6062 struct btrfs_extent_data_ref);
6063 add_data_backref(extent_cache,
6065 btrfs_extent_data_ref_root(buf, ref),
6066 btrfs_extent_data_ref_objectid(buf,
6068 btrfs_extent_data_ref_offset(buf, ref),
6069 btrfs_extent_data_ref_count(buf, ref),
6070 0, root->sectorsize);
6073 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6074 struct btrfs_shared_data_ref *ref;
6075 ref = btrfs_item_ptr(buf, i,
6076 struct btrfs_shared_data_ref);
6077 add_data_backref(extent_cache,
6078 key.objectid, key.offset, 0, 0, 0,
6079 btrfs_shared_data_ref_count(buf, ref),
6080 0, root->sectorsize);
6083 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6084 struct bad_item *bad;
6086 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6090 bad = malloc(sizeof(struct bad_item));
6093 INIT_LIST_HEAD(&bad->list);
6094 memcpy(&bad->key, &key,
6095 sizeof(struct btrfs_key));
6096 bad->root_id = owner;
6097 list_add_tail(&bad->list, &delete_items);
6100 if (key.type != BTRFS_EXTENT_DATA_KEY)
6102 fi = btrfs_item_ptr(buf, i,
6103 struct btrfs_file_extent_item);
6104 if (btrfs_file_extent_type(buf, fi) ==
6105 BTRFS_FILE_EXTENT_INLINE)
6107 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6110 data_bytes_allocated +=
6111 btrfs_file_extent_disk_num_bytes(buf, fi);
6112 if (data_bytes_allocated < root->sectorsize) {
6115 data_bytes_referenced +=
6116 btrfs_file_extent_num_bytes(buf, fi);
6117 add_data_backref(extent_cache,
6118 btrfs_file_extent_disk_bytenr(buf, fi),
6119 parent, owner, key.objectid, key.offset -
6120 btrfs_file_extent_offset(buf, fi), 1, 1,
6121 btrfs_file_extent_disk_num_bytes(buf, fi));
6125 struct btrfs_key first_key;
6127 first_key.objectid = 0;
6130 btrfs_item_key_to_cpu(buf, &first_key, 0);
6131 level = btrfs_header_level(buf);
6132 for (i = 0; i < nritems; i++) {
6133 ptr = btrfs_node_blockptr(buf, i);
6134 size = btrfs_level_size(root, level - 1);
6135 btrfs_node_key_to_cpu(buf, &key, i);
6137 if ((level == ri->drop_level)
6138 && is_dropped_key(&key, &ri->drop_key)) {
6142 ret = add_extent_rec(extent_cache, &key,
6143 btrfs_node_ptr_generation(buf, i),
6144 ptr, size, 0, 0, 1, 0, 1, 0,
6148 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6151 add_pending(nodes, seen, ptr, size);
6153 add_pending(pending, seen, ptr, size);
6156 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6157 nritems) * sizeof(struct btrfs_key_ptr);
6159 total_btree_bytes += buf->len;
6160 if (fs_root_objectid(btrfs_header_owner(buf)))
6161 total_fs_tree_bytes += buf->len;
6162 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6163 total_extent_tree_bytes += buf->len;
6164 if (!found_old_backref &&
6165 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6166 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6167 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6168 found_old_backref = 1;
6170 free_extent_buffer(buf);
6174 static int add_root_to_pending(struct extent_buffer *buf,
6175 struct cache_tree *extent_cache,
6176 struct cache_tree *pending,
6177 struct cache_tree *seen,
6178 struct cache_tree *nodes,
6181 if (btrfs_header_level(buf) > 0)
6182 add_pending(nodes, seen, buf->start, buf->len);
6184 add_pending(pending, seen, buf->start, buf->len);
6185 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6186 0, 1, 1, 0, 1, 0, buf->len);
6188 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6189 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6190 add_tree_backref(extent_cache, buf->start, buf->start,
6193 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6197 /* as we fix the tree, we might be deleting blocks that
6198 * we're tracking for repair. This hook makes sure we
6199 * remove any backrefs for blocks as we are fixing them.
6201 static int free_extent_hook(struct btrfs_trans_handle *trans,
6202 struct btrfs_root *root,
6203 u64 bytenr, u64 num_bytes, u64 parent,
6204 u64 root_objectid, u64 owner, u64 offset,
6207 struct extent_record *rec;
6208 struct cache_extent *cache;
6210 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6212 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6213 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6217 rec = container_of(cache, struct extent_record, cache);
6219 struct data_backref *back;
6220 back = find_data_backref(rec, parent, root_objectid, owner,
6221 offset, 1, bytenr, num_bytes);
6224 if (back->node.found_ref) {
6225 back->found_ref -= refs_to_drop;
6227 rec->refs -= refs_to_drop;
6229 if (back->node.found_extent_tree) {
6230 back->num_refs -= refs_to_drop;
6231 if (rec->extent_item_refs)
6232 rec->extent_item_refs -= refs_to_drop;
6234 if (back->found_ref == 0)
6235 back->node.found_ref = 0;
6236 if (back->num_refs == 0)
6237 back->node.found_extent_tree = 0;
6239 if (!back->node.found_extent_tree && back->node.found_ref) {
6240 list_del(&back->node.list);
6244 struct tree_backref *back;
6245 back = find_tree_backref(rec, parent, root_objectid);
6248 if (back->node.found_ref) {
6251 back->node.found_ref = 0;
6253 if (back->node.found_extent_tree) {
6254 if (rec->extent_item_refs)
6255 rec->extent_item_refs--;
6256 back->node.found_extent_tree = 0;
6258 if (!back->node.found_extent_tree && back->node.found_ref) {
6259 list_del(&back->node.list);
6263 maybe_free_extent_rec(extent_cache, rec);
6268 static int delete_extent_records(struct btrfs_trans_handle *trans,
6269 struct btrfs_root *root,
6270 struct btrfs_path *path,
6271 u64 bytenr, u64 new_len)
6273 struct btrfs_key key;
6274 struct btrfs_key found_key;
6275 struct extent_buffer *leaf;
6280 key.objectid = bytenr;
6282 key.offset = (u64)-1;
6285 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6292 if (path->slots[0] == 0)
6298 leaf = path->nodes[0];
6299 slot = path->slots[0];
6301 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6302 if (found_key.objectid != bytenr)
6305 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6306 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6307 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6308 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6309 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6310 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6311 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6312 btrfs_release_path(path);
6313 if (found_key.type == 0) {
6314 if (found_key.offset == 0)
6316 key.offset = found_key.offset - 1;
6317 key.type = found_key.type;
6319 key.type = found_key.type - 1;
6320 key.offset = (u64)-1;
6324 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6325 found_key.objectid, found_key.type, found_key.offset);
6327 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6330 btrfs_release_path(path);
6332 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6333 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6334 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6335 found_key.offset : root->leafsize;
6337 ret = btrfs_update_block_group(trans, root, bytenr,
6344 btrfs_release_path(path);
6349 * for a single backref, this will allocate a new extent
6350 * and add the backref to it.
6352 static int record_extent(struct btrfs_trans_handle *trans,
6353 struct btrfs_fs_info *info,
6354 struct btrfs_path *path,
6355 struct extent_record *rec,
6356 struct extent_backref *back,
6357 int allocated, u64 flags)
6360 struct btrfs_root *extent_root = info->extent_root;
6361 struct extent_buffer *leaf;
6362 struct btrfs_key ins_key;
6363 struct btrfs_extent_item *ei;
6364 struct tree_backref *tback;
6365 struct data_backref *dback;
6366 struct btrfs_tree_block_info *bi;
6369 rec->max_size = max_t(u64, rec->max_size,
6370 info->extent_root->leafsize);
6373 u32 item_size = sizeof(*ei);
6376 item_size += sizeof(*bi);
6378 ins_key.objectid = rec->start;
6379 ins_key.offset = rec->max_size;
6380 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6382 ret = btrfs_insert_empty_item(trans, extent_root, path,
6383 &ins_key, item_size);
6387 leaf = path->nodes[0];
6388 ei = btrfs_item_ptr(leaf, path->slots[0],
6389 struct btrfs_extent_item);
6391 btrfs_set_extent_refs(leaf, ei, 0);
6392 btrfs_set_extent_generation(leaf, ei, rec->generation);
6394 if (back->is_data) {
6395 btrfs_set_extent_flags(leaf, ei,
6396 BTRFS_EXTENT_FLAG_DATA);
6398 struct btrfs_disk_key copy_key;;
6400 tback = (struct tree_backref *)back;
6401 bi = (struct btrfs_tree_block_info *)(ei + 1);
6402 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6405 btrfs_set_disk_key_objectid(©_key,
6406 rec->info_objectid);
6407 btrfs_set_disk_key_type(©_key, 0);
6408 btrfs_set_disk_key_offset(©_key, 0);
6410 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6411 btrfs_set_tree_block_key(leaf, bi, ©_key);
6413 btrfs_set_extent_flags(leaf, ei,
6414 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6417 btrfs_mark_buffer_dirty(leaf);
6418 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6419 rec->max_size, 1, 0);
6422 btrfs_release_path(path);
6425 if (back->is_data) {
6429 dback = (struct data_backref *)back;
6430 if (back->full_backref)
6431 parent = dback->parent;
6435 for (i = 0; i < dback->found_ref; i++) {
6436 /* if parent != 0, we're doing a full backref
6437 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6438 * just makes the backref allocator create a data
6441 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6442 rec->start, rec->max_size,
6446 BTRFS_FIRST_FREE_OBJECTID :
6452 fprintf(stderr, "adding new data backref"
6453 " on %llu %s %llu owner %llu"
6454 " offset %llu found %d\n",
6455 (unsigned long long)rec->start,
6456 back->full_backref ?
6458 back->full_backref ?
6459 (unsigned long long)parent :
6460 (unsigned long long)dback->root,
6461 (unsigned long long)dback->owner,
6462 (unsigned long long)dback->offset,
6467 tback = (struct tree_backref *)back;
6468 if (back->full_backref)
6469 parent = tback->parent;
6473 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6474 rec->start, rec->max_size,
6475 parent, tback->root, 0, 0);
6476 fprintf(stderr, "adding new tree backref on "
6477 "start %llu len %llu parent %llu root %llu\n",
6478 rec->start, rec->max_size, parent, tback->root);
6481 btrfs_release_path(path);
6485 struct extent_entry {
6490 struct list_head list;
6493 static struct extent_entry *find_entry(struct list_head *entries,
6494 u64 bytenr, u64 bytes)
6496 struct extent_entry *entry = NULL;
6498 list_for_each_entry(entry, entries, list) {
6499 if (entry->bytenr == bytenr && entry->bytes == bytes)
6506 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6508 struct extent_entry *entry, *best = NULL, *prev = NULL;
6510 list_for_each_entry(entry, entries, list) {
6517 * If there are as many broken entries as entries then we know
6518 * not to trust this particular entry.
6520 if (entry->broken == entry->count)
6524 * If our current entry == best then we can't be sure our best
6525 * is really the best, so we need to keep searching.
6527 if (best && best->count == entry->count) {
6533 /* Prev == entry, not good enough, have to keep searching */
6534 if (!prev->broken && prev->count == entry->count)
6538 best = (prev->count > entry->count) ? prev : entry;
6539 else if (best->count < entry->count)
6547 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6548 struct data_backref *dback, struct extent_entry *entry)
6550 struct btrfs_trans_handle *trans;
6551 struct btrfs_root *root;
6552 struct btrfs_file_extent_item *fi;
6553 struct extent_buffer *leaf;
6554 struct btrfs_key key;
6558 key.objectid = dback->root;
6559 key.type = BTRFS_ROOT_ITEM_KEY;
6560 key.offset = (u64)-1;
6561 root = btrfs_read_fs_root(info, &key);
6563 fprintf(stderr, "Couldn't find root for our ref\n");
6568 * The backref points to the original offset of the extent if it was
6569 * split, so we need to search down to the offset we have and then walk
6570 * forward until we find the backref we're looking for.
6572 key.objectid = dback->owner;
6573 key.type = BTRFS_EXTENT_DATA_KEY;
6574 key.offset = dback->offset;
6575 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6577 fprintf(stderr, "Error looking up ref %d\n", ret);
6582 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6583 ret = btrfs_next_leaf(root, path);
6585 fprintf(stderr, "Couldn't find our ref, next\n");
6589 leaf = path->nodes[0];
6590 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6591 if (key.objectid != dback->owner ||
6592 key.type != BTRFS_EXTENT_DATA_KEY) {
6593 fprintf(stderr, "Couldn't find our ref, search\n");
6596 fi = btrfs_item_ptr(leaf, path->slots[0],
6597 struct btrfs_file_extent_item);
6598 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6599 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6601 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6606 btrfs_release_path(path);
6608 trans = btrfs_start_transaction(root, 1);
6610 return PTR_ERR(trans);
6613 * Ok we have the key of the file extent we want to fix, now we can cow
6614 * down to the thing and fix it.
6616 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6618 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6619 key.objectid, key.type, key.offset, ret);
6623 fprintf(stderr, "Well that's odd, we just found this key "
6624 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6629 leaf = path->nodes[0];
6630 fi = btrfs_item_ptr(leaf, path->slots[0],
6631 struct btrfs_file_extent_item);
6633 if (btrfs_file_extent_compression(leaf, fi) &&
6634 dback->disk_bytenr != entry->bytenr) {
6635 fprintf(stderr, "Ref doesn't match the record start and is "
6636 "compressed, please take a btrfs-image of this file "
6637 "system and send it to a btrfs developer so they can "
6638 "complete this functionality for bytenr %Lu\n",
6639 dback->disk_bytenr);
6644 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6645 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6646 } else if (dback->disk_bytenr > entry->bytenr) {
6647 u64 off_diff, offset;
6649 off_diff = dback->disk_bytenr - entry->bytenr;
6650 offset = btrfs_file_extent_offset(leaf, fi);
6651 if (dback->disk_bytenr + offset +
6652 btrfs_file_extent_num_bytes(leaf, fi) >
6653 entry->bytenr + entry->bytes) {
6654 fprintf(stderr, "Ref is past the entry end, please "
6655 "take a btrfs-image of this file system and "
6656 "send it to a btrfs developer, ref %Lu\n",
6657 dback->disk_bytenr);
6662 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6663 btrfs_set_file_extent_offset(leaf, fi, offset);
6664 } else if (dback->disk_bytenr < entry->bytenr) {
6667 offset = btrfs_file_extent_offset(leaf, fi);
6668 if (dback->disk_bytenr + offset < entry->bytenr) {
6669 fprintf(stderr, "Ref is before the entry start, please"
6670 " take a btrfs-image of this file system and "
6671 "send it to a btrfs developer, ref %Lu\n",
6672 dback->disk_bytenr);
6677 offset += dback->disk_bytenr;
6678 offset -= entry->bytenr;
6679 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6680 btrfs_set_file_extent_offset(leaf, fi, offset);
6683 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6686 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6687 * only do this if we aren't using compression, otherwise it's a
6690 if (!btrfs_file_extent_compression(leaf, fi))
6691 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6693 printf("ram bytes may be wrong?\n");
6694 btrfs_mark_buffer_dirty(leaf);
6696 err = btrfs_commit_transaction(trans, root);
6697 btrfs_release_path(path);
6698 return ret ? ret : err;
6701 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6702 struct extent_record *rec)
6704 struct extent_backref *back;
6705 struct data_backref *dback;
6706 struct extent_entry *entry, *best = NULL;
6709 int broken_entries = 0;
6714 * Metadata is easy and the backrefs should always agree on bytenr and
6715 * size, if not we've got bigger issues.
6720 list_for_each_entry(back, &rec->backrefs, list) {
6721 if (back->full_backref || !back->is_data)
6724 dback = (struct data_backref *)back;
6727 * We only pay attention to backrefs that we found a real
6730 if (dback->found_ref == 0)
6734 * For now we only catch when the bytes don't match, not the
6735 * bytenr. We can easily do this at the same time, but I want
6736 * to have a fs image to test on before we just add repair
6737 * functionality willy-nilly so we know we won't screw up the
6741 entry = find_entry(&entries, dback->disk_bytenr,
6744 entry = malloc(sizeof(struct extent_entry));
6749 memset(entry, 0, sizeof(*entry));
6750 entry->bytenr = dback->disk_bytenr;
6751 entry->bytes = dback->bytes;
6752 list_add_tail(&entry->list, &entries);
6757 * If we only have on entry we may think the entries agree when
6758 * in reality they don't so we have to do some extra checking.
6760 if (dback->disk_bytenr != rec->start ||
6761 dback->bytes != rec->nr || back->broken)
6772 /* Yay all the backrefs agree, carry on good sir */
6773 if (nr_entries <= 1 && !mismatch)
6776 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6777 "%Lu\n", rec->start);
6780 * First we want to see if the backrefs can agree amongst themselves who
6781 * is right, so figure out which one of the entries has the highest
6784 best = find_most_right_entry(&entries);
6787 * Ok so we may have an even split between what the backrefs think, so
6788 * this is where we use the extent ref to see what it thinks.
6791 entry = find_entry(&entries, rec->start, rec->nr);
6792 if (!entry && (!broken_entries || !rec->found_rec)) {
6793 fprintf(stderr, "Backrefs don't agree with each other "
6794 "and extent record doesn't agree with anybody,"
6795 " so we can't fix bytenr %Lu bytes %Lu\n",
6796 rec->start, rec->nr);
6799 } else if (!entry) {
6801 * Ok our backrefs were broken, we'll assume this is the
6802 * correct value and add an entry for this range.
6804 entry = malloc(sizeof(struct extent_entry));
6809 memset(entry, 0, sizeof(*entry));
6810 entry->bytenr = rec->start;
6811 entry->bytes = rec->nr;
6812 list_add_tail(&entry->list, &entries);
6816 best = find_most_right_entry(&entries);
6818 fprintf(stderr, "Backrefs and extent record evenly "
6819 "split on who is right, this is going to "
6820 "require user input to fix bytenr %Lu bytes "
6821 "%Lu\n", rec->start, rec->nr);
6828 * I don't think this can happen currently as we'll abort() if we catch
6829 * this case higher up, but in case somebody removes that we still can't
6830 * deal with it properly here yet, so just bail out of that's the case.
6832 if (best->bytenr != rec->start) {
6833 fprintf(stderr, "Extent start and backref starts don't match, "
6834 "please use btrfs-image on this file system and send "
6835 "it to a btrfs developer so they can make fsck fix "
6836 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6837 rec->start, rec->nr);
6843 * Ok great we all agreed on an extent record, let's go find the real
6844 * references and fix up the ones that don't match.
6846 list_for_each_entry(back, &rec->backrefs, list) {
6847 if (back->full_backref || !back->is_data)
6850 dback = (struct data_backref *)back;
6853 * Still ignoring backrefs that don't have a real ref attached
6856 if (dback->found_ref == 0)
6859 if (dback->bytes == best->bytes &&
6860 dback->disk_bytenr == best->bytenr)
6863 ret = repair_ref(info, path, dback, best);
6869 * Ok we messed with the actual refs, which means we need to drop our
6870 * entire cache and go back and rescan. I know this is a huge pain and
6871 * adds a lot of extra work, but it's the only way to be safe. Once all
6872 * the backrefs agree we may not need to do anything to the extent
6877 while (!list_empty(&entries)) {
6878 entry = list_entry(entries.next, struct extent_entry, list);
6879 list_del_init(&entry->list);
6885 static int process_duplicates(struct btrfs_root *root,
6886 struct cache_tree *extent_cache,
6887 struct extent_record *rec)
6889 struct extent_record *good, *tmp;
6890 struct cache_extent *cache;
6894 * If we found a extent record for this extent then return, or if we
6895 * have more than one duplicate we are likely going to need to delete
6898 if (rec->found_rec || rec->num_duplicates > 1)
6901 /* Shouldn't happen but just in case */
6902 BUG_ON(!rec->num_duplicates);
6905 * So this happens if we end up with a backref that doesn't match the
6906 * actual extent entry. So either the backref is bad or the extent
6907 * entry is bad. Either way we want to have the extent_record actually
6908 * reflect what we found in the extent_tree, so we need to take the
6909 * duplicate out and use that as the extent_record since the only way we
6910 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6912 remove_cache_extent(extent_cache, &rec->cache);
6914 good = list_entry(rec->dups.next, struct extent_record, list);
6915 list_del_init(&good->list);
6916 INIT_LIST_HEAD(&good->backrefs);
6917 INIT_LIST_HEAD(&good->dups);
6918 good->cache.start = good->start;
6919 good->cache.size = good->nr;
6920 good->content_checked = 0;
6921 good->owner_ref_checked = 0;
6922 good->num_duplicates = 0;
6923 good->refs = rec->refs;
6924 list_splice_init(&rec->backrefs, &good->backrefs);
6926 cache = lookup_cache_extent(extent_cache, good->start,
6930 tmp = container_of(cache, struct extent_record, cache);
6933 * If we find another overlapping extent and it's found_rec is
6934 * set then it's a duplicate and we need to try and delete
6937 if (tmp->found_rec || tmp->num_duplicates > 0) {
6938 if (list_empty(&good->list))
6939 list_add_tail(&good->list,
6940 &duplicate_extents);
6941 good->num_duplicates += tmp->num_duplicates + 1;
6942 list_splice_init(&tmp->dups, &good->dups);
6943 list_del_init(&tmp->list);
6944 list_add_tail(&tmp->list, &good->dups);
6945 remove_cache_extent(extent_cache, &tmp->cache);
6950 * Ok we have another non extent item backed extent rec, so lets
6951 * just add it to this extent and carry on like we did above.
6953 good->refs += tmp->refs;
6954 list_splice_init(&tmp->backrefs, &good->backrefs);
6955 remove_cache_extent(extent_cache, &tmp->cache);
6958 ret = insert_cache_extent(extent_cache, &good->cache);
6961 return good->num_duplicates ? 0 : 1;
6964 static int delete_duplicate_records(struct btrfs_root *root,
6965 struct extent_record *rec)
6967 struct btrfs_trans_handle *trans;
6968 LIST_HEAD(delete_list);
6969 struct btrfs_path *path;
6970 struct extent_record *tmp, *good, *n;
6973 struct btrfs_key key;
6975 path = btrfs_alloc_path();
6982 /* Find the record that covers all of the duplicates. */
6983 list_for_each_entry(tmp, &rec->dups, list) {
6984 if (good->start < tmp->start)
6986 if (good->nr > tmp->nr)
6989 if (tmp->start + tmp->nr < good->start + good->nr) {
6990 fprintf(stderr, "Ok we have overlapping extents that "
6991 "aren't completely covered by eachother, this "
6992 "is going to require more careful thought. "
6993 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6994 tmp->start, tmp->nr, good->start, good->nr);
7001 list_add_tail(&rec->list, &delete_list);
7003 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7006 list_move_tail(&tmp->list, &delete_list);
7009 root = root->fs_info->extent_root;
7010 trans = btrfs_start_transaction(root, 1);
7011 if (IS_ERR(trans)) {
7012 ret = PTR_ERR(trans);
7016 list_for_each_entry(tmp, &delete_list, list) {
7017 if (tmp->found_rec == 0)
7019 key.objectid = tmp->start;
7020 key.type = BTRFS_EXTENT_ITEM_KEY;
7021 key.offset = tmp->nr;
7023 /* Shouldn't happen but just in case */
7024 if (tmp->metadata) {
7025 fprintf(stderr, "Well this shouldn't happen, extent "
7026 "record overlaps but is metadata? "
7027 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7031 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7037 ret = btrfs_del_item(trans, root, path);
7040 btrfs_release_path(path);
7043 err = btrfs_commit_transaction(trans, root);
7047 while (!list_empty(&delete_list)) {
7048 tmp = list_entry(delete_list.next, struct extent_record, list);
7049 list_del_init(&tmp->list);
7055 while (!list_empty(&rec->dups)) {
7056 tmp = list_entry(rec->dups.next, struct extent_record, list);
7057 list_del_init(&tmp->list);
7061 btrfs_free_path(path);
7063 if (!ret && !nr_del)
7064 rec->num_duplicates = 0;
7066 return ret ? ret : nr_del;
7069 static int find_possible_backrefs(struct btrfs_fs_info *info,
7070 struct btrfs_path *path,
7071 struct cache_tree *extent_cache,
7072 struct extent_record *rec)
7074 struct btrfs_root *root;
7075 struct extent_backref *back;
7076 struct data_backref *dback;
7077 struct cache_extent *cache;
7078 struct btrfs_file_extent_item *fi;
7079 struct btrfs_key key;
7083 list_for_each_entry(back, &rec->backrefs, list) {
7084 /* Don't care about full backrefs (poor unloved backrefs) */
7085 if (back->full_backref || !back->is_data)
7088 dback = (struct data_backref *)back;
7090 /* We found this one, we don't need to do a lookup */
7091 if (dback->found_ref)
7094 key.objectid = dback->root;
7095 key.type = BTRFS_ROOT_ITEM_KEY;
7096 key.offset = (u64)-1;
7098 root = btrfs_read_fs_root(info, &key);
7100 /* No root, definitely a bad ref, skip */
7101 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7103 /* Other err, exit */
7105 return PTR_ERR(root);
7107 key.objectid = dback->owner;
7108 key.type = BTRFS_EXTENT_DATA_KEY;
7109 key.offset = dback->offset;
7110 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7112 btrfs_release_path(path);
7115 /* Didn't find it, we can carry on */
7120 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7121 struct btrfs_file_extent_item);
7122 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7123 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7124 btrfs_release_path(path);
7125 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7127 struct extent_record *tmp;
7128 tmp = container_of(cache, struct extent_record, cache);
7131 * If we found an extent record for the bytenr for this
7132 * particular backref then we can't add it to our
7133 * current extent record. We only want to add backrefs
7134 * that don't have a corresponding extent item in the
7135 * extent tree since they likely belong to this record
7136 * and we need to fix it if it doesn't match bytenrs.
7142 dback->found_ref += 1;
7143 dback->disk_bytenr = bytenr;
7144 dback->bytes = bytes;
7147 * Set this so the verify backref code knows not to trust the
7148 * values in this backref.
7157 * Record orphan data ref into corresponding root.
7159 * Return 0 if the extent item contains data ref and recorded.
7160 * Return 1 if the extent item contains no useful data ref
7161 * On that case, it may contains only shared_dataref or metadata backref
7162 * or the file extent exists(this should be handled by the extent bytenr
7164 * Return <0 if something goes wrong.
7166 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7167 struct extent_record *rec)
7169 struct btrfs_key key;
7170 struct btrfs_root *dest_root;
7171 struct extent_backref *back;
7172 struct data_backref *dback;
7173 struct orphan_data_extent *orphan;
7174 struct btrfs_path *path;
7175 int recorded_data_ref = 0;
7180 path = btrfs_alloc_path();
7183 list_for_each_entry(back, &rec->backrefs, list) {
7184 if (back->full_backref || !back->is_data ||
7185 !back->found_extent_tree)
7187 dback = (struct data_backref *)back;
7188 if (dback->found_ref)
7190 key.objectid = dback->root;
7191 key.type = BTRFS_ROOT_ITEM_KEY;
7192 key.offset = (u64)-1;
7194 dest_root = btrfs_read_fs_root(fs_info, &key);
7196 /* For non-exist root we just skip it */
7197 if (IS_ERR(dest_root) || !dest_root)
7200 key.objectid = dback->owner;
7201 key.type = BTRFS_EXTENT_DATA_KEY;
7202 key.offset = dback->offset;
7204 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7206 * For ret < 0, it's OK since the fs-tree may be corrupted,
7207 * we need to record it for inode/file extent rebuild.
7208 * For ret > 0, we record it only for file extent rebuild.
7209 * For ret == 0, the file extent exists but only bytenr
7210 * mismatch, let the original bytenr fix routine to handle,
7216 orphan = malloc(sizeof(*orphan));
7221 INIT_LIST_HEAD(&orphan->list);
7222 orphan->root = dback->root;
7223 orphan->objectid = dback->owner;
7224 orphan->offset = dback->offset;
7225 orphan->disk_bytenr = rec->cache.start;
7226 orphan->disk_len = rec->cache.size;
7227 list_add(&dest_root->orphan_data_extents, &orphan->list);
7228 recorded_data_ref = 1;
7231 btrfs_free_path(path);
7233 return !recorded_data_ref;
7239 * when an incorrect extent item is found, this will delete
7240 * all of the existing entries for it and recreate them
7241 * based on what the tree scan found.
7243 static int fixup_extent_refs(struct btrfs_fs_info *info,
7244 struct cache_tree *extent_cache,
7245 struct extent_record *rec)
7247 struct btrfs_trans_handle *trans = NULL;
7249 struct btrfs_path *path;
7250 struct list_head *cur = rec->backrefs.next;
7251 struct cache_extent *cache;
7252 struct extent_backref *back;
7256 if (rec->flag_block_full_backref)
7257 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7259 path = btrfs_alloc_path();
7263 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7265 * Sometimes the backrefs themselves are so broken they don't
7266 * get attached to any meaningful rec, so first go back and
7267 * check any of our backrefs that we couldn't find and throw
7268 * them into the list if we find the backref so that
7269 * verify_backrefs can figure out what to do.
7271 ret = find_possible_backrefs(info, path, extent_cache, rec);
7276 /* step one, make sure all of the backrefs agree */
7277 ret = verify_backrefs(info, path, rec);
7281 trans = btrfs_start_transaction(info->extent_root, 1);
7282 if (IS_ERR(trans)) {
7283 ret = PTR_ERR(trans);
7287 /* step two, delete all the existing records */
7288 ret = delete_extent_records(trans, info->extent_root, path,
7289 rec->start, rec->max_size);
7294 /* was this block corrupt? If so, don't add references to it */
7295 cache = lookup_cache_extent(info->corrupt_blocks,
7296 rec->start, rec->max_size);
7302 /* step three, recreate all the refs we did find */
7303 while(cur != &rec->backrefs) {
7304 back = list_entry(cur, struct extent_backref, list);
7308 * if we didn't find any references, don't create a
7311 if (!back->found_ref)
7314 rec->bad_full_backref = 0;
7315 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7323 int err = btrfs_commit_transaction(trans, info->extent_root);
7328 btrfs_free_path(path);
7332 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7333 struct extent_record *rec)
7335 struct btrfs_trans_handle *trans;
7336 struct btrfs_root *root = fs_info->extent_root;
7337 struct btrfs_path *path;
7338 struct btrfs_extent_item *ei;
7339 struct btrfs_key key;
7343 key.objectid = rec->start;
7344 if (rec->metadata) {
7345 key.type = BTRFS_METADATA_ITEM_KEY;
7346 key.offset = rec->info_level;
7348 key.type = BTRFS_EXTENT_ITEM_KEY;
7349 key.offset = rec->max_size;
7352 path = btrfs_alloc_path();
7356 trans = btrfs_start_transaction(root, 0);
7357 if (IS_ERR(trans)) {
7358 btrfs_free_path(path);
7359 return PTR_ERR(trans);
7362 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7364 btrfs_free_path(path);
7365 btrfs_commit_transaction(trans, root);
7368 fprintf(stderr, "Didn't find extent for %llu\n",
7369 (unsigned long long)rec->start);
7370 btrfs_free_path(path);
7371 btrfs_commit_transaction(trans, root);
7375 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7376 struct btrfs_extent_item);
7377 flags = btrfs_extent_flags(path->nodes[0], ei);
7378 if (rec->flag_block_full_backref) {
7379 fprintf(stderr, "setting full backref on %llu\n",
7380 (unsigned long long)key.objectid);
7381 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7383 fprintf(stderr, "clearing full backref on %llu\n",
7384 (unsigned long long)key.objectid);
7385 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7387 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7388 btrfs_mark_buffer_dirty(path->nodes[0]);
7389 btrfs_free_path(path);
7390 return btrfs_commit_transaction(trans, root);
7393 /* right now we only prune from the extent allocation tree */
7394 static int prune_one_block(struct btrfs_trans_handle *trans,
7395 struct btrfs_fs_info *info,
7396 struct btrfs_corrupt_block *corrupt)
7399 struct btrfs_path path;
7400 struct extent_buffer *eb;
7404 int level = corrupt->level + 1;
7406 btrfs_init_path(&path);
7408 /* we want to stop at the parent to our busted block */
7409 path.lowest_level = level;
7411 ret = btrfs_search_slot(trans, info->extent_root,
7412 &corrupt->key, &path, -1, 1);
7417 eb = path.nodes[level];
7424 * hopefully the search gave us the block we want to prune,
7425 * lets try that first
7427 slot = path.slots[level];
7428 found = btrfs_node_blockptr(eb, slot);
7429 if (found == corrupt->cache.start)
7432 nritems = btrfs_header_nritems(eb);
7434 /* the search failed, lets scan this node and hope we find it */
7435 for (slot = 0; slot < nritems; slot++) {
7436 found = btrfs_node_blockptr(eb, slot);
7437 if (found == corrupt->cache.start)
7441 * we couldn't find the bad block. TODO, search all the nodes for pointers
7444 if (eb == info->extent_root->node) {
7449 btrfs_release_path(&path);
7454 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7455 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7458 btrfs_release_path(&path);
7462 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7464 struct btrfs_trans_handle *trans = NULL;
7465 struct cache_extent *cache;
7466 struct btrfs_corrupt_block *corrupt;
7469 cache = search_cache_extent(info->corrupt_blocks, 0);
7473 trans = btrfs_start_transaction(info->extent_root, 1);
7475 return PTR_ERR(trans);
7477 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7478 prune_one_block(trans, info, corrupt);
7479 remove_cache_extent(info->corrupt_blocks, cache);
7482 return btrfs_commit_transaction(trans, info->extent_root);
7486 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7488 struct btrfs_block_group_cache *cache;
7493 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7494 &start, &end, EXTENT_DIRTY);
7497 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7503 cache = btrfs_lookup_first_block_group(fs_info, start);
7508 start = cache->key.objectid + cache->key.offset;
7512 static int check_extent_refs(struct btrfs_root *root,
7513 struct cache_tree *extent_cache)
7515 struct extent_record *rec;
7516 struct cache_extent *cache;
7525 * if we're doing a repair, we have to make sure
7526 * we don't allocate from the problem extents.
7527 * In the worst case, this will be all the
7530 cache = search_cache_extent(extent_cache, 0);
7532 rec = container_of(cache, struct extent_record, cache);
7533 set_extent_dirty(root->fs_info->excluded_extents,
7535 rec->start + rec->max_size - 1,
7537 cache = next_cache_extent(cache);
7540 /* pin down all the corrupted blocks too */
7541 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7543 set_extent_dirty(root->fs_info->excluded_extents,
7545 cache->start + cache->size - 1,
7547 cache = next_cache_extent(cache);
7549 prune_corrupt_blocks(root->fs_info);
7550 reset_cached_block_groups(root->fs_info);
7553 reset_cached_block_groups(root->fs_info);
7556 * We need to delete any duplicate entries we find first otherwise we
7557 * could mess up the extent tree when we have backrefs that actually
7558 * belong to a different extent item and not the weird duplicate one.
7560 while (repair && !list_empty(&duplicate_extents)) {
7561 rec = list_entry(duplicate_extents.next, struct extent_record,
7563 list_del_init(&rec->list);
7565 /* Sometimes we can find a backref before we find an actual
7566 * extent, so we need to process it a little bit to see if there
7567 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7568 * if this is a backref screwup. If we need to delete stuff
7569 * process_duplicates() will return 0, otherwise it will return
7572 if (process_duplicates(root, extent_cache, rec))
7574 ret = delete_duplicate_records(root, rec);
7578 * delete_duplicate_records will return the number of entries
7579 * deleted, so if it's greater than 0 then we know we actually
7580 * did something and we need to remove.
7594 cache = search_cache_extent(extent_cache, 0);
7597 rec = container_of(cache, struct extent_record, cache);
7598 if (rec->num_duplicates) {
7599 fprintf(stderr, "extent item %llu has multiple extent "
7600 "items\n", (unsigned long long)rec->start);
7605 if (rec->refs != rec->extent_item_refs) {
7606 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7607 (unsigned long long)rec->start,
7608 (unsigned long long)rec->nr);
7609 fprintf(stderr, "extent item %llu, found %llu\n",
7610 (unsigned long long)rec->extent_item_refs,
7611 (unsigned long long)rec->refs);
7612 ret = record_orphan_data_extents(root->fs_info, rec);
7619 * we can't use the extent to repair file
7620 * extent, let the fallback method handle it.
7622 if (!fixed && repair) {
7623 ret = fixup_extent_refs(
7634 if (all_backpointers_checked(rec, 1)) {
7635 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7636 (unsigned long long)rec->start,
7637 (unsigned long long)rec->nr);
7639 if (!fixed && !recorded && repair) {
7640 ret = fixup_extent_refs(root->fs_info,
7649 if (!rec->owner_ref_checked) {
7650 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7651 (unsigned long long)rec->start,
7652 (unsigned long long)rec->nr);
7653 if (!fixed && !recorded && repair) {
7654 ret = fixup_extent_refs(root->fs_info,
7663 if (rec->bad_full_backref) {
7664 fprintf(stderr, "bad full backref, on [%llu]\n",
7665 (unsigned long long)rec->start);
7667 ret = fixup_extent_flags(root->fs_info, rec);
7676 * Although it's not a extent ref's problem, we reuse this
7677 * routine for error reporting.
7678 * No repair function yet.
7680 if (rec->crossing_stripes) {
7682 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7683 rec->start, rec->start + rec->max_size);
7688 if (rec->wrong_chunk_type) {
7690 "bad extent [%llu, %llu), type mismatch with chunk\n",
7691 rec->start, rec->start + rec->max_size);
7696 remove_cache_extent(extent_cache, cache);
7697 free_all_extent_backrefs(rec);
7698 if (!init_extent_tree && repair && (!cur_err || fixed))
7699 clear_extent_dirty(root->fs_info->excluded_extents,
7701 rec->start + rec->max_size - 1,
7707 if (ret && ret != -EAGAIN) {
7708 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7711 struct btrfs_trans_handle *trans;
7713 root = root->fs_info->extent_root;
7714 trans = btrfs_start_transaction(root, 1);
7715 if (IS_ERR(trans)) {
7716 ret = PTR_ERR(trans);
7720 btrfs_fix_block_accounting(trans, root);
7721 ret = btrfs_commit_transaction(trans, root);
7726 fprintf(stderr, "repaired damaged extent references\n");
7732 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7736 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7737 stripe_size = length;
7738 stripe_size /= num_stripes;
7739 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7740 stripe_size = length * 2;
7741 stripe_size /= num_stripes;
7742 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7743 stripe_size = length;
7744 stripe_size /= (num_stripes - 1);
7745 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7746 stripe_size = length;
7747 stripe_size /= (num_stripes - 2);
7749 stripe_size = length;
7755 * Check the chunk with its block group/dev list ref:
7756 * Return 0 if all refs seems valid.
7757 * Return 1 if part of refs seems valid, need later check for rebuild ref
7758 * like missing block group and needs to search extent tree to rebuild them.
7759 * Return -1 if essential refs are missing and unable to rebuild.
7761 static int check_chunk_refs(struct chunk_record *chunk_rec,
7762 struct block_group_tree *block_group_cache,
7763 struct device_extent_tree *dev_extent_cache,
7766 struct cache_extent *block_group_item;
7767 struct block_group_record *block_group_rec;
7768 struct cache_extent *dev_extent_item;
7769 struct device_extent_record *dev_extent_rec;
7773 int metadump_v2 = 0;
7777 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7780 if (block_group_item) {
7781 block_group_rec = container_of(block_group_item,
7782 struct block_group_record,
7784 if (chunk_rec->length != block_group_rec->offset ||
7785 chunk_rec->offset != block_group_rec->objectid ||
7787 chunk_rec->type_flags != block_group_rec->flags)) {
7790 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7791 chunk_rec->objectid,
7796 chunk_rec->type_flags,
7797 block_group_rec->objectid,
7798 block_group_rec->type,
7799 block_group_rec->offset,
7800 block_group_rec->offset,
7801 block_group_rec->objectid,
7802 block_group_rec->flags);
7805 list_del_init(&block_group_rec->list);
7806 chunk_rec->bg_rec = block_group_rec;
7811 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7812 chunk_rec->objectid,
7817 chunk_rec->type_flags);
7824 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7825 chunk_rec->num_stripes);
7826 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7827 devid = chunk_rec->stripes[i].devid;
7828 offset = chunk_rec->stripes[i].offset;
7829 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7830 devid, offset, length);
7831 if (dev_extent_item) {
7832 dev_extent_rec = container_of(dev_extent_item,
7833 struct device_extent_record,
7835 if (dev_extent_rec->objectid != devid ||
7836 dev_extent_rec->offset != offset ||
7837 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7838 dev_extent_rec->length != length) {
7841 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7842 chunk_rec->objectid,
7845 chunk_rec->stripes[i].devid,
7846 chunk_rec->stripes[i].offset,
7847 dev_extent_rec->objectid,
7848 dev_extent_rec->offset,
7849 dev_extent_rec->length);
7852 list_move(&dev_extent_rec->chunk_list,
7853 &chunk_rec->dextents);
7858 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7859 chunk_rec->objectid,
7862 chunk_rec->stripes[i].devid,
7863 chunk_rec->stripes[i].offset);
7870 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7871 int check_chunks(struct cache_tree *chunk_cache,
7872 struct block_group_tree *block_group_cache,
7873 struct device_extent_tree *dev_extent_cache,
7874 struct list_head *good, struct list_head *bad,
7875 struct list_head *rebuild, int silent)
7877 struct cache_extent *chunk_item;
7878 struct chunk_record *chunk_rec;
7879 struct block_group_record *bg_rec;
7880 struct device_extent_record *dext_rec;
7884 chunk_item = first_cache_extent(chunk_cache);
7885 while (chunk_item) {
7886 chunk_rec = container_of(chunk_item, struct chunk_record,
7888 err = check_chunk_refs(chunk_rec, block_group_cache,
7889 dev_extent_cache, silent);
7892 if (err == 0 && good)
7893 list_add_tail(&chunk_rec->list, good);
7894 if (err > 0 && rebuild)
7895 list_add_tail(&chunk_rec->list, rebuild);
7897 list_add_tail(&chunk_rec->list, bad);
7898 chunk_item = next_cache_extent(chunk_item);
7901 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7904 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7912 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7916 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7927 static int check_device_used(struct device_record *dev_rec,
7928 struct device_extent_tree *dext_cache)
7930 struct cache_extent *cache;
7931 struct device_extent_record *dev_extent_rec;
7934 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7936 dev_extent_rec = container_of(cache,
7937 struct device_extent_record,
7939 if (dev_extent_rec->objectid != dev_rec->devid)
7942 list_del_init(&dev_extent_rec->device_list);
7943 total_byte += dev_extent_rec->length;
7944 cache = next_cache_extent(cache);
7947 if (total_byte != dev_rec->byte_used) {
7949 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7950 total_byte, dev_rec->byte_used, dev_rec->objectid,
7951 dev_rec->type, dev_rec->offset);
7958 /* check btrfs_dev_item -> btrfs_dev_extent */
7959 static int check_devices(struct rb_root *dev_cache,
7960 struct device_extent_tree *dev_extent_cache)
7962 struct rb_node *dev_node;
7963 struct device_record *dev_rec;
7964 struct device_extent_record *dext_rec;
7968 dev_node = rb_first(dev_cache);
7970 dev_rec = container_of(dev_node, struct device_record, node);
7971 err = check_device_used(dev_rec, dev_extent_cache);
7975 dev_node = rb_next(dev_node);
7977 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7980 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7981 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7988 static int add_root_item_to_list(struct list_head *head,
7989 u64 objectid, u64 bytenr, u64 last_snapshot,
7990 u8 level, u8 drop_level,
7991 int level_size, struct btrfs_key *drop_key)
7994 struct root_item_record *ri_rec;
7995 ri_rec = malloc(sizeof(*ri_rec));
7998 ri_rec->bytenr = bytenr;
7999 ri_rec->objectid = objectid;
8000 ri_rec->level = level;
8001 ri_rec->level_size = level_size;
8002 ri_rec->drop_level = drop_level;
8003 ri_rec->last_snapshot = last_snapshot;
8005 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8006 list_add_tail(&ri_rec->list, head);
8011 static void free_root_item_list(struct list_head *list)
8013 struct root_item_record *ri_rec;
8015 while (!list_empty(list)) {
8016 ri_rec = list_first_entry(list, struct root_item_record,
8018 list_del_init(&ri_rec->list);
8023 static int deal_root_from_list(struct list_head *list,
8024 struct btrfs_root *root,
8025 struct block_info *bits,
8027 struct cache_tree *pending,
8028 struct cache_tree *seen,
8029 struct cache_tree *reada,
8030 struct cache_tree *nodes,
8031 struct cache_tree *extent_cache,
8032 struct cache_tree *chunk_cache,
8033 struct rb_root *dev_cache,
8034 struct block_group_tree *block_group_cache,
8035 struct device_extent_tree *dev_extent_cache)
8040 while (!list_empty(list)) {
8041 struct root_item_record *rec;
8042 struct extent_buffer *buf;
8043 rec = list_entry(list->next,
8044 struct root_item_record, list);
8046 buf = read_tree_block(root->fs_info->tree_root,
8047 rec->bytenr, rec->level_size, 0);
8048 if (!extent_buffer_uptodate(buf)) {
8049 free_extent_buffer(buf);
8053 add_root_to_pending(buf, extent_cache, pending,
8054 seen, nodes, rec->objectid);
8056 * To rebuild extent tree, we need deal with snapshot
8057 * one by one, otherwise we deal with node firstly which
8058 * can maximize readahead.
8061 ret = run_next_block(root, bits, bits_nr, &last,
8062 pending, seen, reada, nodes,
8063 extent_cache, chunk_cache,
8064 dev_cache, block_group_cache,
8065 dev_extent_cache, rec);
8069 free_extent_buffer(buf);
8070 list_del(&rec->list);
8076 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8077 reada, nodes, extent_cache, chunk_cache,
8078 dev_cache, block_group_cache,
8079 dev_extent_cache, NULL);
8089 static int check_chunks_and_extents(struct btrfs_root *root)
8091 struct rb_root dev_cache;
8092 struct cache_tree chunk_cache;
8093 struct block_group_tree block_group_cache;
8094 struct device_extent_tree dev_extent_cache;
8095 struct cache_tree extent_cache;
8096 struct cache_tree seen;
8097 struct cache_tree pending;
8098 struct cache_tree reada;
8099 struct cache_tree nodes;
8100 struct extent_io_tree excluded_extents;
8101 struct cache_tree corrupt_blocks;
8102 struct btrfs_path path;
8103 struct btrfs_key key;
8104 struct btrfs_key found_key;
8106 struct block_info *bits;
8108 struct extent_buffer *leaf;
8110 struct btrfs_root_item ri;
8111 struct list_head dropping_trees;
8112 struct list_head normal_trees;
8113 struct btrfs_root *root1;
8118 dev_cache = RB_ROOT;
8119 cache_tree_init(&chunk_cache);
8120 block_group_tree_init(&block_group_cache);
8121 device_extent_tree_init(&dev_extent_cache);
8123 cache_tree_init(&extent_cache);
8124 cache_tree_init(&seen);
8125 cache_tree_init(&pending);
8126 cache_tree_init(&nodes);
8127 cache_tree_init(&reada);
8128 cache_tree_init(&corrupt_blocks);
8129 extent_io_tree_init(&excluded_extents);
8130 INIT_LIST_HEAD(&dropping_trees);
8131 INIT_LIST_HEAD(&normal_trees);
8134 root->fs_info->excluded_extents = &excluded_extents;
8135 root->fs_info->fsck_extent_cache = &extent_cache;
8136 root->fs_info->free_extent_hook = free_extent_hook;
8137 root->fs_info->corrupt_blocks = &corrupt_blocks;
8141 bits = malloc(bits_nr * sizeof(struct block_info));
8147 if (ctx.progress_enabled) {
8148 ctx.tp = TASK_EXTENTS;
8149 task_start(ctx.info);
8153 root1 = root->fs_info->tree_root;
8154 level = btrfs_header_level(root1->node);
8155 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8156 root1->node->start, 0, level, 0,
8157 btrfs_level_size(root1, level), NULL);
8160 root1 = root->fs_info->chunk_root;
8161 level = btrfs_header_level(root1->node);
8162 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8163 root1->node->start, 0, level, 0,
8164 btrfs_level_size(root1, level), NULL);
8167 btrfs_init_path(&path);
8170 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8171 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8176 leaf = path.nodes[0];
8177 slot = path.slots[0];
8178 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8179 ret = btrfs_next_leaf(root, &path);
8182 leaf = path.nodes[0];
8183 slot = path.slots[0];
8185 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8186 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8187 unsigned long offset;
8190 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8191 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8192 last_snapshot = btrfs_root_last_snapshot(&ri);
8193 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8194 level = btrfs_root_level(&ri);
8195 level_size = btrfs_level_size(root, level);
8196 ret = add_root_item_to_list(&normal_trees,
8198 btrfs_root_bytenr(&ri),
8199 last_snapshot, level,
8200 0, level_size, NULL);
8204 level = btrfs_root_level(&ri);
8205 level_size = btrfs_level_size(root, level);
8206 objectid = found_key.objectid;
8207 btrfs_disk_key_to_cpu(&found_key,
8209 ret = add_root_item_to_list(&dropping_trees,
8211 btrfs_root_bytenr(&ri),
8212 last_snapshot, level,
8214 level_size, &found_key);
8221 btrfs_release_path(&path);
8224 * check_block can return -EAGAIN if it fixes something, please keep
8225 * this in mind when dealing with return values from these functions, if
8226 * we get -EAGAIN we want to fall through and restart the loop.
8228 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8229 &seen, &reada, &nodes, &extent_cache,
8230 &chunk_cache, &dev_cache, &block_group_cache,
8237 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8238 &pending, &seen, &reada, &nodes,
8239 &extent_cache, &chunk_cache, &dev_cache,
8240 &block_group_cache, &dev_extent_cache);
8247 ret = check_chunks(&chunk_cache, &block_group_cache,
8248 &dev_extent_cache, NULL, NULL, NULL, 0);
8255 ret = check_extent_refs(root, &extent_cache);
8262 ret = check_devices(&dev_cache, &dev_extent_cache);
8267 task_stop(ctx.info);
8269 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8270 extent_io_tree_cleanup(&excluded_extents);
8271 root->fs_info->fsck_extent_cache = NULL;
8272 root->fs_info->free_extent_hook = NULL;
8273 root->fs_info->corrupt_blocks = NULL;
8274 root->fs_info->excluded_extents = NULL;
8277 free_chunk_cache_tree(&chunk_cache);
8278 free_device_cache_tree(&dev_cache);
8279 free_block_group_tree(&block_group_cache);
8280 free_device_extent_tree(&dev_extent_cache);
8281 free_extent_cache_tree(&seen);
8282 free_extent_cache_tree(&pending);
8283 free_extent_cache_tree(&reada);
8284 free_extent_cache_tree(&nodes);
8287 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8288 free_extent_cache_tree(&seen);
8289 free_extent_cache_tree(&pending);
8290 free_extent_cache_tree(&reada);
8291 free_extent_cache_tree(&nodes);
8292 free_chunk_cache_tree(&chunk_cache);
8293 free_block_group_tree(&block_group_cache);
8294 free_device_cache_tree(&dev_cache);
8295 free_device_extent_tree(&dev_extent_cache);
8296 free_extent_record_cache(root->fs_info, &extent_cache);
8297 free_root_item_list(&normal_trees);
8298 free_root_item_list(&dropping_trees);
8299 extent_io_tree_cleanup(&excluded_extents);
8303 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8304 struct btrfs_root *root, int overwrite)
8306 struct extent_buffer *c;
8307 struct extent_buffer *old = root->node;
8310 struct btrfs_disk_key disk_key = {0,0,0};
8316 extent_buffer_get(c);
8319 c = btrfs_alloc_free_block(trans, root,
8320 btrfs_level_size(root, 0),
8321 root->root_key.objectid,
8322 &disk_key, level, 0, 0);
8325 extent_buffer_get(c);
8329 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8330 btrfs_set_header_level(c, level);
8331 btrfs_set_header_bytenr(c, c->start);
8332 btrfs_set_header_generation(c, trans->transid);
8333 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8334 btrfs_set_header_owner(c, root->root_key.objectid);
8336 write_extent_buffer(c, root->fs_info->fsid,
8337 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8339 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8340 btrfs_header_chunk_tree_uuid(c),
8343 btrfs_mark_buffer_dirty(c);
8345 * this case can happen in the following case:
8347 * 1.overwrite previous root.
8349 * 2.reinit reloc data root, this is because we skip pin
8350 * down reloc data tree before which means we can allocate
8351 * same block bytenr here.
8353 if (old->start == c->start) {
8354 btrfs_set_root_generation(&root->root_item,
8356 root->root_item.level = btrfs_header_level(root->node);
8357 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8358 &root->root_key, &root->root_item);
8360 free_extent_buffer(c);
8364 free_extent_buffer(old);
8366 add_root_to_dirty_list(root);
8370 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8371 struct extent_buffer *eb, int tree_root)
8373 struct extent_buffer *tmp;
8374 struct btrfs_root_item *ri;
8375 struct btrfs_key key;
8378 int level = btrfs_header_level(eb);
8384 * If we have pinned this block before, don't pin it again.
8385 * This can not only avoid forever loop with broken filesystem
8386 * but also give us some speedups.
8388 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8389 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8392 btrfs_pin_extent(fs_info, eb->start, eb->len);
8394 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8395 nritems = btrfs_header_nritems(eb);
8396 for (i = 0; i < nritems; i++) {
8398 btrfs_item_key_to_cpu(eb, &key, i);
8399 if (key.type != BTRFS_ROOT_ITEM_KEY)
8401 /* Skip the extent root and reloc roots */
8402 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8403 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8404 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8406 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8407 bytenr = btrfs_disk_root_bytenr(eb, ri);
8410 * If at any point we start needing the real root we
8411 * will have to build a stump root for the root we are
8412 * in, but for now this doesn't actually use the root so
8413 * just pass in extent_root.
8415 tmp = read_tree_block(fs_info->extent_root, bytenr,
8417 if (!extent_buffer_uptodate(tmp)) {
8418 fprintf(stderr, "Error reading root block\n");
8421 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8422 free_extent_buffer(tmp);
8426 bytenr = btrfs_node_blockptr(eb, i);
8428 /* If we aren't the tree root don't read the block */
8429 if (level == 1 && !tree_root) {
8430 btrfs_pin_extent(fs_info, bytenr, leafsize);
8434 tmp = read_tree_block(fs_info->extent_root, bytenr,
8436 if (!extent_buffer_uptodate(tmp)) {
8437 fprintf(stderr, "Error reading tree block\n");
8440 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8441 free_extent_buffer(tmp);
8450 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8454 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8458 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8461 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8463 struct btrfs_block_group_cache *cache;
8464 struct btrfs_path *path;
8465 struct extent_buffer *leaf;
8466 struct btrfs_chunk *chunk;
8467 struct btrfs_key key;
8471 path = btrfs_alloc_path();
8476 key.type = BTRFS_CHUNK_ITEM_KEY;
8479 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8481 btrfs_free_path(path);
8486 * We do this in case the block groups were screwed up and had alloc
8487 * bits that aren't actually set on the chunks. This happens with
8488 * restored images every time and could happen in real life I guess.
8490 fs_info->avail_data_alloc_bits = 0;
8491 fs_info->avail_metadata_alloc_bits = 0;
8492 fs_info->avail_system_alloc_bits = 0;
8494 /* First we need to create the in-memory block groups */
8496 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8497 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8499 btrfs_free_path(path);
8507 leaf = path->nodes[0];
8508 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8509 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8514 chunk = btrfs_item_ptr(leaf, path->slots[0],
8515 struct btrfs_chunk);
8516 btrfs_add_block_group(fs_info, 0,
8517 btrfs_chunk_type(leaf, chunk),
8518 key.objectid, key.offset,
8519 btrfs_chunk_length(leaf, chunk));
8520 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8521 key.offset + btrfs_chunk_length(leaf, chunk),
8527 cache = btrfs_lookup_first_block_group(fs_info, start);
8531 start = cache->key.objectid + cache->key.offset;
8534 btrfs_free_path(path);
8538 static int reset_balance(struct btrfs_trans_handle *trans,
8539 struct btrfs_fs_info *fs_info)
8541 struct btrfs_root *root = fs_info->tree_root;
8542 struct btrfs_path *path;
8543 struct extent_buffer *leaf;
8544 struct btrfs_key key;
8545 int del_slot, del_nr = 0;
8549 path = btrfs_alloc_path();
8553 key.objectid = BTRFS_BALANCE_OBJECTID;
8554 key.type = BTRFS_BALANCE_ITEM_KEY;
8557 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8562 goto reinit_data_reloc;
8567 ret = btrfs_del_item(trans, root, path);
8570 btrfs_release_path(path);
8572 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8573 key.type = BTRFS_ROOT_ITEM_KEY;
8576 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8580 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8585 ret = btrfs_del_items(trans, root, path,
8592 btrfs_release_path(path);
8595 ret = btrfs_search_slot(trans, root, &key, path,
8602 leaf = path->nodes[0];
8603 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8604 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8606 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8611 del_slot = path->slots[0];
8620 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8624 btrfs_release_path(path);
8627 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8628 key.type = BTRFS_ROOT_ITEM_KEY;
8629 key.offset = (u64)-1;
8630 root = btrfs_read_fs_root(fs_info, &key);
8632 fprintf(stderr, "Error reading data reloc tree\n");
8633 ret = PTR_ERR(root);
8636 record_root_in_trans(trans, root);
8637 ret = btrfs_fsck_reinit_root(trans, root, 0);
8640 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8642 btrfs_free_path(path);
8646 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8647 struct btrfs_fs_info *fs_info)
8653 * The only reason we don't do this is because right now we're just
8654 * walking the trees we find and pinning down their bytes, we don't look
8655 * at any of the leaves. In order to do mixed groups we'd have to check
8656 * the leaves of any fs roots and pin down the bytes for any file
8657 * extents we find. Not hard but why do it if we don't have to?
8659 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8660 fprintf(stderr, "We don't support re-initing the extent tree "
8661 "for mixed block groups yet, please notify a btrfs "
8662 "developer you want to do this so they can add this "
8663 "functionality.\n");
8668 * first we need to walk all of the trees except the extent tree and pin
8669 * down the bytes that are in use so we don't overwrite any existing
8672 ret = pin_metadata_blocks(fs_info);
8674 fprintf(stderr, "error pinning down used bytes\n");
8679 * Need to drop all the block groups since we're going to recreate all
8682 btrfs_free_block_groups(fs_info);
8683 ret = reset_block_groups(fs_info);
8685 fprintf(stderr, "error resetting the block groups\n");
8689 /* Ok we can allocate now, reinit the extent root */
8690 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8692 fprintf(stderr, "extent root initialization failed\n");
8694 * When the transaction code is updated we should end the
8695 * transaction, but for now progs only knows about commit so
8696 * just return an error.
8702 * Now we have all the in-memory block groups setup so we can make
8703 * allocations properly, and the metadata we care about is safe since we
8704 * pinned all of it above.
8707 struct btrfs_block_group_cache *cache;
8709 cache = btrfs_lookup_first_block_group(fs_info, start);
8712 start = cache->key.objectid + cache->key.offset;
8713 ret = btrfs_insert_item(trans, fs_info->extent_root,
8714 &cache->key, &cache->item,
8715 sizeof(cache->item));
8717 fprintf(stderr, "Error adding block group\n");
8720 btrfs_extent_post_op(trans, fs_info->extent_root);
8723 ret = reset_balance(trans, fs_info);
8725 fprintf(stderr, "error reseting the pending balance\n");
8730 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8732 struct btrfs_path *path;
8733 struct btrfs_trans_handle *trans;
8734 struct btrfs_key key;
8737 printf("Recowing metadata block %llu\n", eb->start);
8738 key.objectid = btrfs_header_owner(eb);
8739 key.type = BTRFS_ROOT_ITEM_KEY;
8740 key.offset = (u64)-1;
8742 root = btrfs_read_fs_root(root->fs_info, &key);
8744 fprintf(stderr, "Couldn't find owner root %llu\n",
8746 return PTR_ERR(root);
8749 path = btrfs_alloc_path();
8753 trans = btrfs_start_transaction(root, 1);
8754 if (IS_ERR(trans)) {
8755 btrfs_free_path(path);
8756 return PTR_ERR(trans);
8759 path->lowest_level = btrfs_header_level(eb);
8760 if (path->lowest_level)
8761 btrfs_node_key_to_cpu(eb, &key, 0);
8763 btrfs_item_key_to_cpu(eb, &key, 0);
8765 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8766 btrfs_commit_transaction(trans, root);
8767 btrfs_free_path(path);
8771 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8773 struct btrfs_path *path;
8774 struct btrfs_trans_handle *trans;
8775 struct btrfs_key key;
8778 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8779 bad->key.type, bad->key.offset);
8780 key.objectid = bad->root_id;
8781 key.type = BTRFS_ROOT_ITEM_KEY;
8782 key.offset = (u64)-1;
8784 root = btrfs_read_fs_root(root->fs_info, &key);
8786 fprintf(stderr, "Couldn't find owner root %llu\n",
8788 return PTR_ERR(root);
8791 path = btrfs_alloc_path();
8795 trans = btrfs_start_transaction(root, 1);
8796 if (IS_ERR(trans)) {
8797 btrfs_free_path(path);
8798 return PTR_ERR(trans);
8801 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8807 ret = btrfs_del_item(trans, root, path);
8809 btrfs_commit_transaction(trans, root);
8810 btrfs_free_path(path);
8814 static int zero_log_tree(struct btrfs_root *root)
8816 struct btrfs_trans_handle *trans;
8819 trans = btrfs_start_transaction(root, 1);
8820 if (IS_ERR(trans)) {
8821 ret = PTR_ERR(trans);
8824 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8825 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8826 ret = btrfs_commit_transaction(trans, root);
8830 static int populate_csum(struct btrfs_trans_handle *trans,
8831 struct btrfs_root *csum_root, char *buf, u64 start,
8838 while (offset < len) {
8839 sectorsize = csum_root->sectorsize;
8840 ret = read_extent_data(csum_root, buf, start + offset,
8844 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8845 start + offset, buf, sectorsize);
8848 offset += sectorsize;
8853 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8854 struct btrfs_root *csum_root,
8855 struct btrfs_root *cur_root)
8857 struct btrfs_path *path;
8858 struct btrfs_key key;
8859 struct extent_buffer *node;
8860 struct btrfs_file_extent_item *fi;
8867 path = btrfs_alloc_path();
8870 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8880 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8883 /* Iterate all regular file extents and fill its csum */
8885 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8887 if (key.type != BTRFS_EXTENT_DATA_KEY)
8889 node = path->nodes[0];
8890 slot = path->slots[0];
8891 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8892 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8894 start = btrfs_file_extent_disk_bytenr(node, fi);
8895 len = btrfs_file_extent_disk_num_bytes(node, fi);
8897 ret = populate_csum(trans, csum_root, buf, start, len);
8904 * TODO: if next leaf is corrupted, jump to nearest next valid
8907 ret = btrfs_next_item(cur_root, path);
8917 btrfs_free_path(path);
8922 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8923 struct btrfs_root *csum_root)
8925 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8926 struct btrfs_path *path;
8927 struct btrfs_root *tree_root = fs_info->tree_root;
8928 struct btrfs_root *cur_root;
8929 struct extent_buffer *node;
8930 struct btrfs_key key;
8934 path = btrfs_alloc_path();
8938 key.objectid = BTRFS_FS_TREE_OBJECTID;
8940 key.type = BTRFS_ROOT_ITEM_KEY;
8942 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8951 node = path->nodes[0];
8952 slot = path->slots[0];
8953 btrfs_item_key_to_cpu(node, &key, slot);
8954 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8956 if (key.type != BTRFS_ROOT_ITEM_KEY)
8958 if (!is_fstree(key.objectid))
8960 key.offset = (u64)-1;
8962 cur_root = btrfs_read_fs_root(fs_info, &key);
8963 if (IS_ERR(cur_root) || !cur_root) {
8964 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8968 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8973 ret = btrfs_next_item(tree_root, path);
8983 btrfs_free_path(path);
8987 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8988 struct btrfs_root *csum_root)
8990 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8991 struct btrfs_path *path;
8992 struct btrfs_extent_item *ei;
8993 struct extent_buffer *leaf;
8995 struct btrfs_key key;
8998 path = btrfs_alloc_path();
9003 key.type = BTRFS_EXTENT_ITEM_KEY;
9006 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9008 btrfs_free_path(path);
9012 buf = malloc(csum_root->sectorsize);
9014 btrfs_free_path(path);
9019 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9020 ret = btrfs_next_leaf(extent_root, path);
9028 leaf = path->nodes[0];
9030 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9031 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9036 ei = btrfs_item_ptr(leaf, path->slots[0],
9037 struct btrfs_extent_item);
9038 if (!(btrfs_extent_flags(leaf, ei) &
9039 BTRFS_EXTENT_FLAG_DATA)) {
9044 ret = populate_csum(trans, csum_root, buf, key.objectid,
9051 btrfs_free_path(path);
9057 * Recalculate the csum and put it into the csum tree.
9059 * Extent tree init will wipe out all the extent info, so in that case, we
9060 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9061 * will use fs/subvol trees to init the csum tree.
9063 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9064 struct btrfs_root *csum_root,
9068 return fill_csum_tree_from_fs(trans, csum_root);
9070 return fill_csum_tree_from_extent(trans, csum_root);
9073 struct root_item_info {
9074 /* level of the root */
9076 /* number of nodes at this level, must be 1 for a root */
9080 struct cache_extent cache_extent;
9083 static struct cache_tree *roots_info_cache = NULL;
9085 static void free_roots_info_cache(void)
9087 if (!roots_info_cache)
9090 while (!cache_tree_empty(roots_info_cache)) {
9091 struct cache_extent *entry;
9092 struct root_item_info *rii;
9094 entry = first_cache_extent(roots_info_cache);
9097 remove_cache_extent(roots_info_cache, entry);
9098 rii = container_of(entry, struct root_item_info, cache_extent);
9102 free(roots_info_cache);
9103 roots_info_cache = NULL;
9106 static int build_roots_info_cache(struct btrfs_fs_info *info)
9109 struct btrfs_key key;
9110 struct extent_buffer *leaf;
9111 struct btrfs_path *path;
9113 if (!roots_info_cache) {
9114 roots_info_cache = malloc(sizeof(*roots_info_cache));
9115 if (!roots_info_cache)
9117 cache_tree_init(roots_info_cache);
9120 path = btrfs_alloc_path();
9125 key.type = BTRFS_EXTENT_ITEM_KEY;
9128 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9131 leaf = path->nodes[0];
9134 struct btrfs_key found_key;
9135 struct btrfs_extent_item *ei;
9136 struct btrfs_extent_inline_ref *iref;
9137 int slot = path->slots[0];
9142 struct cache_extent *entry;
9143 struct root_item_info *rii;
9145 if (slot >= btrfs_header_nritems(leaf)) {
9146 ret = btrfs_next_leaf(info->extent_root, path);
9153 leaf = path->nodes[0];
9154 slot = path->slots[0];
9157 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9159 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9160 found_key.type != BTRFS_METADATA_ITEM_KEY)
9163 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9164 flags = btrfs_extent_flags(leaf, ei);
9166 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9167 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9170 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9171 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9172 level = found_key.offset;
9174 struct btrfs_tree_block_info *info;
9176 info = (struct btrfs_tree_block_info *)(ei + 1);
9177 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9178 level = btrfs_tree_block_level(leaf, info);
9182 * For a root extent, it must be of the following type and the
9183 * first (and only one) iref in the item.
9185 type = btrfs_extent_inline_ref_type(leaf, iref);
9186 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9189 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9190 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9192 rii = malloc(sizeof(struct root_item_info));
9197 rii->cache_extent.start = root_id;
9198 rii->cache_extent.size = 1;
9199 rii->level = (u8)-1;
9200 entry = &rii->cache_extent;
9201 ret = insert_cache_extent(roots_info_cache, entry);
9204 rii = container_of(entry, struct root_item_info,
9208 ASSERT(rii->cache_extent.start == root_id);
9209 ASSERT(rii->cache_extent.size == 1);
9211 if (level > rii->level || rii->level == (u8)-1) {
9213 rii->bytenr = found_key.objectid;
9214 rii->gen = btrfs_extent_generation(leaf, ei);
9215 rii->node_count = 1;
9216 } else if (level == rii->level) {
9224 btrfs_free_path(path);
9229 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9230 struct btrfs_path *path,
9231 const struct btrfs_key *root_key,
9232 const int read_only_mode)
9234 const u64 root_id = root_key->objectid;
9235 struct cache_extent *entry;
9236 struct root_item_info *rii;
9237 struct btrfs_root_item ri;
9238 unsigned long offset;
9240 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9243 "Error: could not find extent items for root %llu\n",
9244 root_key->objectid);
9248 rii = container_of(entry, struct root_item_info, cache_extent);
9249 ASSERT(rii->cache_extent.start == root_id);
9250 ASSERT(rii->cache_extent.size == 1);
9252 if (rii->node_count != 1) {
9254 "Error: could not find btree root extent for root %llu\n",
9259 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9260 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9262 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9263 btrfs_root_level(&ri) != rii->level ||
9264 btrfs_root_generation(&ri) != rii->gen) {
9267 * If we're in repair mode but our caller told us to not update
9268 * the root item, i.e. just check if it needs to be updated, don't
9269 * print this message, since the caller will call us again shortly
9270 * for the same root item without read only mode (the caller will
9271 * open a transaction first).
9273 if (!(read_only_mode && repair))
9275 "%sroot item for root %llu,"
9276 " current bytenr %llu, current gen %llu, current level %u,"
9277 " new bytenr %llu, new gen %llu, new level %u\n",
9278 (read_only_mode ? "" : "fixing "),
9280 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9281 btrfs_root_level(&ri),
9282 rii->bytenr, rii->gen, rii->level);
9284 if (btrfs_root_generation(&ri) > rii->gen) {
9286 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9287 root_id, btrfs_root_generation(&ri), rii->gen);
9291 if (!read_only_mode) {
9292 btrfs_set_root_bytenr(&ri, rii->bytenr);
9293 btrfs_set_root_level(&ri, rii->level);
9294 btrfs_set_root_generation(&ri, rii->gen);
9295 write_extent_buffer(path->nodes[0], &ri,
9296 offset, sizeof(ri));
9306 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9307 * caused read-only snapshots to be corrupted if they were created at a moment
9308 * when the source subvolume/snapshot had orphan items. The issue was that the
9309 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9310 * node instead of the post orphan cleanup root node.
9311 * So this function, and its callees, just detects and fixes those cases. Even
9312 * though the regression was for read-only snapshots, this function applies to
9313 * any snapshot/subvolume root.
9314 * This must be run before any other repair code - not doing it so, makes other
9315 * repair code delete or modify backrefs in the extent tree for example, which
9316 * will result in an inconsistent fs after repairing the root items.
9318 static int repair_root_items(struct btrfs_fs_info *info)
9320 struct btrfs_path *path = NULL;
9321 struct btrfs_key key;
9322 struct extent_buffer *leaf;
9323 struct btrfs_trans_handle *trans = NULL;
9328 ret = build_roots_info_cache(info);
9332 path = btrfs_alloc_path();
9338 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9339 key.type = BTRFS_ROOT_ITEM_KEY;
9344 * Avoid opening and committing transactions if a leaf doesn't have
9345 * any root items that need to be fixed, so that we avoid rotating
9346 * backup roots unnecessarily.
9349 trans = btrfs_start_transaction(info->tree_root, 1);
9350 if (IS_ERR(trans)) {
9351 ret = PTR_ERR(trans);
9356 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9360 leaf = path->nodes[0];
9363 struct btrfs_key found_key;
9365 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9366 int no_more_keys = find_next_key(path, &key);
9368 btrfs_release_path(path);
9370 ret = btrfs_commit_transaction(trans,
9382 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9384 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9386 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9389 ret = maybe_repair_root_item(info, path, &found_key,
9394 if (!trans && repair) {
9397 btrfs_release_path(path);
9407 free_roots_info_cache();
9408 btrfs_free_path(path);
9410 btrfs_commit_transaction(trans, info->tree_root);
9417 const char * const cmd_check_usage[] = {
9418 "btrfs check [options] <device>",
9419 "Check structural inegrity of a filesystem (unmounted).",
9420 "Check structural inegrity of an unmounted filesystem. Verify internal",
9421 "trees' consistency and item connectivity. In the repair mode try to",
9422 "fix the problems found.",
9423 "WARNING: the repair mode is considered dangerous",
9425 "-s|--super <superblock> use this superblock copy",
9426 "-b|--backup use the backup root copy",
9427 "--repair try to repair the filesystem",
9428 "--readonly run in read-only mode (default)",
9429 "--init-csum-tree create a new CRC tree",
9430 "--init-extent-tree create a new extent tree",
9431 "--check-data-csum verify checkums of data blocks",
9432 "-Q|--qgroup-report print a report on qgroup consistency",
9433 "-E|--subvol-extents <subvolid>",
9434 " print subvolume extents and sharing state",
9435 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9436 "-p|--progress indicate progress",
9440 int cmd_check(int argc, char **argv)
9442 struct cache_tree root_cache;
9443 struct btrfs_root *root;
9444 struct btrfs_fs_info *info;
9447 u64 tree_root_bytenr = 0;
9448 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9451 int init_csum_tree = 0;
9453 int qgroup_report = 0;
9454 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9458 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9459 OPT_CHECK_CSUM, OPT_READONLY };
9460 static const struct option long_options[] = {
9461 { "super", required_argument, NULL, 's' },
9462 { "repair", no_argument, NULL, OPT_REPAIR },
9463 { "readonly", no_argument, NULL, OPT_READONLY },
9464 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9465 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9466 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9467 { "backup", no_argument, NULL, 'b' },
9468 { "subvol-extents", required_argument, NULL, 'E' },
9469 { "qgroup-report", no_argument, NULL, 'Q' },
9470 { "tree-root", required_argument, NULL, 'r' },
9471 { "progress", no_argument, NULL, 'p' },
9475 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9479 case 'a': /* ignored */ break;
9481 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9484 num = arg_strtou64(optarg);
9485 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9487 "ERROR: super mirror should be less than: %d\n",
9488 BTRFS_SUPER_MIRROR_MAX);
9491 bytenr = btrfs_sb_offset(((int)num));
9492 printf("using SB copy %llu, bytenr %llu\n", num,
9493 (unsigned long long)bytenr);
9499 subvolid = arg_strtou64(optarg);
9502 tree_root_bytenr = arg_strtou64(optarg);
9505 ctx.progress_enabled = true;
9509 usage(cmd_check_usage);
9511 printf("enabling repair mode\n");
9513 ctree_flags |= OPEN_CTREE_WRITES;
9519 printf("Creating a new CRC tree\n");
9522 ctree_flags |= OPEN_CTREE_WRITES;
9524 case OPT_INIT_EXTENT:
9525 init_extent_tree = 1;
9526 ctree_flags |= (OPEN_CTREE_WRITES |
9527 OPEN_CTREE_NO_BLOCK_GROUPS);
9530 case OPT_CHECK_CSUM:
9531 check_data_csum = 1;
9535 argc = argc - optind;
9537 if (check_argc_exact(argc, 1))
9538 usage(cmd_check_usage);
9540 if (ctx.progress_enabled) {
9541 ctx.tp = TASK_NOTHING;
9542 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9545 /* This check is the only reason for --readonly to exist */
9546 if (readonly && repair) {
9547 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9552 cache_tree_init(&root_cache);
9554 if((ret = check_mounted(argv[optind])) < 0) {
9555 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9558 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9563 /* only allow partial opening under repair mode */
9565 ctree_flags |= OPEN_CTREE_PARTIAL;
9567 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9570 fprintf(stderr, "Couldn't open file system\n");
9576 root = info->fs_root;
9579 * repair mode will force us to commit transaction which
9580 * will make us fail to load log tree when mounting.
9582 if (repair && btrfs_super_log_root(info->super_copy)) {
9583 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9588 ret = zero_log_tree(root);
9590 fprintf(stderr, "fail to zero log tree\n");
9595 uuid_unparse(info->super_copy->fsid, uuidbuf);
9596 if (qgroup_report) {
9597 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9599 ret = qgroup_verify_all(info);
9601 print_qgroup_report(1);
9605 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9606 subvolid, argv[optind], uuidbuf);
9607 ret = print_extent_state(info, subvolid);
9610 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9612 if (!extent_buffer_uptodate(info->tree_root->node) ||
9613 !extent_buffer_uptodate(info->dev_root->node) ||
9614 !extent_buffer_uptodate(info->chunk_root->node)) {
9615 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9620 if (init_extent_tree || init_csum_tree) {
9621 struct btrfs_trans_handle *trans;
9623 trans = btrfs_start_transaction(info->extent_root, 0);
9624 if (IS_ERR(trans)) {
9625 fprintf(stderr, "Error starting transaction\n");
9626 ret = PTR_ERR(trans);
9630 if (init_extent_tree) {
9631 printf("Creating a new extent tree\n");
9632 ret = reinit_extent_tree(trans, info);
9637 if (init_csum_tree) {
9638 fprintf(stderr, "Reinit crc root\n");
9639 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9641 fprintf(stderr, "crc root initialization failed\n");
9646 ret = fill_csum_tree(trans, info->csum_root,
9649 fprintf(stderr, "crc refilling failed\n");
9654 * Ok now we commit and run the normal fsck, which will add
9655 * extent entries for all of the items it finds.
9657 ret = btrfs_commit_transaction(trans, info->extent_root);
9661 if (!extent_buffer_uptodate(info->extent_root->node)) {
9662 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9666 if (!extent_buffer_uptodate(info->csum_root->node)) {
9667 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9672 if (!ctx.progress_enabled)
9673 fprintf(stderr, "checking extents\n");
9674 ret = check_chunks_and_extents(root);
9676 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9678 ret = repair_root_items(info);
9682 fprintf(stderr, "Fixed %d roots.\n", ret);
9684 } else if (ret > 0) {
9686 "Found %d roots with an outdated root item.\n",
9689 "Please run a filesystem check with the option --repair to fix them.\n");
9694 if (!ctx.progress_enabled)
9695 fprintf(stderr, "checking free space cache\n");
9696 ret = check_space_cache(root);
9701 * We used to have to have these hole extents in between our real
9702 * extents so if we don't have this flag set we need to make sure there
9703 * are no gaps in the file extents for inodes, otherwise we can just
9704 * ignore it when this happens.
9706 no_holes = btrfs_fs_incompat(root->fs_info,
9707 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9708 if (!ctx.progress_enabled)
9709 fprintf(stderr, "checking fs roots\n");
9710 ret = check_fs_roots(root, &root_cache);
9714 fprintf(stderr, "checking csums\n");
9715 ret = check_csums(root);
9719 fprintf(stderr, "checking root refs\n");
9720 ret = check_root_refs(root, &root_cache);
9724 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9725 struct extent_buffer *eb;
9727 eb = list_first_entry(&root->fs_info->recow_ebs,
9728 struct extent_buffer, recow);
9729 list_del_init(&eb->recow);
9730 ret = recow_extent_buffer(root, eb);
9735 while (!list_empty(&delete_items)) {
9736 struct bad_item *bad;
9738 bad = list_first_entry(&delete_items, struct bad_item, list);
9739 list_del_init(&bad->list);
9741 ret = delete_bad_item(root, bad);
9745 if (info->quota_enabled) {
9747 fprintf(stderr, "checking quota groups\n");
9748 err = qgroup_verify_all(info);
9753 if (!list_empty(&root->fs_info->recow_ebs)) {
9754 fprintf(stderr, "Transid errors in file system\n");
9758 print_qgroup_report(0);
9759 if (found_old_backref) { /*
9760 * there was a disk format change when mixed
9761 * backref was in testing tree. The old format
9762 * existed about one week.
9764 printf("\n * Found old mixed backref format. "
9765 "The old format is not supported! *"
9766 "\n * Please mount the FS in readonly mode, "
9767 "backup data and re-format the FS. *\n\n");
9770 printf("found %llu bytes used err is %d\n",
9771 (unsigned long long)bytes_used, ret);
9772 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9773 printf("total tree bytes: %llu\n",
9774 (unsigned long long)total_btree_bytes);
9775 printf("total fs tree bytes: %llu\n",
9776 (unsigned long long)total_fs_tree_bytes);
9777 printf("total extent tree bytes: %llu\n",
9778 (unsigned long long)total_extent_tree_bytes);
9779 printf("btree space waste bytes: %llu\n",
9780 (unsigned long long)btree_space_waste);
9781 printf("file data blocks allocated: %llu\n referenced %llu\n",
9782 (unsigned long long)data_bytes_allocated,
9783 (unsigned long long)data_bytes_referenced);
9785 free_root_recs_tree(&root_cache);
9789 if (ctx.progress_enabled)
9790 task_deinit(ctx.info);