2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct orphan_data_extent *src_orphan;
570 struct orphan_data_extent *dst_orphan;
574 rec = malloc(sizeof(*rec));
575 memcpy(rec, orig_rec, sizeof(*rec));
577 INIT_LIST_HEAD(&rec->backrefs);
578 INIT_LIST_HEAD(&rec->orphan_extents);
579 rec->holes = RB_ROOT;
581 list_for_each_entry(orig, &orig_rec->backrefs, list) {
582 size = sizeof(*orig) + orig->namelen + 1;
583 backref = malloc(size);
584 memcpy(backref, orig, size);
585 list_add_tail(&backref->list, &rec->backrefs);
587 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588 dst_orphan = malloc(sizeof(*dst_orphan));
589 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
591 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
594 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
603 struct orphan_data_extent *orphan;
605 if (list_empty(orphan_extents))
607 printf("The following data extent is lost in tree %llu:\n",
609 list_for_each_entry(orphan, orphan_extents, list) {
610 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611 orphan->objectid, orphan->offset, orphan->disk_bytenr,
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
618 u64 root_objectid = root->root_key.objectid;
619 int errors = rec->errors;
623 /* reloc root errors, we print its corresponding fs root objectid*/
624 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625 root_objectid = root->root_key.offset;
626 fprintf(stderr, "reloc");
628 fprintf(stderr, "root %llu inode %llu errors %x",
629 (unsigned long long) root_objectid,
630 (unsigned long long) rec->ino, rec->errors);
632 if (errors & I_ERR_NO_INODE_ITEM)
633 fprintf(stderr, ", no inode item");
634 if (errors & I_ERR_NO_ORPHAN_ITEM)
635 fprintf(stderr, ", no orphan item");
636 if (errors & I_ERR_DUP_INODE_ITEM)
637 fprintf(stderr, ", dup inode item");
638 if (errors & I_ERR_DUP_DIR_INDEX)
639 fprintf(stderr, ", dup dir index");
640 if (errors & I_ERR_ODD_DIR_ITEM)
641 fprintf(stderr, ", odd dir item");
642 if (errors & I_ERR_ODD_FILE_EXTENT)
643 fprintf(stderr, ", odd file extent");
644 if (errors & I_ERR_BAD_FILE_EXTENT)
645 fprintf(stderr, ", bad file extent");
646 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647 fprintf(stderr, ", file extent overlap");
648 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649 fprintf(stderr, ", file extent discount");
650 if (errors & I_ERR_DIR_ISIZE_WRONG)
651 fprintf(stderr, ", dir isize wrong");
652 if (errors & I_ERR_FILE_NBYTES_WRONG)
653 fprintf(stderr, ", nbytes wrong");
654 if (errors & I_ERR_ODD_CSUM_ITEM)
655 fprintf(stderr, ", odd csum item");
656 if (errors & I_ERR_SOME_CSUM_MISSING)
657 fprintf(stderr, ", some csum missing");
658 if (errors & I_ERR_LINK_COUNT_WRONG)
659 fprintf(stderr, ", link count wrong");
660 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661 fprintf(stderr, ", orphan file extent");
662 fprintf(stderr, "\n");
663 /* Print the orphan extents if needed */
664 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
667 /* Print the holes if needed */
668 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669 struct file_extent_hole *hole;
670 struct rb_node *node;
673 node = rb_first(&rec->holes);
674 fprintf(stderr, "Found file extent holes:\n");
677 hole = rb_entry(node, struct file_extent_hole, node);
678 fprintf(stderr, "\tstart: %llu, len: %llu\n",
679 hole->start, hole->len);
680 node = rb_next(node);
683 fprintf(stderr, "\tstart: 0, len: %llu\n",
684 round_up(rec->isize, root->sectorsize));
688 static void print_ref_error(int errors)
690 if (errors & REF_ERR_NO_DIR_ITEM)
691 fprintf(stderr, ", no dir item");
692 if (errors & REF_ERR_NO_DIR_INDEX)
693 fprintf(stderr, ", no dir index");
694 if (errors & REF_ERR_NO_INODE_REF)
695 fprintf(stderr, ", no inode ref");
696 if (errors & REF_ERR_DUP_DIR_ITEM)
697 fprintf(stderr, ", dup dir item");
698 if (errors & REF_ERR_DUP_DIR_INDEX)
699 fprintf(stderr, ", dup dir index");
700 if (errors & REF_ERR_DUP_INODE_REF)
701 fprintf(stderr, ", dup inode ref");
702 if (errors & REF_ERR_INDEX_UNMATCH)
703 fprintf(stderr, ", index unmatch");
704 if (errors & REF_ERR_FILETYPE_UNMATCH)
705 fprintf(stderr, ", filetype unmatch");
706 if (errors & REF_ERR_NAME_TOO_LONG)
707 fprintf(stderr, ", name too long");
708 if (errors & REF_ERR_NO_ROOT_REF)
709 fprintf(stderr, ", no root ref");
710 if (errors & REF_ERR_NO_ROOT_BACKREF)
711 fprintf(stderr, ", no root backref");
712 if (errors & REF_ERR_DUP_ROOT_REF)
713 fprintf(stderr, ", dup root ref");
714 if (errors & REF_ERR_DUP_ROOT_BACKREF)
715 fprintf(stderr, ", dup root backref");
716 fprintf(stderr, "\n");
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
722 struct ptr_node *node;
723 struct cache_extent *cache;
724 struct inode_record *rec = NULL;
727 cache = lookup_cache_extent(inode_cache, ino, 1);
729 node = container_of(cache, struct ptr_node, cache);
731 if (mod && rec->refs > 1) {
732 node->data = clone_inode_rec(rec);
737 rec = calloc(1, sizeof(*rec));
739 rec->extent_start = (u64)-1;
741 INIT_LIST_HEAD(&rec->backrefs);
742 INIT_LIST_HEAD(&rec->orphan_extents);
743 rec->holes = RB_ROOT;
745 node = malloc(sizeof(*node));
746 node->cache.start = ino;
747 node->cache.size = 1;
750 if (ino == BTRFS_FREE_INO_OBJECTID)
753 ret = insert_cache_extent(inode_cache, &node->cache);
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
761 struct orphan_data_extent *orphan;
763 while (!list_empty(orphan_extents)) {
764 orphan = list_entry(orphan_extents->next,
765 struct orphan_data_extent, list);
766 list_del(&orphan->list);
771 static void free_inode_rec(struct inode_record *rec)
773 struct inode_backref *backref;
778 while (!list_empty(&rec->backrefs)) {
779 backref = list_entry(rec->backrefs.next,
780 struct inode_backref, list);
781 list_del(&backref->list);
784 free_orphan_data_extents(&rec->orphan_extents);
785 free_file_extent_holes(&rec->holes);
789 static int can_free_inode_rec(struct inode_record *rec)
791 if (!rec->errors && rec->checked && rec->found_inode_item &&
792 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798 struct inode_record *rec)
800 struct cache_extent *cache;
801 struct inode_backref *tmp, *backref;
802 struct ptr_node *node;
803 unsigned char filetype;
805 if (!rec->found_inode_item)
808 filetype = imode_to_type(rec->imode);
809 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810 if (backref->found_dir_item && backref->found_dir_index) {
811 if (backref->filetype != filetype)
812 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813 if (!backref->errors && backref->found_inode_ref) {
814 list_del(&backref->list);
820 if (!rec->checked || rec->merging)
823 if (S_ISDIR(rec->imode)) {
824 if (rec->found_size != rec->isize)
825 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
826 if (rec->found_file_extent)
827 rec->errors |= I_ERR_ODD_FILE_EXTENT;
828 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
829 if (rec->found_dir_item)
830 rec->errors |= I_ERR_ODD_DIR_ITEM;
831 if (rec->found_size != rec->nbytes)
832 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
833 if (rec->nlink > 0 && !no_holes &&
834 (rec->extent_end < rec->isize ||
835 first_extent_gap(&rec->holes) < rec->isize))
836 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
839 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
840 if (rec->found_csum_item && rec->nodatasum)
841 rec->errors |= I_ERR_ODD_CSUM_ITEM;
842 if (rec->some_csum_missing && !rec->nodatasum)
843 rec->errors |= I_ERR_SOME_CSUM_MISSING;
846 BUG_ON(rec->refs != 1);
847 if (can_free_inode_rec(rec)) {
848 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
849 node = container_of(cache, struct ptr_node, cache);
850 BUG_ON(node->data != rec);
851 remove_cache_extent(inode_cache, &node->cache);
857 static int check_orphan_item(struct btrfs_root *root, u64 ino)
859 struct btrfs_path path;
860 struct btrfs_key key;
863 key.objectid = BTRFS_ORPHAN_OBJECTID;
864 key.type = BTRFS_ORPHAN_ITEM_KEY;
867 btrfs_init_path(&path);
868 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
869 btrfs_release_path(&path);
875 static int process_inode_item(struct extent_buffer *eb,
876 int slot, struct btrfs_key *key,
877 struct shared_node *active_node)
879 struct inode_record *rec;
880 struct btrfs_inode_item *item;
882 rec = active_node->current;
883 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
884 if (rec->found_inode_item) {
885 rec->errors |= I_ERR_DUP_INODE_ITEM;
888 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
889 rec->nlink = btrfs_inode_nlink(eb, item);
890 rec->isize = btrfs_inode_size(eb, item);
891 rec->nbytes = btrfs_inode_nbytes(eb, item);
892 rec->imode = btrfs_inode_mode(eb, item);
893 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
895 rec->found_inode_item = 1;
897 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
898 maybe_free_inode_rec(&active_node->inode_cache, rec);
902 static struct inode_backref *get_inode_backref(struct inode_record *rec,
904 int namelen, u64 dir)
906 struct inode_backref *backref;
908 list_for_each_entry(backref, &rec->backrefs, list) {
909 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
911 if (backref->dir != dir || backref->namelen != namelen)
913 if (memcmp(name, backref->name, namelen))
918 backref = malloc(sizeof(*backref) + namelen + 1);
919 memset(backref, 0, sizeof(*backref));
921 backref->namelen = namelen;
922 memcpy(backref->name, name, namelen);
923 backref->name[namelen] = '\0';
924 list_add_tail(&backref->list, &rec->backrefs);
928 static int add_inode_backref(struct cache_tree *inode_cache,
929 u64 ino, u64 dir, u64 index,
930 const char *name, int namelen,
931 int filetype, int itemtype, int errors)
933 struct inode_record *rec;
934 struct inode_backref *backref;
936 rec = get_inode_rec(inode_cache, ino, 1);
937 backref = get_inode_backref(rec, name, namelen, dir);
939 backref->errors |= errors;
940 if (itemtype == BTRFS_DIR_INDEX_KEY) {
941 if (backref->found_dir_index)
942 backref->errors |= REF_ERR_DUP_DIR_INDEX;
943 if (backref->found_inode_ref && backref->index != index)
944 backref->errors |= REF_ERR_INDEX_UNMATCH;
945 if (backref->found_dir_item && backref->filetype != filetype)
946 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
948 backref->index = index;
949 backref->filetype = filetype;
950 backref->found_dir_index = 1;
951 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
953 if (backref->found_dir_item)
954 backref->errors |= REF_ERR_DUP_DIR_ITEM;
955 if (backref->found_dir_index && backref->filetype != filetype)
956 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
958 backref->filetype = filetype;
959 backref->found_dir_item = 1;
960 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
961 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
962 if (backref->found_inode_ref)
963 backref->errors |= REF_ERR_DUP_INODE_REF;
964 if (backref->found_dir_index && backref->index != index)
965 backref->errors |= REF_ERR_INDEX_UNMATCH;
967 backref->index = index;
969 backref->ref_type = itemtype;
970 backref->found_inode_ref = 1;
975 maybe_free_inode_rec(inode_cache, rec);
979 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
980 struct cache_tree *dst_cache)
982 struct inode_backref *backref;
987 list_for_each_entry(backref, &src->backrefs, list) {
988 if (backref->found_dir_index) {
989 add_inode_backref(dst_cache, dst->ino, backref->dir,
990 backref->index, backref->name,
991 backref->namelen, backref->filetype,
992 BTRFS_DIR_INDEX_KEY, backref->errors);
994 if (backref->found_dir_item) {
996 add_inode_backref(dst_cache, dst->ino,
997 backref->dir, 0, backref->name,
998 backref->namelen, backref->filetype,
999 BTRFS_DIR_ITEM_KEY, backref->errors);
1001 if (backref->found_inode_ref) {
1002 add_inode_backref(dst_cache, dst->ino,
1003 backref->dir, backref->index,
1004 backref->name, backref->namelen, 0,
1005 backref->ref_type, backref->errors);
1009 if (src->found_dir_item)
1010 dst->found_dir_item = 1;
1011 if (src->found_file_extent)
1012 dst->found_file_extent = 1;
1013 if (src->found_csum_item)
1014 dst->found_csum_item = 1;
1015 if (src->some_csum_missing)
1016 dst->some_csum_missing = 1;
1017 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1018 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1023 BUG_ON(src->found_link < dir_count);
1024 dst->found_link += src->found_link - dir_count;
1025 dst->found_size += src->found_size;
1026 if (src->extent_start != (u64)-1) {
1027 if (dst->extent_start == (u64)-1) {
1028 dst->extent_start = src->extent_start;
1029 dst->extent_end = src->extent_end;
1031 if (dst->extent_end > src->extent_start)
1032 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1033 else if (dst->extent_end < src->extent_start) {
1034 ret = add_file_extent_hole(&dst->holes,
1036 src->extent_start - dst->extent_end);
1038 if (dst->extent_end < src->extent_end)
1039 dst->extent_end = src->extent_end;
1043 dst->errors |= src->errors;
1044 if (src->found_inode_item) {
1045 if (!dst->found_inode_item) {
1046 dst->nlink = src->nlink;
1047 dst->isize = src->isize;
1048 dst->nbytes = src->nbytes;
1049 dst->imode = src->imode;
1050 dst->nodatasum = src->nodatasum;
1051 dst->found_inode_item = 1;
1053 dst->errors |= I_ERR_DUP_INODE_ITEM;
1061 static int splice_shared_node(struct shared_node *src_node,
1062 struct shared_node *dst_node)
1064 struct cache_extent *cache;
1065 struct ptr_node *node, *ins;
1066 struct cache_tree *src, *dst;
1067 struct inode_record *rec, *conflict;
1068 u64 current_ino = 0;
1072 if (--src_node->refs == 0)
1074 if (src_node->current)
1075 current_ino = src_node->current->ino;
1077 src = &src_node->root_cache;
1078 dst = &dst_node->root_cache;
1080 cache = search_cache_extent(src, 0);
1082 node = container_of(cache, struct ptr_node, cache);
1084 cache = next_cache_extent(cache);
1087 remove_cache_extent(src, &node->cache);
1090 ins = malloc(sizeof(*ins));
1091 ins->cache.start = node->cache.start;
1092 ins->cache.size = node->cache.size;
1096 ret = insert_cache_extent(dst, &ins->cache);
1097 if (ret == -EEXIST) {
1098 conflict = get_inode_rec(dst, rec->ino, 1);
1099 merge_inode_recs(rec, conflict, dst);
1101 conflict->checked = 1;
1102 if (dst_node->current == conflict)
1103 dst_node->current = NULL;
1105 maybe_free_inode_rec(dst, conflict);
1106 free_inode_rec(rec);
1113 if (src == &src_node->root_cache) {
1114 src = &src_node->inode_cache;
1115 dst = &dst_node->inode_cache;
1119 if (current_ino > 0 && (!dst_node->current ||
1120 current_ino > dst_node->current->ino)) {
1121 if (dst_node->current) {
1122 dst_node->current->checked = 1;
1123 maybe_free_inode_rec(dst, dst_node->current);
1125 dst_node->current = get_inode_rec(dst, current_ino, 1);
1130 static void free_inode_ptr(struct cache_extent *cache)
1132 struct ptr_node *node;
1133 struct inode_record *rec;
1135 node = container_of(cache, struct ptr_node, cache);
1137 free_inode_rec(rec);
1141 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1143 static struct shared_node *find_shared_node(struct cache_tree *shared,
1146 struct cache_extent *cache;
1147 struct shared_node *node;
1149 cache = lookup_cache_extent(shared, bytenr, 1);
1151 node = container_of(cache, struct shared_node, cache);
1157 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1160 struct shared_node *node;
1162 node = calloc(1, sizeof(*node));
1163 node->cache.start = bytenr;
1164 node->cache.size = 1;
1165 cache_tree_init(&node->root_cache);
1166 cache_tree_init(&node->inode_cache);
1169 ret = insert_cache_extent(shared, &node->cache);
1174 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1175 struct walk_control *wc, int level)
1177 struct shared_node *node;
1178 struct shared_node *dest;
1180 if (level == wc->active_node)
1183 BUG_ON(wc->active_node <= level);
1184 node = find_shared_node(&wc->shared, bytenr);
1186 add_shared_node(&wc->shared, bytenr, refs);
1187 node = find_shared_node(&wc->shared, bytenr);
1188 wc->nodes[level] = node;
1189 wc->active_node = level;
1193 if (wc->root_level == wc->active_node &&
1194 btrfs_root_refs(&root->root_item) == 0) {
1195 if (--node->refs == 0) {
1196 free_inode_recs_tree(&node->root_cache);
1197 free_inode_recs_tree(&node->inode_cache);
1198 remove_cache_extent(&wc->shared, &node->cache);
1204 dest = wc->nodes[wc->active_node];
1205 splice_shared_node(node, dest);
1206 if (node->refs == 0) {
1207 remove_cache_extent(&wc->shared, &node->cache);
1213 static int leave_shared_node(struct btrfs_root *root,
1214 struct walk_control *wc, int level)
1216 struct shared_node *node;
1217 struct shared_node *dest;
1220 if (level == wc->root_level)
1223 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1227 BUG_ON(i >= BTRFS_MAX_LEVEL);
1229 node = wc->nodes[wc->active_node];
1230 wc->nodes[wc->active_node] = NULL;
1231 wc->active_node = i;
1233 dest = wc->nodes[wc->active_node];
1234 if (wc->active_node < wc->root_level ||
1235 btrfs_root_refs(&root->root_item) > 0) {
1236 BUG_ON(node->refs <= 1);
1237 splice_shared_node(node, dest);
1239 BUG_ON(node->refs < 2);
1248 * 1 - if the root with id child_root_id is a child of root parent_root_id
1249 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1250 * has other root(s) as parent(s)
1251 * 2 - if the root child_root_id doesn't have any parent roots
1253 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1256 struct btrfs_path path;
1257 struct btrfs_key key;
1258 struct extent_buffer *leaf;
1262 btrfs_init_path(&path);
1264 key.objectid = parent_root_id;
1265 key.type = BTRFS_ROOT_REF_KEY;
1266 key.offset = child_root_id;
1267 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1271 btrfs_release_path(&path);
1275 key.objectid = child_root_id;
1276 key.type = BTRFS_ROOT_BACKREF_KEY;
1278 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1284 leaf = path.nodes[0];
1285 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1286 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1289 leaf = path.nodes[0];
1292 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1293 if (key.objectid != child_root_id ||
1294 key.type != BTRFS_ROOT_BACKREF_KEY)
1299 if (key.offset == parent_root_id) {
1300 btrfs_release_path(&path);
1307 btrfs_release_path(&path);
1310 return has_parent ? 0 : 2;
1313 static int process_dir_item(struct btrfs_root *root,
1314 struct extent_buffer *eb,
1315 int slot, struct btrfs_key *key,
1316 struct shared_node *active_node)
1326 struct btrfs_dir_item *di;
1327 struct inode_record *rec;
1328 struct cache_tree *root_cache;
1329 struct cache_tree *inode_cache;
1330 struct btrfs_key location;
1331 char namebuf[BTRFS_NAME_LEN];
1333 root_cache = &active_node->root_cache;
1334 inode_cache = &active_node->inode_cache;
1335 rec = active_node->current;
1336 rec->found_dir_item = 1;
1338 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1339 total = btrfs_item_size_nr(eb, slot);
1340 while (cur < total) {
1342 btrfs_dir_item_key_to_cpu(eb, di, &location);
1343 name_len = btrfs_dir_name_len(eb, di);
1344 data_len = btrfs_dir_data_len(eb, di);
1345 filetype = btrfs_dir_type(eb, di);
1347 rec->found_size += name_len;
1348 if (name_len <= BTRFS_NAME_LEN) {
1352 len = BTRFS_NAME_LEN;
1353 error = REF_ERR_NAME_TOO_LONG;
1355 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1357 if (location.type == BTRFS_INODE_ITEM_KEY) {
1358 add_inode_backref(inode_cache, location.objectid,
1359 key->objectid, key->offset, namebuf,
1360 len, filetype, key->type, error);
1361 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1362 add_inode_backref(root_cache, location.objectid,
1363 key->objectid, key->offset,
1364 namebuf, len, filetype,
1367 fprintf(stderr, "invalid location in dir item %u\n",
1369 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1370 key->objectid, key->offset, namebuf,
1371 len, filetype, key->type, error);
1374 len = sizeof(*di) + name_len + data_len;
1375 di = (struct btrfs_dir_item *)((char *)di + len);
1378 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1379 rec->errors |= I_ERR_DUP_DIR_INDEX;
1384 static int process_inode_ref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1394 struct cache_tree *inode_cache;
1395 struct btrfs_inode_ref *ref;
1396 char namebuf[BTRFS_NAME_LEN];
1398 inode_cache = &active_node->inode_cache;
1400 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1401 total = btrfs_item_size_nr(eb, slot);
1402 while (cur < total) {
1403 name_len = btrfs_inode_ref_name_len(eb, ref);
1404 index = btrfs_inode_ref_index(eb, ref);
1405 if (name_len <= BTRFS_NAME_LEN) {
1409 len = BTRFS_NAME_LEN;
1410 error = REF_ERR_NAME_TOO_LONG;
1412 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1413 add_inode_backref(inode_cache, key->objectid, key->offset,
1414 index, namebuf, len, 0, key->type, error);
1416 len = sizeof(*ref) + name_len;
1417 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1423 static int process_inode_extref(struct extent_buffer *eb,
1424 int slot, struct btrfs_key *key,
1425 struct shared_node *active_node)
1434 struct cache_tree *inode_cache;
1435 struct btrfs_inode_extref *extref;
1436 char namebuf[BTRFS_NAME_LEN];
1438 inode_cache = &active_node->inode_cache;
1440 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1441 total = btrfs_item_size_nr(eb, slot);
1442 while (cur < total) {
1443 name_len = btrfs_inode_extref_name_len(eb, extref);
1444 index = btrfs_inode_extref_index(eb, extref);
1445 parent = btrfs_inode_extref_parent(eb, extref);
1446 if (name_len <= BTRFS_NAME_LEN) {
1450 len = BTRFS_NAME_LEN;
1451 error = REF_ERR_NAME_TOO_LONG;
1453 read_extent_buffer(eb, namebuf,
1454 (unsigned long)(extref + 1), len);
1455 add_inode_backref(inode_cache, key->objectid, parent,
1456 index, namebuf, len, 0, key->type, error);
1458 len = sizeof(*extref) + name_len;
1459 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1466 static int count_csum_range(struct btrfs_root *root, u64 start,
1467 u64 len, u64 *found)
1469 struct btrfs_key key;
1470 struct btrfs_path path;
1471 struct extent_buffer *leaf;
1476 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1478 btrfs_init_path(&path);
1480 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1482 key.type = BTRFS_EXTENT_CSUM_KEY;
1484 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1488 if (ret > 0 && path.slots[0] > 0) {
1489 leaf = path.nodes[0];
1490 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1491 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1492 key.type == BTRFS_EXTENT_CSUM_KEY)
1497 leaf = path.nodes[0];
1498 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1499 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1504 leaf = path.nodes[0];
1507 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1508 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1509 key.type != BTRFS_EXTENT_CSUM_KEY)
1512 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1513 if (key.offset >= start + len)
1516 if (key.offset > start)
1519 size = btrfs_item_size_nr(leaf, path.slots[0]);
1520 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1521 if (csum_end > start) {
1522 size = min(csum_end - start, len);
1531 btrfs_release_path(&path);
1537 static int process_file_extent(struct btrfs_root *root,
1538 struct extent_buffer *eb,
1539 int slot, struct btrfs_key *key,
1540 struct shared_node *active_node)
1542 struct inode_record *rec;
1543 struct btrfs_file_extent_item *fi;
1545 u64 disk_bytenr = 0;
1546 u64 extent_offset = 0;
1547 u64 mask = root->sectorsize - 1;
1551 rec = active_node->current;
1552 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1553 rec->found_file_extent = 1;
1555 if (rec->extent_start == (u64)-1) {
1556 rec->extent_start = key->offset;
1557 rec->extent_end = key->offset;
1560 if (rec->extent_end > key->offset)
1561 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1562 else if (rec->extent_end < key->offset) {
1563 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1564 key->offset - rec->extent_end);
1569 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1570 extent_type = btrfs_file_extent_type(eb, fi);
1572 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1573 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1575 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1576 rec->found_size += num_bytes;
1577 num_bytes = (num_bytes + mask) & ~mask;
1578 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1579 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1580 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1581 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1582 extent_offset = btrfs_file_extent_offset(eb, fi);
1583 if (num_bytes == 0 || (num_bytes & mask))
1584 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1585 if (num_bytes + extent_offset >
1586 btrfs_file_extent_ram_bytes(eb, fi))
1587 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1588 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1589 (btrfs_file_extent_compression(eb, fi) ||
1590 btrfs_file_extent_encryption(eb, fi) ||
1591 btrfs_file_extent_other_encoding(eb, fi)))
1592 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1593 if (disk_bytenr > 0)
1594 rec->found_size += num_bytes;
1596 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1598 rec->extent_end = key->offset + num_bytes;
1601 * The data reloc tree will copy full extents into its inode and then
1602 * copy the corresponding csums. Because the extent it copied could be
1603 * a preallocated extent that hasn't been written to yet there may be no
1604 * csums to copy, ergo we won't have csums for our file extent. This is
1605 * ok so just don't bother checking csums if the inode belongs to the
1608 if (disk_bytenr > 0 &&
1609 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1611 if (btrfs_file_extent_compression(eb, fi))
1612 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1614 disk_bytenr += extent_offset;
1616 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1619 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1621 rec->found_csum_item = 1;
1622 if (found < num_bytes)
1623 rec->some_csum_missing = 1;
1624 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1626 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1632 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1633 struct walk_control *wc)
1635 struct btrfs_key key;
1639 struct cache_tree *inode_cache;
1640 struct shared_node *active_node;
1642 if (wc->root_level == wc->active_node &&
1643 btrfs_root_refs(&root->root_item) == 0)
1646 active_node = wc->nodes[wc->active_node];
1647 inode_cache = &active_node->inode_cache;
1648 nritems = btrfs_header_nritems(eb);
1649 for (i = 0; i < nritems; i++) {
1650 btrfs_item_key_to_cpu(eb, &key, i);
1652 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1654 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1657 if (active_node->current == NULL ||
1658 active_node->current->ino < key.objectid) {
1659 if (active_node->current) {
1660 active_node->current->checked = 1;
1661 maybe_free_inode_rec(inode_cache,
1662 active_node->current);
1664 active_node->current = get_inode_rec(inode_cache,
1668 case BTRFS_DIR_ITEM_KEY:
1669 case BTRFS_DIR_INDEX_KEY:
1670 ret = process_dir_item(root, eb, i, &key, active_node);
1672 case BTRFS_INODE_REF_KEY:
1673 ret = process_inode_ref(eb, i, &key, active_node);
1675 case BTRFS_INODE_EXTREF_KEY:
1676 ret = process_inode_extref(eb, i, &key, active_node);
1678 case BTRFS_INODE_ITEM_KEY:
1679 ret = process_inode_item(eb, i, &key, active_node);
1681 case BTRFS_EXTENT_DATA_KEY:
1682 ret = process_file_extent(root, eb, i, &key,
1692 static void reada_walk_down(struct btrfs_root *root,
1693 struct extent_buffer *node, int slot)
1702 level = btrfs_header_level(node);
1706 nritems = btrfs_header_nritems(node);
1707 blocksize = btrfs_level_size(root, level - 1);
1708 for (i = slot; i < nritems; i++) {
1709 bytenr = btrfs_node_blockptr(node, i);
1710 ptr_gen = btrfs_node_ptr_generation(node, i);
1711 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1716 * Check the child node/leaf by the following condition:
1717 * 1. the first item key of the node/leaf should be the same with the one
1719 * 2. block in parent node should match the child node/leaf.
1720 * 3. generation of parent node and child's header should be consistent.
1722 * Or the child node/leaf pointed by the key in parent is not valid.
1724 * We hope to check leaf owner too, but since subvol may share leaves,
1725 * which makes leaf owner check not so strong, key check should be
1726 * sufficient enough for that case.
1728 static int check_child_node(struct btrfs_root *root,
1729 struct extent_buffer *parent, int slot,
1730 struct extent_buffer *child)
1732 struct btrfs_key parent_key;
1733 struct btrfs_key child_key;
1736 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1737 if (btrfs_header_level(child) == 0)
1738 btrfs_item_key_to_cpu(child, &child_key, 0);
1740 btrfs_node_key_to_cpu(child, &child_key, 0);
1742 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1745 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1746 parent_key.objectid, parent_key.type, parent_key.offset,
1747 child_key.objectid, child_key.type, child_key.offset);
1749 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1751 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1752 btrfs_node_blockptr(parent, slot),
1753 btrfs_header_bytenr(child));
1755 if (btrfs_node_ptr_generation(parent, slot) !=
1756 btrfs_header_generation(child)) {
1758 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1759 btrfs_header_generation(child),
1760 btrfs_node_ptr_generation(parent, slot));
1765 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1766 struct walk_control *wc, int *level)
1768 enum btrfs_tree_block_status status;
1771 struct extent_buffer *next;
1772 struct extent_buffer *cur;
1777 WARN_ON(*level < 0);
1778 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1779 ret = btrfs_lookup_extent_info(NULL, root,
1780 path->nodes[*level]->start,
1781 *level, 1, &refs, NULL);
1788 ret = enter_shared_node(root, path->nodes[*level]->start,
1796 while (*level >= 0) {
1797 WARN_ON(*level < 0);
1798 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1799 cur = path->nodes[*level];
1801 if (btrfs_header_level(cur) != *level)
1804 if (path->slots[*level] >= btrfs_header_nritems(cur))
1807 ret = process_one_leaf(root, cur, wc);
1812 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1813 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1814 blocksize = btrfs_level_size(root, *level - 1);
1815 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1821 ret = enter_shared_node(root, bytenr, refs,
1824 path->slots[*level]++;
1829 next = btrfs_find_tree_block(root, bytenr, blocksize);
1830 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1831 free_extent_buffer(next);
1832 reada_walk_down(root, cur, path->slots[*level]);
1833 next = read_tree_block(root, bytenr, blocksize,
1835 if (!extent_buffer_uptodate(next)) {
1836 struct btrfs_key node_key;
1838 btrfs_node_key_to_cpu(path->nodes[*level],
1840 path->slots[*level]);
1841 btrfs_add_corrupt_extent_record(root->fs_info,
1843 path->nodes[*level]->start,
1844 root->leafsize, *level);
1850 ret = check_child_node(root, cur, path->slots[*level], next);
1856 if (btrfs_is_leaf(next))
1857 status = btrfs_check_leaf(root, NULL, next);
1859 status = btrfs_check_node(root, NULL, next);
1860 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1861 free_extent_buffer(next);
1866 *level = *level - 1;
1867 free_extent_buffer(path->nodes[*level]);
1868 path->nodes[*level] = next;
1869 path->slots[*level] = 0;
1872 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1876 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1877 struct walk_control *wc, int *level)
1880 struct extent_buffer *leaf;
1882 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1883 leaf = path->nodes[i];
1884 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1889 free_extent_buffer(path->nodes[*level]);
1890 path->nodes[*level] = NULL;
1891 BUG_ON(*level > wc->active_node);
1892 if (*level == wc->active_node)
1893 leave_shared_node(root, wc, *level);
1900 static int check_root_dir(struct inode_record *rec)
1902 struct inode_backref *backref;
1905 if (!rec->found_inode_item || rec->errors)
1907 if (rec->nlink != 1 || rec->found_link != 0)
1909 if (list_empty(&rec->backrefs))
1911 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1912 if (!backref->found_inode_ref)
1914 if (backref->index != 0 || backref->namelen != 2 ||
1915 memcmp(backref->name, "..", 2))
1917 if (backref->found_dir_index || backref->found_dir_item)
1924 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1925 struct btrfs_root *root, struct btrfs_path *path,
1926 struct inode_record *rec)
1928 struct btrfs_inode_item *ei;
1929 struct btrfs_key key;
1932 key.objectid = rec->ino;
1933 key.type = BTRFS_INODE_ITEM_KEY;
1934 key.offset = (u64)-1;
1936 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1940 if (!path->slots[0]) {
1947 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1948 if (key.objectid != rec->ino) {
1953 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1954 struct btrfs_inode_item);
1955 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1956 btrfs_mark_buffer_dirty(path->nodes[0]);
1957 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1958 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1959 root->root_key.objectid);
1961 btrfs_release_path(path);
1965 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1966 struct btrfs_root *root,
1967 struct btrfs_path *path,
1968 struct inode_record *rec)
1972 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1973 btrfs_release_path(path);
1975 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1979 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1980 struct btrfs_root *root,
1981 struct btrfs_path *path,
1982 struct inode_record *rec)
1984 struct btrfs_inode_item *ei;
1985 struct btrfs_key key;
1988 key.objectid = rec->ino;
1989 key.type = BTRFS_INODE_ITEM_KEY;
1992 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1999 /* Since ret == 0, no need to check anything */
2000 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2001 struct btrfs_inode_item);
2002 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2003 btrfs_mark_buffer_dirty(path->nodes[0]);
2004 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2005 printf("reset nbytes for ino %llu root %llu\n",
2006 rec->ino, root->root_key.objectid);
2008 btrfs_release_path(path);
2012 static int add_missing_dir_index(struct btrfs_root *root,
2013 struct cache_tree *inode_cache,
2014 struct inode_record *rec,
2015 struct inode_backref *backref)
2017 struct btrfs_path *path;
2018 struct btrfs_trans_handle *trans;
2019 struct btrfs_dir_item *dir_item;
2020 struct extent_buffer *leaf;
2021 struct btrfs_key key;
2022 struct btrfs_disk_key disk_key;
2023 struct inode_record *dir_rec;
2024 unsigned long name_ptr;
2025 u32 data_size = sizeof(*dir_item) + backref->namelen;
2028 path = btrfs_alloc_path();
2032 trans = btrfs_start_transaction(root, 1);
2033 if (IS_ERR(trans)) {
2034 btrfs_free_path(path);
2035 return PTR_ERR(trans);
2038 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2039 (unsigned long long)rec->ino);
2040 key.objectid = backref->dir;
2041 key.type = BTRFS_DIR_INDEX_KEY;
2042 key.offset = backref->index;
2044 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2047 leaf = path->nodes[0];
2048 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2050 disk_key.objectid = cpu_to_le64(rec->ino);
2051 disk_key.type = BTRFS_INODE_ITEM_KEY;
2052 disk_key.offset = 0;
2054 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2055 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2056 btrfs_set_dir_data_len(leaf, dir_item, 0);
2057 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2058 name_ptr = (unsigned long)(dir_item + 1);
2059 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2060 btrfs_mark_buffer_dirty(leaf);
2061 btrfs_free_path(path);
2062 btrfs_commit_transaction(trans, root);
2064 backref->found_dir_index = 1;
2065 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2068 dir_rec->found_size += backref->namelen;
2069 if (dir_rec->found_size == dir_rec->isize &&
2070 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2071 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2072 if (dir_rec->found_size != dir_rec->isize)
2073 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2078 static int delete_dir_index(struct btrfs_root *root,
2079 struct cache_tree *inode_cache,
2080 struct inode_record *rec,
2081 struct inode_backref *backref)
2083 struct btrfs_trans_handle *trans;
2084 struct btrfs_dir_item *di;
2085 struct btrfs_path *path;
2088 path = btrfs_alloc_path();
2092 trans = btrfs_start_transaction(root, 1);
2093 if (IS_ERR(trans)) {
2094 btrfs_free_path(path);
2095 return PTR_ERR(trans);
2099 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2100 (unsigned long long)backref->dir,
2101 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2102 (unsigned long long)root->objectid);
2104 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2105 backref->name, backref->namelen,
2106 backref->index, -1);
2109 btrfs_free_path(path);
2110 btrfs_commit_transaction(trans, root);
2117 ret = btrfs_del_item(trans, root, path);
2119 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2121 btrfs_free_path(path);
2122 btrfs_commit_transaction(trans, root);
2126 static int create_inode_item(struct btrfs_root *root,
2127 struct inode_record *rec,
2128 struct inode_backref *backref, int root_dir)
2130 struct btrfs_trans_handle *trans;
2131 struct btrfs_inode_item inode_item;
2132 time_t now = time(NULL);
2135 trans = btrfs_start_transaction(root, 1);
2136 if (IS_ERR(trans)) {
2137 ret = PTR_ERR(trans);
2141 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2142 "be incomplete, please check permissions and content after "
2143 "the fsck completes.\n", (unsigned long long)root->objectid,
2144 (unsigned long long)rec->ino);
2146 memset(&inode_item, 0, sizeof(inode_item));
2147 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2149 btrfs_set_stack_inode_nlink(&inode_item, 1);
2151 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2152 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2153 if (rec->found_dir_item) {
2154 if (rec->found_file_extent)
2155 fprintf(stderr, "root %llu inode %llu has both a dir "
2156 "item and extents, unsure if it is a dir or a "
2157 "regular file so setting it as a directory\n",
2158 (unsigned long long)root->objectid,
2159 (unsigned long long)rec->ino);
2160 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2161 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2162 } else if (!rec->found_dir_item) {
2163 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2164 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2166 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2167 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2168 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2169 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2170 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2171 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2172 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2173 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2175 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2177 btrfs_commit_transaction(trans, root);
2181 static int repair_inode_backrefs(struct btrfs_root *root,
2182 struct inode_record *rec,
2183 struct cache_tree *inode_cache,
2186 struct inode_backref *tmp, *backref;
2187 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2191 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2192 if (!delete && rec->ino == root_dirid) {
2193 if (!rec->found_inode_item) {
2194 ret = create_inode_item(root, rec, backref, 1);
2201 /* Index 0 for root dir's are special, don't mess with it */
2202 if (rec->ino == root_dirid && backref->index == 0)
2206 ((backref->found_dir_index && !backref->found_inode_ref) ||
2207 (backref->found_dir_index && backref->found_inode_ref &&
2208 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2209 ret = delete_dir_index(root, inode_cache, rec, backref);
2213 list_del(&backref->list);
2217 if (!delete && !backref->found_dir_index &&
2218 backref->found_dir_item && backref->found_inode_ref) {
2219 ret = add_missing_dir_index(root, inode_cache, rec,
2224 if (backref->found_dir_item &&
2225 backref->found_dir_index &&
2226 backref->found_dir_index) {
2227 if (!backref->errors &&
2228 backref->found_inode_ref) {
2229 list_del(&backref->list);
2235 if (!delete && (!backref->found_dir_index &&
2236 !backref->found_dir_item &&
2237 backref->found_inode_ref)) {
2238 struct btrfs_trans_handle *trans;
2239 struct btrfs_key location;
2241 ret = check_dir_conflict(root, backref->name,
2247 * let nlink fixing routine to handle it,
2248 * which can do it better.
2253 location.objectid = rec->ino;
2254 location.type = BTRFS_INODE_ITEM_KEY;
2255 location.offset = 0;
2257 trans = btrfs_start_transaction(root, 1);
2258 if (IS_ERR(trans)) {
2259 ret = PTR_ERR(trans);
2262 fprintf(stderr, "adding missing dir index/item pair "
2264 (unsigned long long)rec->ino);
2265 ret = btrfs_insert_dir_item(trans, root, backref->name,
2267 backref->dir, &location,
2268 imode_to_type(rec->imode),
2271 btrfs_commit_transaction(trans, root);
2275 if (!delete && (backref->found_inode_ref &&
2276 backref->found_dir_index &&
2277 backref->found_dir_item &&
2278 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2279 !rec->found_inode_item)) {
2280 ret = create_inode_item(root, rec, backref, 0);
2287 return ret ? ret : repaired;
2291 * To determine the file type for nlink/inode_item repair
2293 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2294 * Return -ENOENT if file type is not found.
2296 static int find_file_type(struct inode_record *rec, u8 *type)
2298 struct inode_backref *backref;
2300 /* For inode item recovered case */
2301 if (rec->found_inode_item) {
2302 *type = imode_to_type(rec->imode);
2306 list_for_each_entry(backref, &rec->backrefs, list) {
2307 if (backref->found_dir_index || backref->found_dir_item) {
2308 *type = backref->filetype;
2316 * To determine the file name for nlink repair
2318 * Return 0 if file name is found, set name and namelen.
2319 * Return -ENOENT if file name is not found.
2321 static int find_file_name(struct inode_record *rec,
2322 char *name, int *namelen)
2324 struct inode_backref *backref;
2326 list_for_each_entry(backref, &rec->backrefs, list) {
2327 if (backref->found_dir_index || backref->found_dir_item ||
2328 backref->found_inode_ref) {
2329 memcpy(name, backref->name, backref->namelen);
2330 *namelen = backref->namelen;
2337 /* Reset the nlink of the inode to the correct one */
2338 static int reset_nlink(struct btrfs_trans_handle *trans,
2339 struct btrfs_root *root,
2340 struct btrfs_path *path,
2341 struct inode_record *rec)
2343 struct inode_backref *backref;
2344 struct inode_backref *tmp;
2345 struct btrfs_key key;
2346 struct btrfs_inode_item *inode_item;
2349 /* We don't believe this either, reset it and iterate backref */
2350 rec->found_link = 0;
2352 /* Remove all backref including the valid ones */
2353 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2355 backref->index, backref->name,
2356 backref->namelen, 0);
2360 /* remove invalid backref, so it won't be added back */
2361 if (!(backref->found_dir_index &&
2362 backref->found_dir_item &&
2363 backref->found_inode_ref)) {
2364 list_del(&backref->list);
2371 /* Set nlink to 0 */
2372 key.objectid = rec->ino;
2373 key.type = BTRFS_INODE_ITEM_KEY;
2375 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2382 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2383 struct btrfs_inode_item);
2384 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2385 btrfs_mark_buffer_dirty(path->nodes[0]);
2386 btrfs_release_path(path);
2389 * Add back valid inode_ref/dir_item/dir_index,
2390 * add_link() will handle the nlink inc, so new nlink must be correct
2392 list_for_each_entry(backref, &rec->backrefs, list) {
2393 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2394 backref->name, backref->namelen,
2395 backref->ref_type, &backref->index, 1);
2400 btrfs_release_path(path);
2404 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2405 struct btrfs_root *root,
2406 struct btrfs_path *path,
2407 struct inode_record *rec)
2409 char *dir_name = "lost+found";
2410 char namebuf[BTRFS_NAME_LEN] = {0};
2415 int name_recovered = 0;
2416 int type_recovered = 0;
2420 * Get file name and type first before these invalid inode ref
2421 * are deleted by remove_all_invalid_backref()
2423 name_recovered = !find_file_name(rec, namebuf, &namelen);
2424 type_recovered = !find_file_type(rec, &type);
2426 if (!name_recovered) {
2427 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2428 rec->ino, rec->ino);
2429 namelen = count_digits(rec->ino);
2430 sprintf(namebuf, "%llu", rec->ino);
2433 if (!type_recovered) {
2434 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2436 type = BTRFS_FT_REG_FILE;
2440 ret = reset_nlink(trans, root, path, rec);
2443 "Failed to reset nlink for inode %llu: %s\n",
2444 rec->ino, strerror(-ret));
2448 if (rec->found_link == 0) {
2449 lost_found_ino = root->highest_inode;
2450 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2455 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2456 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2459 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2460 dir_name, strerror(-ret));
2463 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2464 namebuf, namelen, type, NULL, 1);
2466 * Add ".INO" suffix several times to handle case where
2467 * "FILENAME.INO" is already taken by another file.
2469 while (ret == -EEXIST) {
2471 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2473 if (namelen + count_digits(rec->ino) + 1 >
2478 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2480 namelen += count_digits(rec->ino) + 1;
2481 ret = btrfs_add_link(trans, root, rec->ino,
2482 lost_found_ino, namebuf,
2483 namelen, type, NULL, 1);
2487 "Failed to link the inode %llu to %s dir: %s\n",
2488 rec->ino, dir_name, strerror(-ret));
2492 * Just increase the found_link, don't actually add the
2493 * backref. This will make things easier and this inode
2494 * record will be freed after the repair is done.
2495 * So fsck will not report problem about this inode.
2498 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2499 namelen, namebuf, dir_name);
2501 printf("Fixed the nlink of inode %llu\n", rec->ino);
2504 * Clear the flag anyway, or we will loop forever for the same inode
2505 * as it will not be removed from the bad inode list and the dead loop
2508 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2509 btrfs_release_path(path);
2514 * Check if there is any normal(reg or prealloc) file extent for given
2516 * This is used to determine the file type when neither its dir_index/item or
2517 * inode_item exists.
2519 * This will *NOT* report error, if any error happens, just consider it does
2520 * not have any normal file extent.
2522 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2524 struct btrfs_path *path;
2525 struct btrfs_key key;
2526 struct btrfs_key found_key;
2527 struct btrfs_file_extent_item *fi;
2531 path = btrfs_alloc_path();
2535 key.type = BTRFS_EXTENT_DATA_KEY;
2538 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2543 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2544 ret = btrfs_next_leaf(root, path);
2551 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2553 if (found_key.objectid != ino ||
2554 found_key.type != BTRFS_EXTENT_DATA_KEY)
2556 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2557 struct btrfs_file_extent_item);
2558 type = btrfs_file_extent_type(path->nodes[0], fi);
2559 if (type != BTRFS_FILE_EXTENT_INLINE) {
2565 btrfs_free_path(path);
2569 static u32 btrfs_type_to_imode(u8 type)
2571 static u32 imode_by_btrfs_type[] = {
2572 [BTRFS_FT_REG_FILE] = S_IFREG,
2573 [BTRFS_FT_DIR] = S_IFDIR,
2574 [BTRFS_FT_CHRDEV] = S_IFCHR,
2575 [BTRFS_FT_BLKDEV] = S_IFBLK,
2576 [BTRFS_FT_FIFO] = S_IFIFO,
2577 [BTRFS_FT_SOCK] = S_IFSOCK,
2578 [BTRFS_FT_SYMLINK] = S_IFLNK,
2581 return imode_by_btrfs_type[(type)];
2584 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2585 struct btrfs_root *root,
2586 struct btrfs_path *path,
2587 struct inode_record *rec)
2591 int type_recovered = 0;
2594 printf("Trying to rebuild inode:%llu\n", rec->ino);
2596 type_recovered = !find_file_type(rec, &filetype);
2599 * Try to determine inode type if type not found.
2601 * For found regular file extent, it must be FILE.
2602 * For found dir_item/index, it must be DIR.
2604 * For undetermined one, use FILE as fallback.
2607 * 1. If found backref(inode_index/item is already handled) to it,
2609 * Need new inode-inode ref structure to allow search for that.
2611 if (!type_recovered) {
2612 if (rec->found_file_extent &&
2613 find_normal_file_extent(root, rec->ino)) {
2615 filetype = BTRFS_FT_REG_FILE;
2616 } else if (rec->found_dir_item) {
2618 filetype = BTRFS_FT_DIR;
2619 } else if (!list_empty(&rec->orphan_extents)) {
2621 filetype = BTRFS_FT_REG_FILE;
2623 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2626 filetype = BTRFS_FT_REG_FILE;
2630 ret = btrfs_new_inode(trans, root, rec->ino,
2631 mode | btrfs_type_to_imode(filetype));
2636 * Here inode rebuild is done, we only rebuild the inode item,
2637 * don't repair the nlink(like move to lost+found).
2638 * That is the job of nlink repair.
2640 * We just fill the record and return
2642 rec->found_dir_item = 1;
2643 rec->imode = mode | btrfs_type_to_imode(filetype);
2645 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2646 /* Ensure the inode_nlinks repair function will be called */
2647 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2652 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2653 struct btrfs_root *root,
2654 struct btrfs_path *path,
2655 struct inode_record *rec)
2657 struct orphan_data_extent *orphan;
2658 struct orphan_data_extent *tmp;
2661 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2663 * Check for conflicting file extents
2665 * Here we don't know whether the extents is compressed or not,
2666 * so we can only assume it not compressed nor data offset,
2667 * and use its disk_len as extent length.
2669 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2670 orphan->offset, orphan->disk_len, 0);
2671 btrfs_release_path(path);
2676 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2677 orphan->disk_bytenr, orphan->disk_len);
2678 ret = btrfs_free_extent(trans,
2679 root->fs_info->extent_root,
2680 orphan->disk_bytenr, orphan->disk_len,
2681 0, root->objectid, orphan->objectid,
2686 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2687 orphan->offset, orphan->disk_bytenr,
2688 orphan->disk_len, orphan->disk_len);
2692 /* Update file size info */
2693 rec->found_size += orphan->disk_len;
2694 if (rec->found_size == rec->nbytes)
2695 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2697 /* Update the file extent hole info too */
2698 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2702 if (RB_EMPTY_ROOT(&rec->holes))
2703 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2705 list_del(&orphan->list);
2708 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2713 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2714 struct btrfs_root *root,
2715 struct btrfs_path *path,
2716 struct inode_record *rec)
2718 struct rb_node *node;
2719 struct file_extent_hole *hole;
2723 node = rb_first(&rec->holes);
2727 hole = rb_entry(node, struct file_extent_hole, node);
2728 ret = btrfs_punch_hole(trans, root, rec->ino,
2729 hole->start, hole->len);
2732 ret = del_file_extent_hole(&rec->holes, hole->start,
2736 if (RB_EMPTY_ROOT(&rec->holes))
2737 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2738 node = rb_first(&rec->holes);
2740 /* special case for a file losing all its file extent */
2742 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2743 round_up(rec->isize, root->sectorsize));
2747 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2748 rec->ino, root->objectid);
2753 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2755 struct btrfs_trans_handle *trans;
2756 struct btrfs_path *path;
2759 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2760 I_ERR_NO_ORPHAN_ITEM |
2761 I_ERR_LINK_COUNT_WRONG |
2762 I_ERR_NO_INODE_ITEM |
2763 I_ERR_FILE_EXTENT_ORPHAN |
2764 I_ERR_FILE_EXTENT_DISCOUNT|
2765 I_ERR_FILE_NBYTES_WRONG)))
2768 path = btrfs_alloc_path();
2773 * For nlink repair, it may create a dir and add link, so
2774 * 2 for parent(256)'s dir_index and dir_item
2775 * 2 for lost+found dir's inode_item and inode_ref
2776 * 1 for the new inode_ref of the file
2777 * 2 for lost+found dir's dir_index and dir_item for the file
2779 trans = btrfs_start_transaction(root, 7);
2780 if (IS_ERR(trans)) {
2781 btrfs_free_path(path);
2782 return PTR_ERR(trans);
2785 if (rec->errors & I_ERR_NO_INODE_ITEM)
2786 ret = repair_inode_no_item(trans, root, path, rec);
2787 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2788 ret = repair_inode_orphan_extent(trans, root, path, rec);
2789 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2790 ret = repair_inode_discount_extent(trans, root, path, rec);
2791 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2792 ret = repair_inode_isize(trans, root, path, rec);
2793 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2794 ret = repair_inode_orphan_item(trans, root, path, rec);
2795 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2796 ret = repair_inode_nlinks(trans, root, path, rec);
2797 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2798 ret = repair_inode_nbytes(trans, root, path, rec);
2799 btrfs_commit_transaction(trans, root);
2800 btrfs_free_path(path);
2804 static int check_inode_recs(struct btrfs_root *root,
2805 struct cache_tree *inode_cache)
2807 struct cache_extent *cache;
2808 struct ptr_node *node;
2809 struct inode_record *rec;
2810 struct inode_backref *backref;
2815 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2817 if (btrfs_root_refs(&root->root_item) == 0) {
2818 if (!cache_tree_empty(inode_cache))
2819 fprintf(stderr, "warning line %d\n", __LINE__);
2824 * We need to record the highest inode number for later 'lost+found'
2826 * We must select a ino not used/refered by any existing inode, or
2827 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2828 * this may cause 'lost+found' dir has wrong nlinks.
2830 cache = last_cache_extent(inode_cache);
2832 node = container_of(cache, struct ptr_node, cache);
2834 if (rec->ino > root->highest_inode)
2835 root->highest_inode = rec->ino;
2839 * We need to repair backrefs first because we could change some of the
2840 * errors in the inode recs.
2842 * We also need to go through and delete invalid backrefs first and then
2843 * add the correct ones second. We do this because we may get EEXIST
2844 * when adding back the correct index because we hadn't yet deleted the
2847 * For example, if we were missing a dir index then the directories
2848 * isize would be wrong, so if we fixed the isize to what we thought it
2849 * would be and then fixed the backref we'd still have a invalid fs, so
2850 * we need to add back the dir index and then check to see if the isize
2855 if (stage == 3 && !err)
2858 cache = search_cache_extent(inode_cache, 0);
2859 while (repair && cache) {
2860 node = container_of(cache, struct ptr_node, cache);
2862 cache = next_cache_extent(cache);
2864 /* Need to free everything up and rescan */
2866 remove_cache_extent(inode_cache, &node->cache);
2868 free_inode_rec(rec);
2872 if (list_empty(&rec->backrefs))
2875 ret = repair_inode_backrefs(root, rec, inode_cache,
2889 rec = get_inode_rec(inode_cache, root_dirid, 0);
2891 ret = check_root_dir(rec);
2893 fprintf(stderr, "root %llu root dir %llu error\n",
2894 (unsigned long long)root->root_key.objectid,
2895 (unsigned long long)root_dirid);
2896 print_inode_error(root, rec);
2901 struct btrfs_trans_handle *trans;
2903 trans = btrfs_start_transaction(root, 1);
2904 if (IS_ERR(trans)) {
2905 err = PTR_ERR(trans);
2910 "root %llu missing its root dir, recreating\n",
2911 (unsigned long long)root->objectid);
2913 ret = btrfs_make_root_dir(trans, root, root_dirid);
2916 btrfs_commit_transaction(trans, root);
2920 fprintf(stderr, "root %llu root dir %llu not found\n",
2921 (unsigned long long)root->root_key.objectid,
2922 (unsigned long long)root_dirid);
2926 cache = search_cache_extent(inode_cache, 0);
2929 node = container_of(cache, struct ptr_node, cache);
2931 remove_cache_extent(inode_cache, &node->cache);
2933 if (rec->ino == root_dirid ||
2934 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2935 free_inode_rec(rec);
2939 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2940 ret = check_orphan_item(root, rec->ino);
2942 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2943 if (can_free_inode_rec(rec)) {
2944 free_inode_rec(rec);
2949 if (!rec->found_inode_item)
2950 rec->errors |= I_ERR_NO_INODE_ITEM;
2951 if (rec->found_link != rec->nlink)
2952 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2954 ret = try_repair_inode(root, rec);
2955 if (ret == 0 && can_free_inode_rec(rec)) {
2956 free_inode_rec(rec);
2962 if (!(repair && ret == 0))
2964 print_inode_error(root, rec);
2965 list_for_each_entry(backref, &rec->backrefs, list) {
2966 if (!backref->found_dir_item)
2967 backref->errors |= REF_ERR_NO_DIR_ITEM;
2968 if (!backref->found_dir_index)
2969 backref->errors |= REF_ERR_NO_DIR_INDEX;
2970 if (!backref->found_inode_ref)
2971 backref->errors |= REF_ERR_NO_INODE_REF;
2972 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2973 " namelen %u name %s filetype %d errors %x",
2974 (unsigned long long)backref->dir,
2975 (unsigned long long)backref->index,
2976 backref->namelen, backref->name,
2977 backref->filetype, backref->errors);
2978 print_ref_error(backref->errors);
2980 free_inode_rec(rec);
2982 return (error > 0) ? -1 : 0;
2985 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2988 struct cache_extent *cache;
2989 struct root_record *rec = NULL;
2992 cache = lookup_cache_extent(root_cache, objectid, 1);
2994 rec = container_of(cache, struct root_record, cache);
2996 rec = calloc(1, sizeof(*rec));
2997 rec->objectid = objectid;
2998 INIT_LIST_HEAD(&rec->backrefs);
2999 rec->cache.start = objectid;
3000 rec->cache.size = 1;
3002 ret = insert_cache_extent(root_cache, &rec->cache);
3008 static struct root_backref *get_root_backref(struct root_record *rec,
3009 u64 ref_root, u64 dir, u64 index,
3010 const char *name, int namelen)
3012 struct root_backref *backref;
3014 list_for_each_entry(backref, &rec->backrefs, list) {
3015 if (backref->ref_root != ref_root || backref->dir != dir ||
3016 backref->namelen != namelen)
3018 if (memcmp(name, backref->name, namelen))
3023 backref = calloc(1, sizeof(*backref) + namelen + 1);
3024 backref->ref_root = ref_root;
3026 backref->index = index;
3027 backref->namelen = namelen;
3028 memcpy(backref->name, name, namelen);
3029 backref->name[namelen] = '\0';
3030 list_add_tail(&backref->list, &rec->backrefs);
3034 static void free_root_record(struct cache_extent *cache)
3036 struct root_record *rec;
3037 struct root_backref *backref;
3039 rec = container_of(cache, struct root_record, cache);
3040 while (!list_empty(&rec->backrefs)) {
3041 backref = list_entry(rec->backrefs.next,
3042 struct root_backref, list);
3043 list_del(&backref->list);
3050 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3052 static int add_root_backref(struct cache_tree *root_cache,
3053 u64 root_id, u64 ref_root, u64 dir, u64 index,
3054 const char *name, int namelen,
3055 int item_type, int errors)
3057 struct root_record *rec;
3058 struct root_backref *backref;
3060 rec = get_root_rec(root_cache, root_id);
3061 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3063 backref->errors |= errors;
3065 if (item_type != BTRFS_DIR_ITEM_KEY) {
3066 if (backref->found_dir_index || backref->found_back_ref ||
3067 backref->found_forward_ref) {
3068 if (backref->index != index)
3069 backref->errors |= REF_ERR_INDEX_UNMATCH;
3071 backref->index = index;
3075 if (item_type == BTRFS_DIR_ITEM_KEY) {
3076 if (backref->found_forward_ref)
3078 backref->found_dir_item = 1;
3079 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3080 backref->found_dir_index = 1;
3081 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3082 if (backref->found_forward_ref)
3083 backref->errors |= REF_ERR_DUP_ROOT_REF;
3084 else if (backref->found_dir_item)
3086 backref->found_forward_ref = 1;
3087 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3088 if (backref->found_back_ref)
3089 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3090 backref->found_back_ref = 1;
3095 if (backref->found_forward_ref && backref->found_dir_item)
3096 backref->reachable = 1;
3100 static int merge_root_recs(struct btrfs_root *root,
3101 struct cache_tree *src_cache,
3102 struct cache_tree *dst_cache)
3104 struct cache_extent *cache;
3105 struct ptr_node *node;
3106 struct inode_record *rec;
3107 struct inode_backref *backref;
3110 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3111 free_inode_recs_tree(src_cache);
3116 cache = search_cache_extent(src_cache, 0);
3119 node = container_of(cache, struct ptr_node, cache);
3121 remove_cache_extent(src_cache, &node->cache);
3124 ret = is_child_root(root, root->objectid, rec->ino);
3130 list_for_each_entry(backref, &rec->backrefs, list) {
3131 BUG_ON(backref->found_inode_ref);
3132 if (backref->found_dir_item)
3133 add_root_backref(dst_cache, rec->ino,
3134 root->root_key.objectid, backref->dir,
3135 backref->index, backref->name,
3136 backref->namelen, BTRFS_DIR_ITEM_KEY,
3138 if (backref->found_dir_index)
3139 add_root_backref(dst_cache, rec->ino,
3140 root->root_key.objectid, backref->dir,
3141 backref->index, backref->name,
3142 backref->namelen, BTRFS_DIR_INDEX_KEY,
3146 free_inode_rec(rec);
3153 static int check_root_refs(struct btrfs_root *root,
3154 struct cache_tree *root_cache)
3156 struct root_record *rec;
3157 struct root_record *ref_root;
3158 struct root_backref *backref;
3159 struct cache_extent *cache;
3165 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3168 /* fixme: this can not detect circular references */
3171 cache = search_cache_extent(root_cache, 0);
3175 rec = container_of(cache, struct root_record, cache);
3176 cache = next_cache_extent(cache);
3178 if (rec->found_ref == 0)
3181 list_for_each_entry(backref, &rec->backrefs, list) {
3182 if (!backref->reachable)
3185 ref_root = get_root_rec(root_cache,
3187 if (ref_root->found_ref > 0)
3190 backref->reachable = 0;
3192 if (rec->found_ref == 0)
3198 cache = search_cache_extent(root_cache, 0);
3202 rec = container_of(cache, struct root_record, cache);
3203 cache = next_cache_extent(cache);
3205 if (rec->found_ref == 0 &&
3206 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3207 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3208 ret = check_orphan_item(root->fs_info->tree_root,
3214 * If we don't have a root item then we likely just have
3215 * a dir item in a snapshot for this root but no actual
3216 * ref key or anything so it's meaningless.
3218 if (!rec->found_root_item)
3221 fprintf(stderr, "fs tree %llu not referenced\n",
3222 (unsigned long long)rec->objectid);
3226 if (rec->found_ref > 0 && !rec->found_root_item)
3228 list_for_each_entry(backref, &rec->backrefs, list) {
3229 if (!backref->found_dir_item)
3230 backref->errors |= REF_ERR_NO_DIR_ITEM;
3231 if (!backref->found_dir_index)
3232 backref->errors |= REF_ERR_NO_DIR_INDEX;
3233 if (!backref->found_back_ref)
3234 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3235 if (!backref->found_forward_ref)
3236 backref->errors |= REF_ERR_NO_ROOT_REF;
3237 if (backref->reachable && backref->errors)
3244 fprintf(stderr, "fs tree %llu refs %u %s\n",
3245 (unsigned long long)rec->objectid, rec->found_ref,
3246 rec->found_root_item ? "" : "not found");
3248 list_for_each_entry(backref, &rec->backrefs, list) {
3249 if (!backref->reachable)
3251 if (!backref->errors && rec->found_root_item)
3253 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3254 " index %llu namelen %u name %s errors %x\n",
3255 (unsigned long long)backref->ref_root,
3256 (unsigned long long)backref->dir,
3257 (unsigned long long)backref->index,
3258 backref->namelen, backref->name,
3260 print_ref_error(backref->errors);
3263 return errors > 0 ? 1 : 0;
3266 static int process_root_ref(struct extent_buffer *eb, int slot,
3267 struct btrfs_key *key,
3268 struct cache_tree *root_cache)
3274 struct btrfs_root_ref *ref;
3275 char namebuf[BTRFS_NAME_LEN];
3278 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3280 dirid = btrfs_root_ref_dirid(eb, ref);
3281 index = btrfs_root_ref_sequence(eb, ref);
3282 name_len = btrfs_root_ref_name_len(eb, ref);
3284 if (name_len <= BTRFS_NAME_LEN) {
3288 len = BTRFS_NAME_LEN;
3289 error = REF_ERR_NAME_TOO_LONG;
3291 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3293 if (key->type == BTRFS_ROOT_REF_KEY) {
3294 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3295 index, namebuf, len, key->type, error);
3297 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3298 index, namebuf, len, key->type, error);
3303 static void free_corrupt_block(struct cache_extent *cache)
3305 struct btrfs_corrupt_block *corrupt;
3307 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3311 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3314 * Repair the btree of the given root.
3316 * The fix is to remove the node key in corrupt_blocks cache_tree.
3317 * and rebalance the tree.
3318 * After the fix, the btree should be writeable.
3320 static int repair_btree(struct btrfs_root *root,
3321 struct cache_tree *corrupt_blocks)
3323 struct btrfs_trans_handle *trans;
3324 struct btrfs_path *path;
3325 struct btrfs_corrupt_block *corrupt;
3326 struct cache_extent *cache;
3327 struct btrfs_key key;
3332 if (cache_tree_empty(corrupt_blocks))
3335 path = btrfs_alloc_path();
3339 trans = btrfs_start_transaction(root, 1);
3340 if (IS_ERR(trans)) {
3341 ret = PTR_ERR(trans);
3342 fprintf(stderr, "Error starting transaction: %s\n",
3346 cache = first_cache_extent(corrupt_blocks);
3348 corrupt = container_of(cache, struct btrfs_corrupt_block,
3350 level = corrupt->level;
3351 path->lowest_level = level;
3352 key.objectid = corrupt->key.objectid;
3353 key.type = corrupt->key.type;
3354 key.offset = corrupt->key.offset;
3357 * Here we don't want to do any tree balance, since it may
3358 * cause a balance with corrupted brother leaf/node,
3359 * so ins_len set to 0 here.
3360 * Balance will be done after all corrupt node/leaf is deleted.
3362 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3365 offset = btrfs_node_blockptr(path->nodes[level],
3366 path->slots[level]);
3368 /* Remove the ptr */
3369 ret = btrfs_del_ptr(trans, root, path, level,
3370 path->slots[level]);
3374 * Remove the corresponding extent
3375 * return value is not concerned.
3377 btrfs_release_path(path);
3378 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3379 0, root->root_key.objectid,
3381 cache = next_cache_extent(cache);
3384 /* Balance the btree using btrfs_search_slot() */
3385 cache = first_cache_extent(corrupt_blocks);
3387 corrupt = container_of(cache, struct btrfs_corrupt_block,
3389 memcpy(&key, &corrupt->key, sizeof(key));
3390 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3393 /* return will always >0 since it won't find the item */
3395 btrfs_release_path(path);
3396 cache = next_cache_extent(cache);
3399 btrfs_commit_transaction(trans, root);
3401 btrfs_free_path(path);
3405 static int check_fs_root(struct btrfs_root *root,
3406 struct cache_tree *root_cache,
3407 struct walk_control *wc)
3413 struct btrfs_path path;
3414 struct shared_node root_node;
3415 struct root_record *rec;
3416 struct btrfs_root_item *root_item = &root->root_item;
3417 struct cache_tree corrupt_blocks;
3418 struct orphan_data_extent *orphan;
3419 struct orphan_data_extent *tmp;
3420 enum btrfs_tree_block_status status;
3423 * Reuse the corrupt_block cache tree to record corrupted tree block
3425 * Unlike the usage in extent tree check, here we do it in a per
3426 * fs/subvol tree base.
3428 cache_tree_init(&corrupt_blocks);
3429 root->fs_info->corrupt_blocks = &corrupt_blocks;
3431 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3432 rec = get_root_rec(root_cache, root->root_key.objectid);
3433 if (btrfs_root_refs(root_item) > 0)
3434 rec->found_root_item = 1;
3437 btrfs_init_path(&path);
3438 memset(&root_node, 0, sizeof(root_node));
3439 cache_tree_init(&root_node.root_cache);
3440 cache_tree_init(&root_node.inode_cache);
3442 /* Move the orphan extent record to corresponding inode_record */
3443 list_for_each_entry_safe(orphan, tmp,
3444 &root->orphan_data_extents, list) {
3445 struct inode_record *inode;
3447 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3449 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3450 list_move(&orphan->list, &inode->orphan_extents);
3453 level = btrfs_header_level(root->node);
3454 memset(wc->nodes, 0, sizeof(wc->nodes));
3455 wc->nodes[level] = &root_node;
3456 wc->active_node = level;
3457 wc->root_level = level;
3459 /* We may not have checked the root block, lets do that now */
3460 if (btrfs_is_leaf(root->node))
3461 status = btrfs_check_leaf(root, NULL, root->node);
3463 status = btrfs_check_node(root, NULL, root->node);
3464 if (status != BTRFS_TREE_BLOCK_CLEAN)
3467 if (btrfs_root_refs(root_item) > 0 ||
3468 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3469 path.nodes[level] = root->node;
3470 extent_buffer_get(root->node);
3471 path.slots[level] = 0;
3473 struct btrfs_key key;
3474 struct btrfs_disk_key found_key;
3476 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3477 level = root_item->drop_level;
3478 path.lowest_level = level;
3479 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3482 btrfs_node_key(path.nodes[level], &found_key,
3484 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3485 sizeof(found_key)));
3489 wret = walk_down_tree(root, &path, wc, &level);
3495 wret = walk_up_tree(root, &path, wc, &level);
3502 btrfs_release_path(&path);
3504 if (!cache_tree_empty(&corrupt_blocks)) {
3505 struct cache_extent *cache;
3506 struct btrfs_corrupt_block *corrupt;
3508 printf("The following tree block(s) is corrupted in tree %llu:\n",
3509 root->root_key.objectid);
3510 cache = first_cache_extent(&corrupt_blocks);
3512 corrupt = container_of(cache,
3513 struct btrfs_corrupt_block,
3515 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3516 cache->start, corrupt->level,
3517 corrupt->key.objectid, corrupt->key.type,
3518 corrupt->key.offset);
3519 cache = next_cache_extent(cache);
3522 printf("Try to repair the btree for root %llu\n",
3523 root->root_key.objectid);
3524 ret = repair_btree(root, &corrupt_blocks);
3526 fprintf(stderr, "Failed to repair btree: %s\n",
3529 printf("Btree for root %llu is fixed\n",
3530 root->root_key.objectid);
3534 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3538 if (root_node.current) {
3539 root_node.current->checked = 1;
3540 maybe_free_inode_rec(&root_node.inode_cache,
3544 err = check_inode_recs(root, &root_node.inode_cache);
3548 free_corrupt_blocks_tree(&corrupt_blocks);
3549 root->fs_info->corrupt_blocks = NULL;
3550 free_orphan_data_extents(&root->orphan_data_extents);
3554 static int fs_root_objectid(u64 objectid)
3556 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3557 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3559 return is_fstree(objectid);
3562 static int check_fs_roots(struct btrfs_root *root,
3563 struct cache_tree *root_cache)
3565 struct btrfs_path path;
3566 struct btrfs_key key;
3567 struct walk_control wc;
3568 struct extent_buffer *leaf, *tree_node;
3569 struct btrfs_root *tmp_root;
3570 struct btrfs_root *tree_root = root->fs_info->tree_root;
3574 if (ctx.progress_enabled) {
3575 ctx.tp = TASK_FS_ROOTS;
3576 task_start(ctx.info);
3580 * Just in case we made any changes to the extent tree that weren't
3581 * reflected into the free space cache yet.
3584 reset_cached_block_groups(root->fs_info);
3585 memset(&wc, 0, sizeof(wc));
3586 cache_tree_init(&wc.shared);
3587 btrfs_init_path(&path);
3592 key.type = BTRFS_ROOT_ITEM_KEY;
3593 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3598 tree_node = tree_root->node;
3600 if (tree_node != tree_root->node) {
3601 free_root_recs_tree(root_cache);
3602 btrfs_release_path(&path);
3605 leaf = path.nodes[0];
3606 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3607 ret = btrfs_next_leaf(tree_root, &path);
3613 leaf = path.nodes[0];
3615 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3616 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3617 fs_root_objectid(key.objectid)) {
3618 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3619 tmp_root = btrfs_read_fs_root_no_cache(
3620 root->fs_info, &key);
3622 key.offset = (u64)-1;
3623 tmp_root = btrfs_read_fs_root(
3624 root->fs_info, &key);
3626 if (IS_ERR(tmp_root)) {
3630 ret = check_fs_root(tmp_root, root_cache, &wc);
3631 if (ret == -EAGAIN) {
3632 free_root_recs_tree(root_cache);
3633 btrfs_release_path(&path);
3638 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3639 btrfs_free_fs_root(tmp_root);
3640 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3641 key.type == BTRFS_ROOT_BACKREF_KEY) {
3642 process_root_ref(leaf, path.slots[0], &key,
3649 btrfs_release_path(&path);
3651 free_extent_cache_tree(&wc.shared);
3652 if (!cache_tree_empty(&wc.shared))
3653 fprintf(stderr, "warning line %d\n", __LINE__);
3655 task_stop(ctx.info);
3660 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3662 struct list_head *cur = rec->backrefs.next;
3663 struct extent_backref *back;
3664 struct tree_backref *tback;
3665 struct data_backref *dback;
3669 while(cur != &rec->backrefs) {
3670 back = list_entry(cur, struct extent_backref, list);
3672 if (!back->found_extent_tree) {
3676 if (back->is_data) {
3677 dback = (struct data_backref *)back;
3678 fprintf(stderr, "Backref %llu %s %llu"
3679 " owner %llu offset %llu num_refs %lu"
3680 " not found in extent tree\n",
3681 (unsigned long long)rec->start,
3682 back->full_backref ?
3684 back->full_backref ?
3685 (unsigned long long)dback->parent:
3686 (unsigned long long)dback->root,
3687 (unsigned long long)dback->owner,
3688 (unsigned long long)dback->offset,
3689 (unsigned long)dback->num_refs);
3691 tback = (struct tree_backref *)back;
3692 fprintf(stderr, "Backref %llu parent %llu"
3693 " root %llu not found in extent tree\n",
3694 (unsigned long long)rec->start,
3695 (unsigned long long)tback->parent,
3696 (unsigned long long)tback->root);
3699 if (!back->is_data && !back->found_ref) {
3703 tback = (struct tree_backref *)back;
3704 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3705 (unsigned long long)rec->start,
3706 back->full_backref ? "parent" : "root",
3707 back->full_backref ?
3708 (unsigned long long)tback->parent :
3709 (unsigned long long)tback->root, back);
3711 if (back->is_data) {
3712 dback = (struct data_backref *)back;
3713 if (dback->found_ref != dback->num_refs) {
3717 fprintf(stderr, "Incorrect local backref count"
3718 " on %llu %s %llu owner %llu"
3719 " offset %llu found %u wanted %u back %p\n",
3720 (unsigned long long)rec->start,
3721 back->full_backref ?
3723 back->full_backref ?
3724 (unsigned long long)dback->parent:
3725 (unsigned long long)dback->root,
3726 (unsigned long long)dback->owner,
3727 (unsigned long long)dback->offset,
3728 dback->found_ref, dback->num_refs, back);
3730 if (dback->disk_bytenr != rec->start) {
3734 fprintf(stderr, "Backref disk bytenr does not"
3735 " match extent record, bytenr=%llu, "
3736 "ref bytenr=%llu\n",
3737 (unsigned long long)rec->start,
3738 (unsigned long long)dback->disk_bytenr);
3741 if (dback->bytes != rec->nr) {
3745 fprintf(stderr, "Backref bytes do not match "
3746 "extent backref, bytenr=%llu, ref "
3747 "bytes=%llu, backref bytes=%llu\n",
3748 (unsigned long long)rec->start,
3749 (unsigned long long)rec->nr,
3750 (unsigned long long)dback->bytes);
3753 if (!back->is_data) {
3756 dback = (struct data_backref *)back;
3757 found += dback->found_ref;
3760 if (found != rec->refs) {
3764 fprintf(stderr, "Incorrect global backref count "
3765 "on %llu found %llu wanted %llu\n",
3766 (unsigned long long)rec->start,
3767 (unsigned long long)found,
3768 (unsigned long long)rec->refs);
3774 static int free_all_extent_backrefs(struct extent_record *rec)
3776 struct extent_backref *back;
3777 struct list_head *cur;
3778 while (!list_empty(&rec->backrefs)) {
3779 cur = rec->backrefs.next;
3780 back = list_entry(cur, struct extent_backref, list);
3787 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3788 struct cache_tree *extent_cache)
3790 struct cache_extent *cache;
3791 struct extent_record *rec;
3794 cache = first_cache_extent(extent_cache);
3797 rec = container_of(cache, struct extent_record, cache);
3798 remove_cache_extent(extent_cache, cache);
3799 free_all_extent_backrefs(rec);
3804 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3805 struct extent_record *rec)
3807 if (rec->content_checked && rec->owner_ref_checked &&
3808 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3809 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3810 !rec->bad_full_backref && !rec->crossing_stripes &&
3811 !rec->wrong_chunk_type) {
3812 remove_cache_extent(extent_cache, &rec->cache);
3813 free_all_extent_backrefs(rec);
3814 list_del_init(&rec->list);
3820 static int check_owner_ref(struct btrfs_root *root,
3821 struct extent_record *rec,
3822 struct extent_buffer *buf)
3824 struct extent_backref *node;
3825 struct tree_backref *back;
3826 struct btrfs_root *ref_root;
3827 struct btrfs_key key;
3828 struct btrfs_path path;
3829 struct extent_buffer *parent;
3834 list_for_each_entry(node, &rec->backrefs, list) {
3837 if (!node->found_ref)
3839 if (node->full_backref)
3841 back = (struct tree_backref *)node;
3842 if (btrfs_header_owner(buf) == back->root)
3845 BUG_ON(rec->is_root);
3847 /* try to find the block by search corresponding fs tree */
3848 key.objectid = btrfs_header_owner(buf);
3849 key.type = BTRFS_ROOT_ITEM_KEY;
3850 key.offset = (u64)-1;
3852 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3853 if (IS_ERR(ref_root))
3856 level = btrfs_header_level(buf);
3858 btrfs_item_key_to_cpu(buf, &key, 0);
3860 btrfs_node_key_to_cpu(buf, &key, 0);
3862 btrfs_init_path(&path);
3863 path.lowest_level = level + 1;
3864 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3868 parent = path.nodes[level + 1];
3869 if (parent && buf->start == btrfs_node_blockptr(parent,
3870 path.slots[level + 1]))
3873 btrfs_release_path(&path);
3874 return found ? 0 : 1;
3877 static int is_extent_tree_record(struct extent_record *rec)
3879 struct list_head *cur = rec->backrefs.next;
3880 struct extent_backref *node;
3881 struct tree_backref *back;
3884 while(cur != &rec->backrefs) {
3885 node = list_entry(cur, struct extent_backref, list);
3889 back = (struct tree_backref *)node;
3890 if (node->full_backref)
3892 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3899 static int record_bad_block_io(struct btrfs_fs_info *info,
3900 struct cache_tree *extent_cache,
3903 struct extent_record *rec;
3904 struct cache_extent *cache;
3905 struct btrfs_key key;
3907 cache = lookup_cache_extent(extent_cache, start, len);
3911 rec = container_of(cache, struct extent_record, cache);
3912 if (!is_extent_tree_record(rec))
3915 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3916 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3919 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3920 struct extent_buffer *buf, int slot)
3922 if (btrfs_header_level(buf)) {
3923 struct btrfs_key_ptr ptr1, ptr2;
3925 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3926 sizeof(struct btrfs_key_ptr));
3927 read_extent_buffer(buf, &ptr2,
3928 btrfs_node_key_ptr_offset(slot + 1),
3929 sizeof(struct btrfs_key_ptr));
3930 write_extent_buffer(buf, &ptr1,
3931 btrfs_node_key_ptr_offset(slot + 1),
3932 sizeof(struct btrfs_key_ptr));
3933 write_extent_buffer(buf, &ptr2,
3934 btrfs_node_key_ptr_offset(slot),
3935 sizeof(struct btrfs_key_ptr));
3937 struct btrfs_disk_key key;
3938 btrfs_node_key(buf, &key, 0);
3939 btrfs_fixup_low_keys(root, path, &key,
3940 btrfs_header_level(buf) + 1);
3943 struct btrfs_item *item1, *item2;
3944 struct btrfs_key k1, k2;
3945 char *item1_data, *item2_data;
3946 u32 item1_offset, item2_offset, item1_size, item2_size;
3948 item1 = btrfs_item_nr(slot);
3949 item2 = btrfs_item_nr(slot + 1);
3950 btrfs_item_key_to_cpu(buf, &k1, slot);
3951 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3952 item1_offset = btrfs_item_offset(buf, item1);
3953 item2_offset = btrfs_item_offset(buf, item2);
3954 item1_size = btrfs_item_size(buf, item1);
3955 item2_size = btrfs_item_size(buf, item2);
3957 item1_data = malloc(item1_size);
3960 item2_data = malloc(item2_size);
3966 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3967 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3969 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3970 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3974 btrfs_set_item_offset(buf, item1, item2_offset);
3975 btrfs_set_item_offset(buf, item2, item1_offset);
3976 btrfs_set_item_size(buf, item1, item2_size);
3977 btrfs_set_item_size(buf, item2, item1_size);
3979 path->slots[0] = slot;
3980 btrfs_set_item_key_unsafe(root, path, &k2);
3981 path->slots[0] = slot + 1;
3982 btrfs_set_item_key_unsafe(root, path, &k1);
3987 static int fix_key_order(struct btrfs_trans_handle *trans,
3988 struct btrfs_root *root,
3989 struct btrfs_path *path)
3991 struct extent_buffer *buf;
3992 struct btrfs_key k1, k2;
3994 int level = path->lowest_level;
3997 buf = path->nodes[level];
3998 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4000 btrfs_node_key_to_cpu(buf, &k1, i);
4001 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4003 btrfs_item_key_to_cpu(buf, &k1, i);
4004 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4006 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4008 ret = swap_values(root, path, buf, i);
4011 btrfs_mark_buffer_dirty(buf);
4017 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4018 struct btrfs_root *root,
4019 struct btrfs_path *path,
4020 struct extent_buffer *buf, int slot)
4022 struct btrfs_key key;
4023 int nritems = btrfs_header_nritems(buf);
4025 btrfs_item_key_to_cpu(buf, &key, slot);
4027 /* These are all the keys we can deal with missing. */
4028 if (key.type != BTRFS_DIR_INDEX_KEY &&
4029 key.type != BTRFS_EXTENT_ITEM_KEY &&
4030 key.type != BTRFS_METADATA_ITEM_KEY &&
4031 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4032 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4035 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4036 (unsigned long long)key.objectid, key.type,
4037 (unsigned long long)key.offset, slot, buf->start);
4038 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4039 btrfs_item_nr_offset(slot + 1),
4040 sizeof(struct btrfs_item) *
4041 (nritems - slot - 1));
4042 btrfs_set_header_nritems(buf, nritems - 1);
4044 struct btrfs_disk_key disk_key;
4046 btrfs_item_key(buf, &disk_key, 0);
4047 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4049 btrfs_mark_buffer_dirty(buf);
4053 static int fix_item_offset(struct btrfs_trans_handle *trans,
4054 struct btrfs_root *root,
4055 struct btrfs_path *path)
4057 struct extent_buffer *buf;
4061 /* We should only get this for leaves */
4062 BUG_ON(path->lowest_level);
4063 buf = path->nodes[0];
4065 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4066 unsigned int shift = 0, offset;
4068 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4069 BTRFS_LEAF_DATA_SIZE(root)) {
4070 if (btrfs_item_end_nr(buf, i) >
4071 BTRFS_LEAF_DATA_SIZE(root)) {
4072 ret = delete_bogus_item(trans, root, path,
4076 fprintf(stderr, "item is off the end of the "
4077 "leaf, can't fix\n");
4081 shift = BTRFS_LEAF_DATA_SIZE(root) -
4082 btrfs_item_end_nr(buf, i);
4083 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4084 btrfs_item_offset_nr(buf, i - 1)) {
4085 if (btrfs_item_end_nr(buf, i) >
4086 btrfs_item_offset_nr(buf, i - 1)) {
4087 ret = delete_bogus_item(trans, root, path,
4091 fprintf(stderr, "items overlap, can't fix\n");
4095 shift = btrfs_item_offset_nr(buf, i - 1) -
4096 btrfs_item_end_nr(buf, i);
4101 printf("Shifting item nr %d by %u bytes in block %llu\n",
4102 i, shift, (unsigned long long)buf->start);
4103 offset = btrfs_item_offset_nr(buf, i);
4104 memmove_extent_buffer(buf,
4105 btrfs_leaf_data(buf) + offset + shift,
4106 btrfs_leaf_data(buf) + offset,
4107 btrfs_item_size_nr(buf, i));
4108 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4110 btrfs_mark_buffer_dirty(buf);
4114 * We may have moved things, in which case we want to exit so we don't
4115 * write those changes out. Once we have proper abort functionality in
4116 * progs this can be changed to something nicer.
4123 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4124 * then just return -EIO.
4126 static int try_to_fix_bad_block(struct btrfs_root *root,
4127 struct extent_buffer *buf,
4128 enum btrfs_tree_block_status status)
4130 struct btrfs_trans_handle *trans;
4131 struct ulist *roots;
4132 struct ulist_node *node;
4133 struct btrfs_root *search_root;
4134 struct btrfs_path *path;
4135 struct ulist_iterator iter;
4136 struct btrfs_key root_key, key;
4139 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4140 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4143 path = btrfs_alloc_path();
4147 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4150 btrfs_free_path(path);
4154 ULIST_ITER_INIT(&iter);
4155 while ((node = ulist_next(roots, &iter))) {
4156 root_key.objectid = node->val;
4157 root_key.type = BTRFS_ROOT_ITEM_KEY;
4158 root_key.offset = (u64)-1;
4160 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4167 trans = btrfs_start_transaction(search_root, 0);
4168 if (IS_ERR(trans)) {
4169 ret = PTR_ERR(trans);
4173 path->lowest_level = btrfs_header_level(buf);
4174 path->skip_check_block = 1;
4175 if (path->lowest_level)
4176 btrfs_node_key_to_cpu(buf, &key, 0);
4178 btrfs_item_key_to_cpu(buf, &key, 0);
4179 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4182 btrfs_commit_transaction(trans, search_root);
4185 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4186 ret = fix_key_order(trans, search_root, path);
4187 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4188 ret = fix_item_offset(trans, search_root, path);
4190 btrfs_commit_transaction(trans, search_root);
4193 btrfs_release_path(path);
4194 btrfs_commit_transaction(trans, search_root);
4197 btrfs_free_path(path);
4201 static int check_block(struct btrfs_root *root,
4202 struct cache_tree *extent_cache,
4203 struct extent_buffer *buf, u64 flags)
4205 struct extent_record *rec;
4206 struct cache_extent *cache;
4207 struct btrfs_key key;
4208 enum btrfs_tree_block_status status;
4212 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4215 rec = container_of(cache, struct extent_record, cache);
4216 rec->generation = btrfs_header_generation(buf);
4218 level = btrfs_header_level(buf);
4219 if (btrfs_header_nritems(buf) > 0) {
4222 btrfs_item_key_to_cpu(buf, &key, 0);
4224 btrfs_node_key_to_cpu(buf, &key, 0);
4226 rec->info_objectid = key.objectid;
4228 rec->info_level = level;
4230 if (btrfs_is_leaf(buf))
4231 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4233 status = btrfs_check_node(root, &rec->parent_key, buf);
4235 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4237 status = try_to_fix_bad_block(root, buf, status);
4238 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4240 fprintf(stderr, "bad block %llu\n",
4241 (unsigned long long)buf->start);
4244 * Signal to callers we need to start the scan over
4245 * again since we'll have cow'ed blocks.
4250 rec->content_checked = 1;
4251 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4252 rec->owner_ref_checked = 1;
4254 ret = check_owner_ref(root, rec, buf);
4256 rec->owner_ref_checked = 1;
4260 maybe_free_extent_rec(extent_cache, rec);
4264 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4265 u64 parent, u64 root)
4267 struct list_head *cur = rec->backrefs.next;
4268 struct extent_backref *node;
4269 struct tree_backref *back;
4271 while(cur != &rec->backrefs) {
4272 node = list_entry(cur, struct extent_backref, list);
4276 back = (struct tree_backref *)node;
4278 if (!node->full_backref)
4280 if (parent == back->parent)
4283 if (node->full_backref)
4285 if (back->root == root)
4292 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4293 u64 parent, u64 root)
4295 struct tree_backref *ref = malloc(sizeof(*ref));
4296 memset(&ref->node, 0, sizeof(ref->node));
4298 ref->parent = parent;
4299 ref->node.full_backref = 1;
4302 ref->node.full_backref = 0;
4304 list_add_tail(&ref->node.list, &rec->backrefs);
4309 static struct data_backref *find_data_backref(struct extent_record *rec,
4310 u64 parent, u64 root,
4311 u64 owner, u64 offset,
4313 u64 disk_bytenr, u64 bytes)
4315 struct list_head *cur = rec->backrefs.next;
4316 struct extent_backref *node;
4317 struct data_backref *back;
4319 while(cur != &rec->backrefs) {
4320 node = list_entry(cur, struct extent_backref, list);
4324 back = (struct data_backref *)node;
4326 if (!node->full_backref)
4328 if (parent == back->parent)
4331 if (node->full_backref)
4333 if (back->root == root && back->owner == owner &&
4334 back->offset == offset) {
4335 if (found_ref && node->found_ref &&
4336 (back->bytes != bytes ||
4337 back->disk_bytenr != disk_bytenr))
4346 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4347 u64 parent, u64 root,
4348 u64 owner, u64 offset,
4351 struct data_backref *ref = malloc(sizeof(*ref));
4352 memset(&ref->node, 0, sizeof(ref->node));
4353 ref->node.is_data = 1;
4356 ref->parent = parent;
4359 ref->node.full_backref = 1;
4363 ref->offset = offset;
4364 ref->node.full_backref = 0;
4366 ref->bytes = max_size;
4369 list_add_tail(&ref->node.list, &rec->backrefs);
4370 if (max_size > rec->max_size)
4371 rec->max_size = max_size;
4375 /* Check if the type of extent matches with its chunk */
4376 static void check_extent_type(struct extent_record *rec)
4378 struct btrfs_block_group_cache *bg_cache;
4380 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4384 /* data extent, check chunk directly*/
4385 if (!rec->metadata) {
4386 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4387 rec->wrong_chunk_type = 1;
4391 /* metadata extent, check the obvious case first */
4392 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4393 BTRFS_BLOCK_GROUP_METADATA))) {
4394 rec->wrong_chunk_type = 1;
4399 * Check SYSTEM extent, as it's also marked as metadata, we can only
4400 * make sure it's a SYSTEM extent by its backref
4402 if (!list_empty(&rec->backrefs)) {
4403 struct extent_backref *node;
4404 struct tree_backref *tback;
4407 node = list_entry(rec->backrefs.next, struct extent_backref,
4409 if (node->is_data) {
4410 /* tree block shouldn't have data backref */
4411 rec->wrong_chunk_type = 1;
4414 tback = container_of(node, struct tree_backref, node);
4416 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4417 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4419 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4420 if (!(bg_cache->flags & bg_type))
4421 rec->wrong_chunk_type = 1;
4425 static int add_extent_rec(struct cache_tree *extent_cache,
4426 struct btrfs_key *parent_key, u64 parent_gen,
4427 u64 start, u64 nr, u64 extent_item_refs,
4428 int is_root, int inc_ref, int set_checked,
4429 int metadata, int extent_rec, u64 max_size)
4431 struct extent_record *rec;
4432 struct cache_extent *cache;
4436 cache = lookup_cache_extent(extent_cache, start, nr);
4438 rec = container_of(cache, struct extent_record, cache);
4442 rec->nr = max(nr, max_size);
4445 * We need to make sure to reset nr to whatever the extent
4446 * record says was the real size, this way we can compare it to
4450 if (start != rec->start || rec->found_rec) {
4451 struct extent_record *tmp;
4454 if (list_empty(&rec->list))
4455 list_add_tail(&rec->list,
4456 &duplicate_extents);
4459 * We have to do this song and dance in case we
4460 * find an extent record that falls inside of
4461 * our current extent record but does not have
4462 * the same objectid.
4464 tmp = malloc(sizeof(*tmp));
4468 tmp->max_size = max_size;
4471 tmp->metadata = metadata;
4472 tmp->extent_item_refs = extent_item_refs;
4473 INIT_LIST_HEAD(&tmp->list);
4474 list_add_tail(&tmp->list, &rec->dups);
4475 rec->num_duplicates++;
4482 if (extent_item_refs && !dup) {
4483 if (rec->extent_item_refs) {
4484 fprintf(stderr, "block %llu rec "
4485 "extent_item_refs %llu, passed %llu\n",
4486 (unsigned long long)start,
4487 (unsigned long long)
4488 rec->extent_item_refs,
4489 (unsigned long long)extent_item_refs);
4491 rec->extent_item_refs = extent_item_refs;
4496 rec->content_checked = 1;
4497 rec->owner_ref_checked = 1;
4501 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4503 rec->parent_generation = parent_gen;
4505 if (rec->max_size < max_size)
4506 rec->max_size = max_size;
4509 * A metadata extent can't cross stripe_len boundary, otherwise
4510 * kernel scrub won't be able to handle it.
4511 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4514 if (metadata && check_crossing_stripes(rec->start,
4516 rec->crossing_stripes = 1;
4517 check_extent_type(rec);
4518 maybe_free_extent_rec(extent_cache, rec);
4521 rec = malloc(sizeof(*rec));
4523 rec->max_size = max_size;
4524 rec->nr = max(nr, max_size);
4525 rec->found_rec = !!extent_rec;
4526 rec->content_checked = 0;
4527 rec->owner_ref_checked = 0;
4528 rec->num_duplicates = 0;
4529 rec->metadata = metadata;
4530 rec->flag_block_full_backref = -1;
4531 rec->bad_full_backref = 0;
4532 rec->crossing_stripes = 0;
4533 rec->wrong_chunk_type = 0;
4534 INIT_LIST_HEAD(&rec->backrefs);
4535 INIT_LIST_HEAD(&rec->dups);
4536 INIT_LIST_HEAD(&rec->list);
4548 if (extent_item_refs)
4549 rec->extent_item_refs = extent_item_refs;
4551 rec->extent_item_refs = 0;
4554 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4556 memset(&rec->parent_key, 0, sizeof(*parent_key));
4559 rec->parent_generation = parent_gen;
4561 rec->parent_generation = 0;
4563 rec->cache.start = start;
4564 rec->cache.size = nr;
4565 ret = insert_cache_extent(extent_cache, &rec->cache);
4569 rec->content_checked = 1;
4570 rec->owner_ref_checked = 1;
4574 if (check_crossing_stripes(rec->start, rec->max_size))
4575 rec->crossing_stripes = 1;
4576 check_extent_type(rec);
4580 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4581 u64 parent, u64 root, int found_ref)
4583 struct extent_record *rec;
4584 struct tree_backref *back;
4585 struct cache_extent *cache;
4587 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4589 add_extent_rec(extent_cache, NULL, 0, bytenr,
4590 1, 0, 0, 0, 0, 1, 0, 0);
4591 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4596 rec = container_of(cache, struct extent_record, cache);
4597 if (rec->start != bytenr) {
4601 back = find_tree_backref(rec, parent, root);
4603 back = alloc_tree_backref(rec, parent, root);
4606 if (back->node.found_ref) {
4607 fprintf(stderr, "Extent back ref already exists "
4608 "for %llu parent %llu root %llu \n",
4609 (unsigned long long)bytenr,
4610 (unsigned long long)parent,
4611 (unsigned long long)root);
4613 back->node.found_ref = 1;
4615 if (back->node.found_extent_tree) {
4616 fprintf(stderr, "Extent back ref already exists "
4617 "for %llu parent %llu root %llu \n",
4618 (unsigned long long)bytenr,
4619 (unsigned long long)parent,
4620 (unsigned long long)root);
4622 back->node.found_extent_tree = 1;
4624 check_extent_type(rec);
4625 maybe_free_extent_rec(extent_cache, rec);
4629 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4630 u64 parent, u64 root, u64 owner, u64 offset,
4631 u32 num_refs, int found_ref, u64 max_size)
4633 struct extent_record *rec;
4634 struct data_backref *back;
4635 struct cache_extent *cache;
4637 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4639 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4641 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4646 rec = container_of(cache, struct extent_record, cache);
4647 if (rec->max_size < max_size)
4648 rec->max_size = max_size;
4651 * If found_ref is set then max_size is the real size and must match the
4652 * existing refs. So if we have already found a ref then we need to
4653 * make sure that this ref matches the existing one, otherwise we need
4654 * to add a new backref so we can notice that the backrefs don't match
4655 * and we need to figure out who is telling the truth. This is to
4656 * account for that awful fsync bug I introduced where we'd end up with
4657 * a btrfs_file_extent_item that would have its length include multiple
4658 * prealloc extents or point inside of a prealloc extent.
4660 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4663 back = alloc_data_backref(rec, parent, root, owner, offset,
4667 BUG_ON(num_refs != 1);
4668 if (back->node.found_ref)
4669 BUG_ON(back->bytes != max_size);
4670 back->node.found_ref = 1;
4671 back->found_ref += 1;
4672 back->bytes = max_size;
4673 back->disk_bytenr = bytenr;
4675 rec->content_checked = 1;
4676 rec->owner_ref_checked = 1;
4678 if (back->node.found_extent_tree) {
4679 fprintf(stderr, "Extent back ref already exists "
4680 "for %llu parent %llu root %llu "
4681 "owner %llu offset %llu num_refs %lu\n",
4682 (unsigned long long)bytenr,
4683 (unsigned long long)parent,
4684 (unsigned long long)root,
4685 (unsigned long long)owner,
4686 (unsigned long long)offset,
4687 (unsigned long)num_refs);
4689 back->num_refs = num_refs;
4690 back->node.found_extent_tree = 1;
4692 maybe_free_extent_rec(extent_cache, rec);
4696 static int add_pending(struct cache_tree *pending,
4697 struct cache_tree *seen, u64 bytenr, u32 size)
4700 ret = add_cache_extent(seen, bytenr, size);
4703 add_cache_extent(pending, bytenr, size);
4707 static int pick_next_pending(struct cache_tree *pending,
4708 struct cache_tree *reada,
4709 struct cache_tree *nodes,
4710 u64 last, struct block_info *bits, int bits_nr,
4713 unsigned long node_start = last;
4714 struct cache_extent *cache;
4717 cache = search_cache_extent(reada, 0);
4719 bits[0].start = cache->start;
4720 bits[0].size = cache->size;
4725 if (node_start > 32768)
4726 node_start -= 32768;
4728 cache = search_cache_extent(nodes, node_start);
4730 cache = search_cache_extent(nodes, 0);
4733 cache = search_cache_extent(pending, 0);
4738 bits[ret].start = cache->start;
4739 bits[ret].size = cache->size;
4740 cache = next_cache_extent(cache);
4742 } while (cache && ret < bits_nr);
4748 bits[ret].start = cache->start;
4749 bits[ret].size = cache->size;
4750 cache = next_cache_extent(cache);
4752 } while (cache && ret < bits_nr);
4754 if (bits_nr - ret > 8) {
4755 u64 lookup = bits[0].start + bits[0].size;
4756 struct cache_extent *next;
4757 next = search_cache_extent(pending, lookup);
4759 if (next->start - lookup > 32768)
4761 bits[ret].start = next->start;
4762 bits[ret].size = next->size;
4763 lookup = next->start + next->size;
4767 next = next_cache_extent(next);
4775 static void free_chunk_record(struct cache_extent *cache)
4777 struct chunk_record *rec;
4779 rec = container_of(cache, struct chunk_record, cache);
4780 list_del_init(&rec->list);
4781 list_del_init(&rec->dextents);
4785 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4787 cache_tree_free_extents(chunk_cache, free_chunk_record);
4790 static void free_device_record(struct rb_node *node)
4792 struct device_record *rec;
4794 rec = container_of(node, struct device_record, node);
4798 FREE_RB_BASED_TREE(device_cache, free_device_record);
4800 int insert_block_group_record(struct block_group_tree *tree,
4801 struct block_group_record *bg_rec)
4805 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4809 list_add_tail(&bg_rec->list, &tree->block_groups);
4813 static void free_block_group_record(struct cache_extent *cache)
4815 struct block_group_record *rec;
4817 rec = container_of(cache, struct block_group_record, cache);
4818 list_del_init(&rec->list);
4822 void free_block_group_tree(struct block_group_tree *tree)
4824 cache_tree_free_extents(&tree->tree, free_block_group_record);
4827 int insert_device_extent_record(struct device_extent_tree *tree,
4828 struct device_extent_record *de_rec)
4833 * Device extent is a bit different from the other extents, because
4834 * the extents which belong to the different devices may have the
4835 * same start and size, so we need use the special extent cache
4836 * search/insert functions.
4838 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4842 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4843 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4847 static void free_device_extent_record(struct cache_extent *cache)
4849 struct device_extent_record *rec;
4851 rec = container_of(cache, struct device_extent_record, cache);
4852 if (!list_empty(&rec->chunk_list))
4853 list_del_init(&rec->chunk_list);
4854 if (!list_empty(&rec->device_list))
4855 list_del_init(&rec->device_list);
4859 void free_device_extent_tree(struct device_extent_tree *tree)
4861 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4864 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4865 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4866 struct extent_buffer *leaf, int slot)
4868 struct btrfs_extent_ref_v0 *ref0;
4869 struct btrfs_key key;
4871 btrfs_item_key_to_cpu(leaf, &key, slot);
4872 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4873 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4874 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4876 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4877 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4883 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4884 struct btrfs_key *key,
4887 struct btrfs_chunk *ptr;
4888 struct chunk_record *rec;
4891 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4892 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4894 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4896 fprintf(stderr, "memory allocation failed\n");
4900 INIT_LIST_HEAD(&rec->list);
4901 INIT_LIST_HEAD(&rec->dextents);
4904 rec->cache.start = key->offset;
4905 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4907 rec->generation = btrfs_header_generation(leaf);
4909 rec->objectid = key->objectid;
4910 rec->type = key->type;
4911 rec->offset = key->offset;
4913 rec->length = rec->cache.size;
4914 rec->owner = btrfs_chunk_owner(leaf, ptr);
4915 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4916 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4917 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4918 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4919 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4920 rec->num_stripes = num_stripes;
4921 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4923 for (i = 0; i < rec->num_stripes; ++i) {
4924 rec->stripes[i].devid =
4925 btrfs_stripe_devid_nr(leaf, ptr, i);
4926 rec->stripes[i].offset =
4927 btrfs_stripe_offset_nr(leaf, ptr, i);
4928 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4929 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4936 static int process_chunk_item(struct cache_tree *chunk_cache,
4937 struct btrfs_key *key, struct extent_buffer *eb,
4940 struct chunk_record *rec;
4943 rec = btrfs_new_chunk_record(eb, key, slot);
4944 ret = insert_cache_extent(chunk_cache, &rec->cache);
4946 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4947 rec->offset, rec->length);
4954 static int process_device_item(struct rb_root *dev_cache,
4955 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4957 struct btrfs_dev_item *ptr;
4958 struct device_record *rec;
4961 ptr = btrfs_item_ptr(eb,
4962 slot, struct btrfs_dev_item);
4964 rec = malloc(sizeof(*rec));
4966 fprintf(stderr, "memory allocation failed\n");
4970 rec->devid = key->offset;
4971 rec->generation = btrfs_header_generation(eb);
4973 rec->objectid = key->objectid;
4974 rec->type = key->type;
4975 rec->offset = key->offset;
4977 rec->devid = btrfs_device_id(eb, ptr);
4978 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4979 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4981 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4983 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4990 struct block_group_record *
4991 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4994 struct btrfs_block_group_item *ptr;
4995 struct block_group_record *rec;
4997 rec = calloc(1, sizeof(*rec));
4999 fprintf(stderr, "memory allocation failed\n");
5003 rec->cache.start = key->objectid;
5004 rec->cache.size = key->offset;
5006 rec->generation = btrfs_header_generation(leaf);
5008 rec->objectid = key->objectid;
5009 rec->type = key->type;
5010 rec->offset = key->offset;
5012 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5013 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5015 INIT_LIST_HEAD(&rec->list);
5020 static int process_block_group_item(struct block_group_tree *block_group_cache,
5021 struct btrfs_key *key,
5022 struct extent_buffer *eb, int slot)
5024 struct block_group_record *rec;
5027 rec = btrfs_new_block_group_record(eb, key, slot);
5028 ret = insert_block_group_record(block_group_cache, rec);
5030 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5031 rec->objectid, rec->offset);
5038 struct device_extent_record *
5039 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5040 struct btrfs_key *key, int slot)
5042 struct device_extent_record *rec;
5043 struct btrfs_dev_extent *ptr;
5045 rec = calloc(1, sizeof(*rec));
5047 fprintf(stderr, "memory allocation failed\n");
5051 rec->cache.objectid = key->objectid;
5052 rec->cache.start = key->offset;
5054 rec->generation = btrfs_header_generation(leaf);
5056 rec->objectid = key->objectid;
5057 rec->type = key->type;
5058 rec->offset = key->offset;
5060 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5061 rec->chunk_objecteid =
5062 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5064 btrfs_dev_extent_chunk_offset(leaf, ptr);
5065 rec->length = btrfs_dev_extent_length(leaf, ptr);
5066 rec->cache.size = rec->length;
5068 INIT_LIST_HEAD(&rec->chunk_list);
5069 INIT_LIST_HEAD(&rec->device_list);
5075 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5076 struct btrfs_key *key, struct extent_buffer *eb,
5079 struct device_extent_record *rec;
5082 rec = btrfs_new_device_extent_record(eb, key, slot);
5083 ret = insert_device_extent_record(dev_extent_cache, rec);
5086 "Device extent[%llu, %llu, %llu] existed.\n",
5087 rec->objectid, rec->offset, rec->length);
5094 static int process_extent_item(struct btrfs_root *root,
5095 struct cache_tree *extent_cache,
5096 struct extent_buffer *eb, int slot)
5098 struct btrfs_extent_item *ei;
5099 struct btrfs_extent_inline_ref *iref;
5100 struct btrfs_extent_data_ref *dref;
5101 struct btrfs_shared_data_ref *sref;
5102 struct btrfs_key key;
5106 u32 item_size = btrfs_item_size_nr(eb, slot);
5112 btrfs_item_key_to_cpu(eb, &key, slot);
5114 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5116 num_bytes = root->leafsize;
5118 num_bytes = key.offset;
5121 if (item_size < sizeof(*ei)) {
5122 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5123 struct btrfs_extent_item_v0 *ei0;
5124 BUG_ON(item_size != sizeof(*ei0));
5125 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5126 refs = btrfs_extent_refs_v0(eb, ei0);
5130 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5131 num_bytes, refs, 0, 0, 0, metadata, 1,
5135 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5136 refs = btrfs_extent_refs(eb, ei);
5138 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5139 refs, 0, 0, 0, metadata, 1, num_bytes);
5141 ptr = (unsigned long)(ei + 1);
5142 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5143 key.type == BTRFS_EXTENT_ITEM_KEY)
5144 ptr += sizeof(struct btrfs_tree_block_info);
5146 end = (unsigned long)ei + item_size;
5148 iref = (struct btrfs_extent_inline_ref *)ptr;
5149 type = btrfs_extent_inline_ref_type(eb, iref);
5150 offset = btrfs_extent_inline_ref_offset(eb, iref);
5152 case BTRFS_TREE_BLOCK_REF_KEY:
5153 add_tree_backref(extent_cache, key.objectid,
5156 case BTRFS_SHARED_BLOCK_REF_KEY:
5157 add_tree_backref(extent_cache, key.objectid,
5160 case BTRFS_EXTENT_DATA_REF_KEY:
5161 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5162 add_data_backref(extent_cache, key.objectid, 0,
5163 btrfs_extent_data_ref_root(eb, dref),
5164 btrfs_extent_data_ref_objectid(eb,
5166 btrfs_extent_data_ref_offset(eb, dref),
5167 btrfs_extent_data_ref_count(eb, dref),
5170 case BTRFS_SHARED_DATA_REF_KEY:
5171 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5172 add_data_backref(extent_cache, key.objectid, offset,
5174 btrfs_shared_data_ref_count(eb, sref),
5178 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5179 key.objectid, key.type, num_bytes);
5182 ptr += btrfs_extent_inline_ref_size(type);
5189 static int check_cache_range(struct btrfs_root *root,
5190 struct btrfs_block_group_cache *cache,
5191 u64 offset, u64 bytes)
5193 struct btrfs_free_space *entry;
5199 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5200 bytenr = btrfs_sb_offset(i);
5201 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5202 cache->key.objectid, bytenr, 0,
5203 &logical, &nr, &stripe_len);
5208 if (logical[nr] + stripe_len <= offset)
5210 if (offset + bytes <= logical[nr])
5212 if (logical[nr] == offset) {
5213 if (stripe_len >= bytes) {
5217 bytes -= stripe_len;
5218 offset += stripe_len;
5219 } else if (logical[nr] < offset) {
5220 if (logical[nr] + stripe_len >=
5225 bytes = (offset + bytes) -
5226 (logical[nr] + stripe_len);
5227 offset = logical[nr] + stripe_len;
5230 * Could be tricky, the super may land in the
5231 * middle of the area we're checking. First
5232 * check the easiest case, it's at the end.
5234 if (logical[nr] + stripe_len >=
5236 bytes = logical[nr] - offset;
5240 /* Check the left side */
5241 ret = check_cache_range(root, cache,
5243 logical[nr] - offset);
5249 /* Now we continue with the right side */
5250 bytes = (offset + bytes) -
5251 (logical[nr] + stripe_len);
5252 offset = logical[nr] + stripe_len;
5259 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5261 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5262 offset, offset+bytes);
5266 if (entry->offset != offset) {
5267 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5272 if (entry->bytes != bytes) {
5273 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5274 bytes, entry->bytes, offset);
5278 unlink_free_space(cache->free_space_ctl, entry);
5283 static int verify_space_cache(struct btrfs_root *root,
5284 struct btrfs_block_group_cache *cache)
5286 struct btrfs_path *path;
5287 struct extent_buffer *leaf;
5288 struct btrfs_key key;
5292 path = btrfs_alloc_path();
5296 root = root->fs_info->extent_root;
5298 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5300 key.objectid = last;
5302 key.type = BTRFS_EXTENT_ITEM_KEY;
5304 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5309 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5310 ret = btrfs_next_leaf(root, path);
5318 leaf = path->nodes[0];
5319 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5320 if (key.objectid >= cache->key.offset + cache->key.objectid)
5322 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5323 key.type != BTRFS_METADATA_ITEM_KEY) {
5328 if (last == key.objectid) {
5329 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5330 last = key.objectid + key.offset;
5332 last = key.objectid + root->leafsize;
5337 ret = check_cache_range(root, cache, last,
5338 key.objectid - last);
5341 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5342 last = key.objectid + key.offset;
5344 last = key.objectid + root->leafsize;
5348 if (last < cache->key.objectid + cache->key.offset)
5349 ret = check_cache_range(root, cache, last,
5350 cache->key.objectid +
5351 cache->key.offset - last);
5354 btrfs_free_path(path);
5357 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5358 fprintf(stderr, "There are still entries left in the space "
5366 static int check_space_cache(struct btrfs_root *root)
5368 struct btrfs_block_group_cache *cache;
5369 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5373 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5374 btrfs_super_generation(root->fs_info->super_copy) !=
5375 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5376 printf("cache and super generation don't match, space cache "
5377 "will be invalidated\n");
5381 if (ctx.progress_enabled) {
5382 ctx.tp = TASK_FREE_SPACE;
5383 task_start(ctx.info);
5387 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5391 start = cache->key.objectid + cache->key.offset;
5392 if (!cache->free_space_ctl) {
5393 if (btrfs_init_free_space_ctl(cache,
5394 root->sectorsize)) {
5399 btrfs_remove_free_space_cache(cache);
5402 ret = load_free_space_cache(root->fs_info, cache);
5406 ret = verify_space_cache(root, cache);
5408 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5409 cache->key.objectid);
5414 task_stop(ctx.info);
5416 return error ? -EINVAL : 0;
5419 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5420 u64 num_bytes, unsigned long leaf_offset,
5421 struct extent_buffer *eb) {
5424 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5426 unsigned long csum_offset;
5430 u64 data_checked = 0;
5436 if (num_bytes % root->sectorsize)
5439 data = malloc(num_bytes);
5443 while (offset < num_bytes) {
5446 read_len = num_bytes - offset;
5447 /* read as much space once a time */
5448 ret = read_extent_data(root, data + offset,
5449 bytenr + offset, &read_len, mirror);
5453 /* verify every 4k data's checksum */
5454 while (data_checked < read_len) {
5456 tmp = offset + data_checked;
5458 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5459 csum, root->sectorsize);
5460 btrfs_csum_final(csum, (char *)&csum);
5462 csum_offset = leaf_offset +
5463 tmp / root->sectorsize * csum_size;
5464 read_extent_buffer(eb, (char *)&csum_expected,
5465 csum_offset, csum_size);
5466 /* try another mirror */
5467 if (csum != csum_expected) {
5468 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5469 mirror, bytenr + tmp,
5470 csum, csum_expected);
5471 num_copies = btrfs_num_copies(
5472 &root->fs_info->mapping_tree,
5474 if (mirror < num_copies - 1) {
5479 data_checked += root->sectorsize;
5488 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5491 struct btrfs_path *path;
5492 struct extent_buffer *leaf;
5493 struct btrfs_key key;
5496 path = btrfs_alloc_path();
5498 fprintf(stderr, "Error allocing path\n");
5502 key.objectid = bytenr;
5503 key.type = BTRFS_EXTENT_ITEM_KEY;
5504 key.offset = (u64)-1;
5507 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5510 fprintf(stderr, "Error looking up extent record %d\n", ret);
5511 btrfs_free_path(path);
5514 if (path->slots[0] > 0) {
5517 ret = btrfs_prev_leaf(root, path);
5520 } else if (ret > 0) {
5527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5530 * Block group items come before extent items if they have the same
5531 * bytenr, so walk back one more just in case. Dear future traveler,
5532 * first congrats on mastering time travel. Now if it's not too much
5533 * trouble could you go back to 2006 and tell Chris to make the
5534 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5535 * EXTENT_ITEM_KEY please?
5537 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5538 if (path->slots[0] > 0) {
5541 ret = btrfs_prev_leaf(root, path);
5544 } else if (ret > 0) {
5549 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5553 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5554 ret = btrfs_next_leaf(root, path);
5556 fprintf(stderr, "Error going to next leaf "
5558 btrfs_free_path(path);
5564 leaf = path->nodes[0];
5565 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5566 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5570 if (key.objectid + key.offset < bytenr) {
5574 if (key.objectid > bytenr + num_bytes)
5577 if (key.objectid == bytenr) {
5578 if (key.offset >= num_bytes) {
5582 num_bytes -= key.offset;
5583 bytenr += key.offset;
5584 } else if (key.objectid < bytenr) {
5585 if (key.objectid + key.offset >= bytenr + num_bytes) {
5589 num_bytes = (bytenr + num_bytes) -
5590 (key.objectid + key.offset);
5591 bytenr = key.objectid + key.offset;
5593 if (key.objectid + key.offset < bytenr + num_bytes) {
5594 u64 new_start = key.objectid + key.offset;
5595 u64 new_bytes = bytenr + num_bytes - new_start;
5598 * Weird case, the extent is in the middle of
5599 * our range, we'll have to search one side
5600 * and then the other. Not sure if this happens
5601 * in real life, but no harm in coding it up
5602 * anyway just in case.
5604 btrfs_release_path(path);
5605 ret = check_extent_exists(root, new_start,
5608 fprintf(stderr, "Right section didn't "
5612 num_bytes = key.objectid - bytenr;
5615 num_bytes = key.objectid - bytenr;
5622 if (num_bytes && !ret) {
5623 fprintf(stderr, "There are no extents for csum range "
5624 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5628 btrfs_free_path(path);
5632 static int check_csums(struct btrfs_root *root)
5634 struct btrfs_path *path;
5635 struct extent_buffer *leaf;
5636 struct btrfs_key key;
5637 u64 offset = 0, num_bytes = 0;
5638 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5642 unsigned long leaf_offset;
5644 root = root->fs_info->csum_root;
5645 if (!extent_buffer_uptodate(root->node)) {
5646 fprintf(stderr, "No valid csum tree found\n");
5650 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5651 key.type = BTRFS_EXTENT_CSUM_KEY;
5654 path = btrfs_alloc_path();
5658 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5660 fprintf(stderr, "Error searching csum tree %d\n", ret);
5661 btrfs_free_path(path);
5665 if (ret > 0 && path->slots[0])
5670 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5671 ret = btrfs_next_leaf(root, path);
5673 fprintf(stderr, "Error going to next leaf "
5680 leaf = path->nodes[0];
5682 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5683 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5688 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5689 csum_size) * root->sectorsize;
5690 if (!check_data_csum)
5691 goto skip_csum_check;
5692 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5693 ret = check_extent_csums(root, key.offset, data_len,
5699 offset = key.offset;
5700 } else if (key.offset != offset + num_bytes) {
5701 ret = check_extent_exists(root, offset, num_bytes);
5703 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5704 "there is no extent record\n",
5705 offset, offset+num_bytes);
5708 offset = key.offset;
5711 num_bytes += data_len;
5715 btrfs_free_path(path);
5719 static int is_dropped_key(struct btrfs_key *key,
5720 struct btrfs_key *drop_key) {
5721 if (key->objectid < drop_key->objectid)
5723 else if (key->objectid == drop_key->objectid) {
5724 if (key->type < drop_key->type)
5726 else if (key->type == drop_key->type) {
5727 if (key->offset < drop_key->offset)
5735 * Here are the rules for FULL_BACKREF.
5737 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5738 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5740 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5741 * if it happened after the relocation occurred since we'll have dropped the
5742 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5743 * have no real way to know for sure.
5745 * We process the blocks one root at a time, and we start from the lowest root
5746 * objectid and go to the highest. So we can just lookup the owner backref for
5747 * the record and if we don't find it then we know it doesn't exist and we have
5750 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5751 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5752 * be set or not and then we can check later once we've gathered all the refs.
5754 static int calc_extent_flag(struct btrfs_root *root,
5755 struct cache_tree *extent_cache,
5756 struct extent_buffer *buf,
5757 struct root_item_record *ri,
5760 struct extent_record *rec;
5761 struct cache_extent *cache;
5762 struct tree_backref *tback;
5765 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5766 /* we have added this extent before */
5768 rec = container_of(cache, struct extent_record, cache);
5771 * Except file/reloc tree, we can not have
5774 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5779 if (buf->start == ri->bytenr)
5782 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5785 owner = btrfs_header_owner(buf);
5786 if (owner == ri->objectid)
5789 tback = find_tree_backref(rec, 0, owner);
5794 if (rec->flag_block_full_backref != -1 &&
5795 rec->flag_block_full_backref != 0)
5796 rec->bad_full_backref = 1;
5799 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5800 if (rec->flag_block_full_backref != -1 &&
5801 rec->flag_block_full_backref != 1)
5802 rec->bad_full_backref = 1;
5806 static int run_next_block(struct btrfs_root *root,
5807 struct block_info *bits,
5810 struct cache_tree *pending,
5811 struct cache_tree *seen,
5812 struct cache_tree *reada,
5813 struct cache_tree *nodes,
5814 struct cache_tree *extent_cache,
5815 struct cache_tree *chunk_cache,
5816 struct rb_root *dev_cache,
5817 struct block_group_tree *block_group_cache,
5818 struct device_extent_tree *dev_extent_cache,
5819 struct root_item_record *ri)
5821 struct extent_buffer *buf;
5822 struct extent_record *rec = NULL;
5833 struct btrfs_key key;
5834 struct cache_extent *cache;
5837 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5838 bits_nr, &reada_bits);
5843 for(i = 0; i < nritems; i++) {
5844 ret = add_cache_extent(reada, bits[i].start,
5849 /* fixme, get the parent transid */
5850 readahead_tree_block(root, bits[i].start,
5854 *last = bits[0].start;
5855 bytenr = bits[0].start;
5856 size = bits[0].size;
5858 cache = lookup_cache_extent(pending, bytenr, size);
5860 remove_cache_extent(pending, cache);
5863 cache = lookup_cache_extent(reada, bytenr, size);
5865 remove_cache_extent(reada, cache);
5868 cache = lookup_cache_extent(nodes, bytenr, size);
5870 remove_cache_extent(nodes, cache);
5873 cache = lookup_cache_extent(extent_cache, bytenr, size);
5875 rec = container_of(cache, struct extent_record, cache);
5876 gen = rec->parent_generation;
5879 /* fixme, get the real parent transid */
5880 buf = read_tree_block(root, bytenr, size, gen);
5881 if (!extent_buffer_uptodate(buf)) {
5882 record_bad_block_io(root->fs_info,
5883 extent_cache, bytenr, size);
5887 nritems = btrfs_header_nritems(buf);
5890 if (!init_extent_tree) {
5891 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5892 btrfs_header_level(buf), 1, NULL,
5895 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5897 fprintf(stderr, "Couldn't calc extent flags\n");
5898 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5903 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5905 fprintf(stderr, "Couldn't calc extent flags\n");
5906 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5910 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5912 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5913 ri->objectid == btrfs_header_owner(buf)) {
5915 * Ok we got to this block from it's original owner and
5916 * we have FULL_BACKREF set. Relocation can leave
5917 * converted blocks over so this is altogether possible,
5918 * however it's not possible if the generation > the
5919 * last snapshot, so check for this case.
5921 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5922 btrfs_header_generation(buf) > ri->last_snapshot) {
5923 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5924 rec->bad_full_backref = 1;
5929 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5930 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5931 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5932 rec->bad_full_backref = 1;
5936 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5937 rec->flag_block_full_backref = 1;
5941 rec->flag_block_full_backref = 0;
5943 owner = btrfs_header_owner(buf);
5946 ret = check_block(root, extent_cache, buf, flags);
5950 if (btrfs_is_leaf(buf)) {
5951 btree_space_waste += btrfs_leaf_free_space(root, buf);
5952 for (i = 0; i < nritems; i++) {
5953 struct btrfs_file_extent_item *fi;
5954 btrfs_item_key_to_cpu(buf, &key, i);
5955 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5956 process_extent_item(root, extent_cache, buf,
5960 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5961 process_extent_item(root, extent_cache, buf,
5965 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5967 btrfs_item_size_nr(buf, i);
5970 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5971 process_chunk_item(chunk_cache, &key, buf, i);
5974 if (key.type == BTRFS_DEV_ITEM_KEY) {
5975 process_device_item(dev_cache, &key, buf, i);
5978 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5979 process_block_group_item(block_group_cache,
5983 if (key.type == BTRFS_DEV_EXTENT_KEY) {
5984 process_device_extent_item(dev_extent_cache,
5989 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5990 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5991 process_extent_ref_v0(extent_cache, buf, i);
5998 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5999 add_tree_backref(extent_cache, key.objectid, 0,
6003 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6004 add_tree_backref(extent_cache, key.objectid,
6008 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6009 struct btrfs_extent_data_ref *ref;
6010 ref = btrfs_item_ptr(buf, i,
6011 struct btrfs_extent_data_ref);
6012 add_data_backref(extent_cache,
6014 btrfs_extent_data_ref_root(buf, ref),
6015 btrfs_extent_data_ref_objectid(buf,
6017 btrfs_extent_data_ref_offset(buf, ref),
6018 btrfs_extent_data_ref_count(buf, ref),
6019 0, root->sectorsize);
6022 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6023 struct btrfs_shared_data_ref *ref;
6024 ref = btrfs_item_ptr(buf, i,
6025 struct btrfs_shared_data_ref);
6026 add_data_backref(extent_cache,
6027 key.objectid, key.offset, 0, 0, 0,
6028 btrfs_shared_data_ref_count(buf, ref),
6029 0, root->sectorsize);
6032 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6033 struct bad_item *bad;
6035 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6039 bad = malloc(sizeof(struct bad_item));
6042 INIT_LIST_HEAD(&bad->list);
6043 memcpy(&bad->key, &key,
6044 sizeof(struct btrfs_key));
6045 bad->root_id = owner;
6046 list_add_tail(&bad->list, &delete_items);
6049 if (key.type != BTRFS_EXTENT_DATA_KEY)
6051 fi = btrfs_item_ptr(buf, i,
6052 struct btrfs_file_extent_item);
6053 if (btrfs_file_extent_type(buf, fi) ==
6054 BTRFS_FILE_EXTENT_INLINE)
6056 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6059 data_bytes_allocated +=
6060 btrfs_file_extent_disk_num_bytes(buf, fi);
6061 if (data_bytes_allocated < root->sectorsize) {
6064 data_bytes_referenced +=
6065 btrfs_file_extent_num_bytes(buf, fi);
6066 add_data_backref(extent_cache,
6067 btrfs_file_extent_disk_bytenr(buf, fi),
6068 parent, owner, key.objectid, key.offset -
6069 btrfs_file_extent_offset(buf, fi), 1, 1,
6070 btrfs_file_extent_disk_num_bytes(buf, fi));
6074 struct btrfs_key first_key;
6076 first_key.objectid = 0;
6079 btrfs_item_key_to_cpu(buf, &first_key, 0);
6080 level = btrfs_header_level(buf);
6081 for (i = 0; i < nritems; i++) {
6082 ptr = btrfs_node_blockptr(buf, i);
6083 size = btrfs_level_size(root, level - 1);
6084 btrfs_node_key_to_cpu(buf, &key, i);
6086 if ((level == ri->drop_level)
6087 && is_dropped_key(&key, &ri->drop_key)) {
6091 ret = add_extent_rec(extent_cache, &key,
6092 btrfs_node_ptr_generation(buf, i),
6093 ptr, size, 0, 0, 1, 0, 1, 0,
6097 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6100 add_pending(nodes, seen, ptr, size);
6102 add_pending(pending, seen, ptr, size);
6105 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6106 nritems) * sizeof(struct btrfs_key_ptr);
6108 total_btree_bytes += buf->len;
6109 if (fs_root_objectid(btrfs_header_owner(buf)))
6110 total_fs_tree_bytes += buf->len;
6111 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6112 total_extent_tree_bytes += buf->len;
6113 if (!found_old_backref &&
6114 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6115 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6116 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6117 found_old_backref = 1;
6119 free_extent_buffer(buf);
6123 static int add_root_to_pending(struct extent_buffer *buf,
6124 struct cache_tree *extent_cache,
6125 struct cache_tree *pending,
6126 struct cache_tree *seen,
6127 struct cache_tree *nodes,
6130 if (btrfs_header_level(buf) > 0)
6131 add_pending(nodes, seen, buf->start, buf->len);
6133 add_pending(pending, seen, buf->start, buf->len);
6134 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6135 0, 1, 1, 0, 1, 0, buf->len);
6137 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6138 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6139 add_tree_backref(extent_cache, buf->start, buf->start,
6142 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6146 /* as we fix the tree, we might be deleting blocks that
6147 * we're tracking for repair. This hook makes sure we
6148 * remove any backrefs for blocks as we are fixing them.
6150 static int free_extent_hook(struct btrfs_trans_handle *trans,
6151 struct btrfs_root *root,
6152 u64 bytenr, u64 num_bytes, u64 parent,
6153 u64 root_objectid, u64 owner, u64 offset,
6156 struct extent_record *rec;
6157 struct cache_extent *cache;
6159 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6161 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6162 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6166 rec = container_of(cache, struct extent_record, cache);
6168 struct data_backref *back;
6169 back = find_data_backref(rec, parent, root_objectid, owner,
6170 offset, 1, bytenr, num_bytes);
6173 if (back->node.found_ref) {
6174 back->found_ref -= refs_to_drop;
6176 rec->refs -= refs_to_drop;
6178 if (back->node.found_extent_tree) {
6179 back->num_refs -= refs_to_drop;
6180 if (rec->extent_item_refs)
6181 rec->extent_item_refs -= refs_to_drop;
6183 if (back->found_ref == 0)
6184 back->node.found_ref = 0;
6185 if (back->num_refs == 0)
6186 back->node.found_extent_tree = 0;
6188 if (!back->node.found_extent_tree && back->node.found_ref) {
6189 list_del(&back->node.list);
6193 struct tree_backref *back;
6194 back = find_tree_backref(rec, parent, root_objectid);
6197 if (back->node.found_ref) {
6200 back->node.found_ref = 0;
6202 if (back->node.found_extent_tree) {
6203 if (rec->extent_item_refs)
6204 rec->extent_item_refs--;
6205 back->node.found_extent_tree = 0;
6207 if (!back->node.found_extent_tree && back->node.found_ref) {
6208 list_del(&back->node.list);
6212 maybe_free_extent_rec(extent_cache, rec);
6217 static int delete_extent_records(struct btrfs_trans_handle *trans,
6218 struct btrfs_root *root,
6219 struct btrfs_path *path,
6220 u64 bytenr, u64 new_len)
6222 struct btrfs_key key;
6223 struct btrfs_key found_key;
6224 struct extent_buffer *leaf;
6229 key.objectid = bytenr;
6231 key.offset = (u64)-1;
6234 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6241 if (path->slots[0] == 0)
6247 leaf = path->nodes[0];
6248 slot = path->slots[0];
6250 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6251 if (found_key.objectid != bytenr)
6254 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6255 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6256 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6257 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6258 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6259 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6260 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6261 btrfs_release_path(path);
6262 if (found_key.type == 0) {
6263 if (found_key.offset == 0)
6265 key.offset = found_key.offset - 1;
6266 key.type = found_key.type;
6268 key.type = found_key.type - 1;
6269 key.offset = (u64)-1;
6273 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6274 found_key.objectid, found_key.type, found_key.offset);
6276 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6279 btrfs_release_path(path);
6281 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6282 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6283 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6284 found_key.offset : root->leafsize;
6286 ret = btrfs_update_block_group(trans, root, bytenr,
6293 btrfs_release_path(path);
6298 * for a single backref, this will allocate a new extent
6299 * and add the backref to it.
6301 static int record_extent(struct btrfs_trans_handle *trans,
6302 struct btrfs_fs_info *info,
6303 struct btrfs_path *path,
6304 struct extent_record *rec,
6305 struct extent_backref *back,
6306 int allocated, u64 flags)
6309 struct btrfs_root *extent_root = info->extent_root;
6310 struct extent_buffer *leaf;
6311 struct btrfs_key ins_key;
6312 struct btrfs_extent_item *ei;
6313 struct tree_backref *tback;
6314 struct data_backref *dback;
6315 struct btrfs_tree_block_info *bi;
6318 rec->max_size = max_t(u64, rec->max_size,
6319 info->extent_root->leafsize);
6322 u32 item_size = sizeof(*ei);
6325 item_size += sizeof(*bi);
6327 ins_key.objectid = rec->start;
6328 ins_key.offset = rec->max_size;
6329 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6331 ret = btrfs_insert_empty_item(trans, extent_root, path,
6332 &ins_key, item_size);
6336 leaf = path->nodes[0];
6337 ei = btrfs_item_ptr(leaf, path->slots[0],
6338 struct btrfs_extent_item);
6340 btrfs_set_extent_refs(leaf, ei, 0);
6341 btrfs_set_extent_generation(leaf, ei, rec->generation);
6343 if (back->is_data) {
6344 btrfs_set_extent_flags(leaf, ei,
6345 BTRFS_EXTENT_FLAG_DATA);
6347 struct btrfs_disk_key copy_key;;
6349 tback = (struct tree_backref *)back;
6350 bi = (struct btrfs_tree_block_info *)(ei + 1);
6351 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6354 btrfs_set_disk_key_objectid(©_key,
6355 rec->info_objectid);
6356 btrfs_set_disk_key_type(©_key, 0);
6357 btrfs_set_disk_key_offset(©_key, 0);
6359 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6360 btrfs_set_tree_block_key(leaf, bi, ©_key);
6362 btrfs_set_extent_flags(leaf, ei,
6363 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6366 btrfs_mark_buffer_dirty(leaf);
6367 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6368 rec->max_size, 1, 0);
6371 btrfs_release_path(path);
6374 if (back->is_data) {
6378 dback = (struct data_backref *)back;
6379 if (back->full_backref)
6380 parent = dback->parent;
6384 for (i = 0; i < dback->found_ref; i++) {
6385 /* if parent != 0, we're doing a full backref
6386 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6387 * just makes the backref allocator create a data
6390 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6391 rec->start, rec->max_size,
6395 BTRFS_FIRST_FREE_OBJECTID :
6401 fprintf(stderr, "adding new data backref"
6402 " on %llu %s %llu owner %llu"
6403 " offset %llu found %d\n",
6404 (unsigned long long)rec->start,
6405 back->full_backref ?
6407 back->full_backref ?
6408 (unsigned long long)parent :
6409 (unsigned long long)dback->root,
6410 (unsigned long long)dback->owner,
6411 (unsigned long long)dback->offset,
6416 tback = (struct tree_backref *)back;
6417 if (back->full_backref)
6418 parent = tback->parent;
6422 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6423 rec->start, rec->max_size,
6424 parent, tback->root, 0, 0);
6425 fprintf(stderr, "adding new tree backref on "
6426 "start %llu len %llu parent %llu root %llu\n",
6427 rec->start, rec->max_size, parent, tback->root);
6430 btrfs_release_path(path);
6434 struct extent_entry {
6439 struct list_head list;
6442 static struct extent_entry *find_entry(struct list_head *entries,
6443 u64 bytenr, u64 bytes)
6445 struct extent_entry *entry = NULL;
6447 list_for_each_entry(entry, entries, list) {
6448 if (entry->bytenr == bytenr && entry->bytes == bytes)
6455 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6457 struct extent_entry *entry, *best = NULL, *prev = NULL;
6459 list_for_each_entry(entry, entries, list) {
6466 * If there are as many broken entries as entries then we know
6467 * not to trust this particular entry.
6469 if (entry->broken == entry->count)
6473 * If our current entry == best then we can't be sure our best
6474 * is really the best, so we need to keep searching.
6476 if (best && best->count == entry->count) {
6482 /* Prev == entry, not good enough, have to keep searching */
6483 if (!prev->broken && prev->count == entry->count)
6487 best = (prev->count > entry->count) ? prev : entry;
6488 else if (best->count < entry->count)
6496 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6497 struct data_backref *dback, struct extent_entry *entry)
6499 struct btrfs_trans_handle *trans;
6500 struct btrfs_root *root;
6501 struct btrfs_file_extent_item *fi;
6502 struct extent_buffer *leaf;
6503 struct btrfs_key key;
6507 key.objectid = dback->root;
6508 key.type = BTRFS_ROOT_ITEM_KEY;
6509 key.offset = (u64)-1;
6510 root = btrfs_read_fs_root(info, &key);
6512 fprintf(stderr, "Couldn't find root for our ref\n");
6517 * The backref points to the original offset of the extent if it was
6518 * split, so we need to search down to the offset we have and then walk
6519 * forward until we find the backref we're looking for.
6521 key.objectid = dback->owner;
6522 key.type = BTRFS_EXTENT_DATA_KEY;
6523 key.offset = dback->offset;
6524 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6526 fprintf(stderr, "Error looking up ref %d\n", ret);
6531 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6532 ret = btrfs_next_leaf(root, path);
6534 fprintf(stderr, "Couldn't find our ref, next\n");
6538 leaf = path->nodes[0];
6539 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6540 if (key.objectid != dback->owner ||
6541 key.type != BTRFS_EXTENT_DATA_KEY) {
6542 fprintf(stderr, "Couldn't find our ref, search\n");
6545 fi = btrfs_item_ptr(leaf, path->slots[0],
6546 struct btrfs_file_extent_item);
6547 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6548 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6550 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6555 btrfs_release_path(path);
6557 trans = btrfs_start_transaction(root, 1);
6559 return PTR_ERR(trans);
6562 * Ok we have the key of the file extent we want to fix, now we can cow
6563 * down to the thing and fix it.
6565 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6567 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6568 key.objectid, key.type, key.offset, ret);
6572 fprintf(stderr, "Well that's odd, we just found this key "
6573 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6578 leaf = path->nodes[0];
6579 fi = btrfs_item_ptr(leaf, path->slots[0],
6580 struct btrfs_file_extent_item);
6582 if (btrfs_file_extent_compression(leaf, fi) &&
6583 dback->disk_bytenr != entry->bytenr) {
6584 fprintf(stderr, "Ref doesn't match the record start and is "
6585 "compressed, please take a btrfs-image of this file "
6586 "system and send it to a btrfs developer so they can "
6587 "complete this functionality for bytenr %Lu\n",
6588 dback->disk_bytenr);
6593 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6594 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6595 } else if (dback->disk_bytenr > entry->bytenr) {
6596 u64 off_diff, offset;
6598 off_diff = dback->disk_bytenr - entry->bytenr;
6599 offset = btrfs_file_extent_offset(leaf, fi);
6600 if (dback->disk_bytenr + offset +
6601 btrfs_file_extent_num_bytes(leaf, fi) >
6602 entry->bytenr + entry->bytes) {
6603 fprintf(stderr, "Ref is past the entry end, please "
6604 "take a btrfs-image of this file system and "
6605 "send it to a btrfs developer, ref %Lu\n",
6606 dback->disk_bytenr);
6611 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6612 btrfs_set_file_extent_offset(leaf, fi, offset);
6613 } else if (dback->disk_bytenr < entry->bytenr) {
6616 offset = btrfs_file_extent_offset(leaf, fi);
6617 if (dback->disk_bytenr + offset < entry->bytenr) {
6618 fprintf(stderr, "Ref is before the entry start, please"
6619 " take a btrfs-image of this file system and "
6620 "send it to a btrfs developer, ref %Lu\n",
6621 dback->disk_bytenr);
6626 offset += dback->disk_bytenr;
6627 offset -= entry->bytenr;
6628 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6629 btrfs_set_file_extent_offset(leaf, fi, offset);
6632 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6635 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6636 * only do this if we aren't using compression, otherwise it's a
6639 if (!btrfs_file_extent_compression(leaf, fi))
6640 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6642 printf("ram bytes may be wrong?\n");
6643 btrfs_mark_buffer_dirty(leaf);
6645 err = btrfs_commit_transaction(trans, root);
6646 btrfs_release_path(path);
6647 return ret ? ret : err;
6650 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6651 struct extent_record *rec)
6653 struct extent_backref *back;
6654 struct data_backref *dback;
6655 struct extent_entry *entry, *best = NULL;
6658 int broken_entries = 0;
6663 * Metadata is easy and the backrefs should always agree on bytenr and
6664 * size, if not we've got bigger issues.
6669 list_for_each_entry(back, &rec->backrefs, list) {
6670 if (back->full_backref || !back->is_data)
6673 dback = (struct data_backref *)back;
6676 * We only pay attention to backrefs that we found a real
6679 if (dback->found_ref == 0)
6683 * For now we only catch when the bytes don't match, not the
6684 * bytenr. We can easily do this at the same time, but I want
6685 * to have a fs image to test on before we just add repair
6686 * functionality willy-nilly so we know we won't screw up the
6690 entry = find_entry(&entries, dback->disk_bytenr,
6693 entry = malloc(sizeof(struct extent_entry));
6698 memset(entry, 0, sizeof(*entry));
6699 entry->bytenr = dback->disk_bytenr;
6700 entry->bytes = dback->bytes;
6701 list_add_tail(&entry->list, &entries);
6706 * If we only have on entry we may think the entries agree when
6707 * in reality they don't so we have to do some extra checking.
6709 if (dback->disk_bytenr != rec->start ||
6710 dback->bytes != rec->nr || back->broken)
6721 /* Yay all the backrefs agree, carry on good sir */
6722 if (nr_entries <= 1 && !mismatch)
6725 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6726 "%Lu\n", rec->start);
6729 * First we want to see if the backrefs can agree amongst themselves who
6730 * is right, so figure out which one of the entries has the highest
6733 best = find_most_right_entry(&entries);
6736 * Ok so we may have an even split between what the backrefs think, so
6737 * this is where we use the extent ref to see what it thinks.
6740 entry = find_entry(&entries, rec->start, rec->nr);
6741 if (!entry && (!broken_entries || !rec->found_rec)) {
6742 fprintf(stderr, "Backrefs don't agree with each other "
6743 "and extent record doesn't agree with anybody,"
6744 " so we can't fix bytenr %Lu bytes %Lu\n",
6745 rec->start, rec->nr);
6748 } else if (!entry) {
6750 * Ok our backrefs were broken, we'll assume this is the
6751 * correct value and add an entry for this range.
6753 entry = malloc(sizeof(struct extent_entry));
6758 memset(entry, 0, sizeof(*entry));
6759 entry->bytenr = rec->start;
6760 entry->bytes = rec->nr;
6761 list_add_tail(&entry->list, &entries);
6765 best = find_most_right_entry(&entries);
6767 fprintf(stderr, "Backrefs and extent record evenly "
6768 "split on who is right, this is going to "
6769 "require user input to fix bytenr %Lu bytes "
6770 "%Lu\n", rec->start, rec->nr);
6777 * I don't think this can happen currently as we'll abort() if we catch
6778 * this case higher up, but in case somebody removes that we still can't
6779 * deal with it properly here yet, so just bail out of that's the case.
6781 if (best->bytenr != rec->start) {
6782 fprintf(stderr, "Extent start and backref starts don't match, "
6783 "please use btrfs-image on this file system and send "
6784 "it to a btrfs developer so they can make fsck fix "
6785 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6786 rec->start, rec->nr);
6792 * Ok great we all agreed on an extent record, let's go find the real
6793 * references and fix up the ones that don't match.
6795 list_for_each_entry(back, &rec->backrefs, list) {
6796 if (back->full_backref || !back->is_data)
6799 dback = (struct data_backref *)back;
6802 * Still ignoring backrefs that don't have a real ref attached
6805 if (dback->found_ref == 0)
6808 if (dback->bytes == best->bytes &&
6809 dback->disk_bytenr == best->bytenr)
6812 ret = repair_ref(info, path, dback, best);
6818 * Ok we messed with the actual refs, which means we need to drop our
6819 * entire cache and go back and rescan. I know this is a huge pain and
6820 * adds a lot of extra work, but it's the only way to be safe. Once all
6821 * the backrefs agree we may not need to do anything to the extent
6826 while (!list_empty(&entries)) {
6827 entry = list_entry(entries.next, struct extent_entry, list);
6828 list_del_init(&entry->list);
6834 static int process_duplicates(struct btrfs_root *root,
6835 struct cache_tree *extent_cache,
6836 struct extent_record *rec)
6838 struct extent_record *good, *tmp;
6839 struct cache_extent *cache;
6843 * If we found a extent record for this extent then return, or if we
6844 * have more than one duplicate we are likely going to need to delete
6847 if (rec->found_rec || rec->num_duplicates > 1)
6850 /* Shouldn't happen but just in case */
6851 BUG_ON(!rec->num_duplicates);
6854 * So this happens if we end up with a backref that doesn't match the
6855 * actual extent entry. So either the backref is bad or the extent
6856 * entry is bad. Either way we want to have the extent_record actually
6857 * reflect what we found in the extent_tree, so we need to take the
6858 * duplicate out and use that as the extent_record since the only way we
6859 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6861 remove_cache_extent(extent_cache, &rec->cache);
6863 good = list_entry(rec->dups.next, struct extent_record, list);
6864 list_del_init(&good->list);
6865 INIT_LIST_HEAD(&good->backrefs);
6866 INIT_LIST_HEAD(&good->dups);
6867 good->cache.start = good->start;
6868 good->cache.size = good->nr;
6869 good->content_checked = 0;
6870 good->owner_ref_checked = 0;
6871 good->num_duplicates = 0;
6872 good->refs = rec->refs;
6873 list_splice_init(&rec->backrefs, &good->backrefs);
6875 cache = lookup_cache_extent(extent_cache, good->start,
6879 tmp = container_of(cache, struct extent_record, cache);
6882 * If we find another overlapping extent and it's found_rec is
6883 * set then it's a duplicate and we need to try and delete
6886 if (tmp->found_rec || tmp->num_duplicates > 0) {
6887 if (list_empty(&good->list))
6888 list_add_tail(&good->list,
6889 &duplicate_extents);
6890 good->num_duplicates += tmp->num_duplicates + 1;
6891 list_splice_init(&tmp->dups, &good->dups);
6892 list_del_init(&tmp->list);
6893 list_add_tail(&tmp->list, &good->dups);
6894 remove_cache_extent(extent_cache, &tmp->cache);
6899 * Ok we have another non extent item backed extent rec, so lets
6900 * just add it to this extent and carry on like we did above.
6902 good->refs += tmp->refs;
6903 list_splice_init(&tmp->backrefs, &good->backrefs);
6904 remove_cache_extent(extent_cache, &tmp->cache);
6907 ret = insert_cache_extent(extent_cache, &good->cache);
6910 return good->num_duplicates ? 0 : 1;
6913 static int delete_duplicate_records(struct btrfs_root *root,
6914 struct extent_record *rec)
6916 struct btrfs_trans_handle *trans;
6917 LIST_HEAD(delete_list);
6918 struct btrfs_path *path;
6919 struct extent_record *tmp, *good, *n;
6922 struct btrfs_key key;
6924 path = btrfs_alloc_path();
6931 /* Find the record that covers all of the duplicates. */
6932 list_for_each_entry(tmp, &rec->dups, list) {
6933 if (good->start < tmp->start)
6935 if (good->nr > tmp->nr)
6938 if (tmp->start + tmp->nr < good->start + good->nr) {
6939 fprintf(stderr, "Ok we have overlapping extents that "
6940 "aren't completely covered by eachother, this "
6941 "is going to require more careful thought. "
6942 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6943 tmp->start, tmp->nr, good->start, good->nr);
6950 list_add_tail(&rec->list, &delete_list);
6952 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6955 list_move_tail(&tmp->list, &delete_list);
6958 root = root->fs_info->extent_root;
6959 trans = btrfs_start_transaction(root, 1);
6960 if (IS_ERR(trans)) {
6961 ret = PTR_ERR(trans);
6965 list_for_each_entry(tmp, &delete_list, list) {
6966 if (tmp->found_rec == 0)
6968 key.objectid = tmp->start;
6969 key.type = BTRFS_EXTENT_ITEM_KEY;
6970 key.offset = tmp->nr;
6972 /* Shouldn't happen but just in case */
6973 if (tmp->metadata) {
6974 fprintf(stderr, "Well this shouldn't happen, extent "
6975 "record overlaps but is metadata? "
6976 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6980 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6986 ret = btrfs_del_item(trans, root, path);
6989 btrfs_release_path(path);
6992 err = btrfs_commit_transaction(trans, root);
6996 while (!list_empty(&delete_list)) {
6997 tmp = list_entry(delete_list.next, struct extent_record, list);
6998 list_del_init(&tmp->list);
7004 while (!list_empty(&rec->dups)) {
7005 tmp = list_entry(rec->dups.next, struct extent_record, list);
7006 list_del_init(&tmp->list);
7010 btrfs_free_path(path);
7012 if (!ret && !nr_del)
7013 rec->num_duplicates = 0;
7015 return ret ? ret : nr_del;
7018 static int find_possible_backrefs(struct btrfs_fs_info *info,
7019 struct btrfs_path *path,
7020 struct cache_tree *extent_cache,
7021 struct extent_record *rec)
7023 struct btrfs_root *root;
7024 struct extent_backref *back;
7025 struct data_backref *dback;
7026 struct cache_extent *cache;
7027 struct btrfs_file_extent_item *fi;
7028 struct btrfs_key key;
7032 list_for_each_entry(back, &rec->backrefs, list) {
7033 /* Don't care about full backrefs (poor unloved backrefs) */
7034 if (back->full_backref || !back->is_data)
7037 dback = (struct data_backref *)back;
7039 /* We found this one, we don't need to do a lookup */
7040 if (dback->found_ref)
7043 key.objectid = dback->root;
7044 key.type = BTRFS_ROOT_ITEM_KEY;
7045 key.offset = (u64)-1;
7047 root = btrfs_read_fs_root(info, &key);
7049 /* No root, definitely a bad ref, skip */
7050 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7052 /* Other err, exit */
7054 return PTR_ERR(root);
7056 key.objectid = dback->owner;
7057 key.type = BTRFS_EXTENT_DATA_KEY;
7058 key.offset = dback->offset;
7059 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7061 btrfs_release_path(path);
7064 /* Didn't find it, we can carry on */
7069 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7070 struct btrfs_file_extent_item);
7071 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7072 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7073 btrfs_release_path(path);
7074 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7076 struct extent_record *tmp;
7077 tmp = container_of(cache, struct extent_record, cache);
7080 * If we found an extent record for the bytenr for this
7081 * particular backref then we can't add it to our
7082 * current extent record. We only want to add backrefs
7083 * that don't have a corresponding extent item in the
7084 * extent tree since they likely belong to this record
7085 * and we need to fix it if it doesn't match bytenrs.
7091 dback->found_ref += 1;
7092 dback->disk_bytenr = bytenr;
7093 dback->bytes = bytes;
7096 * Set this so the verify backref code knows not to trust the
7097 * values in this backref.
7106 * Record orphan data ref into corresponding root.
7108 * Return 0 if the extent item contains data ref and recorded.
7109 * Return 1 if the extent item contains no useful data ref
7110 * On that case, it may contains only shared_dataref or metadata backref
7111 * or the file extent exists(this should be handled by the extent bytenr
7113 * Return <0 if something goes wrong.
7115 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7116 struct extent_record *rec)
7118 struct btrfs_key key;
7119 struct btrfs_root *dest_root;
7120 struct extent_backref *back;
7121 struct data_backref *dback;
7122 struct orphan_data_extent *orphan;
7123 struct btrfs_path *path;
7124 int recorded_data_ref = 0;
7129 path = btrfs_alloc_path();
7132 list_for_each_entry(back, &rec->backrefs, list) {
7133 if (back->full_backref || !back->is_data ||
7134 !back->found_extent_tree)
7136 dback = (struct data_backref *)back;
7137 if (dback->found_ref)
7139 key.objectid = dback->root;
7140 key.type = BTRFS_ROOT_ITEM_KEY;
7141 key.offset = (u64)-1;
7143 dest_root = btrfs_read_fs_root(fs_info, &key);
7145 /* For non-exist root we just skip it */
7146 if (IS_ERR(dest_root) || !dest_root)
7149 key.objectid = dback->owner;
7150 key.type = BTRFS_EXTENT_DATA_KEY;
7151 key.offset = dback->offset;
7153 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7155 * For ret < 0, it's OK since the fs-tree may be corrupted,
7156 * we need to record it for inode/file extent rebuild.
7157 * For ret > 0, we record it only for file extent rebuild.
7158 * For ret == 0, the file extent exists but only bytenr
7159 * mismatch, let the original bytenr fix routine to handle,
7165 orphan = malloc(sizeof(*orphan));
7170 INIT_LIST_HEAD(&orphan->list);
7171 orphan->root = dback->root;
7172 orphan->objectid = dback->owner;
7173 orphan->offset = dback->offset;
7174 orphan->disk_bytenr = rec->cache.start;
7175 orphan->disk_len = rec->cache.size;
7176 list_add(&dest_root->orphan_data_extents, &orphan->list);
7177 recorded_data_ref = 1;
7180 btrfs_free_path(path);
7182 return !recorded_data_ref;
7188 * when an incorrect extent item is found, this will delete
7189 * all of the existing entries for it and recreate them
7190 * based on what the tree scan found.
7192 static int fixup_extent_refs(struct btrfs_fs_info *info,
7193 struct cache_tree *extent_cache,
7194 struct extent_record *rec)
7196 struct btrfs_trans_handle *trans = NULL;
7198 struct btrfs_path *path;
7199 struct list_head *cur = rec->backrefs.next;
7200 struct cache_extent *cache;
7201 struct extent_backref *back;
7205 if (rec->flag_block_full_backref)
7206 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7208 path = btrfs_alloc_path();
7212 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7214 * Sometimes the backrefs themselves are so broken they don't
7215 * get attached to any meaningful rec, so first go back and
7216 * check any of our backrefs that we couldn't find and throw
7217 * them into the list if we find the backref so that
7218 * verify_backrefs can figure out what to do.
7220 ret = find_possible_backrefs(info, path, extent_cache, rec);
7225 /* step one, make sure all of the backrefs agree */
7226 ret = verify_backrefs(info, path, rec);
7230 trans = btrfs_start_transaction(info->extent_root, 1);
7231 if (IS_ERR(trans)) {
7232 ret = PTR_ERR(trans);
7236 /* step two, delete all the existing records */
7237 ret = delete_extent_records(trans, info->extent_root, path,
7238 rec->start, rec->max_size);
7243 /* was this block corrupt? If so, don't add references to it */
7244 cache = lookup_cache_extent(info->corrupt_blocks,
7245 rec->start, rec->max_size);
7251 /* step three, recreate all the refs we did find */
7252 while(cur != &rec->backrefs) {
7253 back = list_entry(cur, struct extent_backref, list);
7257 * if we didn't find any references, don't create a
7260 if (!back->found_ref)
7263 rec->bad_full_backref = 0;
7264 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7272 int err = btrfs_commit_transaction(trans, info->extent_root);
7277 btrfs_free_path(path);
7281 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7282 struct extent_record *rec)
7284 struct btrfs_trans_handle *trans;
7285 struct btrfs_root *root = fs_info->extent_root;
7286 struct btrfs_path *path;
7287 struct btrfs_extent_item *ei;
7288 struct btrfs_key key;
7292 key.objectid = rec->start;
7293 if (rec->metadata) {
7294 key.type = BTRFS_METADATA_ITEM_KEY;
7295 key.offset = rec->info_level;
7297 key.type = BTRFS_EXTENT_ITEM_KEY;
7298 key.offset = rec->max_size;
7301 path = btrfs_alloc_path();
7305 trans = btrfs_start_transaction(root, 0);
7306 if (IS_ERR(trans)) {
7307 btrfs_free_path(path);
7308 return PTR_ERR(trans);
7311 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7313 btrfs_free_path(path);
7314 btrfs_commit_transaction(trans, root);
7317 fprintf(stderr, "Didn't find extent for %llu\n",
7318 (unsigned long long)rec->start);
7319 btrfs_free_path(path);
7320 btrfs_commit_transaction(trans, root);
7324 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7325 struct btrfs_extent_item);
7326 flags = btrfs_extent_flags(path->nodes[0], ei);
7327 if (rec->flag_block_full_backref) {
7328 fprintf(stderr, "setting full backref on %llu\n",
7329 (unsigned long long)key.objectid);
7330 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7332 fprintf(stderr, "clearing full backref on %llu\n",
7333 (unsigned long long)key.objectid);
7334 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7336 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7337 btrfs_mark_buffer_dirty(path->nodes[0]);
7338 btrfs_free_path(path);
7339 return btrfs_commit_transaction(trans, root);
7342 /* right now we only prune from the extent allocation tree */
7343 static int prune_one_block(struct btrfs_trans_handle *trans,
7344 struct btrfs_fs_info *info,
7345 struct btrfs_corrupt_block *corrupt)
7348 struct btrfs_path path;
7349 struct extent_buffer *eb;
7353 int level = corrupt->level + 1;
7355 btrfs_init_path(&path);
7357 /* we want to stop at the parent to our busted block */
7358 path.lowest_level = level;
7360 ret = btrfs_search_slot(trans, info->extent_root,
7361 &corrupt->key, &path, -1, 1);
7366 eb = path.nodes[level];
7373 * hopefully the search gave us the block we want to prune,
7374 * lets try that first
7376 slot = path.slots[level];
7377 found = btrfs_node_blockptr(eb, slot);
7378 if (found == corrupt->cache.start)
7381 nritems = btrfs_header_nritems(eb);
7383 /* the search failed, lets scan this node and hope we find it */
7384 for (slot = 0; slot < nritems; slot++) {
7385 found = btrfs_node_blockptr(eb, slot);
7386 if (found == corrupt->cache.start)
7390 * we couldn't find the bad block. TODO, search all the nodes for pointers
7393 if (eb == info->extent_root->node) {
7398 btrfs_release_path(&path);
7403 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7404 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7407 btrfs_release_path(&path);
7411 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7413 struct btrfs_trans_handle *trans = NULL;
7414 struct cache_extent *cache;
7415 struct btrfs_corrupt_block *corrupt;
7418 cache = search_cache_extent(info->corrupt_blocks, 0);
7422 trans = btrfs_start_transaction(info->extent_root, 1);
7424 return PTR_ERR(trans);
7426 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7427 prune_one_block(trans, info, corrupt);
7428 remove_cache_extent(info->corrupt_blocks, cache);
7431 return btrfs_commit_transaction(trans, info->extent_root);
7435 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7437 struct btrfs_block_group_cache *cache;
7442 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7443 &start, &end, EXTENT_DIRTY);
7446 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7452 cache = btrfs_lookup_first_block_group(fs_info, start);
7457 start = cache->key.objectid + cache->key.offset;
7461 static int check_extent_refs(struct btrfs_root *root,
7462 struct cache_tree *extent_cache)
7464 struct extent_record *rec;
7465 struct cache_extent *cache;
7474 * if we're doing a repair, we have to make sure
7475 * we don't allocate from the problem extents.
7476 * In the worst case, this will be all the
7479 cache = search_cache_extent(extent_cache, 0);
7481 rec = container_of(cache, struct extent_record, cache);
7482 set_extent_dirty(root->fs_info->excluded_extents,
7484 rec->start + rec->max_size - 1,
7486 cache = next_cache_extent(cache);
7489 /* pin down all the corrupted blocks too */
7490 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7492 set_extent_dirty(root->fs_info->excluded_extents,
7494 cache->start + cache->size - 1,
7496 cache = next_cache_extent(cache);
7498 prune_corrupt_blocks(root->fs_info);
7499 reset_cached_block_groups(root->fs_info);
7502 reset_cached_block_groups(root->fs_info);
7505 * We need to delete any duplicate entries we find first otherwise we
7506 * could mess up the extent tree when we have backrefs that actually
7507 * belong to a different extent item and not the weird duplicate one.
7509 while (repair && !list_empty(&duplicate_extents)) {
7510 rec = list_entry(duplicate_extents.next, struct extent_record,
7512 list_del_init(&rec->list);
7514 /* Sometimes we can find a backref before we find an actual
7515 * extent, so we need to process it a little bit to see if there
7516 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7517 * if this is a backref screwup. If we need to delete stuff
7518 * process_duplicates() will return 0, otherwise it will return
7521 if (process_duplicates(root, extent_cache, rec))
7523 ret = delete_duplicate_records(root, rec);
7527 * delete_duplicate_records will return the number of entries
7528 * deleted, so if it's greater than 0 then we know we actually
7529 * did something and we need to remove.
7543 cache = search_cache_extent(extent_cache, 0);
7546 rec = container_of(cache, struct extent_record, cache);
7547 if (rec->num_duplicates) {
7548 fprintf(stderr, "extent item %llu has multiple extent "
7549 "items\n", (unsigned long long)rec->start);
7554 if (rec->refs != rec->extent_item_refs) {
7555 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7556 (unsigned long long)rec->start,
7557 (unsigned long long)rec->nr);
7558 fprintf(stderr, "extent item %llu, found %llu\n",
7559 (unsigned long long)rec->extent_item_refs,
7560 (unsigned long long)rec->refs);
7561 ret = record_orphan_data_extents(root->fs_info, rec);
7568 * we can't use the extent to repair file
7569 * extent, let the fallback method handle it.
7571 if (!fixed && repair) {
7572 ret = fixup_extent_refs(
7583 if (all_backpointers_checked(rec, 1)) {
7584 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7585 (unsigned long long)rec->start,
7586 (unsigned long long)rec->nr);
7588 if (!fixed && !recorded && repair) {
7589 ret = fixup_extent_refs(root->fs_info,
7598 if (!rec->owner_ref_checked) {
7599 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7600 (unsigned long long)rec->start,
7601 (unsigned long long)rec->nr);
7602 if (!fixed && !recorded && repair) {
7603 ret = fixup_extent_refs(root->fs_info,
7612 if (rec->bad_full_backref) {
7613 fprintf(stderr, "bad full backref, on [%llu]\n",
7614 (unsigned long long)rec->start);
7616 ret = fixup_extent_flags(root->fs_info, rec);
7625 * Although it's not a extent ref's problem, we reuse this
7626 * routine for error reporting.
7627 * No repair function yet.
7629 if (rec->crossing_stripes) {
7631 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7632 rec->start, rec->start + rec->max_size);
7637 if (rec->wrong_chunk_type) {
7639 "bad extent [%llu, %llu), type mismatch with chunk\n",
7640 rec->start, rec->start + rec->max_size);
7645 remove_cache_extent(extent_cache, cache);
7646 free_all_extent_backrefs(rec);
7647 if (!init_extent_tree && repair && (!cur_err || fixed))
7648 clear_extent_dirty(root->fs_info->excluded_extents,
7650 rec->start + rec->max_size - 1,
7656 if (ret && ret != -EAGAIN) {
7657 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7660 struct btrfs_trans_handle *trans;
7662 root = root->fs_info->extent_root;
7663 trans = btrfs_start_transaction(root, 1);
7664 if (IS_ERR(trans)) {
7665 ret = PTR_ERR(trans);
7669 btrfs_fix_block_accounting(trans, root);
7670 ret = btrfs_commit_transaction(trans, root);
7675 fprintf(stderr, "repaired damaged extent references\n");
7681 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7685 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7686 stripe_size = length;
7687 stripe_size /= num_stripes;
7688 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7689 stripe_size = length * 2;
7690 stripe_size /= num_stripes;
7691 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7692 stripe_size = length;
7693 stripe_size /= (num_stripes - 1);
7694 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7695 stripe_size = length;
7696 stripe_size /= (num_stripes - 2);
7698 stripe_size = length;
7704 * Check the chunk with its block group/dev list ref:
7705 * Return 0 if all refs seems valid.
7706 * Return 1 if part of refs seems valid, need later check for rebuild ref
7707 * like missing block group and needs to search extent tree to rebuild them.
7708 * Return -1 if essential refs are missing and unable to rebuild.
7710 static int check_chunk_refs(struct chunk_record *chunk_rec,
7711 struct block_group_tree *block_group_cache,
7712 struct device_extent_tree *dev_extent_cache,
7715 struct cache_extent *block_group_item;
7716 struct block_group_record *block_group_rec;
7717 struct cache_extent *dev_extent_item;
7718 struct device_extent_record *dev_extent_rec;
7722 int metadump_v2 = 0;
7726 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7729 if (block_group_item) {
7730 block_group_rec = container_of(block_group_item,
7731 struct block_group_record,
7733 if (chunk_rec->length != block_group_rec->offset ||
7734 chunk_rec->offset != block_group_rec->objectid ||
7736 chunk_rec->type_flags != block_group_rec->flags)) {
7739 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7740 chunk_rec->objectid,
7745 chunk_rec->type_flags,
7746 block_group_rec->objectid,
7747 block_group_rec->type,
7748 block_group_rec->offset,
7749 block_group_rec->offset,
7750 block_group_rec->objectid,
7751 block_group_rec->flags);
7754 list_del_init(&block_group_rec->list);
7755 chunk_rec->bg_rec = block_group_rec;
7760 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7761 chunk_rec->objectid,
7766 chunk_rec->type_flags);
7773 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7774 chunk_rec->num_stripes);
7775 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7776 devid = chunk_rec->stripes[i].devid;
7777 offset = chunk_rec->stripes[i].offset;
7778 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7779 devid, offset, length);
7780 if (dev_extent_item) {
7781 dev_extent_rec = container_of(dev_extent_item,
7782 struct device_extent_record,
7784 if (dev_extent_rec->objectid != devid ||
7785 dev_extent_rec->offset != offset ||
7786 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7787 dev_extent_rec->length != length) {
7790 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7791 chunk_rec->objectid,
7794 chunk_rec->stripes[i].devid,
7795 chunk_rec->stripes[i].offset,
7796 dev_extent_rec->objectid,
7797 dev_extent_rec->offset,
7798 dev_extent_rec->length);
7801 list_move(&dev_extent_rec->chunk_list,
7802 &chunk_rec->dextents);
7807 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7808 chunk_rec->objectid,
7811 chunk_rec->stripes[i].devid,
7812 chunk_rec->stripes[i].offset);
7819 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7820 int check_chunks(struct cache_tree *chunk_cache,
7821 struct block_group_tree *block_group_cache,
7822 struct device_extent_tree *dev_extent_cache,
7823 struct list_head *good, struct list_head *bad,
7824 struct list_head *rebuild, int silent)
7826 struct cache_extent *chunk_item;
7827 struct chunk_record *chunk_rec;
7828 struct block_group_record *bg_rec;
7829 struct device_extent_record *dext_rec;
7833 chunk_item = first_cache_extent(chunk_cache);
7834 while (chunk_item) {
7835 chunk_rec = container_of(chunk_item, struct chunk_record,
7837 err = check_chunk_refs(chunk_rec, block_group_cache,
7838 dev_extent_cache, silent);
7841 if (err == 0 && good)
7842 list_add_tail(&chunk_rec->list, good);
7843 if (err > 0 && rebuild)
7844 list_add_tail(&chunk_rec->list, rebuild);
7846 list_add_tail(&chunk_rec->list, bad);
7847 chunk_item = next_cache_extent(chunk_item);
7850 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7853 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7861 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7865 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7876 static int check_device_used(struct device_record *dev_rec,
7877 struct device_extent_tree *dext_cache)
7879 struct cache_extent *cache;
7880 struct device_extent_record *dev_extent_rec;
7883 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7885 dev_extent_rec = container_of(cache,
7886 struct device_extent_record,
7888 if (dev_extent_rec->objectid != dev_rec->devid)
7891 list_del_init(&dev_extent_rec->device_list);
7892 total_byte += dev_extent_rec->length;
7893 cache = next_cache_extent(cache);
7896 if (total_byte != dev_rec->byte_used) {
7898 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7899 total_byte, dev_rec->byte_used, dev_rec->objectid,
7900 dev_rec->type, dev_rec->offset);
7907 /* check btrfs_dev_item -> btrfs_dev_extent */
7908 static int check_devices(struct rb_root *dev_cache,
7909 struct device_extent_tree *dev_extent_cache)
7911 struct rb_node *dev_node;
7912 struct device_record *dev_rec;
7913 struct device_extent_record *dext_rec;
7917 dev_node = rb_first(dev_cache);
7919 dev_rec = container_of(dev_node, struct device_record, node);
7920 err = check_device_used(dev_rec, dev_extent_cache);
7924 dev_node = rb_next(dev_node);
7926 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7929 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7930 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7937 static int add_root_item_to_list(struct list_head *head,
7938 u64 objectid, u64 bytenr, u64 last_snapshot,
7939 u8 level, u8 drop_level,
7940 int level_size, struct btrfs_key *drop_key)
7943 struct root_item_record *ri_rec;
7944 ri_rec = malloc(sizeof(*ri_rec));
7947 ri_rec->bytenr = bytenr;
7948 ri_rec->objectid = objectid;
7949 ri_rec->level = level;
7950 ri_rec->level_size = level_size;
7951 ri_rec->drop_level = drop_level;
7952 ri_rec->last_snapshot = last_snapshot;
7954 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7955 list_add_tail(&ri_rec->list, head);
7960 static void free_root_item_list(struct list_head *list)
7962 struct root_item_record *ri_rec;
7964 while (!list_empty(list)) {
7965 ri_rec = list_first_entry(list, struct root_item_record,
7967 list_del_init(&ri_rec->list);
7972 static int deal_root_from_list(struct list_head *list,
7973 struct btrfs_root *root,
7974 struct block_info *bits,
7976 struct cache_tree *pending,
7977 struct cache_tree *seen,
7978 struct cache_tree *reada,
7979 struct cache_tree *nodes,
7980 struct cache_tree *extent_cache,
7981 struct cache_tree *chunk_cache,
7982 struct rb_root *dev_cache,
7983 struct block_group_tree *block_group_cache,
7984 struct device_extent_tree *dev_extent_cache)
7989 while (!list_empty(list)) {
7990 struct root_item_record *rec;
7991 struct extent_buffer *buf;
7992 rec = list_entry(list->next,
7993 struct root_item_record, list);
7995 buf = read_tree_block(root->fs_info->tree_root,
7996 rec->bytenr, rec->level_size, 0);
7997 if (!extent_buffer_uptodate(buf)) {
7998 free_extent_buffer(buf);
8002 add_root_to_pending(buf, extent_cache, pending,
8003 seen, nodes, rec->objectid);
8005 * To rebuild extent tree, we need deal with snapshot
8006 * one by one, otherwise we deal with node firstly which
8007 * can maximize readahead.
8010 ret = run_next_block(root, bits, bits_nr, &last,
8011 pending, seen, reada, nodes,
8012 extent_cache, chunk_cache,
8013 dev_cache, block_group_cache,
8014 dev_extent_cache, rec);
8018 free_extent_buffer(buf);
8019 list_del(&rec->list);
8025 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8026 reada, nodes, extent_cache, chunk_cache,
8027 dev_cache, block_group_cache,
8028 dev_extent_cache, NULL);
8038 static int check_chunks_and_extents(struct btrfs_root *root)
8040 struct rb_root dev_cache;
8041 struct cache_tree chunk_cache;
8042 struct block_group_tree block_group_cache;
8043 struct device_extent_tree dev_extent_cache;
8044 struct cache_tree extent_cache;
8045 struct cache_tree seen;
8046 struct cache_tree pending;
8047 struct cache_tree reada;
8048 struct cache_tree nodes;
8049 struct extent_io_tree excluded_extents;
8050 struct cache_tree corrupt_blocks;
8051 struct btrfs_path path;
8052 struct btrfs_key key;
8053 struct btrfs_key found_key;
8055 struct block_info *bits;
8057 struct extent_buffer *leaf;
8059 struct btrfs_root_item ri;
8060 struct list_head dropping_trees;
8061 struct list_head normal_trees;
8062 struct btrfs_root *root1;
8067 dev_cache = RB_ROOT;
8068 cache_tree_init(&chunk_cache);
8069 block_group_tree_init(&block_group_cache);
8070 device_extent_tree_init(&dev_extent_cache);
8072 cache_tree_init(&extent_cache);
8073 cache_tree_init(&seen);
8074 cache_tree_init(&pending);
8075 cache_tree_init(&nodes);
8076 cache_tree_init(&reada);
8077 cache_tree_init(&corrupt_blocks);
8078 extent_io_tree_init(&excluded_extents);
8079 INIT_LIST_HEAD(&dropping_trees);
8080 INIT_LIST_HEAD(&normal_trees);
8083 root->fs_info->excluded_extents = &excluded_extents;
8084 root->fs_info->fsck_extent_cache = &extent_cache;
8085 root->fs_info->free_extent_hook = free_extent_hook;
8086 root->fs_info->corrupt_blocks = &corrupt_blocks;
8090 bits = malloc(bits_nr * sizeof(struct block_info));
8096 if (ctx.progress_enabled) {
8097 ctx.tp = TASK_EXTENTS;
8098 task_start(ctx.info);
8102 root1 = root->fs_info->tree_root;
8103 level = btrfs_header_level(root1->node);
8104 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8105 root1->node->start, 0, level, 0,
8106 btrfs_level_size(root1, level), NULL);
8109 root1 = root->fs_info->chunk_root;
8110 level = btrfs_header_level(root1->node);
8111 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8112 root1->node->start, 0, level, 0,
8113 btrfs_level_size(root1, level), NULL);
8116 btrfs_init_path(&path);
8119 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8120 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8125 leaf = path.nodes[0];
8126 slot = path.slots[0];
8127 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8128 ret = btrfs_next_leaf(root, &path);
8131 leaf = path.nodes[0];
8132 slot = path.slots[0];
8134 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8135 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8136 unsigned long offset;
8139 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8140 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8141 last_snapshot = btrfs_root_last_snapshot(&ri);
8142 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8143 level = btrfs_root_level(&ri);
8144 level_size = btrfs_level_size(root, level);
8145 ret = add_root_item_to_list(&normal_trees,
8147 btrfs_root_bytenr(&ri),
8148 last_snapshot, level,
8149 0, level_size, NULL);
8153 level = btrfs_root_level(&ri);
8154 level_size = btrfs_level_size(root, level);
8155 objectid = found_key.objectid;
8156 btrfs_disk_key_to_cpu(&found_key,
8158 ret = add_root_item_to_list(&dropping_trees,
8160 btrfs_root_bytenr(&ri),
8161 last_snapshot, level,
8163 level_size, &found_key);
8170 btrfs_release_path(&path);
8173 * check_block can return -EAGAIN if it fixes something, please keep
8174 * this in mind when dealing with return values from these functions, if
8175 * we get -EAGAIN we want to fall through and restart the loop.
8177 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8178 &seen, &reada, &nodes, &extent_cache,
8179 &chunk_cache, &dev_cache, &block_group_cache,
8186 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8187 &pending, &seen, &reada, &nodes,
8188 &extent_cache, &chunk_cache, &dev_cache,
8189 &block_group_cache, &dev_extent_cache);
8196 ret = check_chunks(&chunk_cache, &block_group_cache,
8197 &dev_extent_cache, NULL, NULL, NULL, 0);
8204 ret = check_extent_refs(root, &extent_cache);
8211 ret = check_devices(&dev_cache, &dev_extent_cache);
8216 task_stop(ctx.info);
8218 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8219 extent_io_tree_cleanup(&excluded_extents);
8220 root->fs_info->fsck_extent_cache = NULL;
8221 root->fs_info->free_extent_hook = NULL;
8222 root->fs_info->corrupt_blocks = NULL;
8223 root->fs_info->excluded_extents = NULL;
8226 free_chunk_cache_tree(&chunk_cache);
8227 free_device_cache_tree(&dev_cache);
8228 free_block_group_tree(&block_group_cache);
8229 free_device_extent_tree(&dev_extent_cache);
8230 free_extent_cache_tree(&seen);
8231 free_extent_cache_tree(&pending);
8232 free_extent_cache_tree(&reada);
8233 free_extent_cache_tree(&nodes);
8236 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8237 free_extent_cache_tree(&seen);
8238 free_extent_cache_tree(&pending);
8239 free_extent_cache_tree(&reada);
8240 free_extent_cache_tree(&nodes);
8241 free_chunk_cache_tree(&chunk_cache);
8242 free_block_group_tree(&block_group_cache);
8243 free_device_cache_tree(&dev_cache);
8244 free_device_extent_tree(&dev_extent_cache);
8245 free_extent_record_cache(root->fs_info, &extent_cache);
8246 free_root_item_list(&normal_trees);
8247 free_root_item_list(&dropping_trees);
8248 extent_io_tree_cleanup(&excluded_extents);
8252 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8253 struct btrfs_root *root, int overwrite)
8255 struct extent_buffer *c;
8256 struct extent_buffer *old = root->node;
8259 struct btrfs_disk_key disk_key = {0,0,0};
8265 extent_buffer_get(c);
8268 c = btrfs_alloc_free_block(trans, root,
8269 btrfs_level_size(root, 0),
8270 root->root_key.objectid,
8271 &disk_key, level, 0, 0);
8274 extent_buffer_get(c);
8278 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8279 btrfs_set_header_level(c, level);
8280 btrfs_set_header_bytenr(c, c->start);
8281 btrfs_set_header_generation(c, trans->transid);
8282 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8283 btrfs_set_header_owner(c, root->root_key.objectid);
8285 write_extent_buffer(c, root->fs_info->fsid,
8286 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8288 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8289 btrfs_header_chunk_tree_uuid(c),
8292 btrfs_mark_buffer_dirty(c);
8294 * this case can happen in the following case:
8296 * 1.overwrite previous root.
8298 * 2.reinit reloc data root, this is because we skip pin
8299 * down reloc data tree before which means we can allocate
8300 * same block bytenr here.
8302 if (old->start == c->start) {
8303 btrfs_set_root_generation(&root->root_item,
8305 root->root_item.level = btrfs_header_level(root->node);
8306 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8307 &root->root_key, &root->root_item);
8309 free_extent_buffer(c);
8313 free_extent_buffer(old);
8315 add_root_to_dirty_list(root);
8319 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8320 struct extent_buffer *eb, int tree_root)
8322 struct extent_buffer *tmp;
8323 struct btrfs_root_item *ri;
8324 struct btrfs_key key;
8327 int level = btrfs_header_level(eb);
8333 * If we have pinned this block before, don't pin it again.
8334 * This can not only avoid forever loop with broken filesystem
8335 * but also give us some speedups.
8337 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8338 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8341 btrfs_pin_extent(fs_info, eb->start, eb->len);
8343 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8344 nritems = btrfs_header_nritems(eb);
8345 for (i = 0; i < nritems; i++) {
8347 btrfs_item_key_to_cpu(eb, &key, i);
8348 if (key.type != BTRFS_ROOT_ITEM_KEY)
8350 /* Skip the extent root and reloc roots */
8351 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8352 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8353 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8355 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8356 bytenr = btrfs_disk_root_bytenr(eb, ri);
8359 * If at any point we start needing the real root we
8360 * will have to build a stump root for the root we are
8361 * in, but for now this doesn't actually use the root so
8362 * just pass in extent_root.
8364 tmp = read_tree_block(fs_info->extent_root, bytenr,
8366 if (!extent_buffer_uptodate(tmp)) {
8367 fprintf(stderr, "Error reading root block\n");
8370 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8371 free_extent_buffer(tmp);
8375 bytenr = btrfs_node_blockptr(eb, i);
8377 /* If we aren't the tree root don't read the block */
8378 if (level == 1 && !tree_root) {
8379 btrfs_pin_extent(fs_info, bytenr, leafsize);
8383 tmp = read_tree_block(fs_info->extent_root, bytenr,
8385 if (!extent_buffer_uptodate(tmp)) {
8386 fprintf(stderr, "Error reading tree block\n");
8389 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8390 free_extent_buffer(tmp);
8399 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8403 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8407 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8410 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8412 struct btrfs_block_group_cache *cache;
8413 struct btrfs_path *path;
8414 struct extent_buffer *leaf;
8415 struct btrfs_chunk *chunk;
8416 struct btrfs_key key;
8420 path = btrfs_alloc_path();
8425 key.type = BTRFS_CHUNK_ITEM_KEY;
8428 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8430 btrfs_free_path(path);
8435 * We do this in case the block groups were screwed up and had alloc
8436 * bits that aren't actually set on the chunks. This happens with
8437 * restored images every time and could happen in real life I guess.
8439 fs_info->avail_data_alloc_bits = 0;
8440 fs_info->avail_metadata_alloc_bits = 0;
8441 fs_info->avail_system_alloc_bits = 0;
8443 /* First we need to create the in-memory block groups */
8445 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8446 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8448 btrfs_free_path(path);
8456 leaf = path->nodes[0];
8457 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8458 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8463 chunk = btrfs_item_ptr(leaf, path->slots[0],
8464 struct btrfs_chunk);
8465 btrfs_add_block_group(fs_info, 0,
8466 btrfs_chunk_type(leaf, chunk),
8467 key.objectid, key.offset,
8468 btrfs_chunk_length(leaf, chunk));
8469 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8470 key.offset + btrfs_chunk_length(leaf, chunk),
8476 cache = btrfs_lookup_first_block_group(fs_info, start);
8480 start = cache->key.objectid + cache->key.offset;
8483 btrfs_free_path(path);
8487 static int reset_balance(struct btrfs_trans_handle *trans,
8488 struct btrfs_fs_info *fs_info)
8490 struct btrfs_root *root = fs_info->tree_root;
8491 struct btrfs_path *path;
8492 struct extent_buffer *leaf;
8493 struct btrfs_key key;
8494 int del_slot, del_nr = 0;
8498 path = btrfs_alloc_path();
8502 key.objectid = BTRFS_BALANCE_OBJECTID;
8503 key.type = BTRFS_BALANCE_ITEM_KEY;
8506 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8511 goto reinit_data_reloc;
8516 ret = btrfs_del_item(trans, root, path);
8519 btrfs_release_path(path);
8521 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8522 key.type = BTRFS_ROOT_ITEM_KEY;
8525 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8529 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8534 ret = btrfs_del_items(trans, root, path,
8541 btrfs_release_path(path);
8544 ret = btrfs_search_slot(trans, root, &key, path,
8551 leaf = path->nodes[0];
8552 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8553 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8555 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8560 del_slot = path->slots[0];
8569 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8573 btrfs_release_path(path);
8576 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8577 key.type = BTRFS_ROOT_ITEM_KEY;
8578 key.offset = (u64)-1;
8579 root = btrfs_read_fs_root(fs_info, &key);
8581 fprintf(stderr, "Error reading data reloc tree\n");
8582 ret = PTR_ERR(root);
8585 record_root_in_trans(trans, root);
8586 ret = btrfs_fsck_reinit_root(trans, root, 0);
8589 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8591 btrfs_free_path(path);
8595 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8596 struct btrfs_fs_info *fs_info)
8602 * The only reason we don't do this is because right now we're just
8603 * walking the trees we find and pinning down their bytes, we don't look
8604 * at any of the leaves. In order to do mixed groups we'd have to check
8605 * the leaves of any fs roots and pin down the bytes for any file
8606 * extents we find. Not hard but why do it if we don't have to?
8608 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8609 fprintf(stderr, "We don't support re-initing the extent tree "
8610 "for mixed block groups yet, please notify a btrfs "
8611 "developer you want to do this so they can add this "
8612 "functionality.\n");
8617 * first we need to walk all of the trees except the extent tree and pin
8618 * down the bytes that are in use so we don't overwrite any existing
8621 ret = pin_metadata_blocks(fs_info);
8623 fprintf(stderr, "error pinning down used bytes\n");
8628 * Need to drop all the block groups since we're going to recreate all
8631 btrfs_free_block_groups(fs_info);
8632 ret = reset_block_groups(fs_info);
8634 fprintf(stderr, "error resetting the block groups\n");
8638 /* Ok we can allocate now, reinit the extent root */
8639 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8641 fprintf(stderr, "extent root initialization failed\n");
8643 * When the transaction code is updated we should end the
8644 * transaction, but for now progs only knows about commit so
8645 * just return an error.
8651 * Now we have all the in-memory block groups setup so we can make
8652 * allocations properly, and the metadata we care about is safe since we
8653 * pinned all of it above.
8656 struct btrfs_block_group_cache *cache;
8658 cache = btrfs_lookup_first_block_group(fs_info, start);
8661 start = cache->key.objectid + cache->key.offset;
8662 ret = btrfs_insert_item(trans, fs_info->extent_root,
8663 &cache->key, &cache->item,
8664 sizeof(cache->item));
8666 fprintf(stderr, "Error adding block group\n");
8669 btrfs_extent_post_op(trans, fs_info->extent_root);
8672 ret = reset_balance(trans, fs_info);
8674 fprintf(stderr, "error reseting the pending balance\n");
8679 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8681 struct btrfs_path *path;
8682 struct btrfs_trans_handle *trans;
8683 struct btrfs_key key;
8686 printf("Recowing metadata block %llu\n", eb->start);
8687 key.objectid = btrfs_header_owner(eb);
8688 key.type = BTRFS_ROOT_ITEM_KEY;
8689 key.offset = (u64)-1;
8691 root = btrfs_read_fs_root(root->fs_info, &key);
8693 fprintf(stderr, "Couldn't find owner root %llu\n",
8695 return PTR_ERR(root);
8698 path = btrfs_alloc_path();
8702 trans = btrfs_start_transaction(root, 1);
8703 if (IS_ERR(trans)) {
8704 btrfs_free_path(path);
8705 return PTR_ERR(trans);
8708 path->lowest_level = btrfs_header_level(eb);
8709 if (path->lowest_level)
8710 btrfs_node_key_to_cpu(eb, &key, 0);
8712 btrfs_item_key_to_cpu(eb, &key, 0);
8714 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8715 btrfs_commit_transaction(trans, root);
8716 btrfs_free_path(path);
8720 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8722 struct btrfs_path *path;
8723 struct btrfs_trans_handle *trans;
8724 struct btrfs_key key;
8727 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8728 bad->key.type, bad->key.offset);
8729 key.objectid = bad->root_id;
8730 key.type = BTRFS_ROOT_ITEM_KEY;
8731 key.offset = (u64)-1;
8733 root = btrfs_read_fs_root(root->fs_info, &key);
8735 fprintf(stderr, "Couldn't find owner root %llu\n",
8737 return PTR_ERR(root);
8740 path = btrfs_alloc_path();
8744 trans = btrfs_start_transaction(root, 1);
8745 if (IS_ERR(trans)) {
8746 btrfs_free_path(path);
8747 return PTR_ERR(trans);
8750 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8756 ret = btrfs_del_item(trans, root, path);
8758 btrfs_commit_transaction(trans, root);
8759 btrfs_free_path(path);
8763 static int zero_log_tree(struct btrfs_root *root)
8765 struct btrfs_trans_handle *trans;
8768 trans = btrfs_start_transaction(root, 1);
8769 if (IS_ERR(trans)) {
8770 ret = PTR_ERR(trans);
8773 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8774 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8775 ret = btrfs_commit_transaction(trans, root);
8779 static int populate_csum(struct btrfs_trans_handle *trans,
8780 struct btrfs_root *csum_root, char *buf, u64 start,
8787 while (offset < len) {
8788 sectorsize = csum_root->sectorsize;
8789 ret = read_extent_data(csum_root, buf, start + offset,
8793 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8794 start + offset, buf, sectorsize);
8797 offset += sectorsize;
8802 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8803 struct btrfs_root *csum_root,
8804 struct btrfs_root *cur_root)
8806 struct btrfs_path *path;
8807 struct btrfs_key key;
8808 struct extent_buffer *node;
8809 struct btrfs_file_extent_item *fi;
8816 path = btrfs_alloc_path();
8819 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8829 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8832 /* Iterate all regular file extents and fill its csum */
8834 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8836 if (key.type != BTRFS_EXTENT_DATA_KEY)
8838 node = path->nodes[0];
8839 slot = path->slots[0];
8840 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8841 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8843 start = btrfs_file_extent_disk_bytenr(node, fi);
8844 len = btrfs_file_extent_disk_num_bytes(node, fi);
8846 ret = populate_csum(trans, csum_root, buf, start, len);
8853 * TODO: if next leaf is corrupted, jump to nearest next valid
8856 ret = btrfs_next_item(cur_root, path);
8866 btrfs_free_path(path);
8871 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8872 struct btrfs_root *csum_root)
8874 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8875 struct btrfs_path *path;
8876 struct btrfs_root *tree_root = fs_info->tree_root;
8877 struct btrfs_root *cur_root;
8878 struct extent_buffer *node;
8879 struct btrfs_key key;
8883 path = btrfs_alloc_path();
8887 key.objectid = BTRFS_FS_TREE_OBJECTID;
8889 key.type = BTRFS_ROOT_ITEM_KEY;
8891 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8900 node = path->nodes[0];
8901 slot = path->slots[0];
8902 btrfs_item_key_to_cpu(node, &key, slot);
8903 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8905 if (key.type != BTRFS_ROOT_ITEM_KEY)
8907 if (!is_fstree(key.objectid))
8909 key.offset = (u64)-1;
8911 cur_root = btrfs_read_fs_root(fs_info, &key);
8912 if (IS_ERR(cur_root) || !cur_root) {
8913 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8917 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8922 ret = btrfs_next_item(tree_root, path);
8932 btrfs_free_path(path);
8936 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8937 struct btrfs_root *csum_root)
8939 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8940 struct btrfs_path *path;
8941 struct btrfs_extent_item *ei;
8942 struct extent_buffer *leaf;
8944 struct btrfs_key key;
8947 path = btrfs_alloc_path();
8952 key.type = BTRFS_EXTENT_ITEM_KEY;
8955 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8957 btrfs_free_path(path);
8961 buf = malloc(csum_root->sectorsize);
8963 btrfs_free_path(path);
8968 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8969 ret = btrfs_next_leaf(extent_root, path);
8977 leaf = path->nodes[0];
8979 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8980 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8985 ei = btrfs_item_ptr(leaf, path->slots[0],
8986 struct btrfs_extent_item);
8987 if (!(btrfs_extent_flags(leaf, ei) &
8988 BTRFS_EXTENT_FLAG_DATA)) {
8993 ret = populate_csum(trans, csum_root, buf, key.objectid,
9000 btrfs_free_path(path);
9006 * Recalculate the csum and put it into the csum tree.
9008 * Extent tree init will wipe out all the extent info, so in that case, we
9009 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9010 * will use fs/subvol trees to init the csum tree.
9012 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9013 struct btrfs_root *csum_root,
9017 return fill_csum_tree_from_fs(trans, csum_root);
9019 return fill_csum_tree_from_extent(trans, csum_root);
9022 struct root_item_info {
9023 /* level of the root */
9025 /* number of nodes at this level, must be 1 for a root */
9029 struct cache_extent cache_extent;
9032 static struct cache_tree *roots_info_cache = NULL;
9034 static void free_roots_info_cache(void)
9036 if (!roots_info_cache)
9039 while (!cache_tree_empty(roots_info_cache)) {
9040 struct cache_extent *entry;
9041 struct root_item_info *rii;
9043 entry = first_cache_extent(roots_info_cache);
9046 remove_cache_extent(roots_info_cache, entry);
9047 rii = container_of(entry, struct root_item_info, cache_extent);
9051 free(roots_info_cache);
9052 roots_info_cache = NULL;
9055 static int build_roots_info_cache(struct btrfs_fs_info *info)
9058 struct btrfs_key key;
9059 struct extent_buffer *leaf;
9060 struct btrfs_path *path;
9062 if (!roots_info_cache) {
9063 roots_info_cache = malloc(sizeof(*roots_info_cache));
9064 if (!roots_info_cache)
9066 cache_tree_init(roots_info_cache);
9069 path = btrfs_alloc_path();
9074 key.type = BTRFS_EXTENT_ITEM_KEY;
9077 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9080 leaf = path->nodes[0];
9083 struct btrfs_key found_key;
9084 struct btrfs_extent_item *ei;
9085 struct btrfs_extent_inline_ref *iref;
9086 int slot = path->slots[0];
9091 struct cache_extent *entry;
9092 struct root_item_info *rii;
9094 if (slot >= btrfs_header_nritems(leaf)) {
9095 ret = btrfs_next_leaf(info->extent_root, path);
9102 leaf = path->nodes[0];
9103 slot = path->slots[0];
9106 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9108 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9109 found_key.type != BTRFS_METADATA_ITEM_KEY)
9112 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9113 flags = btrfs_extent_flags(leaf, ei);
9115 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9116 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9119 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9120 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9121 level = found_key.offset;
9123 struct btrfs_tree_block_info *info;
9125 info = (struct btrfs_tree_block_info *)(ei + 1);
9126 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9127 level = btrfs_tree_block_level(leaf, info);
9131 * For a root extent, it must be of the following type and the
9132 * first (and only one) iref in the item.
9134 type = btrfs_extent_inline_ref_type(leaf, iref);
9135 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9138 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9139 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9141 rii = malloc(sizeof(struct root_item_info));
9146 rii->cache_extent.start = root_id;
9147 rii->cache_extent.size = 1;
9148 rii->level = (u8)-1;
9149 entry = &rii->cache_extent;
9150 ret = insert_cache_extent(roots_info_cache, entry);
9153 rii = container_of(entry, struct root_item_info,
9157 ASSERT(rii->cache_extent.start == root_id);
9158 ASSERT(rii->cache_extent.size == 1);
9160 if (level > rii->level || rii->level == (u8)-1) {
9162 rii->bytenr = found_key.objectid;
9163 rii->gen = btrfs_extent_generation(leaf, ei);
9164 rii->node_count = 1;
9165 } else if (level == rii->level) {
9173 btrfs_free_path(path);
9178 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9179 struct btrfs_path *path,
9180 const struct btrfs_key *root_key,
9181 const int read_only_mode)
9183 const u64 root_id = root_key->objectid;
9184 struct cache_extent *entry;
9185 struct root_item_info *rii;
9186 struct btrfs_root_item ri;
9187 unsigned long offset;
9189 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9192 "Error: could not find extent items for root %llu\n",
9193 root_key->objectid);
9197 rii = container_of(entry, struct root_item_info, cache_extent);
9198 ASSERT(rii->cache_extent.start == root_id);
9199 ASSERT(rii->cache_extent.size == 1);
9201 if (rii->node_count != 1) {
9203 "Error: could not find btree root extent for root %llu\n",
9208 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9209 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9211 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9212 btrfs_root_level(&ri) != rii->level ||
9213 btrfs_root_generation(&ri) != rii->gen) {
9216 * If we're in repair mode but our caller told us to not update
9217 * the root item, i.e. just check if it needs to be updated, don't
9218 * print this message, since the caller will call us again shortly
9219 * for the same root item without read only mode (the caller will
9220 * open a transaction first).
9222 if (!(read_only_mode && repair))
9224 "%sroot item for root %llu,"
9225 " current bytenr %llu, current gen %llu, current level %u,"
9226 " new bytenr %llu, new gen %llu, new level %u\n",
9227 (read_only_mode ? "" : "fixing "),
9229 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9230 btrfs_root_level(&ri),
9231 rii->bytenr, rii->gen, rii->level);
9233 if (btrfs_root_generation(&ri) > rii->gen) {
9235 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9236 root_id, btrfs_root_generation(&ri), rii->gen);
9240 if (!read_only_mode) {
9241 btrfs_set_root_bytenr(&ri, rii->bytenr);
9242 btrfs_set_root_level(&ri, rii->level);
9243 btrfs_set_root_generation(&ri, rii->gen);
9244 write_extent_buffer(path->nodes[0], &ri,
9245 offset, sizeof(ri));
9255 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9256 * caused read-only snapshots to be corrupted if they were created at a moment
9257 * when the source subvolume/snapshot had orphan items. The issue was that the
9258 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9259 * node instead of the post orphan cleanup root node.
9260 * So this function, and its callees, just detects and fixes those cases. Even
9261 * though the regression was for read-only snapshots, this function applies to
9262 * any snapshot/subvolume root.
9263 * This must be run before any other repair code - not doing it so, makes other
9264 * repair code delete or modify backrefs in the extent tree for example, which
9265 * will result in an inconsistent fs after repairing the root items.
9267 static int repair_root_items(struct btrfs_fs_info *info)
9269 struct btrfs_path *path = NULL;
9270 struct btrfs_key key;
9271 struct extent_buffer *leaf;
9272 struct btrfs_trans_handle *trans = NULL;
9277 ret = build_roots_info_cache(info);
9281 path = btrfs_alloc_path();
9287 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9288 key.type = BTRFS_ROOT_ITEM_KEY;
9293 * Avoid opening and committing transactions if a leaf doesn't have
9294 * any root items that need to be fixed, so that we avoid rotating
9295 * backup roots unnecessarily.
9298 trans = btrfs_start_transaction(info->tree_root, 1);
9299 if (IS_ERR(trans)) {
9300 ret = PTR_ERR(trans);
9305 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9309 leaf = path->nodes[0];
9312 struct btrfs_key found_key;
9314 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9315 int no_more_keys = find_next_key(path, &key);
9317 btrfs_release_path(path);
9319 ret = btrfs_commit_transaction(trans,
9331 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9333 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9335 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9338 ret = maybe_repair_root_item(info, path, &found_key,
9343 if (!trans && repair) {
9346 btrfs_release_path(path);
9356 free_roots_info_cache();
9357 btrfs_free_path(path);
9359 btrfs_commit_transaction(trans, info->tree_root);
9366 const char * const cmd_check_usage[] = {
9367 "btrfs check [options] <device>",
9368 "Check structural inegrity of a filesystem (unmounted).",
9369 "Check structural inegrity of an unmounted filesystem. Verify internal",
9370 "trees' consistency and item connectivity. In the repair mode try to",
9371 "fix the problems found.",
9372 "WARNING: the repair mode is considered dangerous",
9374 "-s|--super <superblock> use this superblock copy",
9375 "-b|--backup use the backup root copy",
9376 "--repair try to repair the filesystem",
9377 "--readonly run in read-only mode (default)",
9378 "--init-csum-tree create a new CRC tree",
9379 "--init-extent-tree create a new extent tree",
9380 "--check-data-csum verify checkums of data blocks",
9381 "-Q|--qgroup-report print a report on qgroup consistency",
9382 "-E|--subvol-extents <subvolid>",
9383 " print subvolume extents and sharing state",
9384 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9385 "-p|--progress indicate progress",
9389 int cmd_check(int argc, char **argv)
9391 struct cache_tree root_cache;
9392 struct btrfs_root *root;
9393 struct btrfs_fs_info *info;
9396 u64 tree_root_bytenr = 0;
9397 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9400 int init_csum_tree = 0;
9402 int qgroup_report = 0;
9403 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9407 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9408 OPT_CHECK_CSUM, OPT_READONLY };
9409 static const struct option long_options[] = {
9410 { "super", required_argument, NULL, 's' },
9411 { "repair", no_argument, NULL, OPT_REPAIR },
9412 { "readonly", no_argument, NULL, OPT_READONLY },
9413 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9414 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9415 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9416 { "backup", no_argument, NULL, 'b' },
9417 { "subvol-extents", required_argument, NULL, 'E' },
9418 { "qgroup-report", no_argument, NULL, 'Q' },
9419 { "tree-root", required_argument, NULL, 'r' },
9420 { "progress", no_argument, NULL, 'p' },
9424 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9428 case 'a': /* ignored */ break;
9430 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9433 num = arg_strtou64(optarg);
9434 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9436 "ERROR: super mirror should be less than: %d\n",
9437 BTRFS_SUPER_MIRROR_MAX);
9440 bytenr = btrfs_sb_offset(((int)num));
9441 printf("using SB copy %llu, bytenr %llu\n", num,
9442 (unsigned long long)bytenr);
9448 subvolid = arg_strtou64(optarg);
9451 tree_root_bytenr = arg_strtou64(optarg);
9454 ctx.progress_enabled = true;
9458 usage(cmd_check_usage);
9460 printf("enabling repair mode\n");
9462 ctree_flags |= OPEN_CTREE_WRITES;
9468 printf("Creating a new CRC tree\n");
9471 ctree_flags |= OPEN_CTREE_WRITES;
9473 case OPT_INIT_EXTENT:
9474 init_extent_tree = 1;
9475 ctree_flags |= (OPEN_CTREE_WRITES |
9476 OPEN_CTREE_NO_BLOCK_GROUPS);
9479 case OPT_CHECK_CSUM:
9480 check_data_csum = 1;
9484 argc = argc - optind;
9486 if (check_argc_exact(argc, 1))
9487 usage(cmd_check_usage);
9489 if (ctx.progress_enabled) {
9490 ctx.tp = TASK_NOTHING;
9491 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9494 /* This check is the only reason for --readonly to exist */
9495 if (readonly && repair) {
9496 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9501 cache_tree_init(&root_cache);
9503 if((ret = check_mounted(argv[optind])) < 0) {
9504 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9507 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9512 /* only allow partial opening under repair mode */
9514 ctree_flags |= OPEN_CTREE_PARTIAL;
9516 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9519 fprintf(stderr, "Couldn't open file system\n");
9525 root = info->fs_root;
9528 * repair mode will force us to commit transaction which
9529 * will make us fail to load log tree when mounting.
9531 if (repair && btrfs_super_log_root(info->super_copy)) {
9532 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9537 ret = zero_log_tree(root);
9539 fprintf(stderr, "fail to zero log tree\n");
9544 uuid_unparse(info->super_copy->fsid, uuidbuf);
9545 if (qgroup_report) {
9546 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9548 ret = qgroup_verify_all(info);
9550 print_qgroup_report(1);
9554 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9555 subvolid, argv[optind], uuidbuf);
9556 ret = print_extent_state(info, subvolid);
9559 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9561 if (!extent_buffer_uptodate(info->tree_root->node) ||
9562 !extent_buffer_uptodate(info->dev_root->node) ||
9563 !extent_buffer_uptodate(info->chunk_root->node)) {
9564 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9569 if (init_extent_tree || init_csum_tree) {
9570 struct btrfs_trans_handle *trans;
9572 trans = btrfs_start_transaction(info->extent_root, 0);
9573 if (IS_ERR(trans)) {
9574 fprintf(stderr, "Error starting transaction\n");
9575 ret = PTR_ERR(trans);
9579 if (init_extent_tree) {
9580 printf("Creating a new extent tree\n");
9581 ret = reinit_extent_tree(trans, info);
9586 if (init_csum_tree) {
9587 fprintf(stderr, "Reinit crc root\n");
9588 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9590 fprintf(stderr, "crc root initialization failed\n");
9595 ret = fill_csum_tree(trans, info->csum_root,
9598 fprintf(stderr, "crc refilling failed\n");
9603 * Ok now we commit and run the normal fsck, which will add
9604 * extent entries for all of the items it finds.
9606 ret = btrfs_commit_transaction(trans, info->extent_root);
9610 if (!extent_buffer_uptodate(info->extent_root->node)) {
9611 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9615 if (!extent_buffer_uptodate(info->csum_root->node)) {
9616 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9621 if (!ctx.progress_enabled)
9622 fprintf(stderr, "checking extents\n");
9623 ret = check_chunks_and_extents(root);
9625 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9627 ret = repair_root_items(info);
9631 fprintf(stderr, "Fixed %d roots.\n", ret);
9633 } else if (ret > 0) {
9635 "Found %d roots with an outdated root item.\n",
9638 "Please run a filesystem check with the option --repair to fix them.\n");
9643 if (!ctx.progress_enabled)
9644 fprintf(stderr, "checking free space cache\n");
9645 ret = check_space_cache(root);
9650 * We used to have to have these hole extents in between our real
9651 * extents so if we don't have this flag set we need to make sure there
9652 * are no gaps in the file extents for inodes, otherwise we can just
9653 * ignore it when this happens.
9655 no_holes = btrfs_fs_incompat(root->fs_info,
9656 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9657 if (!ctx.progress_enabled)
9658 fprintf(stderr, "checking fs roots\n");
9659 ret = check_fs_roots(root, &root_cache);
9663 fprintf(stderr, "checking csums\n");
9664 ret = check_csums(root);
9668 fprintf(stderr, "checking root refs\n");
9669 ret = check_root_refs(root, &root_cache);
9673 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9674 struct extent_buffer *eb;
9676 eb = list_first_entry(&root->fs_info->recow_ebs,
9677 struct extent_buffer, recow);
9678 list_del_init(&eb->recow);
9679 ret = recow_extent_buffer(root, eb);
9684 while (!list_empty(&delete_items)) {
9685 struct bad_item *bad;
9687 bad = list_first_entry(&delete_items, struct bad_item, list);
9688 list_del_init(&bad->list);
9690 ret = delete_bad_item(root, bad);
9694 if (info->quota_enabled) {
9696 fprintf(stderr, "checking quota groups\n");
9697 err = qgroup_verify_all(info);
9702 if (!list_empty(&root->fs_info->recow_ebs)) {
9703 fprintf(stderr, "Transid errors in file system\n");
9707 print_qgroup_report(0);
9708 if (found_old_backref) { /*
9709 * there was a disk format change when mixed
9710 * backref was in testing tree. The old format
9711 * existed about one week.
9713 printf("\n * Found old mixed backref format. "
9714 "The old format is not supported! *"
9715 "\n * Please mount the FS in readonly mode, "
9716 "backup data and re-format the FS. *\n\n");
9719 printf("found %llu bytes used err is %d\n",
9720 (unsigned long long)bytes_used, ret);
9721 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9722 printf("total tree bytes: %llu\n",
9723 (unsigned long long)total_btree_bytes);
9724 printf("total fs tree bytes: %llu\n",
9725 (unsigned long long)total_fs_tree_bytes);
9726 printf("total extent tree bytes: %llu\n",
9727 (unsigned long long)total_extent_tree_bytes);
9728 printf("btree space waste bytes: %llu\n",
9729 (unsigned long long)btree_space_waste);
9730 printf("file data blocks allocated: %llu\n referenced %llu\n",
9731 (unsigned long long)data_bytes_allocated,
9732 (unsigned long long)data_bytes_referenced);
9733 printf("%s\n", PACKAGE_STRING);
9735 free_root_recs_tree(&root_cache);
9738 btrfs_close_all_devices();
9740 if (ctx.progress_enabled)
9741 task_deinit(ctx.info);