2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct orphan_data_extent *src_orphan;
570 struct orphan_data_extent *dst_orphan;
574 rec = malloc(sizeof(*rec));
575 memcpy(rec, orig_rec, sizeof(*rec));
577 INIT_LIST_HEAD(&rec->backrefs);
578 INIT_LIST_HEAD(&rec->orphan_extents);
579 rec->holes = RB_ROOT;
581 list_for_each_entry(orig, &orig_rec->backrefs, list) {
582 size = sizeof(*orig) + orig->namelen + 1;
583 backref = malloc(size);
584 memcpy(backref, orig, size);
585 list_add_tail(&backref->list, &rec->backrefs);
587 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588 dst_orphan = malloc(sizeof(*dst_orphan));
589 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
591 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
594 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
603 struct orphan_data_extent *orphan;
605 if (list_empty(orphan_extents))
607 printf("The following data extent is lost in tree %llu:\n",
609 list_for_each_entry(orphan, orphan_extents, list) {
610 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611 orphan->objectid, orphan->offset, orphan->disk_bytenr,
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
618 u64 root_objectid = root->root_key.objectid;
619 int errors = rec->errors;
623 /* reloc root errors, we print its corresponding fs root objectid*/
624 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625 root_objectid = root->root_key.offset;
626 fprintf(stderr, "reloc");
628 fprintf(stderr, "root %llu inode %llu errors %x",
629 (unsigned long long) root_objectid,
630 (unsigned long long) rec->ino, rec->errors);
632 if (errors & I_ERR_NO_INODE_ITEM)
633 fprintf(stderr, ", no inode item");
634 if (errors & I_ERR_NO_ORPHAN_ITEM)
635 fprintf(stderr, ", no orphan item");
636 if (errors & I_ERR_DUP_INODE_ITEM)
637 fprintf(stderr, ", dup inode item");
638 if (errors & I_ERR_DUP_DIR_INDEX)
639 fprintf(stderr, ", dup dir index");
640 if (errors & I_ERR_ODD_DIR_ITEM)
641 fprintf(stderr, ", odd dir item");
642 if (errors & I_ERR_ODD_FILE_EXTENT)
643 fprintf(stderr, ", odd file extent");
644 if (errors & I_ERR_BAD_FILE_EXTENT)
645 fprintf(stderr, ", bad file extent");
646 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647 fprintf(stderr, ", file extent overlap");
648 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649 fprintf(stderr, ", file extent discount");
650 if (errors & I_ERR_DIR_ISIZE_WRONG)
651 fprintf(stderr, ", dir isize wrong");
652 if (errors & I_ERR_FILE_NBYTES_WRONG)
653 fprintf(stderr, ", nbytes wrong");
654 if (errors & I_ERR_ODD_CSUM_ITEM)
655 fprintf(stderr, ", odd csum item");
656 if (errors & I_ERR_SOME_CSUM_MISSING)
657 fprintf(stderr, ", some csum missing");
658 if (errors & I_ERR_LINK_COUNT_WRONG)
659 fprintf(stderr, ", link count wrong");
660 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661 fprintf(stderr, ", orphan file extent");
662 fprintf(stderr, "\n");
663 /* Print the orphan extents if needed */
664 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
667 /* Print the holes if needed */
668 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669 struct file_extent_hole *hole;
670 struct rb_node *node;
673 node = rb_first(&rec->holes);
674 fprintf(stderr, "Found file extent holes:\n");
677 hole = rb_entry(node, struct file_extent_hole, node);
678 fprintf(stderr, "\tstart: %llu, len: %llu\n",
679 hole->start, hole->len);
680 node = rb_next(node);
683 fprintf(stderr, "\tstart: 0, len: %llu\n",
684 round_up(rec->isize, root->sectorsize));
688 static void print_ref_error(int errors)
690 if (errors & REF_ERR_NO_DIR_ITEM)
691 fprintf(stderr, ", no dir item");
692 if (errors & REF_ERR_NO_DIR_INDEX)
693 fprintf(stderr, ", no dir index");
694 if (errors & REF_ERR_NO_INODE_REF)
695 fprintf(stderr, ", no inode ref");
696 if (errors & REF_ERR_DUP_DIR_ITEM)
697 fprintf(stderr, ", dup dir item");
698 if (errors & REF_ERR_DUP_DIR_INDEX)
699 fprintf(stderr, ", dup dir index");
700 if (errors & REF_ERR_DUP_INODE_REF)
701 fprintf(stderr, ", dup inode ref");
702 if (errors & REF_ERR_INDEX_UNMATCH)
703 fprintf(stderr, ", index unmatch");
704 if (errors & REF_ERR_FILETYPE_UNMATCH)
705 fprintf(stderr, ", filetype unmatch");
706 if (errors & REF_ERR_NAME_TOO_LONG)
707 fprintf(stderr, ", name too long");
708 if (errors & REF_ERR_NO_ROOT_REF)
709 fprintf(stderr, ", no root ref");
710 if (errors & REF_ERR_NO_ROOT_BACKREF)
711 fprintf(stderr, ", no root backref");
712 if (errors & REF_ERR_DUP_ROOT_REF)
713 fprintf(stderr, ", dup root ref");
714 if (errors & REF_ERR_DUP_ROOT_BACKREF)
715 fprintf(stderr, ", dup root backref");
716 fprintf(stderr, "\n");
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
722 struct ptr_node *node;
723 struct cache_extent *cache;
724 struct inode_record *rec = NULL;
727 cache = lookup_cache_extent(inode_cache, ino, 1);
729 node = container_of(cache, struct ptr_node, cache);
731 if (mod && rec->refs > 1) {
732 node->data = clone_inode_rec(rec);
737 rec = calloc(1, sizeof(*rec));
739 rec->extent_start = (u64)-1;
741 INIT_LIST_HEAD(&rec->backrefs);
742 INIT_LIST_HEAD(&rec->orphan_extents);
743 rec->holes = RB_ROOT;
745 node = malloc(sizeof(*node));
746 node->cache.start = ino;
747 node->cache.size = 1;
750 if (ino == BTRFS_FREE_INO_OBJECTID)
753 ret = insert_cache_extent(inode_cache, &node->cache);
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
761 struct orphan_data_extent *orphan;
763 while (!list_empty(orphan_extents)) {
764 orphan = list_entry(orphan_extents->next,
765 struct orphan_data_extent, list);
766 list_del(&orphan->list);
771 static void free_inode_rec(struct inode_record *rec)
773 struct inode_backref *backref;
778 while (!list_empty(&rec->backrefs)) {
779 backref = list_entry(rec->backrefs.next,
780 struct inode_backref, list);
781 list_del(&backref->list);
784 free_orphan_data_extents(&rec->orphan_extents);
785 free_file_extent_holes(&rec->holes);
789 static int can_free_inode_rec(struct inode_record *rec)
791 if (!rec->errors && rec->checked && rec->found_inode_item &&
792 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798 struct inode_record *rec)
800 struct cache_extent *cache;
801 struct inode_backref *tmp, *backref;
802 struct ptr_node *node;
803 unsigned char filetype;
805 if (!rec->found_inode_item)
808 filetype = imode_to_type(rec->imode);
809 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810 if (backref->found_dir_item && backref->found_dir_index) {
811 if (backref->filetype != filetype)
812 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813 if (!backref->errors && backref->found_inode_ref &&
814 rec->nlink == rec->found_link) {
815 list_del(&backref->list);
821 if (!rec->checked || rec->merging)
824 if (S_ISDIR(rec->imode)) {
825 if (rec->found_size != rec->isize)
826 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
827 if (rec->found_file_extent)
828 rec->errors |= I_ERR_ODD_FILE_EXTENT;
829 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
830 if (rec->found_dir_item)
831 rec->errors |= I_ERR_ODD_DIR_ITEM;
832 if (rec->found_size != rec->nbytes)
833 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
834 if (rec->nlink > 0 && !no_holes &&
835 (rec->extent_end < rec->isize ||
836 first_extent_gap(&rec->holes) < rec->isize))
837 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
840 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
841 if (rec->found_csum_item && rec->nodatasum)
842 rec->errors |= I_ERR_ODD_CSUM_ITEM;
843 if (rec->some_csum_missing && !rec->nodatasum)
844 rec->errors |= I_ERR_SOME_CSUM_MISSING;
847 BUG_ON(rec->refs != 1);
848 if (can_free_inode_rec(rec)) {
849 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
850 node = container_of(cache, struct ptr_node, cache);
851 BUG_ON(node->data != rec);
852 remove_cache_extent(inode_cache, &node->cache);
858 static int check_orphan_item(struct btrfs_root *root, u64 ino)
860 struct btrfs_path path;
861 struct btrfs_key key;
864 key.objectid = BTRFS_ORPHAN_OBJECTID;
865 key.type = BTRFS_ORPHAN_ITEM_KEY;
868 btrfs_init_path(&path);
869 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
870 btrfs_release_path(&path);
876 static int process_inode_item(struct extent_buffer *eb,
877 int slot, struct btrfs_key *key,
878 struct shared_node *active_node)
880 struct inode_record *rec;
881 struct btrfs_inode_item *item;
883 rec = active_node->current;
884 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
885 if (rec->found_inode_item) {
886 rec->errors |= I_ERR_DUP_INODE_ITEM;
889 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
890 rec->nlink = btrfs_inode_nlink(eb, item);
891 rec->isize = btrfs_inode_size(eb, item);
892 rec->nbytes = btrfs_inode_nbytes(eb, item);
893 rec->imode = btrfs_inode_mode(eb, item);
894 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
896 rec->found_inode_item = 1;
898 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
899 maybe_free_inode_rec(&active_node->inode_cache, rec);
903 static struct inode_backref *get_inode_backref(struct inode_record *rec,
905 int namelen, u64 dir)
907 struct inode_backref *backref;
909 list_for_each_entry(backref, &rec->backrefs, list) {
910 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
912 if (backref->dir != dir || backref->namelen != namelen)
914 if (memcmp(name, backref->name, namelen))
919 backref = malloc(sizeof(*backref) + namelen + 1);
920 memset(backref, 0, sizeof(*backref));
922 backref->namelen = namelen;
923 memcpy(backref->name, name, namelen);
924 backref->name[namelen] = '\0';
925 list_add_tail(&backref->list, &rec->backrefs);
929 static int add_inode_backref(struct cache_tree *inode_cache,
930 u64 ino, u64 dir, u64 index,
931 const char *name, int namelen,
932 int filetype, int itemtype, int errors)
934 struct inode_record *rec;
935 struct inode_backref *backref;
937 rec = get_inode_rec(inode_cache, ino, 1);
938 backref = get_inode_backref(rec, name, namelen, dir);
940 backref->errors |= errors;
941 if (itemtype == BTRFS_DIR_INDEX_KEY) {
942 if (backref->found_dir_index)
943 backref->errors |= REF_ERR_DUP_DIR_INDEX;
944 if (backref->found_inode_ref && backref->index != index)
945 backref->errors |= REF_ERR_INDEX_UNMATCH;
946 if (backref->found_dir_item && backref->filetype != filetype)
947 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 backref->index = index;
950 backref->filetype = filetype;
951 backref->found_dir_index = 1;
952 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
954 if (backref->found_dir_item)
955 backref->errors |= REF_ERR_DUP_DIR_ITEM;
956 if (backref->found_dir_index && backref->filetype != filetype)
957 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
959 backref->filetype = filetype;
960 backref->found_dir_item = 1;
961 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
962 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
963 if (backref->found_inode_ref)
964 backref->errors |= REF_ERR_DUP_INODE_REF;
965 if (backref->found_dir_index && backref->index != index)
966 backref->errors |= REF_ERR_INDEX_UNMATCH;
968 backref->index = index;
970 backref->ref_type = itemtype;
971 backref->found_inode_ref = 1;
976 maybe_free_inode_rec(inode_cache, rec);
980 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
981 struct cache_tree *dst_cache)
983 struct inode_backref *backref;
988 list_for_each_entry(backref, &src->backrefs, list) {
989 if (backref->found_dir_index) {
990 add_inode_backref(dst_cache, dst->ino, backref->dir,
991 backref->index, backref->name,
992 backref->namelen, backref->filetype,
993 BTRFS_DIR_INDEX_KEY, backref->errors);
995 if (backref->found_dir_item) {
997 add_inode_backref(dst_cache, dst->ino,
998 backref->dir, 0, backref->name,
999 backref->namelen, backref->filetype,
1000 BTRFS_DIR_ITEM_KEY, backref->errors);
1002 if (backref->found_inode_ref) {
1003 add_inode_backref(dst_cache, dst->ino,
1004 backref->dir, backref->index,
1005 backref->name, backref->namelen, 0,
1006 backref->ref_type, backref->errors);
1010 if (src->found_dir_item)
1011 dst->found_dir_item = 1;
1012 if (src->found_file_extent)
1013 dst->found_file_extent = 1;
1014 if (src->found_csum_item)
1015 dst->found_csum_item = 1;
1016 if (src->some_csum_missing)
1017 dst->some_csum_missing = 1;
1018 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1019 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1024 BUG_ON(src->found_link < dir_count);
1025 dst->found_link += src->found_link - dir_count;
1026 dst->found_size += src->found_size;
1027 if (src->extent_start != (u64)-1) {
1028 if (dst->extent_start == (u64)-1) {
1029 dst->extent_start = src->extent_start;
1030 dst->extent_end = src->extent_end;
1032 if (dst->extent_end > src->extent_start)
1033 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1034 else if (dst->extent_end < src->extent_start) {
1035 ret = add_file_extent_hole(&dst->holes,
1037 src->extent_start - dst->extent_end);
1039 if (dst->extent_end < src->extent_end)
1040 dst->extent_end = src->extent_end;
1044 dst->errors |= src->errors;
1045 if (src->found_inode_item) {
1046 if (!dst->found_inode_item) {
1047 dst->nlink = src->nlink;
1048 dst->isize = src->isize;
1049 dst->nbytes = src->nbytes;
1050 dst->imode = src->imode;
1051 dst->nodatasum = src->nodatasum;
1052 dst->found_inode_item = 1;
1054 dst->errors |= I_ERR_DUP_INODE_ITEM;
1062 static int splice_shared_node(struct shared_node *src_node,
1063 struct shared_node *dst_node)
1065 struct cache_extent *cache;
1066 struct ptr_node *node, *ins;
1067 struct cache_tree *src, *dst;
1068 struct inode_record *rec, *conflict;
1069 u64 current_ino = 0;
1073 if (--src_node->refs == 0)
1075 if (src_node->current)
1076 current_ino = src_node->current->ino;
1078 src = &src_node->root_cache;
1079 dst = &dst_node->root_cache;
1081 cache = search_cache_extent(src, 0);
1083 node = container_of(cache, struct ptr_node, cache);
1085 cache = next_cache_extent(cache);
1088 remove_cache_extent(src, &node->cache);
1091 ins = malloc(sizeof(*ins));
1092 ins->cache.start = node->cache.start;
1093 ins->cache.size = node->cache.size;
1097 ret = insert_cache_extent(dst, &ins->cache);
1098 if (ret == -EEXIST) {
1099 conflict = get_inode_rec(dst, rec->ino, 1);
1100 merge_inode_recs(rec, conflict, dst);
1102 conflict->checked = 1;
1103 if (dst_node->current == conflict)
1104 dst_node->current = NULL;
1106 maybe_free_inode_rec(dst, conflict);
1107 free_inode_rec(rec);
1114 if (src == &src_node->root_cache) {
1115 src = &src_node->inode_cache;
1116 dst = &dst_node->inode_cache;
1120 if (current_ino > 0 && (!dst_node->current ||
1121 current_ino > dst_node->current->ino)) {
1122 if (dst_node->current) {
1123 dst_node->current->checked = 1;
1124 maybe_free_inode_rec(dst, dst_node->current);
1126 dst_node->current = get_inode_rec(dst, current_ino, 1);
1131 static void free_inode_ptr(struct cache_extent *cache)
1133 struct ptr_node *node;
1134 struct inode_record *rec;
1136 node = container_of(cache, struct ptr_node, cache);
1138 free_inode_rec(rec);
1142 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1144 static struct shared_node *find_shared_node(struct cache_tree *shared,
1147 struct cache_extent *cache;
1148 struct shared_node *node;
1150 cache = lookup_cache_extent(shared, bytenr, 1);
1152 node = container_of(cache, struct shared_node, cache);
1158 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1161 struct shared_node *node;
1163 node = calloc(1, sizeof(*node));
1164 node->cache.start = bytenr;
1165 node->cache.size = 1;
1166 cache_tree_init(&node->root_cache);
1167 cache_tree_init(&node->inode_cache);
1170 ret = insert_cache_extent(shared, &node->cache);
1175 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1176 struct walk_control *wc, int level)
1178 struct shared_node *node;
1179 struct shared_node *dest;
1181 if (level == wc->active_node)
1184 BUG_ON(wc->active_node <= level);
1185 node = find_shared_node(&wc->shared, bytenr);
1187 add_shared_node(&wc->shared, bytenr, refs);
1188 node = find_shared_node(&wc->shared, bytenr);
1189 wc->nodes[level] = node;
1190 wc->active_node = level;
1194 if (wc->root_level == wc->active_node &&
1195 btrfs_root_refs(&root->root_item) == 0) {
1196 if (--node->refs == 0) {
1197 free_inode_recs_tree(&node->root_cache);
1198 free_inode_recs_tree(&node->inode_cache);
1199 remove_cache_extent(&wc->shared, &node->cache);
1205 dest = wc->nodes[wc->active_node];
1206 splice_shared_node(node, dest);
1207 if (node->refs == 0) {
1208 remove_cache_extent(&wc->shared, &node->cache);
1214 static int leave_shared_node(struct btrfs_root *root,
1215 struct walk_control *wc, int level)
1217 struct shared_node *node;
1218 struct shared_node *dest;
1221 if (level == wc->root_level)
1224 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1228 BUG_ON(i >= BTRFS_MAX_LEVEL);
1230 node = wc->nodes[wc->active_node];
1231 wc->nodes[wc->active_node] = NULL;
1232 wc->active_node = i;
1234 dest = wc->nodes[wc->active_node];
1235 if (wc->active_node < wc->root_level ||
1236 btrfs_root_refs(&root->root_item) > 0) {
1237 BUG_ON(node->refs <= 1);
1238 splice_shared_node(node, dest);
1240 BUG_ON(node->refs < 2);
1249 * 1 - if the root with id child_root_id is a child of root parent_root_id
1250 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1251 * has other root(s) as parent(s)
1252 * 2 - if the root child_root_id doesn't have any parent roots
1254 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1257 struct btrfs_path path;
1258 struct btrfs_key key;
1259 struct extent_buffer *leaf;
1263 btrfs_init_path(&path);
1265 key.objectid = parent_root_id;
1266 key.type = BTRFS_ROOT_REF_KEY;
1267 key.offset = child_root_id;
1268 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1272 btrfs_release_path(&path);
1276 key.objectid = child_root_id;
1277 key.type = BTRFS_ROOT_BACKREF_KEY;
1279 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1285 leaf = path.nodes[0];
1286 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1287 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1290 leaf = path.nodes[0];
1293 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1294 if (key.objectid != child_root_id ||
1295 key.type != BTRFS_ROOT_BACKREF_KEY)
1300 if (key.offset == parent_root_id) {
1301 btrfs_release_path(&path);
1308 btrfs_release_path(&path);
1311 return has_parent ? 0 : 2;
1314 static int process_dir_item(struct btrfs_root *root,
1315 struct extent_buffer *eb,
1316 int slot, struct btrfs_key *key,
1317 struct shared_node *active_node)
1327 struct btrfs_dir_item *di;
1328 struct inode_record *rec;
1329 struct cache_tree *root_cache;
1330 struct cache_tree *inode_cache;
1331 struct btrfs_key location;
1332 char namebuf[BTRFS_NAME_LEN];
1334 root_cache = &active_node->root_cache;
1335 inode_cache = &active_node->inode_cache;
1336 rec = active_node->current;
1337 rec->found_dir_item = 1;
1339 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1340 total = btrfs_item_size_nr(eb, slot);
1341 while (cur < total) {
1343 btrfs_dir_item_key_to_cpu(eb, di, &location);
1344 name_len = btrfs_dir_name_len(eb, di);
1345 data_len = btrfs_dir_data_len(eb, di);
1346 filetype = btrfs_dir_type(eb, di);
1348 rec->found_size += name_len;
1349 if (name_len <= BTRFS_NAME_LEN) {
1353 len = BTRFS_NAME_LEN;
1354 error = REF_ERR_NAME_TOO_LONG;
1356 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1358 if (location.type == BTRFS_INODE_ITEM_KEY) {
1359 add_inode_backref(inode_cache, location.objectid,
1360 key->objectid, key->offset, namebuf,
1361 len, filetype, key->type, error);
1362 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1363 add_inode_backref(root_cache, location.objectid,
1364 key->objectid, key->offset,
1365 namebuf, len, filetype,
1368 fprintf(stderr, "invalid location in dir item %u\n",
1370 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1371 key->objectid, key->offset, namebuf,
1372 len, filetype, key->type, error);
1375 len = sizeof(*di) + name_len + data_len;
1376 di = (struct btrfs_dir_item *)((char *)di + len);
1379 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1380 rec->errors |= I_ERR_DUP_DIR_INDEX;
1385 static int process_inode_ref(struct extent_buffer *eb,
1386 int slot, struct btrfs_key *key,
1387 struct shared_node *active_node)
1395 struct cache_tree *inode_cache;
1396 struct btrfs_inode_ref *ref;
1397 char namebuf[BTRFS_NAME_LEN];
1399 inode_cache = &active_node->inode_cache;
1401 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1402 total = btrfs_item_size_nr(eb, slot);
1403 while (cur < total) {
1404 name_len = btrfs_inode_ref_name_len(eb, ref);
1405 index = btrfs_inode_ref_index(eb, ref);
1406 if (name_len <= BTRFS_NAME_LEN) {
1410 len = BTRFS_NAME_LEN;
1411 error = REF_ERR_NAME_TOO_LONG;
1413 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1414 add_inode_backref(inode_cache, key->objectid, key->offset,
1415 index, namebuf, len, 0, key->type, error);
1417 len = sizeof(*ref) + name_len;
1418 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1424 static int process_inode_extref(struct extent_buffer *eb,
1425 int slot, struct btrfs_key *key,
1426 struct shared_node *active_node)
1435 struct cache_tree *inode_cache;
1436 struct btrfs_inode_extref *extref;
1437 char namebuf[BTRFS_NAME_LEN];
1439 inode_cache = &active_node->inode_cache;
1441 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1442 total = btrfs_item_size_nr(eb, slot);
1443 while (cur < total) {
1444 name_len = btrfs_inode_extref_name_len(eb, extref);
1445 index = btrfs_inode_extref_index(eb, extref);
1446 parent = btrfs_inode_extref_parent(eb, extref);
1447 if (name_len <= BTRFS_NAME_LEN) {
1451 len = BTRFS_NAME_LEN;
1452 error = REF_ERR_NAME_TOO_LONG;
1454 read_extent_buffer(eb, namebuf,
1455 (unsigned long)(extref + 1), len);
1456 add_inode_backref(inode_cache, key->objectid, parent,
1457 index, namebuf, len, 0, key->type, error);
1459 len = sizeof(*extref) + name_len;
1460 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1467 static int count_csum_range(struct btrfs_root *root, u64 start,
1468 u64 len, u64 *found)
1470 struct btrfs_key key;
1471 struct btrfs_path path;
1472 struct extent_buffer *leaf;
1477 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1479 btrfs_init_path(&path);
1481 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1483 key.type = BTRFS_EXTENT_CSUM_KEY;
1485 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1489 if (ret > 0 && path.slots[0] > 0) {
1490 leaf = path.nodes[0];
1491 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1492 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1493 key.type == BTRFS_EXTENT_CSUM_KEY)
1498 leaf = path.nodes[0];
1499 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1500 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1505 leaf = path.nodes[0];
1508 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1509 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1510 key.type != BTRFS_EXTENT_CSUM_KEY)
1513 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1514 if (key.offset >= start + len)
1517 if (key.offset > start)
1520 size = btrfs_item_size_nr(leaf, path.slots[0]);
1521 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1522 if (csum_end > start) {
1523 size = min(csum_end - start, len);
1532 btrfs_release_path(&path);
1538 static int process_file_extent(struct btrfs_root *root,
1539 struct extent_buffer *eb,
1540 int slot, struct btrfs_key *key,
1541 struct shared_node *active_node)
1543 struct inode_record *rec;
1544 struct btrfs_file_extent_item *fi;
1546 u64 disk_bytenr = 0;
1547 u64 extent_offset = 0;
1548 u64 mask = root->sectorsize - 1;
1552 rec = active_node->current;
1553 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1554 rec->found_file_extent = 1;
1556 if (rec->extent_start == (u64)-1) {
1557 rec->extent_start = key->offset;
1558 rec->extent_end = key->offset;
1561 if (rec->extent_end > key->offset)
1562 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1563 else if (rec->extent_end < key->offset) {
1564 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1565 key->offset - rec->extent_end);
1570 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1571 extent_type = btrfs_file_extent_type(eb, fi);
1573 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1574 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1576 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1577 rec->found_size += num_bytes;
1578 num_bytes = (num_bytes + mask) & ~mask;
1579 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1580 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1581 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1582 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1583 extent_offset = btrfs_file_extent_offset(eb, fi);
1584 if (num_bytes == 0 || (num_bytes & mask))
1585 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1586 if (num_bytes + extent_offset >
1587 btrfs_file_extent_ram_bytes(eb, fi))
1588 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1589 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1590 (btrfs_file_extent_compression(eb, fi) ||
1591 btrfs_file_extent_encryption(eb, fi) ||
1592 btrfs_file_extent_other_encoding(eb, fi)))
1593 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1594 if (disk_bytenr > 0)
1595 rec->found_size += num_bytes;
1597 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1599 rec->extent_end = key->offset + num_bytes;
1602 * The data reloc tree will copy full extents into its inode and then
1603 * copy the corresponding csums. Because the extent it copied could be
1604 * a preallocated extent that hasn't been written to yet there may be no
1605 * csums to copy, ergo we won't have csums for our file extent. This is
1606 * ok so just don't bother checking csums if the inode belongs to the
1609 if (disk_bytenr > 0 &&
1610 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1612 if (btrfs_file_extent_compression(eb, fi))
1613 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1615 disk_bytenr += extent_offset;
1617 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1620 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1622 rec->found_csum_item = 1;
1623 if (found < num_bytes)
1624 rec->some_csum_missing = 1;
1625 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1627 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1633 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1634 struct walk_control *wc)
1636 struct btrfs_key key;
1640 struct cache_tree *inode_cache;
1641 struct shared_node *active_node;
1643 if (wc->root_level == wc->active_node &&
1644 btrfs_root_refs(&root->root_item) == 0)
1647 active_node = wc->nodes[wc->active_node];
1648 inode_cache = &active_node->inode_cache;
1649 nritems = btrfs_header_nritems(eb);
1650 for (i = 0; i < nritems; i++) {
1651 btrfs_item_key_to_cpu(eb, &key, i);
1653 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1655 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1658 if (active_node->current == NULL ||
1659 active_node->current->ino < key.objectid) {
1660 if (active_node->current) {
1661 active_node->current->checked = 1;
1662 maybe_free_inode_rec(inode_cache,
1663 active_node->current);
1665 active_node->current = get_inode_rec(inode_cache,
1669 case BTRFS_DIR_ITEM_KEY:
1670 case BTRFS_DIR_INDEX_KEY:
1671 ret = process_dir_item(root, eb, i, &key, active_node);
1673 case BTRFS_INODE_REF_KEY:
1674 ret = process_inode_ref(eb, i, &key, active_node);
1676 case BTRFS_INODE_EXTREF_KEY:
1677 ret = process_inode_extref(eb, i, &key, active_node);
1679 case BTRFS_INODE_ITEM_KEY:
1680 ret = process_inode_item(eb, i, &key, active_node);
1682 case BTRFS_EXTENT_DATA_KEY:
1683 ret = process_file_extent(root, eb, i, &key,
1693 static void reada_walk_down(struct btrfs_root *root,
1694 struct extent_buffer *node, int slot)
1703 level = btrfs_header_level(node);
1707 nritems = btrfs_header_nritems(node);
1708 blocksize = btrfs_level_size(root, level - 1);
1709 for (i = slot; i < nritems; i++) {
1710 bytenr = btrfs_node_blockptr(node, i);
1711 ptr_gen = btrfs_node_ptr_generation(node, i);
1712 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1717 * Check the child node/leaf by the following condition:
1718 * 1. the first item key of the node/leaf should be the same with the one
1720 * 2. block in parent node should match the child node/leaf.
1721 * 3. generation of parent node and child's header should be consistent.
1723 * Or the child node/leaf pointed by the key in parent is not valid.
1725 * We hope to check leaf owner too, but since subvol may share leaves,
1726 * which makes leaf owner check not so strong, key check should be
1727 * sufficient enough for that case.
1729 static int check_child_node(struct btrfs_root *root,
1730 struct extent_buffer *parent, int slot,
1731 struct extent_buffer *child)
1733 struct btrfs_key parent_key;
1734 struct btrfs_key child_key;
1737 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1738 if (btrfs_header_level(child) == 0)
1739 btrfs_item_key_to_cpu(child, &child_key, 0);
1741 btrfs_node_key_to_cpu(child, &child_key, 0);
1743 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1746 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1747 parent_key.objectid, parent_key.type, parent_key.offset,
1748 child_key.objectid, child_key.type, child_key.offset);
1750 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1752 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1753 btrfs_node_blockptr(parent, slot),
1754 btrfs_header_bytenr(child));
1756 if (btrfs_node_ptr_generation(parent, slot) !=
1757 btrfs_header_generation(child)) {
1759 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1760 btrfs_header_generation(child),
1761 btrfs_node_ptr_generation(parent, slot));
1766 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1767 struct walk_control *wc, int *level)
1769 enum btrfs_tree_block_status status;
1772 struct extent_buffer *next;
1773 struct extent_buffer *cur;
1778 WARN_ON(*level < 0);
1779 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1780 ret = btrfs_lookup_extent_info(NULL, root,
1781 path->nodes[*level]->start,
1782 *level, 1, &refs, NULL);
1789 ret = enter_shared_node(root, path->nodes[*level]->start,
1797 while (*level >= 0) {
1798 WARN_ON(*level < 0);
1799 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1800 cur = path->nodes[*level];
1802 if (btrfs_header_level(cur) != *level)
1805 if (path->slots[*level] >= btrfs_header_nritems(cur))
1808 ret = process_one_leaf(root, cur, wc);
1813 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1814 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1815 blocksize = btrfs_level_size(root, *level - 1);
1816 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1822 ret = enter_shared_node(root, bytenr, refs,
1825 path->slots[*level]++;
1830 next = btrfs_find_tree_block(root, bytenr, blocksize);
1831 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1832 free_extent_buffer(next);
1833 reada_walk_down(root, cur, path->slots[*level]);
1834 next = read_tree_block(root, bytenr, blocksize,
1836 if (!extent_buffer_uptodate(next)) {
1837 struct btrfs_key node_key;
1839 btrfs_node_key_to_cpu(path->nodes[*level],
1841 path->slots[*level]);
1842 btrfs_add_corrupt_extent_record(root->fs_info,
1844 path->nodes[*level]->start,
1845 root->leafsize, *level);
1851 ret = check_child_node(root, cur, path->slots[*level], next);
1857 if (btrfs_is_leaf(next))
1858 status = btrfs_check_leaf(root, NULL, next);
1860 status = btrfs_check_node(root, NULL, next);
1861 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1862 free_extent_buffer(next);
1867 *level = *level - 1;
1868 free_extent_buffer(path->nodes[*level]);
1869 path->nodes[*level] = next;
1870 path->slots[*level] = 0;
1873 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1877 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1878 struct walk_control *wc, int *level)
1881 struct extent_buffer *leaf;
1883 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1884 leaf = path->nodes[i];
1885 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1890 free_extent_buffer(path->nodes[*level]);
1891 path->nodes[*level] = NULL;
1892 BUG_ON(*level > wc->active_node);
1893 if (*level == wc->active_node)
1894 leave_shared_node(root, wc, *level);
1901 static int check_root_dir(struct inode_record *rec)
1903 struct inode_backref *backref;
1906 if (!rec->found_inode_item || rec->errors)
1908 if (rec->nlink != 1 || rec->found_link != 0)
1910 if (list_empty(&rec->backrefs))
1912 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1913 if (!backref->found_inode_ref)
1915 if (backref->index != 0 || backref->namelen != 2 ||
1916 memcmp(backref->name, "..", 2))
1918 if (backref->found_dir_index || backref->found_dir_item)
1925 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1926 struct btrfs_root *root, struct btrfs_path *path,
1927 struct inode_record *rec)
1929 struct btrfs_inode_item *ei;
1930 struct btrfs_key key;
1933 key.objectid = rec->ino;
1934 key.type = BTRFS_INODE_ITEM_KEY;
1935 key.offset = (u64)-1;
1937 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1941 if (!path->slots[0]) {
1948 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1949 if (key.objectid != rec->ino) {
1954 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1955 struct btrfs_inode_item);
1956 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1957 btrfs_mark_buffer_dirty(path->nodes[0]);
1958 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1959 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1960 root->root_key.objectid);
1962 btrfs_release_path(path);
1966 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1967 struct btrfs_root *root,
1968 struct btrfs_path *path,
1969 struct inode_record *rec)
1973 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1974 btrfs_release_path(path);
1976 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1980 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1981 struct btrfs_root *root,
1982 struct btrfs_path *path,
1983 struct inode_record *rec)
1985 struct btrfs_inode_item *ei;
1986 struct btrfs_key key;
1989 key.objectid = rec->ino;
1990 key.type = BTRFS_INODE_ITEM_KEY;
1993 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2000 /* Since ret == 0, no need to check anything */
2001 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2002 struct btrfs_inode_item);
2003 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2004 btrfs_mark_buffer_dirty(path->nodes[0]);
2005 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2006 printf("reset nbytes for ino %llu root %llu\n",
2007 rec->ino, root->root_key.objectid);
2009 btrfs_release_path(path);
2013 static int add_missing_dir_index(struct btrfs_root *root,
2014 struct cache_tree *inode_cache,
2015 struct inode_record *rec,
2016 struct inode_backref *backref)
2018 struct btrfs_path *path;
2019 struct btrfs_trans_handle *trans;
2020 struct btrfs_dir_item *dir_item;
2021 struct extent_buffer *leaf;
2022 struct btrfs_key key;
2023 struct btrfs_disk_key disk_key;
2024 struct inode_record *dir_rec;
2025 unsigned long name_ptr;
2026 u32 data_size = sizeof(*dir_item) + backref->namelen;
2029 path = btrfs_alloc_path();
2033 trans = btrfs_start_transaction(root, 1);
2034 if (IS_ERR(trans)) {
2035 btrfs_free_path(path);
2036 return PTR_ERR(trans);
2039 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2040 (unsigned long long)rec->ino);
2041 key.objectid = backref->dir;
2042 key.type = BTRFS_DIR_INDEX_KEY;
2043 key.offset = backref->index;
2045 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2048 leaf = path->nodes[0];
2049 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2051 disk_key.objectid = cpu_to_le64(rec->ino);
2052 disk_key.type = BTRFS_INODE_ITEM_KEY;
2053 disk_key.offset = 0;
2055 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2056 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2057 btrfs_set_dir_data_len(leaf, dir_item, 0);
2058 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2059 name_ptr = (unsigned long)(dir_item + 1);
2060 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2061 btrfs_mark_buffer_dirty(leaf);
2062 btrfs_free_path(path);
2063 btrfs_commit_transaction(trans, root);
2065 backref->found_dir_index = 1;
2066 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2069 dir_rec->found_size += backref->namelen;
2070 if (dir_rec->found_size == dir_rec->isize &&
2071 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2072 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2073 if (dir_rec->found_size != dir_rec->isize)
2074 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2079 static int delete_dir_index(struct btrfs_root *root,
2080 struct cache_tree *inode_cache,
2081 struct inode_record *rec,
2082 struct inode_backref *backref)
2084 struct btrfs_trans_handle *trans;
2085 struct btrfs_dir_item *di;
2086 struct btrfs_path *path;
2089 path = btrfs_alloc_path();
2093 trans = btrfs_start_transaction(root, 1);
2094 if (IS_ERR(trans)) {
2095 btrfs_free_path(path);
2096 return PTR_ERR(trans);
2100 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2101 (unsigned long long)backref->dir,
2102 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2103 (unsigned long long)root->objectid);
2105 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2106 backref->name, backref->namelen,
2107 backref->index, -1);
2110 btrfs_free_path(path);
2111 btrfs_commit_transaction(trans, root);
2118 ret = btrfs_del_item(trans, root, path);
2120 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2122 btrfs_free_path(path);
2123 btrfs_commit_transaction(trans, root);
2127 static int create_inode_item(struct btrfs_root *root,
2128 struct inode_record *rec,
2129 struct inode_backref *backref, int root_dir)
2131 struct btrfs_trans_handle *trans;
2132 struct btrfs_inode_item inode_item;
2133 time_t now = time(NULL);
2136 trans = btrfs_start_transaction(root, 1);
2137 if (IS_ERR(trans)) {
2138 ret = PTR_ERR(trans);
2142 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2143 "be incomplete, please check permissions and content after "
2144 "the fsck completes.\n", (unsigned long long)root->objectid,
2145 (unsigned long long)rec->ino);
2147 memset(&inode_item, 0, sizeof(inode_item));
2148 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2150 btrfs_set_stack_inode_nlink(&inode_item, 1);
2152 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2153 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2154 if (rec->found_dir_item) {
2155 if (rec->found_file_extent)
2156 fprintf(stderr, "root %llu inode %llu has both a dir "
2157 "item and extents, unsure if it is a dir or a "
2158 "regular file so setting it as a directory\n",
2159 (unsigned long long)root->objectid,
2160 (unsigned long long)rec->ino);
2161 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2162 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2163 } else if (!rec->found_dir_item) {
2164 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2165 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2167 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2168 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2169 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2170 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2171 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2172 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2173 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2174 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2176 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2178 btrfs_commit_transaction(trans, root);
2182 static int repair_inode_backrefs(struct btrfs_root *root,
2183 struct inode_record *rec,
2184 struct cache_tree *inode_cache,
2187 struct inode_backref *tmp, *backref;
2188 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2192 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2193 if (!delete && rec->ino == root_dirid) {
2194 if (!rec->found_inode_item) {
2195 ret = create_inode_item(root, rec, backref, 1);
2202 /* Index 0 for root dir's are special, don't mess with it */
2203 if (rec->ino == root_dirid && backref->index == 0)
2207 ((backref->found_dir_index && !backref->found_inode_ref) ||
2208 (backref->found_dir_index && backref->found_inode_ref &&
2209 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2210 ret = delete_dir_index(root, inode_cache, rec, backref);
2214 list_del(&backref->list);
2218 if (!delete && !backref->found_dir_index &&
2219 backref->found_dir_item && backref->found_inode_ref) {
2220 ret = add_missing_dir_index(root, inode_cache, rec,
2225 if (backref->found_dir_item &&
2226 backref->found_dir_index &&
2227 backref->found_dir_index) {
2228 if (!backref->errors &&
2229 backref->found_inode_ref) {
2230 list_del(&backref->list);
2236 if (!delete && (!backref->found_dir_index &&
2237 !backref->found_dir_item &&
2238 backref->found_inode_ref)) {
2239 struct btrfs_trans_handle *trans;
2240 struct btrfs_key location;
2242 ret = check_dir_conflict(root, backref->name,
2248 * let nlink fixing routine to handle it,
2249 * which can do it better.
2254 location.objectid = rec->ino;
2255 location.type = BTRFS_INODE_ITEM_KEY;
2256 location.offset = 0;
2258 trans = btrfs_start_transaction(root, 1);
2259 if (IS_ERR(trans)) {
2260 ret = PTR_ERR(trans);
2263 fprintf(stderr, "adding missing dir index/item pair "
2265 (unsigned long long)rec->ino);
2266 ret = btrfs_insert_dir_item(trans, root, backref->name,
2268 backref->dir, &location,
2269 imode_to_type(rec->imode),
2272 btrfs_commit_transaction(trans, root);
2276 if (!delete && (backref->found_inode_ref &&
2277 backref->found_dir_index &&
2278 backref->found_dir_item &&
2279 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2280 !rec->found_inode_item)) {
2281 ret = create_inode_item(root, rec, backref, 0);
2288 return ret ? ret : repaired;
2292 * To determine the file type for nlink/inode_item repair
2294 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2295 * Return -ENOENT if file type is not found.
2297 static int find_file_type(struct inode_record *rec, u8 *type)
2299 struct inode_backref *backref;
2301 /* For inode item recovered case */
2302 if (rec->found_inode_item) {
2303 *type = imode_to_type(rec->imode);
2307 list_for_each_entry(backref, &rec->backrefs, list) {
2308 if (backref->found_dir_index || backref->found_dir_item) {
2309 *type = backref->filetype;
2317 * To determine the file name for nlink repair
2319 * Return 0 if file name is found, set name and namelen.
2320 * Return -ENOENT if file name is not found.
2322 static int find_file_name(struct inode_record *rec,
2323 char *name, int *namelen)
2325 struct inode_backref *backref;
2327 list_for_each_entry(backref, &rec->backrefs, list) {
2328 if (backref->found_dir_index || backref->found_dir_item ||
2329 backref->found_inode_ref) {
2330 memcpy(name, backref->name, backref->namelen);
2331 *namelen = backref->namelen;
2338 /* Reset the nlink of the inode to the correct one */
2339 static int reset_nlink(struct btrfs_trans_handle *trans,
2340 struct btrfs_root *root,
2341 struct btrfs_path *path,
2342 struct inode_record *rec)
2344 struct inode_backref *backref;
2345 struct inode_backref *tmp;
2346 struct btrfs_key key;
2347 struct btrfs_inode_item *inode_item;
2350 /* We don't believe this either, reset it and iterate backref */
2351 rec->found_link = 0;
2353 /* Remove all backref including the valid ones */
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2356 backref->index, backref->name,
2357 backref->namelen, 0);
2361 /* remove invalid backref, so it won't be added back */
2362 if (!(backref->found_dir_index &&
2363 backref->found_dir_item &&
2364 backref->found_inode_ref)) {
2365 list_del(&backref->list);
2372 /* Set nlink to 0 */
2373 key.objectid = rec->ino;
2374 key.type = BTRFS_INODE_ITEM_KEY;
2376 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2383 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2384 struct btrfs_inode_item);
2385 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2386 btrfs_mark_buffer_dirty(path->nodes[0]);
2387 btrfs_release_path(path);
2390 * Add back valid inode_ref/dir_item/dir_index,
2391 * add_link() will handle the nlink inc, so new nlink must be correct
2393 list_for_each_entry(backref, &rec->backrefs, list) {
2394 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2395 backref->name, backref->namelen,
2396 backref->filetype, &backref->index, 1);
2401 btrfs_release_path(path);
2405 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2406 struct btrfs_root *root,
2407 struct btrfs_path *path,
2408 struct inode_record *rec)
2410 char *dir_name = "lost+found";
2411 char namebuf[BTRFS_NAME_LEN] = {0};
2416 int name_recovered = 0;
2417 int type_recovered = 0;
2421 * Get file name and type first before these invalid inode ref
2422 * are deleted by remove_all_invalid_backref()
2424 name_recovered = !find_file_name(rec, namebuf, &namelen);
2425 type_recovered = !find_file_type(rec, &type);
2427 if (!name_recovered) {
2428 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2429 rec->ino, rec->ino);
2430 namelen = count_digits(rec->ino);
2431 sprintf(namebuf, "%llu", rec->ino);
2434 if (!type_recovered) {
2435 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2437 type = BTRFS_FT_REG_FILE;
2441 ret = reset_nlink(trans, root, path, rec);
2444 "Failed to reset nlink for inode %llu: %s\n",
2445 rec->ino, strerror(-ret));
2449 if (rec->found_link == 0) {
2450 lost_found_ino = root->highest_inode;
2451 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2456 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2457 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2460 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2461 dir_name, strerror(-ret));
2464 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2465 namebuf, namelen, type, NULL, 1);
2467 * Add ".INO" suffix several times to handle case where
2468 * "FILENAME.INO" is already taken by another file.
2470 while (ret == -EEXIST) {
2472 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2474 if (namelen + count_digits(rec->ino) + 1 >
2479 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2481 namelen += count_digits(rec->ino) + 1;
2482 ret = btrfs_add_link(trans, root, rec->ino,
2483 lost_found_ino, namebuf,
2484 namelen, type, NULL, 1);
2488 "Failed to link the inode %llu to %s dir: %s\n",
2489 rec->ino, dir_name, strerror(-ret));
2493 * Just increase the found_link, don't actually add the
2494 * backref. This will make things easier and this inode
2495 * record will be freed after the repair is done.
2496 * So fsck will not report problem about this inode.
2499 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2500 namelen, namebuf, dir_name);
2502 printf("Fixed the nlink of inode %llu\n", rec->ino);
2505 * Clear the flag anyway, or we will loop forever for the same inode
2506 * as it will not be removed from the bad inode list and the dead loop
2509 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2510 btrfs_release_path(path);
2515 * Check if there is any normal(reg or prealloc) file extent for given
2517 * This is used to determine the file type when neither its dir_index/item or
2518 * inode_item exists.
2520 * This will *NOT* report error, if any error happens, just consider it does
2521 * not have any normal file extent.
2523 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2525 struct btrfs_path *path;
2526 struct btrfs_key key;
2527 struct btrfs_key found_key;
2528 struct btrfs_file_extent_item *fi;
2532 path = btrfs_alloc_path();
2536 key.type = BTRFS_EXTENT_DATA_KEY;
2539 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2544 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2545 ret = btrfs_next_leaf(root, path);
2552 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2554 if (found_key.objectid != ino ||
2555 found_key.type != BTRFS_EXTENT_DATA_KEY)
2557 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2558 struct btrfs_file_extent_item);
2559 type = btrfs_file_extent_type(path->nodes[0], fi);
2560 if (type != BTRFS_FILE_EXTENT_INLINE) {
2566 btrfs_free_path(path);
2570 static u32 btrfs_type_to_imode(u8 type)
2572 static u32 imode_by_btrfs_type[] = {
2573 [BTRFS_FT_REG_FILE] = S_IFREG,
2574 [BTRFS_FT_DIR] = S_IFDIR,
2575 [BTRFS_FT_CHRDEV] = S_IFCHR,
2576 [BTRFS_FT_BLKDEV] = S_IFBLK,
2577 [BTRFS_FT_FIFO] = S_IFIFO,
2578 [BTRFS_FT_SOCK] = S_IFSOCK,
2579 [BTRFS_FT_SYMLINK] = S_IFLNK,
2582 return imode_by_btrfs_type[(type)];
2585 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2586 struct btrfs_root *root,
2587 struct btrfs_path *path,
2588 struct inode_record *rec)
2592 int type_recovered = 0;
2595 printf("Trying to rebuild inode:%llu\n", rec->ino);
2597 type_recovered = !find_file_type(rec, &filetype);
2600 * Try to determine inode type if type not found.
2602 * For found regular file extent, it must be FILE.
2603 * For found dir_item/index, it must be DIR.
2605 * For undetermined one, use FILE as fallback.
2608 * 1. If found backref(inode_index/item is already handled) to it,
2610 * Need new inode-inode ref structure to allow search for that.
2612 if (!type_recovered) {
2613 if (rec->found_file_extent &&
2614 find_normal_file_extent(root, rec->ino)) {
2616 filetype = BTRFS_FT_REG_FILE;
2617 } else if (rec->found_dir_item) {
2619 filetype = BTRFS_FT_DIR;
2620 } else if (!list_empty(&rec->orphan_extents)) {
2622 filetype = BTRFS_FT_REG_FILE;
2624 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2627 filetype = BTRFS_FT_REG_FILE;
2631 ret = btrfs_new_inode(trans, root, rec->ino,
2632 mode | btrfs_type_to_imode(filetype));
2637 * Here inode rebuild is done, we only rebuild the inode item,
2638 * don't repair the nlink(like move to lost+found).
2639 * That is the job of nlink repair.
2641 * We just fill the record and return
2643 rec->found_dir_item = 1;
2644 rec->imode = mode | btrfs_type_to_imode(filetype);
2646 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2647 /* Ensure the inode_nlinks repair function will be called */
2648 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2653 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2654 struct btrfs_root *root,
2655 struct btrfs_path *path,
2656 struct inode_record *rec)
2658 struct orphan_data_extent *orphan;
2659 struct orphan_data_extent *tmp;
2662 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2664 * Check for conflicting file extents
2666 * Here we don't know whether the extents is compressed or not,
2667 * so we can only assume it not compressed nor data offset,
2668 * and use its disk_len as extent length.
2670 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2671 orphan->offset, orphan->disk_len, 0);
2672 btrfs_release_path(path);
2677 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2678 orphan->disk_bytenr, orphan->disk_len);
2679 ret = btrfs_free_extent(trans,
2680 root->fs_info->extent_root,
2681 orphan->disk_bytenr, orphan->disk_len,
2682 0, root->objectid, orphan->objectid,
2687 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2688 orphan->offset, orphan->disk_bytenr,
2689 orphan->disk_len, orphan->disk_len);
2693 /* Update file size info */
2694 rec->found_size += orphan->disk_len;
2695 if (rec->found_size == rec->nbytes)
2696 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2698 /* Update the file extent hole info too */
2699 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2703 if (RB_EMPTY_ROOT(&rec->holes))
2704 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2706 list_del(&orphan->list);
2709 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2714 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2715 struct btrfs_root *root,
2716 struct btrfs_path *path,
2717 struct inode_record *rec)
2719 struct rb_node *node;
2720 struct file_extent_hole *hole;
2724 node = rb_first(&rec->holes);
2728 hole = rb_entry(node, struct file_extent_hole, node);
2729 ret = btrfs_punch_hole(trans, root, rec->ino,
2730 hole->start, hole->len);
2733 ret = del_file_extent_hole(&rec->holes, hole->start,
2737 if (RB_EMPTY_ROOT(&rec->holes))
2738 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2739 node = rb_first(&rec->holes);
2741 /* special case for a file losing all its file extent */
2743 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2744 round_up(rec->isize, root->sectorsize));
2748 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2749 rec->ino, root->objectid);
2754 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2756 struct btrfs_trans_handle *trans;
2757 struct btrfs_path *path;
2760 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2761 I_ERR_NO_ORPHAN_ITEM |
2762 I_ERR_LINK_COUNT_WRONG |
2763 I_ERR_NO_INODE_ITEM |
2764 I_ERR_FILE_EXTENT_ORPHAN |
2765 I_ERR_FILE_EXTENT_DISCOUNT|
2766 I_ERR_FILE_NBYTES_WRONG)))
2769 path = btrfs_alloc_path();
2774 * For nlink repair, it may create a dir and add link, so
2775 * 2 for parent(256)'s dir_index and dir_item
2776 * 2 for lost+found dir's inode_item and inode_ref
2777 * 1 for the new inode_ref of the file
2778 * 2 for lost+found dir's dir_index and dir_item for the file
2780 trans = btrfs_start_transaction(root, 7);
2781 if (IS_ERR(trans)) {
2782 btrfs_free_path(path);
2783 return PTR_ERR(trans);
2786 if (rec->errors & I_ERR_NO_INODE_ITEM)
2787 ret = repair_inode_no_item(trans, root, path, rec);
2788 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2789 ret = repair_inode_orphan_extent(trans, root, path, rec);
2790 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2791 ret = repair_inode_discount_extent(trans, root, path, rec);
2792 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2793 ret = repair_inode_isize(trans, root, path, rec);
2794 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2795 ret = repair_inode_orphan_item(trans, root, path, rec);
2796 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2797 ret = repair_inode_nlinks(trans, root, path, rec);
2798 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2799 ret = repair_inode_nbytes(trans, root, path, rec);
2800 btrfs_commit_transaction(trans, root);
2801 btrfs_free_path(path);
2805 static int check_inode_recs(struct btrfs_root *root,
2806 struct cache_tree *inode_cache)
2808 struct cache_extent *cache;
2809 struct ptr_node *node;
2810 struct inode_record *rec;
2811 struct inode_backref *backref;
2816 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2818 if (btrfs_root_refs(&root->root_item) == 0) {
2819 if (!cache_tree_empty(inode_cache))
2820 fprintf(stderr, "warning line %d\n", __LINE__);
2825 * We need to record the highest inode number for later 'lost+found'
2827 * We must select a ino not used/refered by any existing inode, or
2828 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2829 * this may cause 'lost+found' dir has wrong nlinks.
2831 cache = last_cache_extent(inode_cache);
2833 node = container_of(cache, struct ptr_node, cache);
2835 if (rec->ino > root->highest_inode)
2836 root->highest_inode = rec->ino;
2840 * We need to repair backrefs first because we could change some of the
2841 * errors in the inode recs.
2843 * We also need to go through and delete invalid backrefs first and then
2844 * add the correct ones second. We do this because we may get EEXIST
2845 * when adding back the correct index because we hadn't yet deleted the
2848 * For example, if we were missing a dir index then the directories
2849 * isize would be wrong, so if we fixed the isize to what we thought it
2850 * would be and then fixed the backref we'd still have a invalid fs, so
2851 * we need to add back the dir index and then check to see if the isize
2856 if (stage == 3 && !err)
2859 cache = search_cache_extent(inode_cache, 0);
2860 while (repair && cache) {
2861 node = container_of(cache, struct ptr_node, cache);
2863 cache = next_cache_extent(cache);
2865 /* Need to free everything up and rescan */
2867 remove_cache_extent(inode_cache, &node->cache);
2869 free_inode_rec(rec);
2873 if (list_empty(&rec->backrefs))
2876 ret = repair_inode_backrefs(root, rec, inode_cache,
2890 rec = get_inode_rec(inode_cache, root_dirid, 0);
2892 ret = check_root_dir(rec);
2894 fprintf(stderr, "root %llu root dir %llu error\n",
2895 (unsigned long long)root->root_key.objectid,
2896 (unsigned long long)root_dirid);
2897 print_inode_error(root, rec);
2902 struct btrfs_trans_handle *trans;
2904 trans = btrfs_start_transaction(root, 1);
2905 if (IS_ERR(trans)) {
2906 err = PTR_ERR(trans);
2911 "root %llu missing its root dir, recreating\n",
2912 (unsigned long long)root->objectid);
2914 ret = btrfs_make_root_dir(trans, root, root_dirid);
2917 btrfs_commit_transaction(trans, root);
2921 fprintf(stderr, "root %llu root dir %llu not found\n",
2922 (unsigned long long)root->root_key.objectid,
2923 (unsigned long long)root_dirid);
2927 cache = search_cache_extent(inode_cache, 0);
2930 node = container_of(cache, struct ptr_node, cache);
2932 remove_cache_extent(inode_cache, &node->cache);
2934 if (rec->ino == root_dirid ||
2935 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2936 free_inode_rec(rec);
2940 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2941 ret = check_orphan_item(root, rec->ino);
2943 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2944 if (can_free_inode_rec(rec)) {
2945 free_inode_rec(rec);
2950 if (!rec->found_inode_item)
2951 rec->errors |= I_ERR_NO_INODE_ITEM;
2952 if (rec->found_link != rec->nlink)
2953 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2955 ret = try_repair_inode(root, rec);
2956 if (ret == 0 && can_free_inode_rec(rec)) {
2957 free_inode_rec(rec);
2963 if (!(repair && ret == 0))
2965 print_inode_error(root, rec);
2966 list_for_each_entry(backref, &rec->backrefs, list) {
2967 if (!backref->found_dir_item)
2968 backref->errors |= REF_ERR_NO_DIR_ITEM;
2969 if (!backref->found_dir_index)
2970 backref->errors |= REF_ERR_NO_DIR_INDEX;
2971 if (!backref->found_inode_ref)
2972 backref->errors |= REF_ERR_NO_INODE_REF;
2973 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2974 " namelen %u name %s filetype %d errors %x",
2975 (unsigned long long)backref->dir,
2976 (unsigned long long)backref->index,
2977 backref->namelen, backref->name,
2978 backref->filetype, backref->errors);
2979 print_ref_error(backref->errors);
2981 free_inode_rec(rec);
2983 return (error > 0) ? -1 : 0;
2986 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2989 struct cache_extent *cache;
2990 struct root_record *rec = NULL;
2993 cache = lookup_cache_extent(root_cache, objectid, 1);
2995 rec = container_of(cache, struct root_record, cache);
2997 rec = calloc(1, sizeof(*rec));
2998 rec->objectid = objectid;
2999 INIT_LIST_HEAD(&rec->backrefs);
3000 rec->cache.start = objectid;
3001 rec->cache.size = 1;
3003 ret = insert_cache_extent(root_cache, &rec->cache);
3009 static struct root_backref *get_root_backref(struct root_record *rec,
3010 u64 ref_root, u64 dir, u64 index,
3011 const char *name, int namelen)
3013 struct root_backref *backref;
3015 list_for_each_entry(backref, &rec->backrefs, list) {
3016 if (backref->ref_root != ref_root || backref->dir != dir ||
3017 backref->namelen != namelen)
3019 if (memcmp(name, backref->name, namelen))
3024 backref = calloc(1, sizeof(*backref) + namelen + 1);
3025 backref->ref_root = ref_root;
3027 backref->index = index;
3028 backref->namelen = namelen;
3029 memcpy(backref->name, name, namelen);
3030 backref->name[namelen] = '\0';
3031 list_add_tail(&backref->list, &rec->backrefs);
3035 static void free_root_record(struct cache_extent *cache)
3037 struct root_record *rec;
3038 struct root_backref *backref;
3040 rec = container_of(cache, struct root_record, cache);
3041 while (!list_empty(&rec->backrefs)) {
3042 backref = list_entry(rec->backrefs.next,
3043 struct root_backref, list);
3044 list_del(&backref->list);
3051 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3053 static int add_root_backref(struct cache_tree *root_cache,
3054 u64 root_id, u64 ref_root, u64 dir, u64 index,
3055 const char *name, int namelen,
3056 int item_type, int errors)
3058 struct root_record *rec;
3059 struct root_backref *backref;
3061 rec = get_root_rec(root_cache, root_id);
3062 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3064 backref->errors |= errors;
3066 if (item_type != BTRFS_DIR_ITEM_KEY) {
3067 if (backref->found_dir_index || backref->found_back_ref ||
3068 backref->found_forward_ref) {
3069 if (backref->index != index)
3070 backref->errors |= REF_ERR_INDEX_UNMATCH;
3072 backref->index = index;
3076 if (item_type == BTRFS_DIR_ITEM_KEY) {
3077 if (backref->found_forward_ref)
3079 backref->found_dir_item = 1;
3080 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3081 backref->found_dir_index = 1;
3082 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3083 if (backref->found_forward_ref)
3084 backref->errors |= REF_ERR_DUP_ROOT_REF;
3085 else if (backref->found_dir_item)
3087 backref->found_forward_ref = 1;
3088 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3089 if (backref->found_back_ref)
3090 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3091 backref->found_back_ref = 1;
3096 if (backref->found_forward_ref && backref->found_dir_item)
3097 backref->reachable = 1;
3101 static int merge_root_recs(struct btrfs_root *root,
3102 struct cache_tree *src_cache,
3103 struct cache_tree *dst_cache)
3105 struct cache_extent *cache;
3106 struct ptr_node *node;
3107 struct inode_record *rec;
3108 struct inode_backref *backref;
3111 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3112 free_inode_recs_tree(src_cache);
3117 cache = search_cache_extent(src_cache, 0);
3120 node = container_of(cache, struct ptr_node, cache);
3122 remove_cache_extent(src_cache, &node->cache);
3125 ret = is_child_root(root, root->objectid, rec->ino);
3131 list_for_each_entry(backref, &rec->backrefs, list) {
3132 BUG_ON(backref->found_inode_ref);
3133 if (backref->found_dir_item)
3134 add_root_backref(dst_cache, rec->ino,
3135 root->root_key.objectid, backref->dir,
3136 backref->index, backref->name,
3137 backref->namelen, BTRFS_DIR_ITEM_KEY,
3139 if (backref->found_dir_index)
3140 add_root_backref(dst_cache, rec->ino,
3141 root->root_key.objectid, backref->dir,
3142 backref->index, backref->name,
3143 backref->namelen, BTRFS_DIR_INDEX_KEY,
3147 free_inode_rec(rec);
3154 static int check_root_refs(struct btrfs_root *root,
3155 struct cache_tree *root_cache)
3157 struct root_record *rec;
3158 struct root_record *ref_root;
3159 struct root_backref *backref;
3160 struct cache_extent *cache;
3166 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3169 /* fixme: this can not detect circular references */
3172 cache = search_cache_extent(root_cache, 0);
3176 rec = container_of(cache, struct root_record, cache);
3177 cache = next_cache_extent(cache);
3179 if (rec->found_ref == 0)
3182 list_for_each_entry(backref, &rec->backrefs, list) {
3183 if (!backref->reachable)
3186 ref_root = get_root_rec(root_cache,
3188 if (ref_root->found_ref > 0)
3191 backref->reachable = 0;
3193 if (rec->found_ref == 0)
3199 cache = search_cache_extent(root_cache, 0);
3203 rec = container_of(cache, struct root_record, cache);
3204 cache = next_cache_extent(cache);
3206 if (rec->found_ref == 0 &&
3207 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3208 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3209 ret = check_orphan_item(root->fs_info->tree_root,
3215 * If we don't have a root item then we likely just have
3216 * a dir item in a snapshot for this root but no actual
3217 * ref key or anything so it's meaningless.
3219 if (!rec->found_root_item)
3222 fprintf(stderr, "fs tree %llu not referenced\n",
3223 (unsigned long long)rec->objectid);
3227 if (rec->found_ref > 0 && !rec->found_root_item)
3229 list_for_each_entry(backref, &rec->backrefs, list) {
3230 if (!backref->found_dir_item)
3231 backref->errors |= REF_ERR_NO_DIR_ITEM;
3232 if (!backref->found_dir_index)
3233 backref->errors |= REF_ERR_NO_DIR_INDEX;
3234 if (!backref->found_back_ref)
3235 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3236 if (!backref->found_forward_ref)
3237 backref->errors |= REF_ERR_NO_ROOT_REF;
3238 if (backref->reachable && backref->errors)
3245 fprintf(stderr, "fs tree %llu refs %u %s\n",
3246 (unsigned long long)rec->objectid, rec->found_ref,
3247 rec->found_root_item ? "" : "not found");
3249 list_for_each_entry(backref, &rec->backrefs, list) {
3250 if (!backref->reachable)
3252 if (!backref->errors && rec->found_root_item)
3254 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3255 " index %llu namelen %u name %s errors %x\n",
3256 (unsigned long long)backref->ref_root,
3257 (unsigned long long)backref->dir,
3258 (unsigned long long)backref->index,
3259 backref->namelen, backref->name,
3261 print_ref_error(backref->errors);
3264 return errors > 0 ? 1 : 0;
3267 static int process_root_ref(struct extent_buffer *eb, int slot,
3268 struct btrfs_key *key,
3269 struct cache_tree *root_cache)
3275 struct btrfs_root_ref *ref;
3276 char namebuf[BTRFS_NAME_LEN];
3279 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3281 dirid = btrfs_root_ref_dirid(eb, ref);
3282 index = btrfs_root_ref_sequence(eb, ref);
3283 name_len = btrfs_root_ref_name_len(eb, ref);
3285 if (name_len <= BTRFS_NAME_LEN) {
3289 len = BTRFS_NAME_LEN;
3290 error = REF_ERR_NAME_TOO_LONG;
3292 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3294 if (key->type == BTRFS_ROOT_REF_KEY) {
3295 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3296 index, namebuf, len, key->type, error);
3298 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3299 index, namebuf, len, key->type, error);
3304 static void free_corrupt_block(struct cache_extent *cache)
3306 struct btrfs_corrupt_block *corrupt;
3308 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3312 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3315 * Repair the btree of the given root.
3317 * The fix is to remove the node key in corrupt_blocks cache_tree.
3318 * and rebalance the tree.
3319 * After the fix, the btree should be writeable.
3321 static int repair_btree(struct btrfs_root *root,
3322 struct cache_tree *corrupt_blocks)
3324 struct btrfs_trans_handle *trans;
3325 struct btrfs_path *path;
3326 struct btrfs_corrupt_block *corrupt;
3327 struct cache_extent *cache;
3328 struct btrfs_key key;
3333 if (cache_tree_empty(corrupt_blocks))
3336 path = btrfs_alloc_path();
3340 trans = btrfs_start_transaction(root, 1);
3341 if (IS_ERR(trans)) {
3342 ret = PTR_ERR(trans);
3343 fprintf(stderr, "Error starting transaction: %s\n",
3347 cache = first_cache_extent(corrupt_blocks);
3349 corrupt = container_of(cache, struct btrfs_corrupt_block,
3351 level = corrupt->level;
3352 path->lowest_level = level;
3353 key.objectid = corrupt->key.objectid;
3354 key.type = corrupt->key.type;
3355 key.offset = corrupt->key.offset;
3358 * Here we don't want to do any tree balance, since it may
3359 * cause a balance with corrupted brother leaf/node,
3360 * so ins_len set to 0 here.
3361 * Balance will be done after all corrupt node/leaf is deleted.
3363 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3366 offset = btrfs_node_blockptr(path->nodes[level],
3367 path->slots[level]);
3369 /* Remove the ptr */
3370 ret = btrfs_del_ptr(trans, root, path, level,
3371 path->slots[level]);
3375 * Remove the corresponding extent
3376 * return value is not concerned.
3378 btrfs_release_path(path);
3379 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3380 0, root->root_key.objectid,
3382 cache = next_cache_extent(cache);
3385 /* Balance the btree using btrfs_search_slot() */
3386 cache = first_cache_extent(corrupt_blocks);
3388 corrupt = container_of(cache, struct btrfs_corrupt_block,
3390 memcpy(&key, &corrupt->key, sizeof(key));
3391 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3394 /* return will always >0 since it won't find the item */
3396 btrfs_release_path(path);
3397 cache = next_cache_extent(cache);
3400 btrfs_commit_transaction(trans, root);
3402 btrfs_free_path(path);
3406 static int check_fs_root(struct btrfs_root *root,
3407 struct cache_tree *root_cache,
3408 struct walk_control *wc)
3414 struct btrfs_path path;
3415 struct shared_node root_node;
3416 struct root_record *rec;
3417 struct btrfs_root_item *root_item = &root->root_item;
3418 struct cache_tree corrupt_blocks;
3419 struct orphan_data_extent *orphan;
3420 struct orphan_data_extent *tmp;
3421 enum btrfs_tree_block_status status;
3424 * Reuse the corrupt_block cache tree to record corrupted tree block
3426 * Unlike the usage in extent tree check, here we do it in a per
3427 * fs/subvol tree base.
3429 cache_tree_init(&corrupt_blocks);
3430 root->fs_info->corrupt_blocks = &corrupt_blocks;
3432 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3433 rec = get_root_rec(root_cache, root->root_key.objectid);
3434 if (btrfs_root_refs(root_item) > 0)
3435 rec->found_root_item = 1;
3438 btrfs_init_path(&path);
3439 memset(&root_node, 0, sizeof(root_node));
3440 cache_tree_init(&root_node.root_cache);
3441 cache_tree_init(&root_node.inode_cache);
3443 /* Move the orphan extent record to corresponding inode_record */
3444 list_for_each_entry_safe(orphan, tmp,
3445 &root->orphan_data_extents, list) {
3446 struct inode_record *inode;
3448 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3450 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3451 list_move(&orphan->list, &inode->orphan_extents);
3454 level = btrfs_header_level(root->node);
3455 memset(wc->nodes, 0, sizeof(wc->nodes));
3456 wc->nodes[level] = &root_node;
3457 wc->active_node = level;
3458 wc->root_level = level;
3460 /* We may not have checked the root block, lets do that now */
3461 if (btrfs_is_leaf(root->node))
3462 status = btrfs_check_leaf(root, NULL, root->node);
3464 status = btrfs_check_node(root, NULL, root->node);
3465 if (status != BTRFS_TREE_BLOCK_CLEAN)
3468 if (btrfs_root_refs(root_item) > 0 ||
3469 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3470 path.nodes[level] = root->node;
3471 extent_buffer_get(root->node);
3472 path.slots[level] = 0;
3474 struct btrfs_key key;
3475 struct btrfs_disk_key found_key;
3477 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3478 level = root_item->drop_level;
3479 path.lowest_level = level;
3480 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3483 btrfs_node_key(path.nodes[level], &found_key,
3485 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3486 sizeof(found_key)));
3490 wret = walk_down_tree(root, &path, wc, &level);
3496 wret = walk_up_tree(root, &path, wc, &level);
3503 btrfs_release_path(&path);
3505 if (!cache_tree_empty(&corrupt_blocks)) {
3506 struct cache_extent *cache;
3507 struct btrfs_corrupt_block *corrupt;
3509 printf("The following tree block(s) is corrupted in tree %llu:\n",
3510 root->root_key.objectid);
3511 cache = first_cache_extent(&corrupt_blocks);
3513 corrupt = container_of(cache,
3514 struct btrfs_corrupt_block,
3516 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3517 cache->start, corrupt->level,
3518 corrupt->key.objectid, corrupt->key.type,
3519 corrupt->key.offset);
3520 cache = next_cache_extent(cache);
3523 printf("Try to repair the btree for root %llu\n",
3524 root->root_key.objectid);
3525 ret = repair_btree(root, &corrupt_blocks);
3527 fprintf(stderr, "Failed to repair btree: %s\n",
3530 printf("Btree for root %llu is fixed\n",
3531 root->root_key.objectid);
3535 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3539 if (root_node.current) {
3540 root_node.current->checked = 1;
3541 maybe_free_inode_rec(&root_node.inode_cache,
3545 err = check_inode_recs(root, &root_node.inode_cache);
3549 free_corrupt_blocks_tree(&corrupt_blocks);
3550 root->fs_info->corrupt_blocks = NULL;
3551 free_orphan_data_extents(&root->orphan_data_extents);
3555 static int fs_root_objectid(u64 objectid)
3557 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3558 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3560 return is_fstree(objectid);
3563 static int check_fs_roots(struct btrfs_root *root,
3564 struct cache_tree *root_cache)
3566 struct btrfs_path path;
3567 struct btrfs_key key;
3568 struct walk_control wc;
3569 struct extent_buffer *leaf, *tree_node;
3570 struct btrfs_root *tmp_root;
3571 struct btrfs_root *tree_root = root->fs_info->tree_root;
3575 if (ctx.progress_enabled) {
3576 ctx.tp = TASK_FS_ROOTS;
3577 task_start(ctx.info);
3581 * Just in case we made any changes to the extent tree that weren't
3582 * reflected into the free space cache yet.
3585 reset_cached_block_groups(root->fs_info);
3586 memset(&wc, 0, sizeof(wc));
3587 cache_tree_init(&wc.shared);
3588 btrfs_init_path(&path);
3593 key.type = BTRFS_ROOT_ITEM_KEY;
3594 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3599 tree_node = tree_root->node;
3601 if (tree_node != tree_root->node) {
3602 free_root_recs_tree(root_cache);
3603 btrfs_release_path(&path);
3606 leaf = path.nodes[0];
3607 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3608 ret = btrfs_next_leaf(tree_root, &path);
3614 leaf = path.nodes[0];
3616 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3617 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3618 fs_root_objectid(key.objectid)) {
3619 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3620 tmp_root = btrfs_read_fs_root_no_cache(
3621 root->fs_info, &key);
3623 key.offset = (u64)-1;
3624 tmp_root = btrfs_read_fs_root(
3625 root->fs_info, &key);
3627 if (IS_ERR(tmp_root)) {
3631 ret = check_fs_root(tmp_root, root_cache, &wc);
3632 if (ret == -EAGAIN) {
3633 free_root_recs_tree(root_cache);
3634 btrfs_release_path(&path);
3639 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3640 btrfs_free_fs_root(tmp_root);
3641 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3642 key.type == BTRFS_ROOT_BACKREF_KEY) {
3643 process_root_ref(leaf, path.slots[0], &key,
3650 btrfs_release_path(&path);
3652 free_extent_cache_tree(&wc.shared);
3653 if (!cache_tree_empty(&wc.shared))
3654 fprintf(stderr, "warning line %d\n", __LINE__);
3656 task_stop(ctx.info);
3661 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3663 struct list_head *cur = rec->backrefs.next;
3664 struct extent_backref *back;
3665 struct tree_backref *tback;
3666 struct data_backref *dback;
3670 while(cur != &rec->backrefs) {
3671 back = list_entry(cur, struct extent_backref, list);
3673 if (!back->found_extent_tree) {
3677 if (back->is_data) {
3678 dback = (struct data_backref *)back;
3679 fprintf(stderr, "Backref %llu %s %llu"
3680 " owner %llu offset %llu num_refs %lu"
3681 " not found in extent tree\n",
3682 (unsigned long long)rec->start,
3683 back->full_backref ?
3685 back->full_backref ?
3686 (unsigned long long)dback->parent:
3687 (unsigned long long)dback->root,
3688 (unsigned long long)dback->owner,
3689 (unsigned long long)dback->offset,
3690 (unsigned long)dback->num_refs);
3692 tback = (struct tree_backref *)back;
3693 fprintf(stderr, "Backref %llu parent %llu"
3694 " root %llu not found in extent tree\n",
3695 (unsigned long long)rec->start,
3696 (unsigned long long)tback->parent,
3697 (unsigned long long)tback->root);
3700 if (!back->is_data && !back->found_ref) {
3704 tback = (struct tree_backref *)back;
3705 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3706 (unsigned long long)rec->start,
3707 back->full_backref ? "parent" : "root",
3708 back->full_backref ?
3709 (unsigned long long)tback->parent :
3710 (unsigned long long)tback->root, back);
3712 if (back->is_data) {
3713 dback = (struct data_backref *)back;
3714 if (dback->found_ref != dback->num_refs) {
3718 fprintf(stderr, "Incorrect local backref count"
3719 " on %llu %s %llu owner %llu"
3720 " offset %llu found %u wanted %u back %p\n",
3721 (unsigned long long)rec->start,
3722 back->full_backref ?
3724 back->full_backref ?
3725 (unsigned long long)dback->parent:
3726 (unsigned long long)dback->root,
3727 (unsigned long long)dback->owner,
3728 (unsigned long long)dback->offset,
3729 dback->found_ref, dback->num_refs, back);
3731 if (dback->disk_bytenr != rec->start) {
3735 fprintf(stderr, "Backref disk bytenr does not"
3736 " match extent record, bytenr=%llu, "
3737 "ref bytenr=%llu\n",
3738 (unsigned long long)rec->start,
3739 (unsigned long long)dback->disk_bytenr);
3742 if (dback->bytes != rec->nr) {
3746 fprintf(stderr, "Backref bytes do not match "
3747 "extent backref, bytenr=%llu, ref "
3748 "bytes=%llu, backref bytes=%llu\n",
3749 (unsigned long long)rec->start,
3750 (unsigned long long)rec->nr,
3751 (unsigned long long)dback->bytes);
3754 if (!back->is_data) {
3757 dback = (struct data_backref *)back;
3758 found += dback->found_ref;
3761 if (found != rec->refs) {
3765 fprintf(stderr, "Incorrect global backref count "
3766 "on %llu found %llu wanted %llu\n",
3767 (unsigned long long)rec->start,
3768 (unsigned long long)found,
3769 (unsigned long long)rec->refs);
3775 static int free_all_extent_backrefs(struct extent_record *rec)
3777 struct extent_backref *back;
3778 struct list_head *cur;
3779 while (!list_empty(&rec->backrefs)) {
3780 cur = rec->backrefs.next;
3781 back = list_entry(cur, struct extent_backref, list);
3788 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3789 struct cache_tree *extent_cache)
3791 struct cache_extent *cache;
3792 struct extent_record *rec;
3795 cache = first_cache_extent(extent_cache);
3798 rec = container_of(cache, struct extent_record, cache);
3799 remove_cache_extent(extent_cache, cache);
3800 free_all_extent_backrefs(rec);
3805 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3806 struct extent_record *rec)
3808 if (rec->content_checked && rec->owner_ref_checked &&
3809 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3810 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3811 !rec->bad_full_backref && !rec->crossing_stripes &&
3812 !rec->wrong_chunk_type) {
3813 remove_cache_extent(extent_cache, &rec->cache);
3814 free_all_extent_backrefs(rec);
3815 list_del_init(&rec->list);
3821 static int check_owner_ref(struct btrfs_root *root,
3822 struct extent_record *rec,
3823 struct extent_buffer *buf)
3825 struct extent_backref *node;
3826 struct tree_backref *back;
3827 struct btrfs_root *ref_root;
3828 struct btrfs_key key;
3829 struct btrfs_path path;
3830 struct extent_buffer *parent;
3835 list_for_each_entry(node, &rec->backrefs, list) {
3838 if (!node->found_ref)
3840 if (node->full_backref)
3842 back = (struct tree_backref *)node;
3843 if (btrfs_header_owner(buf) == back->root)
3846 BUG_ON(rec->is_root);
3848 /* try to find the block by search corresponding fs tree */
3849 key.objectid = btrfs_header_owner(buf);
3850 key.type = BTRFS_ROOT_ITEM_KEY;
3851 key.offset = (u64)-1;
3853 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3854 if (IS_ERR(ref_root))
3857 level = btrfs_header_level(buf);
3859 btrfs_item_key_to_cpu(buf, &key, 0);
3861 btrfs_node_key_to_cpu(buf, &key, 0);
3863 btrfs_init_path(&path);
3864 path.lowest_level = level + 1;
3865 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3869 parent = path.nodes[level + 1];
3870 if (parent && buf->start == btrfs_node_blockptr(parent,
3871 path.slots[level + 1]))
3874 btrfs_release_path(&path);
3875 return found ? 0 : 1;
3878 static int is_extent_tree_record(struct extent_record *rec)
3880 struct list_head *cur = rec->backrefs.next;
3881 struct extent_backref *node;
3882 struct tree_backref *back;
3885 while(cur != &rec->backrefs) {
3886 node = list_entry(cur, struct extent_backref, list);
3890 back = (struct tree_backref *)node;
3891 if (node->full_backref)
3893 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3900 static int record_bad_block_io(struct btrfs_fs_info *info,
3901 struct cache_tree *extent_cache,
3904 struct extent_record *rec;
3905 struct cache_extent *cache;
3906 struct btrfs_key key;
3908 cache = lookup_cache_extent(extent_cache, start, len);
3912 rec = container_of(cache, struct extent_record, cache);
3913 if (!is_extent_tree_record(rec))
3916 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3917 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3920 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3921 struct extent_buffer *buf, int slot)
3923 if (btrfs_header_level(buf)) {
3924 struct btrfs_key_ptr ptr1, ptr2;
3926 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3927 sizeof(struct btrfs_key_ptr));
3928 read_extent_buffer(buf, &ptr2,
3929 btrfs_node_key_ptr_offset(slot + 1),
3930 sizeof(struct btrfs_key_ptr));
3931 write_extent_buffer(buf, &ptr1,
3932 btrfs_node_key_ptr_offset(slot + 1),
3933 sizeof(struct btrfs_key_ptr));
3934 write_extent_buffer(buf, &ptr2,
3935 btrfs_node_key_ptr_offset(slot),
3936 sizeof(struct btrfs_key_ptr));
3938 struct btrfs_disk_key key;
3939 btrfs_node_key(buf, &key, 0);
3940 btrfs_fixup_low_keys(root, path, &key,
3941 btrfs_header_level(buf) + 1);
3944 struct btrfs_item *item1, *item2;
3945 struct btrfs_key k1, k2;
3946 char *item1_data, *item2_data;
3947 u32 item1_offset, item2_offset, item1_size, item2_size;
3949 item1 = btrfs_item_nr(slot);
3950 item2 = btrfs_item_nr(slot + 1);
3951 btrfs_item_key_to_cpu(buf, &k1, slot);
3952 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3953 item1_offset = btrfs_item_offset(buf, item1);
3954 item2_offset = btrfs_item_offset(buf, item2);
3955 item1_size = btrfs_item_size(buf, item1);
3956 item2_size = btrfs_item_size(buf, item2);
3958 item1_data = malloc(item1_size);
3961 item2_data = malloc(item2_size);
3967 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3968 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3970 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3971 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3975 btrfs_set_item_offset(buf, item1, item2_offset);
3976 btrfs_set_item_offset(buf, item2, item1_offset);
3977 btrfs_set_item_size(buf, item1, item2_size);
3978 btrfs_set_item_size(buf, item2, item1_size);
3980 path->slots[0] = slot;
3981 btrfs_set_item_key_unsafe(root, path, &k2);
3982 path->slots[0] = slot + 1;
3983 btrfs_set_item_key_unsafe(root, path, &k1);
3988 static int fix_key_order(struct btrfs_trans_handle *trans,
3989 struct btrfs_root *root,
3990 struct btrfs_path *path)
3992 struct extent_buffer *buf;
3993 struct btrfs_key k1, k2;
3995 int level = path->lowest_level;
3998 buf = path->nodes[level];
3999 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4001 btrfs_node_key_to_cpu(buf, &k1, i);
4002 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4004 btrfs_item_key_to_cpu(buf, &k1, i);
4005 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4007 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4009 ret = swap_values(root, path, buf, i);
4012 btrfs_mark_buffer_dirty(buf);
4018 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4019 struct btrfs_root *root,
4020 struct btrfs_path *path,
4021 struct extent_buffer *buf, int slot)
4023 struct btrfs_key key;
4024 int nritems = btrfs_header_nritems(buf);
4026 btrfs_item_key_to_cpu(buf, &key, slot);
4028 /* These are all the keys we can deal with missing. */
4029 if (key.type != BTRFS_DIR_INDEX_KEY &&
4030 key.type != BTRFS_EXTENT_ITEM_KEY &&
4031 key.type != BTRFS_METADATA_ITEM_KEY &&
4032 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4033 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4036 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4037 (unsigned long long)key.objectid, key.type,
4038 (unsigned long long)key.offset, slot, buf->start);
4039 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4040 btrfs_item_nr_offset(slot + 1),
4041 sizeof(struct btrfs_item) *
4042 (nritems - slot - 1));
4043 btrfs_set_header_nritems(buf, nritems - 1);
4045 struct btrfs_disk_key disk_key;
4047 btrfs_item_key(buf, &disk_key, 0);
4048 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4050 btrfs_mark_buffer_dirty(buf);
4054 static int fix_item_offset(struct btrfs_trans_handle *trans,
4055 struct btrfs_root *root,
4056 struct btrfs_path *path)
4058 struct extent_buffer *buf;
4062 /* We should only get this for leaves */
4063 BUG_ON(path->lowest_level);
4064 buf = path->nodes[0];
4066 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4067 unsigned int shift = 0, offset;
4069 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4070 BTRFS_LEAF_DATA_SIZE(root)) {
4071 if (btrfs_item_end_nr(buf, i) >
4072 BTRFS_LEAF_DATA_SIZE(root)) {
4073 ret = delete_bogus_item(trans, root, path,
4077 fprintf(stderr, "item is off the end of the "
4078 "leaf, can't fix\n");
4082 shift = BTRFS_LEAF_DATA_SIZE(root) -
4083 btrfs_item_end_nr(buf, i);
4084 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4085 btrfs_item_offset_nr(buf, i - 1)) {
4086 if (btrfs_item_end_nr(buf, i) >
4087 btrfs_item_offset_nr(buf, i - 1)) {
4088 ret = delete_bogus_item(trans, root, path,
4092 fprintf(stderr, "items overlap, can't fix\n");
4096 shift = btrfs_item_offset_nr(buf, i - 1) -
4097 btrfs_item_end_nr(buf, i);
4102 printf("Shifting item nr %d by %u bytes in block %llu\n",
4103 i, shift, (unsigned long long)buf->start);
4104 offset = btrfs_item_offset_nr(buf, i);
4105 memmove_extent_buffer(buf,
4106 btrfs_leaf_data(buf) + offset + shift,
4107 btrfs_leaf_data(buf) + offset,
4108 btrfs_item_size_nr(buf, i));
4109 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4111 btrfs_mark_buffer_dirty(buf);
4115 * We may have moved things, in which case we want to exit so we don't
4116 * write those changes out. Once we have proper abort functionality in
4117 * progs this can be changed to something nicer.
4124 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4125 * then just return -EIO.
4127 static int try_to_fix_bad_block(struct btrfs_root *root,
4128 struct extent_buffer *buf,
4129 enum btrfs_tree_block_status status)
4131 struct btrfs_trans_handle *trans;
4132 struct ulist *roots;
4133 struct ulist_node *node;
4134 struct btrfs_root *search_root;
4135 struct btrfs_path *path;
4136 struct ulist_iterator iter;
4137 struct btrfs_key root_key, key;
4140 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4141 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4144 path = btrfs_alloc_path();
4148 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4151 btrfs_free_path(path);
4155 ULIST_ITER_INIT(&iter);
4156 while ((node = ulist_next(roots, &iter))) {
4157 root_key.objectid = node->val;
4158 root_key.type = BTRFS_ROOT_ITEM_KEY;
4159 root_key.offset = (u64)-1;
4161 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4168 trans = btrfs_start_transaction(search_root, 0);
4169 if (IS_ERR(trans)) {
4170 ret = PTR_ERR(trans);
4174 path->lowest_level = btrfs_header_level(buf);
4175 path->skip_check_block = 1;
4176 if (path->lowest_level)
4177 btrfs_node_key_to_cpu(buf, &key, 0);
4179 btrfs_item_key_to_cpu(buf, &key, 0);
4180 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4183 btrfs_commit_transaction(trans, search_root);
4186 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4187 ret = fix_key_order(trans, search_root, path);
4188 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4189 ret = fix_item_offset(trans, search_root, path);
4191 btrfs_commit_transaction(trans, search_root);
4194 btrfs_release_path(path);
4195 btrfs_commit_transaction(trans, search_root);
4198 btrfs_free_path(path);
4202 static int check_block(struct btrfs_root *root,
4203 struct cache_tree *extent_cache,
4204 struct extent_buffer *buf, u64 flags)
4206 struct extent_record *rec;
4207 struct cache_extent *cache;
4208 struct btrfs_key key;
4209 enum btrfs_tree_block_status status;
4213 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4216 rec = container_of(cache, struct extent_record, cache);
4217 rec->generation = btrfs_header_generation(buf);
4219 level = btrfs_header_level(buf);
4220 if (btrfs_header_nritems(buf) > 0) {
4223 btrfs_item_key_to_cpu(buf, &key, 0);
4225 btrfs_node_key_to_cpu(buf, &key, 0);
4227 rec->info_objectid = key.objectid;
4229 rec->info_level = level;
4231 if (btrfs_is_leaf(buf))
4232 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4234 status = btrfs_check_node(root, &rec->parent_key, buf);
4236 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4238 status = try_to_fix_bad_block(root, buf, status);
4239 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4241 fprintf(stderr, "bad block %llu\n",
4242 (unsigned long long)buf->start);
4245 * Signal to callers we need to start the scan over
4246 * again since we'll have cow'ed blocks.
4251 rec->content_checked = 1;
4252 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4253 rec->owner_ref_checked = 1;
4255 ret = check_owner_ref(root, rec, buf);
4257 rec->owner_ref_checked = 1;
4261 maybe_free_extent_rec(extent_cache, rec);
4265 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4266 u64 parent, u64 root)
4268 struct list_head *cur = rec->backrefs.next;
4269 struct extent_backref *node;
4270 struct tree_backref *back;
4272 while(cur != &rec->backrefs) {
4273 node = list_entry(cur, struct extent_backref, list);
4277 back = (struct tree_backref *)node;
4279 if (!node->full_backref)
4281 if (parent == back->parent)
4284 if (node->full_backref)
4286 if (back->root == root)
4293 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4294 u64 parent, u64 root)
4296 struct tree_backref *ref = malloc(sizeof(*ref));
4297 memset(&ref->node, 0, sizeof(ref->node));
4299 ref->parent = parent;
4300 ref->node.full_backref = 1;
4303 ref->node.full_backref = 0;
4305 list_add_tail(&ref->node.list, &rec->backrefs);
4310 static struct data_backref *find_data_backref(struct extent_record *rec,
4311 u64 parent, u64 root,
4312 u64 owner, u64 offset,
4314 u64 disk_bytenr, u64 bytes)
4316 struct list_head *cur = rec->backrefs.next;
4317 struct extent_backref *node;
4318 struct data_backref *back;
4320 while(cur != &rec->backrefs) {
4321 node = list_entry(cur, struct extent_backref, list);
4325 back = (struct data_backref *)node;
4327 if (!node->full_backref)
4329 if (parent == back->parent)
4332 if (node->full_backref)
4334 if (back->root == root && back->owner == owner &&
4335 back->offset == offset) {
4336 if (found_ref && node->found_ref &&
4337 (back->bytes != bytes ||
4338 back->disk_bytenr != disk_bytenr))
4347 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4348 u64 parent, u64 root,
4349 u64 owner, u64 offset,
4352 struct data_backref *ref = malloc(sizeof(*ref));
4353 memset(&ref->node, 0, sizeof(ref->node));
4354 ref->node.is_data = 1;
4357 ref->parent = parent;
4360 ref->node.full_backref = 1;
4364 ref->offset = offset;
4365 ref->node.full_backref = 0;
4367 ref->bytes = max_size;
4370 list_add_tail(&ref->node.list, &rec->backrefs);
4371 if (max_size > rec->max_size)
4372 rec->max_size = max_size;
4376 /* Check if the type of extent matches with its chunk */
4377 static void check_extent_type(struct extent_record *rec)
4379 struct btrfs_block_group_cache *bg_cache;
4381 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4385 /* data extent, check chunk directly*/
4386 if (!rec->metadata) {
4387 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4388 rec->wrong_chunk_type = 1;
4392 /* metadata extent, check the obvious case first */
4393 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4394 BTRFS_BLOCK_GROUP_METADATA))) {
4395 rec->wrong_chunk_type = 1;
4400 * Check SYSTEM extent, as it's also marked as metadata, we can only
4401 * make sure it's a SYSTEM extent by its backref
4403 if (!list_empty(&rec->backrefs)) {
4404 struct extent_backref *node;
4405 struct tree_backref *tback;
4408 node = list_entry(rec->backrefs.next, struct extent_backref,
4410 if (node->is_data) {
4411 /* tree block shouldn't have data backref */
4412 rec->wrong_chunk_type = 1;
4415 tback = container_of(node, struct tree_backref, node);
4417 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4418 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4420 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4421 if (!(bg_cache->flags & bg_type))
4422 rec->wrong_chunk_type = 1;
4426 static int add_extent_rec(struct cache_tree *extent_cache,
4427 struct btrfs_key *parent_key, u64 parent_gen,
4428 u64 start, u64 nr, u64 extent_item_refs,
4429 int is_root, int inc_ref, int set_checked,
4430 int metadata, int extent_rec, u64 max_size)
4432 struct extent_record *rec;
4433 struct cache_extent *cache;
4437 cache = lookup_cache_extent(extent_cache, start, nr);
4439 rec = container_of(cache, struct extent_record, cache);
4443 rec->nr = max(nr, max_size);
4446 * We need to make sure to reset nr to whatever the extent
4447 * record says was the real size, this way we can compare it to
4451 if (start != rec->start || rec->found_rec) {
4452 struct extent_record *tmp;
4455 if (list_empty(&rec->list))
4456 list_add_tail(&rec->list,
4457 &duplicate_extents);
4460 * We have to do this song and dance in case we
4461 * find an extent record that falls inside of
4462 * our current extent record but does not have
4463 * the same objectid.
4465 tmp = malloc(sizeof(*tmp));
4469 tmp->max_size = max_size;
4472 tmp->metadata = metadata;
4473 tmp->extent_item_refs = extent_item_refs;
4474 INIT_LIST_HEAD(&tmp->list);
4475 list_add_tail(&tmp->list, &rec->dups);
4476 rec->num_duplicates++;
4483 if (extent_item_refs && !dup) {
4484 if (rec->extent_item_refs) {
4485 fprintf(stderr, "block %llu rec "
4486 "extent_item_refs %llu, passed %llu\n",
4487 (unsigned long long)start,
4488 (unsigned long long)
4489 rec->extent_item_refs,
4490 (unsigned long long)extent_item_refs);
4492 rec->extent_item_refs = extent_item_refs;
4497 rec->content_checked = 1;
4498 rec->owner_ref_checked = 1;
4502 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4504 rec->parent_generation = parent_gen;
4506 if (rec->max_size < max_size)
4507 rec->max_size = max_size;
4510 * A metadata extent can't cross stripe_len boundary, otherwise
4511 * kernel scrub won't be able to handle it.
4512 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4515 if (metadata && check_crossing_stripes(rec->start,
4517 rec->crossing_stripes = 1;
4518 check_extent_type(rec);
4519 maybe_free_extent_rec(extent_cache, rec);
4522 rec = malloc(sizeof(*rec));
4524 rec->max_size = max_size;
4525 rec->nr = max(nr, max_size);
4526 rec->found_rec = !!extent_rec;
4527 rec->content_checked = 0;
4528 rec->owner_ref_checked = 0;
4529 rec->num_duplicates = 0;
4530 rec->metadata = metadata;
4531 rec->flag_block_full_backref = -1;
4532 rec->bad_full_backref = 0;
4533 rec->crossing_stripes = 0;
4534 rec->wrong_chunk_type = 0;
4535 INIT_LIST_HEAD(&rec->backrefs);
4536 INIT_LIST_HEAD(&rec->dups);
4537 INIT_LIST_HEAD(&rec->list);
4549 if (extent_item_refs)
4550 rec->extent_item_refs = extent_item_refs;
4552 rec->extent_item_refs = 0;
4555 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4557 memset(&rec->parent_key, 0, sizeof(*parent_key));
4560 rec->parent_generation = parent_gen;
4562 rec->parent_generation = 0;
4564 rec->cache.start = start;
4565 rec->cache.size = nr;
4566 ret = insert_cache_extent(extent_cache, &rec->cache);
4570 rec->content_checked = 1;
4571 rec->owner_ref_checked = 1;
4575 if (check_crossing_stripes(rec->start, rec->max_size))
4576 rec->crossing_stripes = 1;
4577 check_extent_type(rec);
4581 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4582 u64 parent, u64 root, int found_ref)
4584 struct extent_record *rec;
4585 struct tree_backref *back;
4586 struct cache_extent *cache;
4588 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4590 add_extent_rec(extent_cache, NULL, 0, bytenr,
4591 1, 0, 0, 0, 0, 1, 0, 0);
4592 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4597 rec = container_of(cache, struct extent_record, cache);
4598 if (rec->start != bytenr) {
4602 back = find_tree_backref(rec, parent, root);
4604 back = alloc_tree_backref(rec, parent, root);
4607 if (back->node.found_ref) {
4608 fprintf(stderr, "Extent back ref already exists "
4609 "for %llu parent %llu root %llu \n",
4610 (unsigned long long)bytenr,
4611 (unsigned long long)parent,
4612 (unsigned long long)root);
4614 back->node.found_ref = 1;
4616 if (back->node.found_extent_tree) {
4617 fprintf(stderr, "Extent back ref already exists "
4618 "for %llu parent %llu root %llu \n",
4619 (unsigned long long)bytenr,
4620 (unsigned long long)parent,
4621 (unsigned long long)root);
4623 back->node.found_extent_tree = 1;
4625 check_extent_type(rec);
4626 maybe_free_extent_rec(extent_cache, rec);
4630 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4631 u64 parent, u64 root, u64 owner, u64 offset,
4632 u32 num_refs, int found_ref, u64 max_size)
4634 struct extent_record *rec;
4635 struct data_backref *back;
4636 struct cache_extent *cache;
4638 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4640 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4642 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4647 rec = container_of(cache, struct extent_record, cache);
4648 if (rec->max_size < max_size)
4649 rec->max_size = max_size;
4652 * If found_ref is set then max_size is the real size and must match the
4653 * existing refs. So if we have already found a ref then we need to
4654 * make sure that this ref matches the existing one, otherwise we need
4655 * to add a new backref so we can notice that the backrefs don't match
4656 * and we need to figure out who is telling the truth. This is to
4657 * account for that awful fsync bug I introduced where we'd end up with
4658 * a btrfs_file_extent_item that would have its length include multiple
4659 * prealloc extents or point inside of a prealloc extent.
4661 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4664 back = alloc_data_backref(rec, parent, root, owner, offset,
4668 BUG_ON(num_refs != 1);
4669 if (back->node.found_ref)
4670 BUG_ON(back->bytes != max_size);
4671 back->node.found_ref = 1;
4672 back->found_ref += 1;
4673 back->bytes = max_size;
4674 back->disk_bytenr = bytenr;
4676 rec->content_checked = 1;
4677 rec->owner_ref_checked = 1;
4679 if (back->node.found_extent_tree) {
4680 fprintf(stderr, "Extent back ref already exists "
4681 "for %llu parent %llu root %llu "
4682 "owner %llu offset %llu num_refs %lu\n",
4683 (unsigned long long)bytenr,
4684 (unsigned long long)parent,
4685 (unsigned long long)root,
4686 (unsigned long long)owner,
4687 (unsigned long long)offset,
4688 (unsigned long)num_refs);
4690 back->num_refs = num_refs;
4691 back->node.found_extent_tree = 1;
4693 maybe_free_extent_rec(extent_cache, rec);
4697 static int add_pending(struct cache_tree *pending,
4698 struct cache_tree *seen, u64 bytenr, u32 size)
4701 ret = add_cache_extent(seen, bytenr, size);
4704 add_cache_extent(pending, bytenr, size);
4708 static int pick_next_pending(struct cache_tree *pending,
4709 struct cache_tree *reada,
4710 struct cache_tree *nodes,
4711 u64 last, struct block_info *bits, int bits_nr,
4714 unsigned long node_start = last;
4715 struct cache_extent *cache;
4718 cache = search_cache_extent(reada, 0);
4720 bits[0].start = cache->start;
4721 bits[0].size = cache->size;
4726 if (node_start > 32768)
4727 node_start -= 32768;
4729 cache = search_cache_extent(nodes, node_start);
4731 cache = search_cache_extent(nodes, 0);
4734 cache = search_cache_extent(pending, 0);
4739 bits[ret].start = cache->start;
4740 bits[ret].size = cache->size;
4741 cache = next_cache_extent(cache);
4743 } while (cache && ret < bits_nr);
4749 bits[ret].start = cache->start;
4750 bits[ret].size = cache->size;
4751 cache = next_cache_extent(cache);
4753 } while (cache && ret < bits_nr);
4755 if (bits_nr - ret > 8) {
4756 u64 lookup = bits[0].start + bits[0].size;
4757 struct cache_extent *next;
4758 next = search_cache_extent(pending, lookup);
4760 if (next->start - lookup > 32768)
4762 bits[ret].start = next->start;
4763 bits[ret].size = next->size;
4764 lookup = next->start + next->size;
4768 next = next_cache_extent(next);
4776 static void free_chunk_record(struct cache_extent *cache)
4778 struct chunk_record *rec;
4780 rec = container_of(cache, struct chunk_record, cache);
4781 list_del_init(&rec->list);
4782 list_del_init(&rec->dextents);
4786 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4788 cache_tree_free_extents(chunk_cache, free_chunk_record);
4791 static void free_device_record(struct rb_node *node)
4793 struct device_record *rec;
4795 rec = container_of(node, struct device_record, node);
4799 FREE_RB_BASED_TREE(device_cache, free_device_record);
4801 int insert_block_group_record(struct block_group_tree *tree,
4802 struct block_group_record *bg_rec)
4806 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4810 list_add_tail(&bg_rec->list, &tree->block_groups);
4814 static void free_block_group_record(struct cache_extent *cache)
4816 struct block_group_record *rec;
4818 rec = container_of(cache, struct block_group_record, cache);
4819 list_del_init(&rec->list);
4823 void free_block_group_tree(struct block_group_tree *tree)
4825 cache_tree_free_extents(&tree->tree, free_block_group_record);
4828 int insert_device_extent_record(struct device_extent_tree *tree,
4829 struct device_extent_record *de_rec)
4834 * Device extent is a bit different from the other extents, because
4835 * the extents which belong to the different devices may have the
4836 * same start and size, so we need use the special extent cache
4837 * search/insert functions.
4839 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4843 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4844 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4848 static void free_device_extent_record(struct cache_extent *cache)
4850 struct device_extent_record *rec;
4852 rec = container_of(cache, struct device_extent_record, cache);
4853 if (!list_empty(&rec->chunk_list))
4854 list_del_init(&rec->chunk_list);
4855 if (!list_empty(&rec->device_list))
4856 list_del_init(&rec->device_list);
4860 void free_device_extent_tree(struct device_extent_tree *tree)
4862 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4865 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4866 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4867 struct extent_buffer *leaf, int slot)
4869 struct btrfs_extent_ref_v0 *ref0;
4870 struct btrfs_key key;
4872 btrfs_item_key_to_cpu(leaf, &key, slot);
4873 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4874 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4875 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4877 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4878 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4884 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4885 struct btrfs_key *key,
4888 struct btrfs_chunk *ptr;
4889 struct chunk_record *rec;
4892 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4893 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4895 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4897 fprintf(stderr, "memory allocation failed\n");
4901 INIT_LIST_HEAD(&rec->list);
4902 INIT_LIST_HEAD(&rec->dextents);
4905 rec->cache.start = key->offset;
4906 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4908 rec->generation = btrfs_header_generation(leaf);
4910 rec->objectid = key->objectid;
4911 rec->type = key->type;
4912 rec->offset = key->offset;
4914 rec->length = rec->cache.size;
4915 rec->owner = btrfs_chunk_owner(leaf, ptr);
4916 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4917 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4918 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4919 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4920 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4921 rec->num_stripes = num_stripes;
4922 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4924 for (i = 0; i < rec->num_stripes; ++i) {
4925 rec->stripes[i].devid =
4926 btrfs_stripe_devid_nr(leaf, ptr, i);
4927 rec->stripes[i].offset =
4928 btrfs_stripe_offset_nr(leaf, ptr, i);
4929 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4930 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4937 static int process_chunk_item(struct cache_tree *chunk_cache,
4938 struct btrfs_key *key, struct extent_buffer *eb,
4941 struct chunk_record *rec;
4944 rec = btrfs_new_chunk_record(eb, key, slot);
4945 ret = insert_cache_extent(chunk_cache, &rec->cache);
4947 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4948 rec->offset, rec->length);
4955 static int process_device_item(struct rb_root *dev_cache,
4956 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4958 struct btrfs_dev_item *ptr;
4959 struct device_record *rec;
4962 ptr = btrfs_item_ptr(eb,
4963 slot, struct btrfs_dev_item);
4965 rec = malloc(sizeof(*rec));
4967 fprintf(stderr, "memory allocation failed\n");
4971 rec->devid = key->offset;
4972 rec->generation = btrfs_header_generation(eb);
4974 rec->objectid = key->objectid;
4975 rec->type = key->type;
4976 rec->offset = key->offset;
4978 rec->devid = btrfs_device_id(eb, ptr);
4979 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4980 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4982 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4984 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4991 struct block_group_record *
4992 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4995 struct btrfs_block_group_item *ptr;
4996 struct block_group_record *rec;
4998 rec = calloc(1, sizeof(*rec));
5000 fprintf(stderr, "memory allocation failed\n");
5004 rec->cache.start = key->objectid;
5005 rec->cache.size = key->offset;
5007 rec->generation = btrfs_header_generation(leaf);
5009 rec->objectid = key->objectid;
5010 rec->type = key->type;
5011 rec->offset = key->offset;
5013 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5014 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5016 INIT_LIST_HEAD(&rec->list);
5021 static int process_block_group_item(struct block_group_tree *block_group_cache,
5022 struct btrfs_key *key,
5023 struct extent_buffer *eb, int slot)
5025 struct block_group_record *rec;
5028 rec = btrfs_new_block_group_record(eb, key, slot);
5029 ret = insert_block_group_record(block_group_cache, rec);
5031 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5032 rec->objectid, rec->offset);
5039 struct device_extent_record *
5040 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5041 struct btrfs_key *key, int slot)
5043 struct device_extent_record *rec;
5044 struct btrfs_dev_extent *ptr;
5046 rec = calloc(1, sizeof(*rec));
5048 fprintf(stderr, "memory allocation failed\n");
5052 rec->cache.objectid = key->objectid;
5053 rec->cache.start = key->offset;
5055 rec->generation = btrfs_header_generation(leaf);
5057 rec->objectid = key->objectid;
5058 rec->type = key->type;
5059 rec->offset = key->offset;
5061 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5062 rec->chunk_objecteid =
5063 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5065 btrfs_dev_extent_chunk_offset(leaf, ptr);
5066 rec->length = btrfs_dev_extent_length(leaf, ptr);
5067 rec->cache.size = rec->length;
5069 INIT_LIST_HEAD(&rec->chunk_list);
5070 INIT_LIST_HEAD(&rec->device_list);
5076 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5077 struct btrfs_key *key, struct extent_buffer *eb,
5080 struct device_extent_record *rec;
5083 rec = btrfs_new_device_extent_record(eb, key, slot);
5084 ret = insert_device_extent_record(dev_extent_cache, rec);
5087 "Device extent[%llu, %llu, %llu] existed.\n",
5088 rec->objectid, rec->offset, rec->length);
5095 static int process_extent_item(struct btrfs_root *root,
5096 struct cache_tree *extent_cache,
5097 struct extent_buffer *eb, int slot)
5099 struct btrfs_extent_item *ei;
5100 struct btrfs_extent_inline_ref *iref;
5101 struct btrfs_extent_data_ref *dref;
5102 struct btrfs_shared_data_ref *sref;
5103 struct btrfs_key key;
5107 u32 item_size = btrfs_item_size_nr(eb, slot);
5113 btrfs_item_key_to_cpu(eb, &key, slot);
5115 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5117 num_bytes = root->leafsize;
5119 num_bytes = key.offset;
5122 if (item_size < sizeof(*ei)) {
5123 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5124 struct btrfs_extent_item_v0 *ei0;
5125 BUG_ON(item_size != sizeof(*ei0));
5126 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5127 refs = btrfs_extent_refs_v0(eb, ei0);
5131 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5132 num_bytes, refs, 0, 0, 0, metadata, 1,
5136 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5137 refs = btrfs_extent_refs(eb, ei);
5138 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5143 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5144 refs, 0, 0, 0, metadata, 1, num_bytes);
5146 ptr = (unsigned long)(ei + 1);
5147 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5148 key.type == BTRFS_EXTENT_ITEM_KEY)
5149 ptr += sizeof(struct btrfs_tree_block_info);
5151 end = (unsigned long)ei + item_size;
5153 iref = (struct btrfs_extent_inline_ref *)ptr;
5154 type = btrfs_extent_inline_ref_type(eb, iref);
5155 offset = btrfs_extent_inline_ref_offset(eb, iref);
5157 case BTRFS_TREE_BLOCK_REF_KEY:
5158 add_tree_backref(extent_cache, key.objectid,
5161 case BTRFS_SHARED_BLOCK_REF_KEY:
5162 add_tree_backref(extent_cache, key.objectid,
5165 case BTRFS_EXTENT_DATA_REF_KEY:
5166 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5167 add_data_backref(extent_cache, key.objectid, 0,
5168 btrfs_extent_data_ref_root(eb, dref),
5169 btrfs_extent_data_ref_objectid(eb,
5171 btrfs_extent_data_ref_offset(eb, dref),
5172 btrfs_extent_data_ref_count(eb, dref),
5175 case BTRFS_SHARED_DATA_REF_KEY:
5176 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5177 add_data_backref(extent_cache, key.objectid, offset,
5179 btrfs_shared_data_ref_count(eb, sref),
5183 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5184 key.objectid, key.type, num_bytes);
5187 ptr += btrfs_extent_inline_ref_size(type);
5194 static int check_cache_range(struct btrfs_root *root,
5195 struct btrfs_block_group_cache *cache,
5196 u64 offset, u64 bytes)
5198 struct btrfs_free_space *entry;
5204 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5205 bytenr = btrfs_sb_offset(i);
5206 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5207 cache->key.objectid, bytenr, 0,
5208 &logical, &nr, &stripe_len);
5213 if (logical[nr] + stripe_len <= offset)
5215 if (offset + bytes <= logical[nr])
5217 if (logical[nr] == offset) {
5218 if (stripe_len >= bytes) {
5222 bytes -= stripe_len;
5223 offset += stripe_len;
5224 } else if (logical[nr] < offset) {
5225 if (logical[nr] + stripe_len >=
5230 bytes = (offset + bytes) -
5231 (logical[nr] + stripe_len);
5232 offset = logical[nr] + stripe_len;
5235 * Could be tricky, the super may land in the
5236 * middle of the area we're checking. First
5237 * check the easiest case, it's at the end.
5239 if (logical[nr] + stripe_len >=
5241 bytes = logical[nr] - offset;
5245 /* Check the left side */
5246 ret = check_cache_range(root, cache,
5248 logical[nr] - offset);
5254 /* Now we continue with the right side */
5255 bytes = (offset + bytes) -
5256 (logical[nr] + stripe_len);
5257 offset = logical[nr] + stripe_len;
5264 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5266 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5267 offset, offset+bytes);
5271 if (entry->offset != offset) {
5272 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5277 if (entry->bytes != bytes) {
5278 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5279 bytes, entry->bytes, offset);
5283 unlink_free_space(cache->free_space_ctl, entry);
5288 static int verify_space_cache(struct btrfs_root *root,
5289 struct btrfs_block_group_cache *cache)
5291 struct btrfs_path *path;
5292 struct extent_buffer *leaf;
5293 struct btrfs_key key;
5297 path = btrfs_alloc_path();
5301 root = root->fs_info->extent_root;
5303 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5305 key.objectid = last;
5307 key.type = BTRFS_EXTENT_ITEM_KEY;
5309 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5314 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5315 ret = btrfs_next_leaf(root, path);
5323 leaf = path->nodes[0];
5324 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5325 if (key.objectid >= cache->key.offset + cache->key.objectid)
5327 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5328 key.type != BTRFS_METADATA_ITEM_KEY) {
5333 if (last == key.objectid) {
5334 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5335 last = key.objectid + key.offset;
5337 last = key.objectid + root->leafsize;
5342 ret = check_cache_range(root, cache, last,
5343 key.objectid - last);
5346 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5347 last = key.objectid + key.offset;
5349 last = key.objectid + root->leafsize;
5353 if (last < cache->key.objectid + cache->key.offset)
5354 ret = check_cache_range(root, cache, last,
5355 cache->key.objectid +
5356 cache->key.offset - last);
5359 btrfs_free_path(path);
5362 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5363 fprintf(stderr, "There are still entries left in the space "
5371 static int check_space_cache(struct btrfs_root *root)
5373 struct btrfs_block_group_cache *cache;
5374 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5378 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5379 btrfs_super_generation(root->fs_info->super_copy) !=
5380 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5381 printf("cache and super generation don't match, space cache "
5382 "will be invalidated\n");
5386 if (ctx.progress_enabled) {
5387 ctx.tp = TASK_FREE_SPACE;
5388 task_start(ctx.info);
5392 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5396 start = cache->key.objectid + cache->key.offset;
5397 if (!cache->free_space_ctl) {
5398 if (btrfs_init_free_space_ctl(cache,
5399 root->sectorsize)) {
5404 btrfs_remove_free_space_cache(cache);
5407 ret = load_free_space_cache(root->fs_info, cache);
5411 ret = verify_space_cache(root, cache);
5413 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5414 cache->key.objectid);
5419 task_stop(ctx.info);
5421 return error ? -EINVAL : 0;
5424 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5425 u64 num_bytes, unsigned long leaf_offset,
5426 struct extent_buffer *eb) {
5429 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5431 unsigned long csum_offset;
5435 u64 data_checked = 0;
5441 if (num_bytes % root->sectorsize)
5444 data = malloc(num_bytes);
5448 while (offset < num_bytes) {
5451 read_len = num_bytes - offset;
5452 /* read as much space once a time */
5453 ret = read_extent_data(root, data + offset,
5454 bytenr + offset, &read_len, mirror);
5458 /* verify every 4k data's checksum */
5459 while (data_checked < read_len) {
5461 tmp = offset + data_checked;
5463 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5464 csum, root->sectorsize);
5465 btrfs_csum_final(csum, (char *)&csum);
5467 csum_offset = leaf_offset +
5468 tmp / root->sectorsize * csum_size;
5469 read_extent_buffer(eb, (char *)&csum_expected,
5470 csum_offset, csum_size);
5471 /* try another mirror */
5472 if (csum != csum_expected) {
5473 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5474 mirror, bytenr + tmp,
5475 csum, csum_expected);
5476 num_copies = btrfs_num_copies(
5477 &root->fs_info->mapping_tree,
5479 if (mirror < num_copies - 1) {
5484 data_checked += root->sectorsize;
5493 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5496 struct btrfs_path *path;
5497 struct extent_buffer *leaf;
5498 struct btrfs_key key;
5501 path = btrfs_alloc_path();
5503 fprintf(stderr, "Error allocing path\n");
5507 key.objectid = bytenr;
5508 key.type = BTRFS_EXTENT_ITEM_KEY;
5509 key.offset = (u64)-1;
5512 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5515 fprintf(stderr, "Error looking up extent record %d\n", ret);
5516 btrfs_free_path(path);
5519 if (path->slots[0] > 0) {
5522 ret = btrfs_prev_leaf(root, path);
5525 } else if (ret > 0) {
5532 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5535 * Block group items come before extent items if they have the same
5536 * bytenr, so walk back one more just in case. Dear future traveler,
5537 * first congrats on mastering time travel. Now if it's not too much
5538 * trouble could you go back to 2006 and tell Chris to make the
5539 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5540 * EXTENT_ITEM_KEY please?
5542 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5543 if (path->slots[0] > 0) {
5546 ret = btrfs_prev_leaf(root, path);
5549 } else if (ret > 0) {
5554 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5558 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5559 ret = btrfs_next_leaf(root, path);
5561 fprintf(stderr, "Error going to next leaf "
5563 btrfs_free_path(path);
5569 leaf = path->nodes[0];
5570 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5571 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5575 if (key.objectid + key.offset < bytenr) {
5579 if (key.objectid > bytenr + num_bytes)
5582 if (key.objectid == bytenr) {
5583 if (key.offset >= num_bytes) {
5587 num_bytes -= key.offset;
5588 bytenr += key.offset;
5589 } else if (key.objectid < bytenr) {
5590 if (key.objectid + key.offset >= bytenr + num_bytes) {
5594 num_bytes = (bytenr + num_bytes) -
5595 (key.objectid + key.offset);
5596 bytenr = key.objectid + key.offset;
5598 if (key.objectid + key.offset < bytenr + num_bytes) {
5599 u64 new_start = key.objectid + key.offset;
5600 u64 new_bytes = bytenr + num_bytes - new_start;
5603 * Weird case, the extent is in the middle of
5604 * our range, we'll have to search one side
5605 * and then the other. Not sure if this happens
5606 * in real life, but no harm in coding it up
5607 * anyway just in case.
5609 btrfs_release_path(path);
5610 ret = check_extent_exists(root, new_start,
5613 fprintf(stderr, "Right section didn't "
5617 num_bytes = key.objectid - bytenr;
5620 num_bytes = key.objectid - bytenr;
5627 if (num_bytes && !ret) {
5628 fprintf(stderr, "There are no extents for csum range "
5629 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5633 btrfs_free_path(path);
5637 static int check_csums(struct btrfs_root *root)
5639 struct btrfs_path *path;
5640 struct extent_buffer *leaf;
5641 struct btrfs_key key;
5642 u64 offset = 0, num_bytes = 0;
5643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5647 unsigned long leaf_offset;
5649 root = root->fs_info->csum_root;
5650 if (!extent_buffer_uptodate(root->node)) {
5651 fprintf(stderr, "No valid csum tree found\n");
5655 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5656 key.type = BTRFS_EXTENT_CSUM_KEY;
5659 path = btrfs_alloc_path();
5663 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5665 fprintf(stderr, "Error searching csum tree %d\n", ret);
5666 btrfs_free_path(path);
5670 if (ret > 0 && path->slots[0])
5675 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5676 ret = btrfs_next_leaf(root, path);
5678 fprintf(stderr, "Error going to next leaf "
5685 leaf = path->nodes[0];
5687 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5688 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5693 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5694 csum_size) * root->sectorsize;
5695 if (!check_data_csum)
5696 goto skip_csum_check;
5697 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5698 ret = check_extent_csums(root, key.offset, data_len,
5704 offset = key.offset;
5705 } else if (key.offset != offset + num_bytes) {
5706 ret = check_extent_exists(root, offset, num_bytes);
5708 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5709 "there is no extent record\n",
5710 offset, offset+num_bytes);
5713 offset = key.offset;
5716 num_bytes += data_len;
5720 btrfs_free_path(path);
5724 static int is_dropped_key(struct btrfs_key *key,
5725 struct btrfs_key *drop_key) {
5726 if (key->objectid < drop_key->objectid)
5728 else if (key->objectid == drop_key->objectid) {
5729 if (key->type < drop_key->type)
5731 else if (key->type == drop_key->type) {
5732 if (key->offset < drop_key->offset)
5740 * Here are the rules for FULL_BACKREF.
5742 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5743 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5745 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5746 * if it happened after the relocation occurred since we'll have dropped the
5747 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5748 * have no real way to know for sure.
5750 * We process the blocks one root at a time, and we start from the lowest root
5751 * objectid and go to the highest. So we can just lookup the owner backref for
5752 * the record and if we don't find it then we know it doesn't exist and we have
5755 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5756 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5757 * be set or not and then we can check later once we've gathered all the refs.
5759 static int calc_extent_flag(struct btrfs_root *root,
5760 struct cache_tree *extent_cache,
5761 struct extent_buffer *buf,
5762 struct root_item_record *ri,
5765 struct extent_record *rec;
5766 struct cache_extent *cache;
5767 struct tree_backref *tback;
5770 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5771 /* we have added this extent before */
5773 rec = container_of(cache, struct extent_record, cache);
5776 * Except file/reloc tree, we can not have
5779 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5784 if (buf->start == ri->bytenr)
5787 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5790 owner = btrfs_header_owner(buf);
5791 if (owner == ri->objectid)
5794 tback = find_tree_backref(rec, 0, owner);
5799 if (rec->flag_block_full_backref != -1 &&
5800 rec->flag_block_full_backref != 0)
5801 rec->bad_full_backref = 1;
5804 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5805 if (rec->flag_block_full_backref != -1 &&
5806 rec->flag_block_full_backref != 1)
5807 rec->bad_full_backref = 1;
5811 static int run_next_block(struct btrfs_root *root,
5812 struct block_info *bits,
5815 struct cache_tree *pending,
5816 struct cache_tree *seen,
5817 struct cache_tree *reada,
5818 struct cache_tree *nodes,
5819 struct cache_tree *extent_cache,
5820 struct cache_tree *chunk_cache,
5821 struct rb_root *dev_cache,
5822 struct block_group_tree *block_group_cache,
5823 struct device_extent_tree *dev_extent_cache,
5824 struct root_item_record *ri)
5826 struct extent_buffer *buf;
5827 struct extent_record *rec = NULL;
5838 struct btrfs_key key;
5839 struct cache_extent *cache;
5842 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5843 bits_nr, &reada_bits);
5848 for(i = 0; i < nritems; i++) {
5849 ret = add_cache_extent(reada, bits[i].start,
5854 /* fixme, get the parent transid */
5855 readahead_tree_block(root, bits[i].start,
5859 *last = bits[0].start;
5860 bytenr = bits[0].start;
5861 size = bits[0].size;
5863 cache = lookup_cache_extent(pending, bytenr, size);
5865 remove_cache_extent(pending, cache);
5868 cache = lookup_cache_extent(reada, bytenr, size);
5870 remove_cache_extent(reada, cache);
5873 cache = lookup_cache_extent(nodes, bytenr, size);
5875 remove_cache_extent(nodes, cache);
5878 cache = lookup_cache_extent(extent_cache, bytenr, size);
5880 rec = container_of(cache, struct extent_record, cache);
5881 gen = rec->parent_generation;
5884 /* fixme, get the real parent transid */
5885 buf = read_tree_block(root, bytenr, size, gen);
5886 if (!extent_buffer_uptodate(buf)) {
5887 record_bad_block_io(root->fs_info,
5888 extent_cache, bytenr, size);
5892 nritems = btrfs_header_nritems(buf);
5895 if (!init_extent_tree) {
5896 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5897 btrfs_header_level(buf), 1, NULL,
5900 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5902 fprintf(stderr, "Couldn't calc extent flags\n");
5903 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5908 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5910 fprintf(stderr, "Couldn't calc extent flags\n");
5911 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5915 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5917 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5918 ri->objectid == btrfs_header_owner(buf)) {
5920 * Ok we got to this block from it's original owner and
5921 * we have FULL_BACKREF set. Relocation can leave
5922 * converted blocks over so this is altogether possible,
5923 * however it's not possible if the generation > the
5924 * last snapshot, so check for this case.
5926 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5927 btrfs_header_generation(buf) > ri->last_snapshot) {
5928 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5929 rec->bad_full_backref = 1;
5934 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5935 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5936 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5937 rec->bad_full_backref = 1;
5941 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5942 rec->flag_block_full_backref = 1;
5946 rec->flag_block_full_backref = 0;
5948 owner = btrfs_header_owner(buf);
5951 ret = check_block(root, extent_cache, buf, flags);
5955 if (btrfs_is_leaf(buf)) {
5956 btree_space_waste += btrfs_leaf_free_space(root, buf);
5957 for (i = 0; i < nritems; i++) {
5958 struct btrfs_file_extent_item *fi;
5959 btrfs_item_key_to_cpu(buf, &key, i);
5960 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5961 process_extent_item(root, extent_cache, buf,
5965 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5966 process_extent_item(root, extent_cache, buf,
5970 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5972 btrfs_item_size_nr(buf, i);
5975 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5976 process_chunk_item(chunk_cache, &key, buf, i);
5979 if (key.type == BTRFS_DEV_ITEM_KEY) {
5980 process_device_item(dev_cache, &key, buf, i);
5983 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5984 process_block_group_item(block_group_cache,
5988 if (key.type == BTRFS_DEV_EXTENT_KEY) {
5989 process_device_extent_item(dev_extent_cache,
5994 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5995 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5996 process_extent_ref_v0(extent_cache, buf, i);
6003 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6004 add_tree_backref(extent_cache, key.objectid, 0,
6008 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6009 add_tree_backref(extent_cache, key.objectid,
6013 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6014 struct btrfs_extent_data_ref *ref;
6015 ref = btrfs_item_ptr(buf, i,
6016 struct btrfs_extent_data_ref);
6017 add_data_backref(extent_cache,
6019 btrfs_extent_data_ref_root(buf, ref),
6020 btrfs_extent_data_ref_objectid(buf,
6022 btrfs_extent_data_ref_offset(buf, ref),
6023 btrfs_extent_data_ref_count(buf, ref),
6024 0, root->sectorsize);
6027 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6028 struct btrfs_shared_data_ref *ref;
6029 ref = btrfs_item_ptr(buf, i,
6030 struct btrfs_shared_data_ref);
6031 add_data_backref(extent_cache,
6032 key.objectid, key.offset, 0, 0, 0,
6033 btrfs_shared_data_ref_count(buf, ref),
6034 0, root->sectorsize);
6037 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6038 struct bad_item *bad;
6040 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6044 bad = malloc(sizeof(struct bad_item));
6047 INIT_LIST_HEAD(&bad->list);
6048 memcpy(&bad->key, &key,
6049 sizeof(struct btrfs_key));
6050 bad->root_id = owner;
6051 list_add_tail(&bad->list, &delete_items);
6054 if (key.type != BTRFS_EXTENT_DATA_KEY)
6056 fi = btrfs_item_ptr(buf, i,
6057 struct btrfs_file_extent_item);
6058 if (btrfs_file_extent_type(buf, fi) ==
6059 BTRFS_FILE_EXTENT_INLINE)
6061 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6064 data_bytes_allocated +=
6065 btrfs_file_extent_disk_num_bytes(buf, fi);
6066 if (data_bytes_allocated < root->sectorsize) {
6069 data_bytes_referenced +=
6070 btrfs_file_extent_num_bytes(buf, fi);
6071 add_data_backref(extent_cache,
6072 btrfs_file_extent_disk_bytenr(buf, fi),
6073 parent, owner, key.objectid, key.offset -
6074 btrfs_file_extent_offset(buf, fi), 1, 1,
6075 btrfs_file_extent_disk_num_bytes(buf, fi));
6079 struct btrfs_key first_key;
6081 first_key.objectid = 0;
6084 btrfs_item_key_to_cpu(buf, &first_key, 0);
6085 level = btrfs_header_level(buf);
6086 for (i = 0; i < nritems; i++) {
6087 ptr = btrfs_node_blockptr(buf, i);
6088 size = btrfs_level_size(root, level - 1);
6089 btrfs_node_key_to_cpu(buf, &key, i);
6091 if ((level == ri->drop_level)
6092 && is_dropped_key(&key, &ri->drop_key)) {
6096 ret = add_extent_rec(extent_cache, &key,
6097 btrfs_node_ptr_generation(buf, i),
6098 ptr, size, 0, 0, 1, 0, 1, 0,
6102 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6105 add_pending(nodes, seen, ptr, size);
6107 add_pending(pending, seen, ptr, size);
6110 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6111 nritems) * sizeof(struct btrfs_key_ptr);
6113 total_btree_bytes += buf->len;
6114 if (fs_root_objectid(btrfs_header_owner(buf)))
6115 total_fs_tree_bytes += buf->len;
6116 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6117 total_extent_tree_bytes += buf->len;
6118 if (!found_old_backref &&
6119 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6120 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6121 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6122 found_old_backref = 1;
6124 free_extent_buffer(buf);
6128 static int add_root_to_pending(struct extent_buffer *buf,
6129 struct cache_tree *extent_cache,
6130 struct cache_tree *pending,
6131 struct cache_tree *seen,
6132 struct cache_tree *nodes,
6135 if (btrfs_header_level(buf) > 0)
6136 add_pending(nodes, seen, buf->start, buf->len);
6138 add_pending(pending, seen, buf->start, buf->len);
6139 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6140 0, 1, 1, 0, 1, 0, buf->len);
6142 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6143 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6144 add_tree_backref(extent_cache, buf->start, buf->start,
6147 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6151 /* as we fix the tree, we might be deleting blocks that
6152 * we're tracking for repair. This hook makes sure we
6153 * remove any backrefs for blocks as we are fixing them.
6155 static int free_extent_hook(struct btrfs_trans_handle *trans,
6156 struct btrfs_root *root,
6157 u64 bytenr, u64 num_bytes, u64 parent,
6158 u64 root_objectid, u64 owner, u64 offset,
6161 struct extent_record *rec;
6162 struct cache_extent *cache;
6164 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6166 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6167 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6171 rec = container_of(cache, struct extent_record, cache);
6173 struct data_backref *back;
6174 back = find_data_backref(rec, parent, root_objectid, owner,
6175 offset, 1, bytenr, num_bytes);
6178 if (back->node.found_ref) {
6179 back->found_ref -= refs_to_drop;
6181 rec->refs -= refs_to_drop;
6183 if (back->node.found_extent_tree) {
6184 back->num_refs -= refs_to_drop;
6185 if (rec->extent_item_refs)
6186 rec->extent_item_refs -= refs_to_drop;
6188 if (back->found_ref == 0)
6189 back->node.found_ref = 0;
6190 if (back->num_refs == 0)
6191 back->node.found_extent_tree = 0;
6193 if (!back->node.found_extent_tree && back->node.found_ref) {
6194 list_del(&back->node.list);
6198 struct tree_backref *back;
6199 back = find_tree_backref(rec, parent, root_objectid);
6202 if (back->node.found_ref) {
6205 back->node.found_ref = 0;
6207 if (back->node.found_extent_tree) {
6208 if (rec->extent_item_refs)
6209 rec->extent_item_refs--;
6210 back->node.found_extent_tree = 0;
6212 if (!back->node.found_extent_tree && back->node.found_ref) {
6213 list_del(&back->node.list);
6217 maybe_free_extent_rec(extent_cache, rec);
6222 static int delete_extent_records(struct btrfs_trans_handle *trans,
6223 struct btrfs_root *root,
6224 struct btrfs_path *path,
6225 u64 bytenr, u64 new_len)
6227 struct btrfs_key key;
6228 struct btrfs_key found_key;
6229 struct extent_buffer *leaf;
6234 key.objectid = bytenr;
6236 key.offset = (u64)-1;
6239 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6246 if (path->slots[0] == 0)
6252 leaf = path->nodes[0];
6253 slot = path->slots[0];
6255 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6256 if (found_key.objectid != bytenr)
6259 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6260 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6261 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6262 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6263 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6264 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6265 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6266 btrfs_release_path(path);
6267 if (found_key.type == 0) {
6268 if (found_key.offset == 0)
6270 key.offset = found_key.offset - 1;
6271 key.type = found_key.type;
6273 key.type = found_key.type - 1;
6274 key.offset = (u64)-1;
6278 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6279 found_key.objectid, found_key.type, found_key.offset);
6281 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6284 btrfs_release_path(path);
6286 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6287 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6288 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6289 found_key.offset : root->leafsize;
6291 ret = btrfs_update_block_group(trans, root, bytenr,
6298 btrfs_release_path(path);
6303 * for a single backref, this will allocate a new extent
6304 * and add the backref to it.
6306 static int record_extent(struct btrfs_trans_handle *trans,
6307 struct btrfs_fs_info *info,
6308 struct btrfs_path *path,
6309 struct extent_record *rec,
6310 struct extent_backref *back,
6311 int allocated, u64 flags)
6314 struct btrfs_root *extent_root = info->extent_root;
6315 struct extent_buffer *leaf;
6316 struct btrfs_key ins_key;
6317 struct btrfs_extent_item *ei;
6318 struct tree_backref *tback;
6319 struct data_backref *dback;
6320 struct btrfs_tree_block_info *bi;
6323 rec->max_size = max_t(u64, rec->max_size,
6324 info->extent_root->leafsize);
6327 u32 item_size = sizeof(*ei);
6330 item_size += sizeof(*bi);
6332 ins_key.objectid = rec->start;
6333 ins_key.offset = rec->max_size;
6334 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6336 ret = btrfs_insert_empty_item(trans, extent_root, path,
6337 &ins_key, item_size);
6341 leaf = path->nodes[0];
6342 ei = btrfs_item_ptr(leaf, path->slots[0],
6343 struct btrfs_extent_item);
6345 btrfs_set_extent_refs(leaf, ei, 0);
6346 btrfs_set_extent_generation(leaf, ei, rec->generation);
6348 if (back->is_data) {
6349 btrfs_set_extent_flags(leaf, ei,
6350 BTRFS_EXTENT_FLAG_DATA);
6352 struct btrfs_disk_key copy_key;;
6354 tback = (struct tree_backref *)back;
6355 bi = (struct btrfs_tree_block_info *)(ei + 1);
6356 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6359 btrfs_set_disk_key_objectid(©_key,
6360 rec->info_objectid);
6361 btrfs_set_disk_key_type(©_key, 0);
6362 btrfs_set_disk_key_offset(©_key, 0);
6364 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6365 btrfs_set_tree_block_key(leaf, bi, ©_key);
6367 btrfs_set_extent_flags(leaf, ei,
6368 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6371 btrfs_mark_buffer_dirty(leaf);
6372 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6373 rec->max_size, 1, 0);
6376 btrfs_release_path(path);
6379 if (back->is_data) {
6383 dback = (struct data_backref *)back;
6384 if (back->full_backref)
6385 parent = dback->parent;
6389 for (i = 0; i < dback->found_ref; i++) {
6390 /* if parent != 0, we're doing a full backref
6391 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6392 * just makes the backref allocator create a data
6395 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6396 rec->start, rec->max_size,
6400 BTRFS_FIRST_FREE_OBJECTID :
6406 fprintf(stderr, "adding new data backref"
6407 " on %llu %s %llu owner %llu"
6408 " offset %llu found %d\n",
6409 (unsigned long long)rec->start,
6410 back->full_backref ?
6412 back->full_backref ?
6413 (unsigned long long)parent :
6414 (unsigned long long)dback->root,
6415 (unsigned long long)dback->owner,
6416 (unsigned long long)dback->offset,
6421 tback = (struct tree_backref *)back;
6422 if (back->full_backref)
6423 parent = tback->parent;
6427 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6428 rec->start, rec->max_size,
6429 parent, tback->root, 0, 0);
6430 fprintf(stderr, "adding new tree backref on "
6431 "start %llu len %llu parent %llu root %llu\n",
6432 rec->start, rec->max_size, parent, tback->root);
6435 btrfs_release_path(path);
6439 struct extent_entry {
6444 struct list_head list;
6447 static struct extent_entry *find_entry(struct list_head *entries,
6448 u64 bytenr, u64 bytes)
6450 struct extent_entry *entry = NULL;
6452 list_for_each_entry(entry, entries, list) {
6453 if (entry->bytenr == bytenr && entry->bytes == bytes)
6460 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6462 struct extent_entry *entry, *best = NULL, *prev = NULL;
6464 list_for_each_entry(entry, entries, list) {
6471 * If there are as many broken entries as entries then we know
6472 * not to trust this particular entry.
6474 if (entry->broken == entry->count)
6478 * If our current entry == best then we can't be sure our best
6479 * is really the best, so we need to keep searching.
6481 if (best && best->count == entry->count) {
6487 /* Prev == entry, not good enough, have to keep searching */
6488 if (!prev->broken && prev->count == entry->count)
6492 best = (prev->count > entry->count) ? prev : entry;
6493 else if (best->count < entry->count)
6501 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6502 struct data_backref *dback, struct extent_entry *entry)
6504 struct btrfs_trans_handle *trans;
6505 struct btrfs_root *root;
6506 struct btrfs_file_extent_item *fi;
6507 struct extent_buffer *leaf;
6508 struct btrfs_key key;
6512 key.objectid = dback->root;
6513 key.type = BTRFS_ROOT_ITEM_KEY;
6514 key.offset = (u64)-1;
6515 root = btrfs_read_fs_root(info, &key);
6517 fprintf(stderr, "Couldn't find root for our ref\n");
6522 * The backref points to the original offset of the extent if it was
6523 * split, so we need to search down to the offset we have and then walk
6524 * forward until we find the backref we're looking for.
6526 key.objectid = dback->owner;
6527 key.type = BTRFS_EXTENT_DATA_KEY;
6528 key.offset = dback->offset;
6529 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6531 fprintf(stderr, "Error looking up ref %d\n", ret);
6536 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6537 ret = btrfs_next_leaf(root, path);
6539 fprintf(stderr, "Couldn't find our ref, next\n");
6543 leaf = path->nodes[0];
6544 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6545 if (key.objectid != dback->owner ||
6546 key.type != BTRFS_EXTENT_DATA_KEY) {
6547 fprintf(stderr, "Couldn't find our ref, search\n");
6550 fi = btrfs_item_ptr(leaf, path->slots[0],
6551 struct btrfs_file_extent_item);
6552 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6553 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6555 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6560 btrfs_release_path(path);
6562 trans = btrfs_start_transaction(root, 1);
6564 return PTR_ERR(trans);
6567 * Ok we have the key of the file extent we want to fix, now we can cow
6568 * down to the thing and fix it.
6570 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6572 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6573 key.objectid, key.type, key.offset, ret);
6577 fprintf(stderr, "Well that's odd, we just found this key "
6578 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6583 leaf = path->nodes[0];
6584 fi = btrfs_item_ptr(leaf, path->slots[0],
6585 struct btrfs_file_extent_item);
6587 if (btrfs_file_extent_compression(leaf, fi) &&
6588 dback->disk_bytenr != entry->bytenr) {
6589 fprintf(stderr, "Ref doesn't match the record start and is "
6590 "compressed, please take a btrfs-image of this file "
6591 "system and send it to a btrfs developer so they can "
6592 "complete this functionality for bytenr %Lu\n",
6593 dback->disk_bytenr);
6598 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6599 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6600 } else if (dback->disk_bytenr > entry->bytenr) {
6601 u64 off_diff, offset;
6603 off_diff = dback->disk_bytenr - entry->bytenr;
6604 offset = btrfs_file_extent_offset(leaf, fi);
6605 if (dback->disk_bytenr + offset +
6606 btrfs_file_extent_num_bytes(leaf, fi) >
6607 entry->bytenr + entry->bytes) {
6608 fprintf(stderr, "Ref is past the entry end, please "
6609 "take a btrfs-image of this file system and "
6610 "send it to a btrfs developer, ref %Lu\n",
6611 dback->disk_bytenr);
6616 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6617 btrfs_set_file_extent_offset(leaf, fi, offset);
6618 } else if (dback->disk_bytenr < entry->bytenr) {
6621 offset = btrfs_file_extent_offset(leaf, fi);
6622 if (dback->disk_bytenr + offset < entry->bytenr) {
6623 fprintf(stderr, "Ref is before the entry start, please"
6624 " take a btrfs-image of this file system and "
6625 "send it to a btrfs developer, ref %Lu\n",
6626 dback->disk_bytenr);
6631 offset += dback->disk_bytenr;
6632 offset -= entry->bytenr;
6633 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6634 btrfs_set_file_extent_offset(leaf, fi, offset);
6637 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6640 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6641 * only do this if we aren't using compression, otherwise it's a
6644 if (!btrfs_file_extent_compression(leaf, fi))
6645 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6647 printf("ram bytes may be wrong?\n");
6648 btrfs_mark_buffer_dirty(leaf);
6650 err = btrfs_commit_transaction(trans, root);
6651 btrfs_release_path(path);
6652 return ret ? ret : err;
6655 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6656 struct extent_record *rec)
6658 struct extent_backref *back;
6659 struct data_backref *dback;
6660 struct extent_entry *entry, *best = NULL;
6663 int broken_entries = 0;
6668 * Metadata is easy and the backrefs should always agree on bytenr and
6669 * size, if not we've got bigger issues.
6674 list_for_each_entry(back, &rec->backrefs, list) {
6675 if (back->full_backref || !back->is_data)
6678 dback = (struct data_backref *)back;
6681 * We only pay attention to backrefs that we found a real
6684 if (dback->found_ref == 0)
6688 * For now we only catch when the bytes don't match, not the
6689 * bytenr. We can easily do this at the same time, but I want
6690 * to have a fs image to test on before we just add repair
6691 * functionality willy-nilly so we know we won't screw up the
6695 entry = find_entry(&entries, dback->disk_bytenr,
6698 entry = malloc(sizeof(struct extent_entry));
6703 memset(entry, 0, sizeof(*entry));
6704 entry->bytenr = dback->disk_bytenr;
6705 entry->bytes = dback->bytes;
6706 list_add_tail(&entry->list, &entries);
6711 * If we only have on entry we may think the entries agree when
6712 * in reality they don't so we have to do some extra checking.
6714 if (dback->disk_bytenr != rec->start ||
6715 dback->bytes != rec->nr || back->broken)
6726 /* Yay all the backrefs agree, carry on good sir */
6727 if (nr_entries <= 1 && !mismatch)
6730 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6731 "%Lu\n", rec->start);
6734 * First we want to see if the backrefs can agree amongst themselves who
6735 * is right, so figure out which one of the entries has the highest
6738 best = find_most_right_entry(&entries);
6741 * Ok so we may have an even split between what the backrefs think, so
6742 * this is where we use the extent ref to see what it thinks.
6745 entry = find_entry(&entries, rec->start, rec->nr);
6746 if (!entry && (!broken_entries || !rec->found_rec)) {
6747 fprintf(stderr, "Backrefs don't agree with each other "
6748 "and extent record doesn't agree with anybody,"
6749 " so we can't fix bytenr %Lu bytes %Lu\n",
6750 rec->start, rec->nr);
6753 } else if (!entry) {
6755 * Ok our backrefs were broken, we'll assume this is the
6756 * correct value and add an entry for this range.
6758 entry = malloc(sizeof(struct extent_entry));
6763 memset(entry, 0, sizeof(*entry));
6764 entry->bytenr = rec->start;
6765 entry->bytes = rec->nr;
6766 list_add_tail(&entry->list, &entries);
6770 best = find_most_right_entry(&entries);
6772 fprintf(stderr, "Backrefs and extent record evenly "
6773 "split on who is right, this is going to "
6774 "require user input to fix bytenr %Lu bytes "
6775 "%Lu\n", rec->start, rec->nr);
6782 * I don't think this can happen currently as we'll abort() if we catch
6783 * this case higher up, but in case somebody removes that we still can't
6784 * deal with it properly here yet, so just bail out of that's the case.
6786 if (best->bytenr != rec->start) {
6787 fprintf(stderr, "Extent start and backref starts don't match, "
6788 "please use btrfs-image on this file system and send "
6789 "it to a btrfs developer so they can make fsck fix "
6790 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6791 rec->start, rec->nr);
6797 * Ok great we all agreed on an extent record, let's go find the real
6798 * references and fix up the ones that don't match.
6800 list_for_each_entry(back, &rec->backrefs, list) {
6801 if (back->full_backref || !back->is_data)
6804 dback = (struct data_backref *)back;
6807 * Still ignoring backrefs that don't have a real ref attached
6810 if (dback->found_ref == 0)
6813 if (dback->bytes == best->bytes &&
6814 dback->disk_bytenr == best->bytenr)
6817 ret = repair_ref(info, path, dback, best);
6823 * Ok we messed with the actual refs, which means we need to drop our
6824 * entire cache and go back and rescan. I know this is a huge pain and
6825 * adds a lot of extra work, but it's the only way to be safe. Once all
6826 * the backrefs agree we may not need to do anything to the extent
6831 while (!list_empty(&entries)) {
6832 entry = list_entry(entries.next, struct extent_entry, list);
6833 list_del_init(&entry->list);
6839 static int process_duplicates(struct btrfs_root *root,
6840 struct cache_tree *extent_cache,
6841 struct extent_record *rec)
6843 struct extent_record *good, *tmp;
6844 struct cache_extent *cache;
6848 * If we found a extent record for this extent then return, or if we
6849 * have more than one duplicate we are likely going to need to delete
6852 if (rec->found_rec || rec->num_duplicates > 1)
6855 /* Shouldn't happen but just in case */
6856 BUG_ON(!rec->num_duplicates);
6859 * So this happens if we end up with a backref that doesn't match the
6860 * actual extent entry. So either the backref is bad or the extent
6861 * entry is bad. Either way we want to have the extent_record actually
6862 * reflect what we found in the extent_tree, so we need to take the
6863 * duplicate out and use that as the extent_record since the only way we
6864 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6866 remove_cache_extent(extent_cache, &rec->cache);
6868 good = list_entry(rec->dups.next, struct extent_record, list);
6869 list_del_init(&good->list);
6870 INIT_LIST_HEAD(&good->backrefs);
6871 INIT_LIST_HEAD(&good->dups);
6872 good->cache.start = good->start;
6873 good->cache.size = good->nr;
6874 good->content_checked = 0;
6875 good->owner_ref_checked = 0;
6876 good->num_duplicates = 0;
6877 good->refs = rec->refs;
6878 list_splice_init(&rec->backrefs, &good->backrefs);
6880 cache = lookup_cache_extent(extent_cache, good->start,
6884 tmp = container_of(cache, struct extent_record, cache);
6887 * If we find another overlapping extent and it's found_rec is
6888 * set then it's a duplicate and we need to try and delete
6891 if (tmp->found_rec || tmp->num_duplicates > 0) {
6892 if (list_empty(&good->list))
6893 list_add_tail(&good->list,
6894 &duplicate_extents);
6895 good->num_duplicates += tmp->num_duplicates + 1;
6896 list_splice_init(&tmp->dups, &good->dups);
6897 list_del_init(&tmp->list);
6898 list_add_tail(&tmp->list, &good->dups);
6899 remove_cache_extent(extent_cache, &tmp->cache);
6904 * Ok we have another non extent item backed extent rec, so lets
6905 * just add it to this extent and carry on like we did above.
6907 good->refs += tmp->refs;
6908 list_splice_init(&tmp->backrefs, &good->backrefs);
6909 remove_cache_extent(extent_cache, &tmp->cache);
6912 ret = insert_cache_extent(extent_cache, &good->cache);
6915 return good->num_duplicates ? 0 : 1;
6918 static int delete_duplicate_records(struct btrfs_root *root,
6919 struct extent_record *rec)
6921 struct btrfs_trans_handle *trans;
6922 LIST_HEAD(delete_list);
6923 struct btrfs_path *path;
6924 struct extent_record *tmp, *good, *n;
6927 struct btrfs_key key;
6929 path = btrfs_alloc_path();
6936 /* Find the record that covers all of the duplicates. */
6937 list_for_each_entry(tmp, &rec->dups, list) {
6938 if (good->start < tmp->start)
6940 if (good->nr > tmp->nr)
6943 if (tmp->start + tmp->nr < good->start + good->nr) {
6944 fprintf(stderr, "Ok we have overlapping extents that "
6945 "aren't completely covered by eachother, this "
6946 "is going to require more careful thought. "
6947 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6948 tmp->start, tmp->nr, good->start, good->nr);
6955 list_add_tail(&rec->list, &delete_list);
6957 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6960 list_move_tail(&tmp->list, &delete_list);
6963 root = root->fs_info->extent_root;
6964 trans = btrfs_start_transaction(root, 1);
6965 if (IS_ERR(trans)) {
6966 ret = PTR_ERR(trans);
6970 list_for_each_entry(tmp, &delete_list, list) {
6971 if (tmp->found_rec == 0)
6973 key.objectid = tmp->start;
6974 key.type = BTRFS_EXTENT_ITEM_KEY;
6975 key.offset = tmp->nr;
6977 /* Shouldn't happen but just in case */
6978 if (tmp->metadata) {
6979 fprintf(stderr, "Well this shouldn't happen, extent "
6980 "record overlaps but is metadata? "
6981 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6985 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6991 ret = btrfs_del_item(trans, root, path);
6994 btrfs_release_path(path);
6997 err = btrfs_commit_transaction(trans, root);
7001 while (!list_empty(&delete_list)) {
7002 tmp = list_entry(delete_list.next, struct extent_record, list);
7003 list_del_init(&tmp->list);
7009 while (!list_empty(&rec->dups)) {
7010 tmp = list_entry(rec->dups.next, struct extent_record, list);
7011 list_del_init(&tmp->list);
7015 btrfs_free_path(path);
7017 if (!ret && !nr_del)
7018 rec->num_duplicates = 0;
7020 return ret ? ret : nr_del;
7023 static int find_possible_backrefs(struct btrfs_fs_info *info,
7024 struct btrfs_path *path,
7025 struct cache_tree *extent_cache,
7026 struct extent_record *rec)
7028 struct btrfs_root *root;
7029 struct extent_backref *back;
7030 struct data_backref *dback;
7031 struct cache_extent *cache;
7032 struct btrfs_file_extent_item *fi;
7033 struct btrfs_key key;
7037 list_for_each_entry(back, &rec->backrefs, list) {
7038 /* Don't care about full backrefs (poor unloved backrefs) */
7039 if (back->full_backref || !back->is_data)
7042 dback = (struct data_backref *)back;
7044 /* We found this one, we don't need to do a lookup */
7045 if (dback->found_ref)
7048 key.objectid = dback->root;
7049 key.type = BTRFS_ROOT_ITEM_KEY;
7050 key.offset = (u64)-1;
7052 root = btrfs_read_fs_root(info, &key);
7054 /* No root, definitely a bad ref, skip */
7055 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7057 /* Other err, exit */
7059 return PTR_ERR(root);
7061 key.objectid = dback->owner;
7062 key.type = BTRFS_EXTENT_DATA_KEY;
7063 key.offset = dback->offset;
7064 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7066 btrfs_release_path(path);
7069 /* Didn't find it, we can carry on */
7074 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7075 struct btrfs_file_extent_item);
7076 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7077 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7078 btrfs_release_path(path);
7079 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7081 struct extent_record *tmp;
7082 tmp = container_of(cache, struct extent_record, cache);
7085 * If we found an extent record for the bytenr for this
7086 * particular backref then we can't add it to our
7087 * current extent record. We only want to add backrefs
7088 * that don't have a corresponding extent item in the
7089 * extent tree since they likely belong to this record
7090 * and we need to fix it if it doesn't match bytenrs.
7096 dback->found_ref += 1;
7097 dback->disk_bytenr = bytenr;
7098 dback->bytes = bytes;
7101 * Set this so the verify backref code knows not to trust the
7102 * values in this backref.
7111 * Record orphan data ref into corresponding root.
7113 * Return 0 if the extent item contains data ref and recorded.
7114 * Return 1 if the extent item contains no useful data ref
7115 * On that case, it may contains only shared_dataref or metadata backref
7116 * or the file extent exists(this should be handled by the extent bytenr
7118 * Return <0 if something goes wrong.
7120 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7121 struct extent_record *rec)
7123 struct btrfs_key key;
7124 struct btrfs_root *dest_root;
7125 struct extent_backref *back;
7126 struct data_backref *dback;
7127 struct orphan_data_extent *orphan;
7128 struct btrfs_path *path;
7129 int recorded_data_ref = 0;
7134 path = btrfs_alloc_path();
7137 list_for_each_entry(back, &rec->backrefs, list) {
7138 if (back->full_backref || !back->is_data ||
7139 !back->found_extent_tree)
7141 dback = (struct data_backref *)back;
7142 if (dback->found_ref)
7144 key.objectid = dback->root;
7145 key.type = BTRFS_ROOT_ITEM_KEY;
7146 key.offset = (u64)-1;
7148 dest_root = btrfs_read_fs_root(fs_info, &key);
7150 /* For non-exist root we just skip it */
7151 if (IS_ERR(dest_root) || !dest_root)
7154 key.objectid = dback->owner;
7155 key.type = BTRFS_EXTENT_DATA_KEY;
7156 key.offset = dback->offset;
7158 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7160 * For ret < 0, it's OK since the fs-tree may be corrupted,
7161 * we need to record it for inode/file extent rebuild.
7162 * For ret > 0, we record it only for file extent rebuild.
7163 * For ret == 0, the file extent exists but only bytenr
7164 * mismatch, let the original bytenr fix routine to handle,
7170 orphan = malloc(sizeof(*orphan));
7175 INIT_LIST_HEAD(&orphan->list);
7176 orphan->root = dback->root;
7177 orphan->objectid = dback->owner;
7178 orphan->offset = dback->offset;
7179 orphan->disk_bytenr = rec->cache.start;
7180 orphan->disk_len = rec->cache.size;
7181 list_add(&dest_root->orphan_data_extents, &orphan->list);
7182 recorded_data_ref = 1;
7185 btrfs_free_path(path);
7187 return !recorded_data_ref;
7193 * when an incorrect extent item is found, this will delete
7194 * all of the existing entries for it and recreate them
7195 * based on what the tree scan found.
7197 static int fixup_extent_refs(struct btrfs_fs_info *info,
7198 struct cache_tree *extent_cache,
7199 struct extent_record *rec)
7201 struct btrfs_trans_handle *trans = NULL;
7203 struct btrfs_path *path;
7204 struct list_head *cur = rec->backrefs.next;
7205 struct cache_extent *cache;
7206 struct extent_backref *back;
7210 if (rec->flag_block_full_backref)
7211 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7213 path = btrfs_alloc_path();
7217 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7219 * Sometimes the backrefs themselves are so broken they don't
7220 * get attached to any meaningful rec, so first go back and
7221 * check any of our backrefs that we couldn't find and throw
7222 * them into the list if we find the backref so that
7223 * verify_backrefs can figure out what to do.
7225 ret = find_possible_backrefs(info, path, extent_cache, rec);
7230 /* step one, make sure all of the backrefs agree */
7231 ret = verify_backrefs(info, path, rec);
7235 trans = btrfs_start_transaction(info->extent_root, 1);
7236 if (IS_ERR(trans)) {
7237 ret = PTR_ERR(trans);
7241 /* step two, delete all the existing records */
7242 ret = delete_extent_records(trans, info->extent_root, path,
7243 rec->start, rec->max_size);
7248 /* was this block corrupt? If so, don't add references to it */
7249 cache = lookup_cache_extent(info->corrupt_blocks,
7250 rec->start, rec->max_size);
7256 /* step three, recreate all the refs we did find */
7257 while(cur != &rec->backrefs) {
7258 back = list_entry(cur, struct extent_backref, list);
7262 * if we didn't find any references, don't create a
7265 if (!back->found_ref)
7268 rec->bad_full_backref = 0;
7269 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7277 int err = btrfs_commit_transaction(trans, info->extent_root);
7282 btrfs_free_path(path);
7286 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7287 struct extent_record *rec)
7289 struct btrfs_trans_handle *trans;
7290 struct btrfs_root *root = fs_info->extent_root;
7291 struct btrfs_path *path;
7292 struct btrfs_extent_item *ei;
7293 struct btrfs_key key;
7297 key.objectid = rec->start;
7298 if (rec->metadata) {
7299 key.type = BTRFS_METADATA_ITEM_KEY;
7300 key.offset = rec->info_level;
7302 key.type = BTRFS_EXTENT_ITEM_KEY;
7303 key.offset = rec->max_size;
7306 path = btrfs_alloc_path();
7310 trans = btrfs_start_transaction(root, 0);
7311 if (IS_ERR(trans)) {
7312 btrfs_free_path(path);
7313 return PTR_ERR(trans);
7316 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7318 btrfs_free_path(path);
7319 btrfs_commit_transaction(trans, root);
7322 fprintf(stderr, "Didn't find extent for %llu\n",
7323 (unsigned long long)rec->start);
7324 btrfs_free_path(path);
7325 btrfs_commit_transaction(trans, root);
7329 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7330 struct btrfs_extent_item);
7331 flags = btrfs_extent_flags(path->nodes[0], ei);
7332 if (rec->flag_block_full_backref) {
7333 fprintf(stderr, "setting full backref on %llu\n",
7334 (unsigned long long)key.objectid);
7335 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7337 fprintf(stderr, "clearing full backref on %llu\n",
7338 (unsigned long long)key.objectid);
7339 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7341 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7342 btrfs_mark_buffer_dirty(path->nodes[0]);
7343 btrfs_free_path(path);
7344 return btrfs_commit_transaction(trans, root);
7347 /* right now we only prune from the extent allocation tree */
7348 static int prune_one_block(struct btrfs_trans_handle *trans,
7349 struct btrfs_fs_info *info,
7350 struct btrfs_corrupt_block *corrupt)
7353 struct btrfs_path path;
7354 struct extent_buffer *eb;
7358 int level = corrupt->level + 1;
7360 btrfs_init_path(&path);
7362 /* we want to stop at the parent to our busted block */
7363 path.lowest_level = level;
7365 ret = btrfs_search_slot(trans, info->extent_root,
7366 &corrupt->key, &path, -1, 1);
7371 eb = path.nodes[level];
7378 * hopefully the search gave us the block we want to prune,
7379 * lets try that first
7381 slot = path.slots[level];
7382 found = btrfs_node_blockptr(eb, slot);
7383 if (found == corrupt->cache.start)
7386 nritems = btrfs_header_nritems(eb);
7388 /* the search failed, lets scan this node and hope we find it */
7389 for (slot = 0; slot < nritems; slot++) {
7390 found = btrfs_node_blockptr(eb, slot);
7391 if (found == corrupt->cache.start)
7395 * we couldn't find the bad block. TODO, search all the nodes for pointers
7398 if (eb == info->extent_root->node) {
7403 btrfs_release_path(&path);
7408 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7409 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7412 btrfs_release_path(&path);
7416 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7418 struct btrfs_trans_handle *trans = NULL;
7419 struct cache_extent *cache;
7420 struct btrfs_corrupt_block *corrupt;
7423 cache = search_cache_extent(info->corrupt_blocks, 0);
7427 trans = btrfs_start_transaction(info->extent_root, 1);
7429 return PTR_ERR(trans);
7431 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7432 prune_one_block(trans, info, corrupt);
7433 remove_cache_extent(info->corrupt_blocks, cache);
7436 return btrfs_commit_transaction(trans, info->extent_root);
7440 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7442 struct btrfs_block_group_cache *cache;
7447 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7448 &start, &end, EXTENT_DIRTY);
7451 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7457 cache = btrfs_lookup_first_block_group(fs_info, start);
7462 start = cache->key.objectid + cache->key.offset;
7466 static int check_extent_refs(struct btrfs_root *root,
7467 struct cache_tree *extent_cache)
7469 struct extent_record *rec;
7470 struct cache_extent *cache;
7479 * if we're doing a repair, we have to make sure
7480 * we don't allocate from the problem extents.
7481 * In the worst case, this will be all the
7484 cache = search_cache_extent(extent_cache, 0);
7486 rec = container_of(cache, struct extent_record, cache);
7487 set_extent_dirty(root->fs_info->excluded_extents,
7489 rec->start + rec->max_size - 1,
7491 cache = next_cache_extent(cache);
7494 /* pin down all the corrupted blocks too */
7495 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7497 set_extent_dirty(root->fs_info->excluded_extents,
7499 cache->start + cache->size - 1,
7501 cache = next_cache_extent(cache);
7503 prune_corrupt_blocks(root->fs_info);
7504 reset_cached_block_groups(root->fs_info);
7507 reset_cached_block_groups(root->fs_info);
7510 * We need to delete any duplicate entries we find first otherwise we
7511 * could mess up the extent tree when we have backrefs that actually
7512 * belong to a different extent item and not the weird duplicate one.
7514 while (repair && !list_empty(&duplicate_extents)) {
7515 rec = list_entry(duplicate_extents.next, struct extent_record,
7517 list_del_init(&rec->list);
7519 /* Sometimes we can find a backref before we find an actual
7520 * extent, so we need to process it a little bit to see if there
7521 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7522 * if this is a backref screwup. If we need to delete stuff
7523 * process_duplicates() will return 0, otherwise it will return
7526 if (process_duplicates(root, extent_cache, rec))
7528 ret = delete_duplicate_records(root, rec);
7532 * delete_duplicate_records will return the number of entries
7533 * deleted, so if it's greater than 0 then we know we actually
7534 * did something and we need to remove.
7548 cache = search_cache_extent(extent_cache, 0);
7551 rec = container_of(cache, struct extent_record, cache);
7552 if (rec->num_duplicates) {
7553 fprintf(stderr, "extent item %llu has multiple extent "
7554 "items\n", (unsigned long long)rec->start);
7559 if (rec->refs != rec->extent_item_refs) {
7560 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7561 (unsigned long long)rec->start,
7562 (unsigned long long)rec->nr);
7563 fprintf(stderr, "extent item %llu, found %llu\n",
7564 (unsigned long long)rec->extent_item_refs,
7565 (unsigned long long)rec->refs);
7566 ret = record_orphan_data_extents(root->fs_info, rec);
7573 * we can't use the extent to repair file
7574 * extent, let the fallback method handle it.
7576 if (!fixed && repair) {
7577 ret = fixup_extent_refs(
7588 if (all_backpointers_checked(rec, 1)) {
7589 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7590 (unsigned long long)rec->start,
7591 (unsigned long long)rec->nr);
7593 if (!fixed && !recorded && repair) {
7594 ret = fixup_extent_refs(root->fs_info,
7603 if (!rec->owner_ref_checked) {
7604 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7605 (unsigned long long)rec->start,
7606 (unsigned long long)rec->nr);
7607 if (!fixed && !recorded && repair) {
7608 ret = fixup_extent_refs(root->fs_info,
7617 if (rec->bad_full_backref) {
7618 fprintf(stderr, "bad full backref, on [%llu]\n",
7619 (unsigned long long)rec->start);
7621 ret = fixup_extent_flags(root->fs_info, rec);
7630 * Although it's not a extent ref's problem, we reuse this
7631 * routine for error reporting.
7632 * No repair function yet.
7634 if (rec->crossing_stripes) {
7636 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7637 rec->start, rec->start + rec->max_size);
7642 if (rec->wrong_chunk_type) {
7644 "bad extent [%llu, %llu), type mismatch with chunk\n",
7645 rec->start, rec->start + rec->max_size);
7650 remove_cache_extent(extent_cache, cache);
7651 free_all_extent_backrefs(rec);
7652 if (!init_extent_tree && repair && (!cur_err || fixed))
7653 clear_extent_dirty(root->fs_info->excluded_extents,
7655 rec->start + rec->max_size - 1,
7661 if (ret && ret != -EAGAIN) {
7662 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7665 struct btrfs_trans_handle *trans;
7667 root = root->fs_info->extent_root;
7668 trans = btrfs_start_transaction(root, 1);
7669 if (IS_ERR(trans)) {
7670 ret = PTR_ERR(trans);
7674 btrfs_fix_block_accounting(trans, root);
7675 ret = btrfs_commit_transaction(trans, root);
7680 fprintf(stderr, "repaired damaged extent references\n");
7686 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7690 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7691 stripe_size = length;
7692 stripe_size /= num_stripes;
7693 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7694 stripe_size = length * 2;
7695 stripe_size /= num_stripes;
7696 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7697 stripe_size = length;
7698 stripe_size /= (num_stripes - 1);
7699 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7700 stripe_size = length;
7701 stripe_size /= (num_stripes - 2);
7703 stripe_size = length;
7709 * Check the chunk with its block group/dev list ref:
7710 * Return 0 if all refs seems valid.
7711 * Return 1 if part of refs seems valid, need later check for rebuild ref
7712 * like missing block group and needs to search extent tree to rebuild them.
7713 * Return -1 if essential refs are missing and unable to rebuild.
7715 static int check_chunk_refs(struct chunk_record *chunk_rec,
7716 struct block_group_tree *block_group_cache,
7717 struct device_extent_tree *dev_extent_cache,
7720 struct cache_extent *block_group_item;
7721 struct block_group_record *block_group_rec;
7722 struct cache_extent *dev_extent_item;
7723 struct device_extent_record *dev_extent_rec;
7727 int metadump_v2 = 0;
7731 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7734 if (block_group_item) {
7735 block_group_rec = container_of(block_group_item,
7736 struct block_group_record,
7738 if (chunk_rec->length != block_group_rec->offset ||
7739 chunk_rec->offset != block_group_rec->objectid ||
7741 chunk_rec->type_flags != block_group_rec->flags)) {
7744 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7745 chunk_rec->objectid,
7750 chunk_rec->type_flags,
7751 block_group_rec->objectid,
7752 block_group_rec->type,
7753 block_group_rec->offset,
7754 block_group_rec->offset,
7755 block_group_rec->objectid,
7756 block_group_rec->flags);
7759 list_del_init(&block_group_rec->list);
7760 chunk_rec->bg_rec = block_group_rec;
7765 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7766 chunk_rec->objectid,
7771 chunk_rec->type_flags);
7778 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7779 chunk_rec->num_stripes);
7780 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7781 devid = chunk_rec->stripes[i].devid;
7782 offset = chunk_rec->stripes[i].offset;
7783 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7784 devid, offset, length);
7785 if (dev_extent_item) {
7786 dev_extent_rec = container_of(dev_extent_item,
7787 struct device_extent_record,
7789 if (dev_extent_rec->objectid != devid ||
7790 dev_extent_rec->offset != offset ||
7791 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7792 dev_extent_rec->length != length) {
7795 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7796 chunk_rec->objectid,
7799 chunk_rec->stripes[i].devid,
7800 chunk_rec->stripes[i].offset,
7801 dev_extent_rec->objectid,
7802 dev_extent_rec->offset,
7803 dev_extent_rec->length);
7806 list_move(&dev_extent_rec->chunk_list,
7807 &chunk_rec->dextents);
7812 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7813 chunk_rec->objectid,
7816 chunk_rec->stripes[i].devid,
7817 chunk_rec->stripes[i].offset);
7824 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7825 int check_chunks(struct cache_tree *chunk_cache,
7826 struct block_group_tree *block_group_cache,
7827 struct device_extent_tree *dev_extent_cache,
7828 struct list_head *good, struct list_head *bad,
7829 struct list_head *rebuild, int silent)
7831 struct cache_extent *chunk_item;
7832 struct chunk_record *chunk_rec;
7833 struct block_group_record *bg_rec;
7834 struct device_extent_record *dext_rec;
7838 chunk_item = first_cache_extent(chunk_cache);
7839 while (chunk_item) {
7840 chunk_rec = container_of(chunk_item, struct chunk_record,
7842 err = check_chunk_refs(chunk_rec, block_group_cache,
7843 dev_extent_cache, silent);
7846 if (err == 0 && good)
7847 list_add_tail(&chunk_rec->list, good);
7848 if (err > 0 && rebuild)
7849 list_add_tail(&chunk_rec->list, rebuild);
7851 list_add_tail(&chunk_rec->list, bad);
7852 chunk_item = next_cache_extent(chunk_item);
7855 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7858 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7866 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7870 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7881 static int check_device_used(struct device_record *dev_rec,
7882 struct device_extent_tree *dext_cache)
7884 struct cache_extent *cache;
7885 struct device_extent_record *dev_extent_rec;
7888 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7890 dev_extent_rec = container_of(cache,
7891 struct device_extent_record,
7893 if (dev_extent_rec->objectid != dev_rec->devid)
7896 list_del_init(&dev_extent_rec->device_list);
7897 total_byte += dev_extent_rec->length;
7898 cache = next_cache_extent(cache);
7901 if (total_byte != dev_rec->byte_used) {
7903 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7904 total_byte, dev_rec->byte_used, dev_rec->objectid,
7905 dev_rec->type, dev_rec->offset);
7912 /* check btrfs_dev_item -> btrfs_dev_extent */
7913 static int check_devices(struct rb_root *dev_cache,
7914 struct device_extent_tree *dev_extent_cache)
7916 struct rb_node *dev_node;
7917 struct device_record *dev_rec;
7918 struct device_extent_record *dext_rec;
7922 dev_node = rb_first(dev_cache);
7924 dev_rec = container_of(dev_node, struct device_record, node);
7925 err = check_device_used(dev_rec, dev_extent_cache);
7929 dev_node = rb_next(dev_node);
7931 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7934 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7935 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7942 static int add_root_item_to_list(struct list_head *head,
7943 u64 objectid, u64 bytenr, u64 last_snapshot,
7944 u8 level, u8 drop_level,
7945 int level_size, struct btrfs_key *drop_key)
7948 struct root_item_record *ri_rec;
7949 ri_rec = malloc(sizeof(*ri_rec));
7952 ri_rec->bytenr = bytenr;
7953 ri_rec->objectid = objectid;
7954 ri_rec->level = level;
7955 ri_rec->level_size = level_size;
7956 ri_rec->drop_level = drop_level;
7957 ri_rec->last_snapshot = last_snapshot;
7959 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7960 list_add_tail(&ri_rec->list, head);
7965 static void free_root_item_list(struct list_head *list)
7967 struct root_item_record *ri_rec;
7969 while (!list_empty(list)) {
7970 ri_rec = list_first_entry(list, struct root_item_record,
7972 list_del_init(&ri_rec->list);
7977 static int deal_root_from_list(struct list_head *list,
7978 struct btrfs_root *root,
7979 struct block_info *bits,
7981 struct cache_tree *pending,
7982 struct cache_tree *seen,
7983 struct cache_tree *reada,
7984 struct cache_tree *nodes,
7985 struct cache_tree *extent_cache,
7986 struct cache_tree *chunk_cache,
7987 struct rb_root *dev_cache,
7988 struct block_group_tree *block_group_cache,
7989 struct device_extent_tree *dev_extent_cache)
7994 while (!list_empty(list)) {
7995 struct root_item_record *rec;
7996 struct extent_buffer *buf;
7997 rec = list_entry(list->next,
7998 struct root_item_record, list);
8000 buf = read_tree_block(root->fs_info->tree_root,
8001 rec->bytenr, rec->level_size, 0);
8002 if (!extent_buffer_uptodate(buf)) {
8003 free_extent_buffer(buf);
8007 add_root_to_pending(buf, extent_cache, pending,
8008 seen, nodes, rec->objectid);
8010 * To rebuild extent tree, we need deal with snapshot
8011 * one by one, otherwise we deal with node firstly which
8012 * can maximize readahead.
8015 ret = run_next_block(root, bits, bits_nr, &last,
8016 pending, seen, reada, nodes,
8017 extent_cache, chunk_cache,
8018 dev_cache, block_group_cache,
8019 dev_extent_cache, rec);
8023 free_extent_buffer(buf);
8024 list_del(&rec->list);
8030 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8031 reada, nodes, extent_cache, chunk_cache,
8032 dev_cache, block_group_cache,
8033 dev_extent_cache, NULL);
8043 static int check_chunks_and_extents(struct btrfs_root *root)
8045 struct rb_root dev_cache;
8046 struct cache_tree chunk_cache;
8047 struct block_group_tree block_group_cache;
8048 struct device_extent_tree dev_extent_cache;
8049 struct cache_tree extent_cache;
8050 struct cache_tree seen;
8051 struct cache_tree pending;
8052 struct cache_tree reada;
8053 struct cache_tree nodes;
8054 struct extent_io_tree excluded_extents;
8055 struct cache_tree corrupt_blocks;
8056 struct btrfs_path path;
8057 struct btrfs_key key;
8058 struct btrfs_key found_key;
8060 struct block_info *bits;
8062 struct extent_buffer *leaf;
8064 struct btrfs_root_item ri;
8065 struct list_head dropping_trees;
8066 struct list_head normal_trees;
8067 struct btrfs_root *root1;
8072 dev_cache = RB_ROOT;
8073 cache_tree_init(&chunk_cache);
8074 block_group_tree_init(&block_group_cache);
8075 device_extent_tree_init(&dev_extent_cache);
8077 cache_tree_init(&extent_cache);
8078 cache_tree_init(&seen);
8079 cache_tree_init(&pending);
8080 cache_tree_init(&nodes);
8081 cache_tree_init(&reada);
8082 cache_tree_init(&corrupt_blocks);
8083 extent_io_tree_init(&excluded_extents);
8084 INIT_LIST_HEAD(&dropping_trees);
8085 INIT_LIST_HEAD(&normal_trees);
8088 root->fs_info->excluded_extents = &excluded_extents;
8089 root->fs_info->fsck_extent_cache = &extent_cache;
8090 root->fs_info->free_extent_hook = free_extent_hook;
8091 root->fs_info->corrupt_blocks = &corrupt_blocks;
8095 bits = malloc(bits_nr * sizeof(struct block_info));
8101 if (ctx.progress_enabled) {
8102 ctx.tp = TASK_EXTENTS;
8103 task_start(ctx.info);
8107 root1 = root->fs_info->tree_root;
8108 level = btrfs_header_level(root1->node);
8109 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8110 root1->node->start, 0, level, 0,
8111 btrfs_level_size(root1, level), NULL);
8114 root1 = root->fs_info->chunk_root;
8115 level = btrfs_header_level(root1->node);
8116 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8117 root1->node->start, 0, level, 0,
8118 btrfs_level_size(root1, level), NULL);
8121 btrfs_init_path(&path);
8124 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8125 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8130 leaf = path.nodes[0];
8131 slot = path.slots[0];
8132 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8133 ret = btrfs_next_leaf(root, &path);
8136 leaf = path.nodes[0];
8137 slot = path.slots[0];
8139 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8140 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8141 unsigned long offset;
8144 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8145 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8146 last_snapshot = btrfs_root_last_snapshot(&ri);
8147 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8148 level = btrfs_root_level(&ri);
8149 level_size = btrfs_level_size(root, level);
8150 ret = add_root_item_to_list(&normal_trees,
8152 btrfs_root_bytenr(&ri),
8153 last_snapshot, level,
8154 0, level_size, NULL);
8158 level = btrfs_root_level(&ri);
8159 level_size = btrfs_level_size(root, level);
8160 objectid = found_key.objectid;
8161 btrfs_disk_key_to_cpu(&found_key,
8163 ret = add_root_item_to_list(&dropping_trees,
8165 btrfs_root_bytenr(&ri),
8166 last_snapshot, level,
8168 level_size, &found_key);
8175 btrfs_release_path(&path);
8178 * check_block can return -EAGAIN if it fixes something, please keep
8179 * this in mind when dealing with return values from these functions, if
8180 * we get -EAGAIN we want to fall through and restart the loop.
8182 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8183 &seen, &reada, &nodes, &extent_cache,
8184 &chunk_cache, &dev_cache, &block_group_cache,
8191 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8192 &pending, &seen, &reada, &nodes,
8193 &extent_cache, &chunk_cache, &dev_cache,
8194 &block_group_cache, &dev_extent_cache);
8201 ret = check_chunks(&chunk_cache, &block_group_cache,
8202 &dev_extent_cache, NULL, NULL, NULL, 0);
8209 ret = check_extent_refs(root, &extent_cache);
8216 ret = check_devices(&dev_cache, &dev_extent_cache);
8221 task_stop(ctx.info);
8223 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8224 extent_io_tree_cleanup(&excluded_extents);
8225 root->fs_info->fsck_extent_cache = NULL;
8226 root->fs_info->free_extent_hook = NULL;
8227 root->fs_info->corrupt_blocks = NULL;
8228 root->fs_info->excluded_extents = NULL;
8231 free_chunk_cache_tree(&chunk_cache);
8232 free_device_cache_tree(&dev_cache);
8233 free_block_group_tree(&block_group_cache);
8234 free_device_extent_tree(&dev_extent_cache);
8235 free_extent_cache_tree(&seen);
8236 free_extent_cache_tree(&pending);
8237 free_extent_cache_tree(&reada);
8238 free_extent_cache_tree(&nodes);
8241 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8242 free_extent_cache_tree(&seen);
8243 free_extent_cache_tree(&pending);
8244 free_extent_cache_tree(&reada);
8245 free_extent_cache_tree(&nodes);
8246 free_chunk_cache_tree(&chunk_cache);
8247 free_block_group_tree(&block_group_cache);
8248 free_device_cache_tree(&dev_cache);
8249 free_device_extent_tree(&dev_extent_cache);
8250 free_extent_record_cache(root->fs_info, &extent_cache);
8251 free_root_item_list(&normal_trees);
8252 free_root_item_list(&dropping_trees);
8253 extent_io_tree_cleanup(&excluded_extents);
8257 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8258 struct btrfs_root *root, int overwrite)
8260 struct extent_buffer *c;
8261 struct extent_buffer *old = root->node;
8264 struct btrfs_disk_key disk_key = {0,0,0};
8270 extent_buffer_get(c);
8273 c = btrfs_alloc_free_block(trans, root,
8274 btrfs_level_size(root, 0),
8275 root->root_key.objectid,
8276 &disk_key, level, 0, 0);
8279 extent_buffer_get(c);
8283 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8284 btrfs_set_header_level(c, level);
8285 btrfs_set_header_bytenr(c, c->start);
8286 btrfs_set_header_generation(c, trans->transid);
8287 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8288 btrfs_set_header_owner(c, root->root_key.objectid);
8290 write_extent_buffer(c, root->fs_info->fsid,
8291 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8293 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8294 btrfs_header_chunk_tree_uuid(c),
8297 btrfs_mark_buffer_dirty(c);
8299 * this case can happen in the following case:
8301 * 1.overwrite previous root.
8303 * 2.reinit reloc data root, this is because we skip pin
8304 * down reloc data tree before which means we can allocate
8305 * same block bytenr here.
8307 if (old->start == c->start) {
8308 btrfs_set_root_generation(&root->root_item,
8310 root->root_item.level = btrfs_header_level(root->node);
8311 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8312 &root->root_key, &root->root_item);
8314 free_extent_buffer(c);
8318 free_extent_buffer(old);
8320 add_root_to_dirty_list(root);
8324 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8325 struct extent_buffer *eb, int tree_root)
8327 struct extent_buffer *tmp;
8328 struct btrfs_root_item *ri;
8329 struct btrfs_key key;
8332 int level = btrfs_header_level(eb);
8338 * If we have pinned this block before, don't pin it again.
8339 * This can not only avoid forever loop with broken filesystem
8340 * but also give us some speedups.
8342 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8343 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8346 btrfs_pin_extent(fs_info, eb->start, eb->len);
8348 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8349 nritems = btrfs_header_nritems(eb);
8350 for (i = 0; i < nritems; i++) {
8352 btrfs_item_key_to_cpu(eb, &key, i);
8353 if (key.type != BTRFS_ROOT_ITEM_KEY)
8355 /* Skip the extent root and reloc roots */
8356 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8357 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8358 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8360 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8361 bytenr = btrfs_disk_root_bytenr(eb, ri);
8364 * If at any point we start needing the real root we
8365 * will have to build a stump root for the root we are
8366 * in, but for now this doesn't actually use the root so
8367 * just pass in extent_root.
8369 tmp = read_tree_block(fs_info->extent_root, bytenr,
8371 if (!extent_buffer_uptodate(tmp)) {
8372 fprintf(stderr, "Error reading root block\n");
8375 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8376 free_extent_buffer(tmp);
8380 bytenr = btrfs_node_blockptr(eb, i);
8382 /* If we aren't the tree root don't read the block */
8383 if (level == 1 && !tree_root) {
8384 btrfs_pin_extent(fs_info, bytenr, leafsize);
8388 tmp = read_tree_block(fs_info->extent_root, bytenr,
8390 if (!extent_buffer_uptodate(tmp)) {
8391 fprintf(stderr, "Error reading tree block\n");
8394 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8395 free_extent_buffer(tmp);
8404 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8408 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8412 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8415 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8417 struct btrfs_block_group_cache *cache;
8418 struct btrfs_path *path;
8419 struct extent_buffer *leaf;
8420 struct btrfs_chunk *chunk;
8421 struct btrfs_key key;
8425 path = btrfs_alloc_path();
8430 key.type = BTRFS_CHUNK_ITEM_KEY;
8433 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8435 btrfs_free_path(path);
8440 * We do this in case the block groups were screwed up and had alloc
8441 * bits that aren't actually set on the chunks. This happens with
8442 * restored images every time and could happen in real life I guess.
8444 fs_info->avail_data_alloc_bits = 0;
8445 fs_info->avail_metadata_alloc_bits = 0;
8446 fs_info->avail_system_alloc_bits = 0;
8448 /* First we need to create the in-memory block groups */
8450 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8451 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8453 btrfs_free_path(path);
8461 leaf = path->nodes[0];
8462 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8463 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8468 chunk = btrfs_item_ptr(leaf, path->slots[0],
8469 struct btrfs_chunk);
8470 btrfs_add_block_group(fs_info, 0,
8471 btrfs_chunk_type(leaf, chunk),
8472 key.objectid, key.offset,
8473 btrfs_chunk_length(leaf, chunk));
8474 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8475 key.offset + btrfs_chunk_length(leaf, chunk),
8481 cache = btrfs_lookup_first_block_group(fs_info, start);
8485 start = cache->key.objectid + cache->key.offset;
8488 btrfs_free_path(path);
8492 static int reset_balance(struct btrfs_trans_handle *trans,
8493 struct btrfs_fs_info *fs_info)
8495 struct btrfs_root *root = fs_info->tree_root;
8496 struct btrfs_path *path;
8497 struct extent_buffer *leaf;
8498 struct btrfs_key key;
8499 int del_slot, del_nr = 0;
8503 path = btrfs_alloc_path();
8507 key.objectid = BTRFS_BALANCE_OBJECTID;
8508 key.type = BTRFS_BALANCE_ITEM_KEY;
8511 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8516 goto reinit_data_reloc;
8521 ret = btrfs_del_item(trans, root, path);
8524 btrfs_release_path(path);
8526 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8527 key.type = BTRFS_ROOT_ITEM_KEY;
8530 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8534 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8539 ret = btrfs_del_items(trans, root, path,
8546 btrfs_release_path(path);
8549 ret = btrfs_search_slot(trans, root, &key, path,
8556 leaf = path->nodes[0];
8557 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8558 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8560 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8565 del_slot = path->slots[0];
8574 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8578 btrfs_release_path(path);
8581 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8582 key.type = BTRFS_ROOT_ITEM_KEY;
8583 key.offset = (u64)-1;
8584 root = btrfs_read_fs_root(fs_info, &key);
8586 fprintf(stderr, "Error reading data reloc tree\n");
8587 ret = PTR_ERR(root);
8590 record_root_in_trans(trans, root);
8591 ret = btrfs_fsck_reinit_root(trans, root, 0);
8594 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8596 btrfs_free_path(path);
8600 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8601 struct btrfs_fs_info *fs_info)
8607 * The only reason we don't do this is because right now we're just
8608 * walking the trees we find and pinning down their bytes, we don't look
8609 * at any of the leaves. In order to do mixed groups we'd have to check
8610 * the leaves of any fs roots and pin down the bytes for any file
8611 * extents we find. Not hard but why do it if we don't have to?
8613 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8614 fprintf(stderr, "We don't support re-initing the extent tree "
8615 "for mixed block groups yet, please notify a btrfs "
8616 "developer you want to do this so they can add this "
8617 "functionality.\n");
8622 * first we need to walk all of the trees except the extent tree and pin
8623 * down the bytes that are in use so we don't overwrite any existing
8626 ret = pin_metadata_blocks(fs_info);
8628 fprintf(stderr, "error pinning down used bytes\n");
8633 * Need to drop all the block groups since we're going to recreate all
8636 btrfs_free_block_groups(fs_info);
8637 ret = reset_block_groups(fs_info);
8639 fprintf(stderr, "error resetting the block groups\n");
8643 /* Ok we can allocate now, reinit the extent root */
8644 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8646 fprintf(stderr, "extent root initialization failed\n");
8648 * When the transaction code is updated we should end the
8649 * transaction, but for now progs only knows about commit so
8650 * just return an error.
8656 * Now we have all the in-memory block groups setup so we can make
8657 * allocations properly, and the metadata we care about is safe since we
8658 * pinned all of it above.
8661 struct btrfs_block_group_cache *cache;
8663 cache = btrfs_lookup_first_block_group(fs_info, start);
8666 start = cache->key.objectid + cache->key.offset;
8667 ret = btrfs_insert_item(trans, fs_info->extent_root,
8668 &cache->key, &cache->item,
8669 sizeof(cache->item));
8671 fprintf(stderr, "Error adding block group\n");
8674 btrfs_extent_post_op(trans, fs_info->extent_root);
8677 ret = reset_balance(trans, fs_info);
8679 fprintf(stderr, "error reseting the pending balance\n");
8684 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8686 struct btrfs_path *path;
8687 struct btrfs_trans_handle *trans;
8688 struct btrfs_key key;
8691 printf("Recowing metadata block %llu\n", eb->start);
8692 key.objectid = btrfs_header_owner(eb);
8693 key.type = BTRFS_ROOT_ITEM_KEY;
8694 key.offset = (u64)-1;
8696 root = btrfs_read_fs_root(root->fs_info, &key);
8698 fprintf(stderr, "Couldn't find owner root %llu\n",
8700 return PTR_ERR(root);
8703 path = btrfs_alloc_path();
8707 trans = btrfs_start_transaction(root, 1);
8708 if (IS_ERR(trans)) {
8709 btrfs_free_path(path);
8710 return PTR_ERR(trans);
8713 path->lowest_level = btrfs_header_level(eb);
8714 if (path->lowest_level)
8715 btrfs_node_key_to_cpu(eb, &key, 0);
8717 btrfs_item_key_to_cpu(eb, &key, 0);
8719 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8720 btrfs_commit_transaction(trans, root);
8721 btrfs_free_path(path);
8725 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8727 struct btrfs_path *path;
8728 struct btrfs_trans_handle *trans;
8729 struct btrfs_key key;
8732 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8733 bad->key.type, bad->key.offset);
8734 key.objectid = bad->root_id;
8735 key.type = BTRFS_ROOT_ITEM_KEY;
8736 key.offset = (u64)-1;
8738 root = btrfs_read_fs_root(root->fs_info, &key);
8740 fprintf(stderr, "Couldn't find owner root %llu\n",
8742 return PTR_ERR(root);
8745 path = btrfs_alloc_path();
8749 trans = btrfs_start_transaction(root, 1);
8750 if (IS_ERR(trans)) {
8751 btrfs_free_path(path);
8752 return PTR_ERR(trans);
8755 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8761 ret = btrfs_del_item(trans, root, path);
8763 btrfs_commit_transaction(trans, root);
8764 btrfs_free_path(path);
8768 static int zero_log_tree(struct btrfs_root *root)
8770 struct btrfs_trans_handle *trans;
8773 trans = btrfs_start_transaction(root, 1);
8774 if (IS_ERR(trans)) {
8775 ret = PTR_ERR(trans);
8778 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8779 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8780 ret = btrfs_commit_transaction(trans, root);
8784 static int populate_csum(struct btrfs_trans_handle *trans,
8785 struct btrfs_root *csum_root, char *buf, u64 start,
8792 while (offset < len) {
8793 sectorsize = csum_root->sectorsize;
8794 ret = read_extent_data(csum_root, buf, start + offset,
8798 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8799 start + offset, buf, sectorsize);
8802 offset += sectorsize;
8807 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8808 struct btrfs_root *csum_root,
8809 struct btrfs_root *cur_root)
8811 struct btrfs_path *path;
8812 struct btrfs_key key;
8813 struct extent_buffer *node;
8814 struct btrfs_file_extent_item *fi;
8821 path = btrfs_alloc_path();
8824 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8834 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8837 /* Iterate all regular file extents and fill its csum */
8839 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8841 if (key.type != BTRFS_EXTENT_DATA_KEY)
8843 node = path->nodes[0];
8844 slot = path->slots[0];
8845 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8846 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8848 start = btrfs_file_extent_disk_bytenr(node, fi);
8849 len = btrfs_file_extent_disk_num_bytes(node, fi);
8851 ret = populate_csum(trans, csum_root, buf, start, len);
8858 * TODO: if next leaf is corrupted, jump to nearest next valid
8861 ret = btrfs_next_item(cur_root, path);
8871 btrfs_free_path(path);
8876 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8877 struct btrfs_root *csum_root)
8879 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8880 struct btrfs_path *path;
8881 struct btrfs_root *tree_root = fs_info->tree_root;
8882 struct btrfs_root *cur_root;
8883 struct extent_buffer *node;
8884 struct btrfs_key key;
8888 path = btrfs_alloc_path();
8892 key.objectid = BTRFS_FS_TREE_OBJECTID;
8894 key.type = BTRFS_ROOT_ITEM_KEY;
8896 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8905 node = path->nodes[0];
8906 slot = path->slots[0];
8907 btrfs_item_key_to_cpu(node, &key, slot);
8908 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8910 if (key.type != BTRFS_ROOT_ITEM_KEY)
8912 if (!is_fstree(key.objectid))
8914 key.offset = (u64)-1;
8916 cur_root = btrfs_read_fs_root(fs_info, &key);
8917 if (IS_ERR(cur_root) || !cur_root) {
8918 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8922 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8927 ret = btrfs_next_item(tree_root, path);
8937 btrfs_free_path(path);
8941 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8942 struct btrfs_root *csum_root)
8944 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8945 struct btrfs_path *path;
8946 struct btrfs_extent_item *ei;
8947 struct extent_buffer *leaf;
8949 struct btrfs_key key;
8952 path = btrfs_alloc_path();
8957 key.type = BTRFS_EXTENT_ITEM_KEY;
8960 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8962 btrfs_free_path(path);
8966 buf = malloc(csum_root->sectorsize);
8968 btrfs_free_path(path);
8973 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8974 ret = btrfs_next_leaf(extent_root, path);
8982 leaf = path->nodes[0];
8984 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8985 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8990 ei = btrfs_item_ptr(leaf, path->slots[0],
8991 struct btrfs_extent_item);
8992 if (!(btrfs_extent_flags(leaf, ei) &
8993 BTRFS_EXTENT_FLAG_DATA)) {
8998 ret = populate_csum(trans, csum_root, buf, key.objectid,
9005 btrfs_free_path(path);
9011 * Recalculate the csum and put it into the csum tree.
9013 * Extent tree init will wipe out all the extent info, so in that case, we
9014 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9015 * will use fs/subvol trees to init the csum tree.
9017 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9018 struct btrfs_root *csum_root,
9022 return fill_csum_tree_from_fs(trans, csum_root);
9024 return fill_csum_tree_from_extent(trans, csum_root);
9027 struct root_item_info {
9028 /* level of the root */
9030 /* number of nodes at this level, must be 1 for a root */
9034 struct cache_extent cache_extent;
9037 static struct cache_tree *roots_info_cache = NULL;
9039 static void free_roots_info_cache(void)
9041 if (!roots_info_cache)
9044 while (!cache_tree_empty(roots_info_cache)) {
9045 struct cache_extent *entry;
9046 struct root_item_info *rii;
9048 entry = first_cache_extent(roots_info_cache);
9051 remove_cache_extent(roots_info_cache, entry);
9052 rii = container_of(entry, struct root_item_info, cache_extent);
9056 free(roots_info_cache);
9057 roots_info_cache = NULL;
9060 static int build_roots_info_cache(struct btrfs_fs_info *info)
9063 struct btrfs_key key;
9064 struct extent_buffer *leaf;
9065 struct btrfs_path *path;
9067 if (!roots_info_cache) {
9068 roots_info_cache = malloc(sizeof(*roots_info_cache));
9069 if (!roots_info_cache)
9071 cache_tree_init(roots_info_cache);
9074 path = btrfs_alloc_path();
9079 key.type = BTRFS_EXTENT_ITEM_KEY;
9082 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9085 leaf = path->nodes[0];
9088 struct btrfs_key found_key;
9089 struct btrfs_extent_item *ei;
9090 struct btrfs_extent_inline_ref *iref;
9091 int slot = path->slots[0];
9096 struct cache_extent *entry;
9097 struct root_item_info *rii;
9099 if (slot >= btrfs_header_nritems(leaf)) {
9100 ret = btrfs_next_leaf(info->extent_root, path);
9107 leaf = path->nodes[0];
9108 slot = path->slots[0];
9111 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9113 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9114 found_key.type != BTRFS_METADATA_ITEM_KEY)
9117 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9118 flags = btrfs_extent_flags(leaf, ei);
9120 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9121 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9124 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9125 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9126 level = found_key.offset;
9128 struct btrfs_tree_block_info *info;
9130 info = (struct btrfs_tree_block_info *)(ei + 1);
9131 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9132 level = btrfs_tree_block_level(leaf, info);
9136 * For a root extent, it must be of the following type and the
9137 * first (and only one) iref in the item.
9139 type = btrfs_extent_inline_ref_type(leaf, iref);
9140 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9143 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9144 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9146 rii = malloc(sizeof(struct root_item_info));
9151 rii->cache_extent.start = root_id;
9152 rii->cache_extent.size = 1;
9153 rii->level = (u8)-1;
9154 entry = &rii->cache_extent;
9155 ret = insert_cache_extent(roots_info_cache, entry);
9158 rii = container_of(entry, struct root_item_info,
9162 ASSERT(rii->cache_extent.start == root_id);
9163 ASSERT(rii->cache_extent.size == 1);
9165 if (level > rii->level || rii->level == (u8)-1) {
9167 rii->bytenr = found_key.objectid;
9168 rii->gen = btrfs_extent_generation(leaf, ei);
9169 rii->node_count = 1;
9170 } else if (level == rii->level) {
9178 btrfs_free_path(path);
9183 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9184 struct btrfs_path *path,
9185 const struct btrfs_key *root_key,
9186 const int read_only_mode)
9188 const u64 root_id = root_key->objectid;
9189 struct cache_extent *entry;
9190 struct root_item_info *rii;
9191 struct btrfs_root_item ri;
9192 unsigned long offset;
9194 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9197 "Error: could not find extent items for root %llu\n",
9198 root_key->objectid);
9202 rii = container_of(entry, struct root_item_info, cache_extent);
9203 ASSERT(rii->cache_extent.start == root_id);
9204 ASSERT(rii->cache_extent.size == 1);
9206 if (rii->node_count != 1) {
9208 "Error: could not find btree root extent for root %llu\n",
9213 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9214 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9216 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9217 btrfs_root_level(&ri) != rii->level ||
9218 btrfs_root_generation(&ri) != rii->gen) {
9221 * If we're in repair mode but our caller told us to not update
9222 * the root item, i.e. just check if it needs to be updated, don't
9223 * print this message, since the caller will call us again shortly
9224 * for the same root item without read only mode (the caller will
9225 * open a transaction first).
9227 if (!(read_only_mode && repair))
9229 "%sroot item for root %llu,"
9230 " current bytenr %llu, current gen %llu, current level %u,"
9231 " new bytenr %llu, new gen %llu, new level %u\n",
9232 (read_only_mode ? "" : "fixing "),
9234 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9235 btrfs_root_level(&ri),
9236 rii->bytenr, rii->gen, rii->level);
9238 if (btrfs_root_generation(&ri) > rii->gen) {
9240 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9241 root_id, btrfs_root_generation(&ri), rii->gen);
9245 if (!read_only_mode) {
9246 btrfs_set_root_bytenr(&ri, rii->bytenr);
9247 btrfs_set_root_level(&ri, rii->level);
9248 btrfs_set_root_generation(&ri, rii->gen);
9249 write_extent_buffer(path->nodes[0], &ri,
9250 offset, sizeof(ri));
9260 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9261 * caused read-only snapshots to be corrupted if they were created at a moment
9262 * when the source subvolume/snapshot had orphan items. The issue was that the
9263 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9264 * node instead of the post orphan cleanup root node.
9265 * So this function, and its callees, just detects and fixes those cases. Even
9266 * though the regression was for read-only snapshots, this function applies to
9267 * any snapshot/subvolume root.
9268 * This must be run before any other repair code - not doing it so, makes other
9269 * repair code delete or modify backrefs in the extent tree for example, which
9270 * will result in an inconsistent fs after repairing the root items.
9272 static int repair_root_items(struct btrfs_fs_info *info)
9274 struct btrfs_path *path = NULL;
9275 struct btrfs_key key;
9276 struct extent_buffer *leaf;
9277 struct btrfs_trans_handle *trans = NULL;
9282 ret = build_roots_info_cache(info);
9286 path = btrfs_alloc_path();
9292 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9293 key.type = BTRFS_ROOT_ITEM_KEY;
9298 * Avoid opening and committing transactions if a leaf doesn't have
9299 * any root items that need to be fixed, so that we avoid rotating
9300 * backup roots unnecessarily.
9303 trans = btrfs_start_transaction(info->tree_root, 1);
9304 if (IS_ERR(trans)) {
9305 ret = PTR_ERR(trans);
9310 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9314 leaf = path->nodes[0];
9317 struct btrfs_key found_key;
9319 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9320 int no_more_keys = find_next_key(path, &key);
9322 btrfs_release_path(path);
9324 ret = btrfs_commit_transaction(trans,
9336 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9338 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9340 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9343 ret = maybe_repair_root_item(info, path, &found_key,
9348 if (!trans && repair) {
9351 btrfs_release_path(path);
9361 free_roots_info_cache();
9362 btrfs_free_path(path);
9364 btrfs_commit_transaction(trans, info->tree_root);
9371 const char * const cmd_check_usage[] = {
9372 "btrfs check [options] <device>",
9373 "Check structural inegrity of a filesystem (unmounted).",
9374 "Check structural inegrity of an unmounted filesystem. Verify internal",
9375 "trees' consistency and item connectivity. In the repair mode try to",
9376 "fix the problems found.",
9377 "WARNING: the repair mode is considered dangerous",
9379 "-s|--super <superblock> use this superblock copy",
9380 "-b|--backup use the backup root copy",
9381 "--repair try to repair the filesystem",
9382 "--readonly run in read-only mode (default)",
9383 "--init-csum-tree create a new CRC tree",
9384 "--init-extent-tree create a new extent tree",
9385 "--check-data-csum verify checkums of data blocks",
9386 "-Q|--qgroup-report print a report on qgroup consistency",
9387 "-E|--subvol-extents <subvolid>",
9388 " print subvolume extents and sharing state",
9389 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9390 "-p|--progress indicate progress",
9394 int cmd_check(int argc, char **argv)
9396 struct cache_tree root_cache;
9397 struct btrfs_root *root;
9398 struct btrfs_fs_info *info;
9401 u64 tree_root_bytenr = 0;
9402 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9405 int init_csum_tree = 0;
9407 int qgroup_report = 0;
9408 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9412 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9413 OPT_CHECK_CSUM, OPT_READONLY };
9414 static const struct option long_options[] = {
9415 { "super", required_argument, NULL, 's' },
9416 { "repair", no_argument, NULL, OPT_REPAIR },
9417 { "readonly", no_argument, NULL, OPT_READONLY },
9418 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9419 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9420 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9421 { "backup", no_argument, NULL, 'b' },
9422 { "subvol-extents", required_argument, NULL, 'E' },
9423 { "qgroup-report", no_argument, NULL, 'Q' },
9424 { "tree-root", required_argument, NULL, 'r' },
9425 { "progress", no_argument, NULL, 'p' },
9429 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9433 case 'a': /* ignored */ break;
9435 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9438 num = arg_strtou64(optarg);
9439 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9441 "ERROR: super mirror should be less than: %d\n",
9442 BTRFS_SUPER_MIRROR_MAX);
9445 bytenr = btrfs_sb_offset(((int)num));
9446 printf("using SB copy %llu, bytenr %llu\n", num,
9447 (unsigned long long)bytenr);
9453 subvolid = arg_strtou64(optarg);
9456 tree_root_bytenr = arg_strtou64(optarg);
9459 ctx.progress_enabled = true;
9463 usage(cmd_check_usage);
9465 printf("enabling repair mode\n");
9467 ctree_flags |= OPEN_CTREE_WRITES;
9473 printf("Creating a new CRC tree\n");
9476 ctree_flags |= OPEN_CTREE_WRITES;
9478 case OPT_INIT_EXTENT:
9479 init_extent_tree = 1;
9480 ctree_flags |= (OPEN_CTREE_WRITES |
9481 OPEN_CTREE_NO_BLOCK_GROUPS);
9484 case OPT_CHECK_CSUM:
9485 check_data_csum = 1;
9489 argc = argc - optind;
9491 if (check_argc_exact(argc, 1))
9492 usage(cmd_check_usage);
9494 if (ctx.progress_enabled) {
9495 ctx.tp = TASK_NOTHING;
9496 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9499 /* This check is the only reason for --readonly to exist */
9500 if (readonly && repair) {
9501 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9506 cache_tree_init(&root_cache);
9508 if((ret = check_mounted(argv[optind])) < 0) {
9509 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9512 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9517 /* only allow partial opening under repair mode */
9519 ctree_flags |= OPEN_CTREE_PARTIAL;
9521 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9524 fprintf(stderr, "Couldn't open file system\n");
9530 root = info->fs_root;
9533 * repair mode will force us to commit transaction which
9534 * will make us fail to load log tree when mounting.
9536 if (repair && btrfs_super_log_root(info->super_copy)) {
9537 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9542 ret = zero_log_tree(root);
9544 fprintf(stderr, "fail to zero log tree\n");
9549 uuid_unparse(info->super_copy->fsid, uuidbuf);
9550 if (qgroup_report) {
9551 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9553 ret = qgroup_verify_all(info);
9555 print_qgroup_report(1);
9559 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9560 subvolid, argv[optind], uuidbuf);
9561 ret = print_extent_state(info, subvolid);
9564 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9566 if (!extent_buffer_uptodate(info->tree_root->node) ||
9567 !extent_buffer_uptodate(info->dev_root->node) ||
9568 !extent_buffer_uptodate(info->chunk_root->node)) {
9569 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9574 if (init_extent_tree || init_csum_tree) {
9575 struct btrfs_trans_handle *trans;
9577 trans = btrfs_start_transaction(info->extent_root, 0);
9578 if (IS_ERR(trans)) {
9579 fprintf(stderr, "Error starting transaction\n");
9580 ret = PTR_ERR(trans);
9584 if (init_extent_tree) {
9585 printf("Creating a new extent tree\n");
9586 ret = reinit_extent_tree(trans, info);
9591 if (init_csum_tree) {
9592 fprintf(stderr, "Reinit crc root\n");
9593 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9595 fprintf(stderr, "crc root initialization failed\n");
9600 ret = fill_csum_tree(trans, info->csum_root,
9603 fprintf(stderr, "crc refilling failed\n");
9608 * Ok now we commit and run the normal fsck, which will add
9609 * extent entries for all of the items it finds.
9611 ret = btrfs_commit_transaction(trans, info->extent_root);
9615 if (!extent_buffer_uptodate(info->extent_root->node)) {
9616 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9620 if (!extent_buffer_uptodate(info->csum_root->node)) {
9621 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9626 if (!ctx.progress_enabled)
9627 fprintf(stderr, "checking extents\n");
9628 ret = check_chunks_and_extents(root);
9630 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9632 ret = repair_root_items(info);
9636 fprintf(stderr, "Fixed %d roots.\n", ret);
9638 } else if (ret > 0) {
9640 "Found %d roots with an outdated root item.\n",
9643 "Please run a filesystem check with the option --repair to fix them.\n");
9648 if (!ctx.progress_enabled)
9649 fprintf(stderr, "checking free space cache\n");
9650 ret = check_space_cache(root);
9655 * We used to have to have these hole extents in between our real
9656 * extents so if we don't have this flag set we need to make sure there
9657 * are no gaps in the file extents for inodes, otherwise we can just
9658 * ignore it when this happens.
9660 no_holes = btrfs_fs_incompat(root->fs_info,
9661 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9662 if (!ctx.progress_enabled)
9663 fprintf(stderr, "checking fs roots\n");
9664 ret = check_fs_roots(root, &root_cache);
9668 fprintf(stderr, "checking csums\n");
9669 ret = check_csums(root);
9673 fprintf(stderr, "checking root refs\n");
9674 ret = check_root_refs(root, &root_cache);
9678 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9679 struct extent_buffer *eb;
9681 eb = list_first_entry(&root->fs_info->recow_ebs,
9682 struct extent_buffer, recow);
9683 list_del_init(&eb->recow);
9684 ret = recow_extent_buffer(root, eb);
9689 while (!list_empty(&delete_items)) {
9690 struct bad_item *bad;
9692 bad = list_first_entry(&delete_items, struct bad_item, list);
9693 list_del_init(&bad->list);
9695 ret = delete_bad_item(root, bad);
9699 if (info->quota_enabled) {
9701 fprintf(stderr, "checking quota groups\n");
9702 err = qgroup_verify_all(info);
9707 if (!list_empty(&root->fs_info->recow_ebs)) {
9708 fprintf(stderr, "Transid errors in file system\n");
9712 print_qgroup_report(0);
9713 if (found_old_backref) { /*
9714 * there was a disk format change when mixed
9715 * backref was in testing tree. The old format
9716 * existed about one week.
9718 printf("\n * Found old mixed backref format. "
9719 "The old format is not supported! *"
9720 "\n * Please mount the FS in readonly mode, "
9721 "backup data and re-format the FS. *\n\n");
9724 printf("found %llu bytes used err is %d\n",
9725 (unsigned long long)bytes_used, ret);
9726 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9727 printf("total tree bytes: %llu\n",
9728 (unsigned long long)total_btree_bytes);
9729 printf("total fs tree bytes: %llu\n",
9730 (unsigned long long)total_fs_tree_bytes);
9731 printf("total extent tree bytes: %llu\n",
9732 (unsigned long long)total_extent_tree_bytes);
9733 printf("btree space waste bytes: %llu\n",
9734 (unsigned long long)btree_space_waste);
9735 printf("file data blocks allocated: %llu\n referenced %llu\n",
9736 (unsigned long long)data_bytes_allocated,
9737 (unsigned long long)data_bytes_referenced);
9739 free_root_recs_tree(&root_cache);
9743 if (ctx.progress_enabled)
9744 task_deinit(ctx.info);