2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct orphan_data_extent *src_orphan;
570 struct orphan_data_extent *dst_orphan;
574 rec = malloc(sizeof(*rec));
575 memcpy(rec, orig_rec, sizeof(*rec));
577 INIT_LIST_HEAD(&rec->backrefs);
578 INIT_LIST_HEAD(&rec->orphan_extents);
579 rec->holes = RB_ROOT;
581 list_for_each_entry(orig, &orig_rec->backrefs, list) {
582 size = sizeof(*orig) + orig->namelen + 1;
583 backref = malloc(size);
584 memcpy(backref, orig, size);
585 list_add_tail(&backref->list, &rec->backrefs);
587 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588 dst_orphan = malloc(sizeof(*dst_orphan));
589 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
591 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
594 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
603 struct orphan_data_extent *orphan;
605 if (list_empty(orphan_extents))
607 printf("The following data extent is lost in tree %llu:\n",
609 list_for_each_entry(orphan, orphan_extents, list) {
610 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611 orphan->objectid, orphan->offset, orphan->disk_bytenr,
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
618 u64 root_objectid = root->root_key.objectid;
619 int errors = rec->errors;
623 /* reloc root errors, we print its corresponding fs root objectid*/
624 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625 root_objectid = root->root_key.offset;
626 fprintf(stderr, "reloc");
628 fprintf(stderr, "root %llu inode %llu errors %x",
629 (unsigned long long) root_objectid,
630 (unsigned long long) rec->ino, rec->errors);
632 if (errors & I_ERR_NO_INODE_ITEM)
633 fprintf(stderr, ", no inode item");
634 if (errors & I_ERR_NO_ORPHAN_ITEM)
635 fprintf(stderr, ", no orphan item");
636 if (errors & I_ERR_DUP_INODE_ITEM)
637 fprintf(stderr, ", dup inode item");
638 if (errors & I_ERR_DUP_DIR_INDEX)
639 fprintf(stderr, ", dup dir index");
640 if (errors & I_ERR_ODD_DIR_ITEM)
641 fprintf(stderr, ", odd dir item");
642 if (errors & I_ERR_ODD_FILE_EXTENT)
643 fprintf(stderr, ", odd file extent");
644 if (errors & I_ERR_BAD_FILE_EXTENT)
645 fprintf(stderr, ", bad file extent");
646 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647 fprintf(stderr, ", file extent overlap");
648 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649 fprintf(stderr, ", file extent discount");
650 if (errors & I_ERR_DIR_ISIZE_WRONG)
651 fprintf(stderr, ", dir isize wrong");
652 if (errors & I_ERR_FILE_NBYTES_WRONG)
653 fprintf(stderr, ", nbytes wrong");
654 if (errors & I_ERR_ODD_CSUM_ITEM)
655 fprintf(stderr, ", odd csum item");
656 if (errors & I_ERR_SOME_CSUM_MISSING)
657 fprintf(stderr, ", some csum missing");
658 if (errors & I_ERR_LINK_COUNT_WRONG)
659 fprintf(stderr, ", link count wrong");
660 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661 fprintf(stderr, ", orphan file extent");
662 fprintf(stderr, "\n");
663 /* Print the orphan extents if needed */
664 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
667 /* Print the holes if needed */
668 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669 struct file_extent_hole *hole;
670 struct rb_node *node;
673 node = rb_first(&rec->holes);
674 fprintf(stderr, "Found file extent holes:\n");
677 hole = rb_entry(node, struct file_extent_hole, node);
678 fprintf(stderr, "\tstart: %llu, len: %llu\n",
679 hole->start, hole->len);
680 node = rb_next(node);
683 fprintf(stderr, "\tstart: 0, len: %llu\n",
684 round_up(rec->isize, root->sectorsize));
688 static void print_ref_error(int errors)
690 if (errors & REF_ERR_NO_DIR_ITEM)
691 fprintf(stderr, ", no dir item");
692 if (errors & REF_ERR_NO_DIR_INDEX)
693 fprintf(stderr, ", no dir index");
694 if (errors & REF_ERR_NO_INODE_REF)
695 fprintf(stderr, ", no inode ref");
696 if (errors & REF_ERR_DUP_DIR_ITEM)
697 fprintf(stderr, ", dup dir item");
698 if (errors & REF_ERR_DUP_DIR_INDEX)
699 fprintf(stderr, ", dup dir index");
700 if (errors & REF_ERR_DUP_INODE_REF)
701 fprintf(stderr, ", dup inode ref");
702 if (errors & REF_ERR_INDEX_UNMATCH)
703 fprintf(stderr, ", index unmatch");
704 if (errors & REF_ERR_FILETYPE_UNMATCH)
705 fprintf(stderr, ", filetype unmatch");
706 if (errors & REF_ERR_NAME_TOO_LONG)
707 fprintf(stderr, ", name too long");
708 if (errors & REF_ERR_NO_ROOT_REF)
709 fprintf(stderr, ", no root ref");
710 if (errors & REF_ERR_NO_ROOT_BACKREF)
711 fprintf(stderr, ", no root backref");
712 if (errors & REF_ERR_DUP_ROOT_REF)
713 fprintf(stderr, ", dup root ref");
714 if (errors & REF_ERR_DUP_ROOT_BACKREF)
715 fprintf(stderr, ", dup root backref");
716 fprintf(stderr, "\n");
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
722 struct ptr_node *node;
723 struct cache_extent *cache;
724 struct inode_record *rec = NULL;
727 cache = lookup_cache_extent(inode_cache, ino, 1);
729 node = container_of(cache, struct ptr_node, cache);
731 if (mod && rec->refs > 1) {
732 node->data = clone_inode_rec(rec);
737 rec = calloc(1, sizeof(*rec));
739 rec->extent_start = (u64)-1;
741 INIT_LIST_HEAD(&rec->backrefs);
742 INIT_LIST_HEAD(&rec->orphan_extents);
743 rec->holes = RB_ROOT;
745 node = malloc(sizeof(*node));
746 node->cache.start = ino;
747 node->cache.size = 1;
750 if (ino == BTRFS_FREE_INO_OBJECTID)
753 ret = insert_cache_extent(inode_cache, &node->cache);
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
761 struct orphan_data_extent *orphan;
763 while (!list_empty(orphan_extents)) {
764 orphan = list_entry(orphan_extents->next,
765 struct orphan_data_extent, list);
766 list_del(&orphan->list);
771 static void free_inode_rec(struct inode_record *rec)
773 struct inode_backref *backref;
778 while (!list_empty(&rec->backrefs)) {
779 backref = list_entry(rec->backrefs.next,
780 struct inode_backref, list);
781 list_del(&backref->list);
784 free_orphan_data_extents(&rec->orphan_extents);
785 free_file_extent_holes(&rec->holes);
789 static int can_free_inode_rec(struct inode_record *rec)
791 if (!rec->errors && rec->checked && rec->found_inode_item &&
792 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798 struct inode_record *rec)
800 struct cache_extent *cache;
801 struct inode_backref *tmp, *backref;
802 struct ptr_node *node;
803 unsigned char filetype;
805 if (!rec->found_inode_item)
808 filetype = imode_to_type(rec->imode);
809 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810 if (backref->found_dir_item && backref->found_dir_index) {
811 if (backref->filetype != filetype)
812 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813 if (!backref->errors && backref->found_inode_ref) {
814 list_del(&backref->list);
820 if (!rec->checked || rec->merging)
823 if (S_ISDIR(rec->imode)) {
824 if (rec->found_size != rec->isize)
825 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
826 if (rec->found_file_extent)
827 rec->errors |= I_ERR_ODD_FILE_EXTENT;
828 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
829 if (rec->found_dir_item)
830 rec->errors |= I_ERR_ODD_DIR_ITEM;
831 if (rec->found_size != rec->nbytes)
832 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
833 if (rec->nlink > 0 && !no_holes &&
834 (rec->extent_end < rec->isize ||
835 first_extent_gap(&rec->holes) < rec->isize))
836 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
839 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
840 if (rec->found_csum_item && rec->nodatasum)
841 rec->errors |= I_ERR_ODD_CSUM_ITEM;
842 if (rec->some_csum_missing && !rec->nodatasum)
843 rec->errors |= I_ERR_SOME_CSUM_MISSING;
846 BUG_ON(rec->refs != 1);
847 if (can_free_inode_rec(rec)) {
848 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
849 node = container_of(cache, struct ptr_node, cache);
850 BUG_ON(node->data != rec);
851 remove_cache_extent(inode_cache, &node->cache);
857 static int check_orphan_item(struct btrfs_root *root, u64 ino)
859 struct btrfs_path path;
860 struct btrfs_key key;
863 key.objectid = BTRFS_ORPHAN_OBJECTID;
864 key.type = BTRFS_ORPHAN_ITEM_KEY;
867 btrfs_init_path(&path);
868 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
869 btrfs_release_path(&path);
875 static int process_inode_item(struct extent_buffer *eb,
876 int slot, struct btrfs_key *key,
877 struct shared_node *active_node)
879 struct inode_record *rec;
880 struct btrfs_inode_item *item;
882 rec = active_node->current;
883 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
884 if (rec->found_inode_item) {
885 rec->errors |= I_ERR_DUP_INODE_ITEM;
888 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
889 rec->nlink = btrfs_inode_nlink(eb, item);
890 rec->isize = btrfs_inode_size(eb, item);
891 rec->nbytes = btrfs_inode_nbytes(eb, item);
892 rec->imode = btrfs_inode_mode(eb, item);
893 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
895 rec->found_inode_item = 1;
897 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
898 maybe_free_inode_rec(&active_node->inode_cache, rec);
902 static struct inode_backref *get_inode_backref(struct inode_record *rec,
904 int namelen, u64 dir)
906 struct inode_backref *backref;
908 list_for_each_entry(backref, &rec->backrefs, list) {
909 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
911 if (backref->dir != dir || backref->namelen != namelen)
913 if (memcmp(name, backref->name, namelen))
918 backref = malloc(sizeof(*backref) + namelen + 1);
919 memset(backref, 0, sizeof(*backref));
921 backref->namelen = namelen;
922 memcpy(backref->name, name, namelen);
923 backref->name[namelen] = '\0';
924 list_add_tail(&backref->list, &rec->backrefs);
928 static int add_inode_backref(struct cache_tree *inode_cache,
929 u64 ino, u64 dir, u64 index,
930 const char *name, int namelen,
931 int filetype, int itemtype, int errors)
933 struct inode_record *rec;
934 struct inode_backref *backref;
936 rec = get_inode_rec(inode_cache, ino, 1);
937 backref = get_inode_backref(rec, name, namelen, dir);
939 backref->errors |= errors;
940 if (itemtype == BTRFS_DIR_INDEX_KEY) {
941 if (backref->found_dir_index)
942 backref->errors |= REF_ERR_DUP_DIR_INDEX;
943 if (backref->found_inode_ref && backref->index != index)
944 backref->errors |= REF_ERR_INDEX_UNMATCH;
945 if (backref->found_dir_item && backref->filetype != filetype)
946 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
948 backref->index = index;
949 backref->filetype = filetype;
950 backref->found_dir_index = 1;
951 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
953 if (backref->found_dir_item)
954 backref->errors |= REF_ERR_DUP_DIR_ITEM;
955 if (backref->found_dir_index && backref->filetype != filetype)
956 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
958 backref->filetype = filetype;
959 backref->found_dir_item = 1;
960 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
961 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
962 if (backref->found_inode_ref)
963 backref->errors |= REF_ERR_DUP_INODE_REF;
964 if (backref->found_dir_index && backref->index != index)
965 backref->errors |= REF_ERR_INDEX_UNMATCH;
967 backref->index = index;
969 backref->ref_type = itemtype;
970 backref->found_inode_ref = 1;
975 maybe_free_inode_rec(inode_cache, rec);
979 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
980 struct cache_tree *dst_cache)
982 struct inode_backref *backref;
987 list_for_each_entry(backref, &src->backrefs, list) {
988 if (backref->found_dir_index) {
989 add_inode_backref(dst_cache, dst->ino, backref->dir,
990 backref->index, backref->name,
991 backref->namelen, backref->filetype,
992 BTRFS_DIR_INDEX_KEY, backref->errors);
994 if (backref->found_dir_item) {
996 add_inode_backref(dst_cache, dst->ino,
997 backref->dir, 0, backref->name,
998 backref->namelen, backref->filetype,
999 BTRFS_DIR_ITEM_KEY, backref->errors);
1001 if (backref->found_inode_ref) {
1002 add_inode_backref(dst_cache, dst->ino,
1003 backref->dir, backref->index,
1004 backref->name, backref->namelen, 0,
1005 backref->ref_type, backref->errors);
1009 if (src->found_dir_item)
1010 dst->found_dir_item = 1;
1011 if (src->found_file_extent)
1012 dst->found_file_extent = 1;
1013 if (src->found_csum_item)
1014 dst->found_csum_item = 1;
1015 if (src->some_csum_missing)
1016 dst->some_csum_missing = 1;
1017 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1018 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1023 BUG_ON(src->found_link < dir_count);
1024 dst->found_link += src->found_link - dir_count;
1025 dst->found_size += src->found_size;
1026 if (src->extent_start != (u64)-1) {
1027 if (dst->extent_start == (u64)-1) {
1028 dst->extent_start = src->extent_start;
1029 dst->extent_end = src->extent_end;
1031 if (dst->extent_end > src->extent_start)
1032 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1033 else if (dst->extent_end < src->extent_start) {
1034 ret = add_file_extent_hole(&dst->holes,
1036 src->extent_start - dst->extent_end);
1038 if (dst->extent_end < src->extent_end)
1039 dst->extent_end = src->extent_end;
1043 dst->errors |= src->errors;
1044 if (src->found_inode_item) {
1045 if (!dst->found_inode_item) {
1046 dst->nlink = src->nlink;
1047 dst->isize = src->isize;
1048 dst->nbytes = src->nbytes;
1049 dst->imode = src->imode;
1050 dst->nodatasum = src->nodatasum;
1051 dst->found_inode_item = 1;
1053 dst->errors |= I_ERR_DUP_INODE_ITEM;
1061 static int splice_shared_node(struct shared_node *src_node,
1062 struct shared_node *dst_node)
1064 struct cache_extent *cache;
1065 struct ptr_node *node, *ins;
1066 struct cache_tree *src, *dst;
1067 struct inode_record *rec, *conflict;
1068 u64 current_ino = 0;
1072 if (--src_node->refs == 0)
1074 if (src_node->current)
1075 current_ino = src_node->current->ino;
1077 src = &src_node->root_cache;
1078 dst = &dst_node->root_cache;
1080 cache = search_cache_extent(src, 0);
1082 node = container_of(cache, struct ptr_node, cache);
1084 cache = next_cache_extent(cache);
1087 remove_cache_extent(src, &node->cache);
1090 ins = malloc(sizeof(*ins));
1091 ins->cache.start = node->cache.start;
1092 ins->cache.size = node->cache.size;
1096 ret = insert_cache_extent(dst, &ins->cache);
1097 if (ret == -EEXIST) {
1098 conflict = get_inode_rec(dst, rec->ino, 1);
1099 merge_inode_recs(rec, conflict, dst);
1101 conflict->checked = 1;
1102 if (dst_node->current == conflict)
1103 dst_node->current = NULL;
1105 maybe_free_inode_rec(dst, conflict);
1106 free_inode_rec(rec);
1113 if (src == &src_node->root_cache) {
1114 src = &src_node->inode_cache;
1115 dst = &dst_node->inode_cache;
1119 if (current_ino > 0 && (!dst_node->current ||
1120 current_ino > dst_node->current->ino)) {
1121 if (dst_node->current) {
1122 dst_node->current->checked = 1;
1123 maybe_free_inode_rec(dst, dst_node->current);
1125 dst_node->current = get_inode_rec(dst, current_ino, 1);
1130 static void free_inode_ptr(struct cache_extent *cache)
1132 struct ptr_node *node;
1133 struct inode_record *rec;
1135 node = container_of(cache, struct ptr_node, cache);
1137 free_inode_rec(rec);
1141 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1143 static struct shared_node *find_shared_node(struct cache_tree *shared,
1146 struct cache_extent *cache;
1147 struct shared_node *node;
1149 cache = lookup_cache_extent(shared, bytenr, 1);
1151 node = container_of(cache, struct shared_node, cache);
1157 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1160 struct shared_node *node;
1162 node = calloc(1, sizeof(*node));
1163 node->cache.start = bytenr;
1164 node->cache.size = 1;
1165 cache_tree_init(&node->root_cache);
1166 cache_tree_init(&node->inode_cache);
1169 ret = insert_cache_extent(shared, &node->cache);
1174 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1175 struct walk_control *wc, int level)
1177 struct shared_node *node;
1178 struct shared_node *dest;
1180 if (level == wc->active_node)
1183 BUG_ON(wc->active_node <= level);
1184 node = find_shared_node(&wc->shared, bytenr);
1186 add_shared_node(&wc->shared, bytenr, refs);
1187 node = find_shared_node(&wc->shared, bytenr);
1188 wc->nodes[level] = node;
1189 wc->active_node = level;
1193 if (wc->root_level == wc->active_node &&
1194 btrfs_root_refs(&root->root_item) == 0) {
1195 if (--node->refs == 0) {
1196 free_inode_recs_tree(&node->root_cache);
1197 free_inode_recs_tree(&node->inode_cache);
1198 remove_cache_extent(&wc->shared, &node->cache);
1204 dest = wc->nodes[wc->active_node];
1205 splice_shared_node(node, dest);
1206 if (node->refs == 0) {
1207 remove_cache_extent(&wc->shared, &node->cache);
1213 static int leave_shared_node(struct btrfs_root *root,
1214 struct walk_control *wc, int level)
1216 struct shared_node *node;
1217 struct shared_node *dest;
1220 if (level == wc->root_level)
1223 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1227 BUG_ON(i >= BTRFS_MAX_LEVEL);
1229 node = wc->nodes[wc->active_node];
1230 wc->nodes[wc->active_node] = NULL;
1231 wc->active_node = i;
1233 dest = wc->nodes[wc->active_node];
1234 if (wc->active_node < wc->root_level ||
1235 btrfs_root_refs(&root->root_item) > 0) {
1236 BUG_ON(node->refs <= 1);
1237 splice_shared_node(node, dest);
1239 BUG_ON(node->refs < 2);
1248 * 1 - if the root with id child_root_id is a child of root parent_root_id
1249 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1250 * has other root(s) as parent(s)
1251 * 2 - if the root child_root_id doesn't have any parent roots
1253 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1256 struct btrfs_path path;
1257 struct btrfs_key key;
1258 struct extent_buffer *leaf;
1262 btrfs_init_path(&path);
1264 key.objectid = parent_root_id;
1265 key.type = BTRFS_ROOT_REF_KEY;
1266 key.offset = child_root_id;
1267 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1271 btrfs_release_path(&path);
1275 key.objectid = child_root_id;
1276 key.type = BTRFS_ROOT_BACKREF_KEY;
1278 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1284 leaf = path.nodes[0];
1285 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1286 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1289 leaf = path.nodes[0];
1292 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1293 if (key.objectid != child_root_id ||
1294 key.type != BTRFS_ROOT_BACKREF_KEY)
1299 if (key.offset == parent_root_id) {
1300 btrfs_release_path(&path);
1307 btrfs_release_path(&path);
1310 return has_parent ? 0 : 2;
1313 static int process_dir_item(struct btrfs_root *root,
1314 struct extent_buffer *eb,
1315 int slot, struct btrfs_key *key,
1316 struct shared_node *active_node)
1326 struct btrfs_dir_item *di;
1327 struct inode_record *rec;
1328 struct cache_tree *root_cache;
1329 struct cache_tree *inode_cache;
1330 struct btrfs_key location;
1331 char namebuf[BTRFS_NAME_LEN];
1333 root_cache = &active_node->root_cache;
1334 inode_cache = &active_node->inode_cache;
1335 rec = active_node->current;
1336 rec->found_dir_item = 1;
1338 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1339 total = btrfs_item_size_nr(eb, slot);
1340 while (cur < total) {
1342 btrfs_dir_item_key_to_cpu(eb, di, &location);
1343 name_len = btrfs_dir_name_len(eb, di);
1344 data_len = btrfs_dir_data_len(eb, di);
1345 filetype = btrfs_dir_type(eb, di);
1347 rec->found_size += name_len;
1348 if (name_len <= BTRFS_NAME_LEN) {
1352 len = BTRFS_NAME_LEN;
1353 error = REF_ERR_NAME_TOO_LONG;
1355 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1357 if (location.type == BTRFS_INODE_ITEM_KEY) {
1358 add_inode_backref(inode_cache, location.objectid,
1359 key->objectid, key->offset, namebuf,
1360 len, filetype, key->type, error);
1361 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1362 add_inode_backref(root_cache, location.objectid,
1363 key->objectid, key->offset,
1364 namebuf, len, filetype,
1367 fprintf(stderr, "invalid location in dir item %u\n",
1369 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1370 key->objectid, key->offset, namebuf,
1371 len, filetype, key->type, error);
1374 len = sizeof(*di) + name_len + data_len;
1375 di = (struct btrfs_dir_item *)((char *)di + len);
1378 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1379 rec->errors |= I_ERR_DUP_DIR_INDEX;
1384 static int process_inode_ref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1394 struct cache_tree *inode_cache;
1395 struct btrfs_inode_ref *ref;
1396 char namebuf[BTRFS_NAME_LEN];
1398 inode_cache = &active_node->inode_cache;
1400 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1401 total = btrfs_item_size_nr(eb, slot);
1402 while (cur < total) {
1403 name_len = btrfs_inode_ref_name_len(eb, ref);
1404 index = btrfs_inode_ref_index(eb, ref);
1405 if (name_len <= BTRFS_NAME_LEN) {
1409 len = BTRFS_NAME_LEN;
1410 error = REF_ERR_NAME_TOO_LONG;
1412 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1413 add_inode_backref(inode_cache, key->objectid, key->offset,
1414 index, namebuf, len, 0, key->type, error);
1416 len = sizeof(*ref) + name_len;
1417 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1423 static int process_inode_extref(struct extent_buffer *eb,
1424 int slot, struct btrfs_key *key,
1425 struct shared_node *active_node)
1434 struct cache_tree *inode_cache;
1435 struct btrfs_inode_extref *extref;
1436 char namebuf[BTRFS_NAME_LEN];
1438 inode_cache = &active_node->inode_cache;
1440 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1441 total = btrfs_item_size_nr(eb, slot);
1442 while (cur < total) {
1443 name_len = btrfs_inode_extref_name_len(eb, extref);
1444 index = btrfs_inode_extref_index(eb, extref);
1445 parent = btrfs_inode_extref_parent(eb, extref);
1446 if (name_len <= BTRFS_NAME_LEN) {
1450 len = BTRFS_NAME_LEN;
1451 error = REF_ERR_NAME_TOO_LONG;
1453 read_extent_buffer(eb, namebuf,
1454 (unsigned long)(extref + 1), len);
1455 add_inode_backref(inode_cache, key->objectid, parent,
1456 index, namebuf, len, 0, key->type, error);
1458 len = sizeof(*extref) + name_len;
1459 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1466 static int count_csum_range(struct btrfs_root *root, u64 start,
1467 u64 len, u64 *found)
1469 struct btrfs_key key;
1470 struct btrfs_path path;
1471 struct extent_buffer *leaf;
1476 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1478 btrfs_init_path(&path);
1480 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1482 key.type = BTRFS_EXTENT_CSUM_KEY;
1484 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1488 if (ret > 0 && path.slots[0] > 0) {
1489 leaf = path.nodes[0];
1490 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1491 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1492 key.type == BTRFS_EXTENT_CSUM_KEY)
1497 leaf = path.nodes[0];
1498 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1499 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1504 leaf = path.nodes[0];
1507 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1508 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1509 key.type != BTRFS_EXTENT_CSUM_KEY)
1512 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1513 if (key.offset >= start + len)
1516 if (key.offset > start)
1519 size = btrfs_item_size_nr(leaf, path.slots[0]);
1520 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1521 if (csum_end > start) {
1522 size = min(csum_end - start, len);
1531 btrfs_release_path(&path);
1537 static int process_file_extent(struct btrfs_root *root,
1538 struct extent_buffer *eb,
1539 int slot, struct btrfs_key *key,
1540 struct shared_node *active_node)
1542 struct inode_record *rec;
1543 struct btrfs_file_extent_item *fi;
1545 u64 disk_bytenr = 0;
1546 u64 extent_offset = 0;
1547 u64 mask = root->sectorsize - 1;
1551 rec = active_node->current;
1552 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1553 rec->found_file_extent = 1;
1555 if (rec->extent_start == (u64)-1) {
1556 rec->extent_start = key->offset;
1557 rec->extent_end = key->offset;
1560 if (rec->extent_end > key->offset)
1561 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1562 else if (rec->extent_end < key->offset) {
1563 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1564 key->offset - rec->extent_end);
1569 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1570 extent_type = btrfs_file_extent_type(eb, fi);
1572 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1573 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1575 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1576 rec->found_size += num_bytes;
1577 num_bytes = (num_bytes + mask) & ~mask;
1578 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1579 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1580 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1581 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1582 extent_offset = btrfs_file_extent_offset(eb, fi);
1583 if (num_bytes == 0 || (num_bytes & mask))
1584 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1585 if (num_bytes + extent_offset >
1586 btrfs_file_extent_ram_bytes(eb, fi))
1587 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1588 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1589 (btrfs_file_extent_compression(eb, fi) ||
1590 btrfs_file_extent_encryption(eb, fi) ||
1591 btrfs_file_extent_other_encoding(eb, fi)))
1592 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1593 if (disk_bytenr > 0)
1594 rec->found_size += num_bytes;
1596 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1598 rec->extent_end = key->offset + num_bytes;
1601 * The data reloc tree will copy full extents into its inode and then
1602 * copy the corresponding csums. Because the extent it copied could be
1603 * a preallocated extent that hasn't been written to yet there may be no
1604 * csums to copy, ergo we won't have csums for our file extent. This is
1605 * ok so just don't bother checking csums if the inode belongs to the
1608 if (disk_bytenr > 0 &&
1609 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1611 if (btrfs_file_extent_compression(eb, fi))
1612 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1614 disk_bytenr += extent_offset;
1616 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1619 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1621 rec->found_csum_item = 1;
1622 if (found < num_bytes)
1623 rec->some_csum_missing = 1;
1624 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1626 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1632 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1633 struct walk_control *wc)
1635 struct btrfs_key key;
1639 struct cache_tree *inode_cache;
1640 struct shared_node *active_node;
1642 if (wc->root_level == wc->active_node &&
1643 btrfs_root_refs(&root->root_item) == 0)
1646 active_node = wc->nodes[wc->active_node];
1647 inode_cache = &active_node->inode_cache;
1648 nritems = btrfs_header_nritems(eb);
1649 for (i = 0; i < nritems; i++) {
1650 btrfs_item_key_to_cpu(eb, &key, i);
1652 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1654 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1657 if (active_node->current == NULL ||
1658 active_node->current->ino < key.objectid) {
1659 if (active_node->current) {
1660 active_node->current->checked = 1;
1661 maybe_free_inode_rec(inode_cache,
1662 active_node->current);
1664 active_node->current = get_inode_rec(inode_cache,
1668 case BTRFS_DIR_ITEM_KEY:
1669 case BTRFS_DIR_INDEX_KEY:
1670 ret = process_dir_item(root, eb, i, &key, active_node);
1672 case BTRFS_INODE_REF_KEY:
1673 ret = process_inode_ref(eb, i, &key, active_node);
1675 case BTRFS_INODE_EXTREF_KEY:
1676 ret = process_inode_extref(eb, i, &key, active_node);
1678 case BTRFS_INODE_ITEM_KEY:
1679 ret = process_inode_item(eb, i, &key, active_node);
1681 case BTRFS_EXTENT_DATA_KEY:
1682 ret = process_file_extent(root, eb, i, &key,
1692 static void reada_walk_down(struct btrfs_root *root,
1693 struct extent_buffer *node, int slot)
1702 level = btrfs_header_level(node);
1706 nritems = btrfs_header_nritems(node);
1707 blocksize = btrfs_level_size(root, level - 1);
1708 for (i = slot; i < nritems; i++) {
1709 bytenr = btrfs_node_blockptr(node, i);
1710 ptr_gen = btrfs_node_ptr_generation(node, i);
1711 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1716 * Check the child node/leaf by the following condition:
1717 * 1. the first item key of the node/leaf should be the same with the one
1719 * 2. block in parent node should match the child node/leaf.
1720 * 3. generation of parent node and child's header should be consistent.
1722 * Or the child node/leaf pointed by the key in parent is not valid.
1724 * We hope to check leaf owner too, but since subvol may share leaves,
1725 * which makes leaf owner check not so strong, key check should be
1726 * sufficient enough for that case.
1728 static int check_child_node(struct btrfs_root *root,
1729 struct extent_buffer *parent, int slot,
1730 struct extent_buffer *child)
1732 struct btrfs_key parent_key;
1733 struct btrfs_key child_key;
1736 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1737 if (btrfs_header_level(child) == 0)
1738 btrfs_item_key_to_cpu(child, &child_key, 0);
1740 btrfs_node_key_to_cpu(child, &child_key, 0);
1742 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1745 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1746 parent_key.objectid, parent_key.type, parent_key.offset,
1747 child_key.objectid, child_key.type, child_key.offset);
1749 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1751 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1752 btrfs_node_blockptr(parent, slot),
1753 btrfs_header_bytenr(child));
1755 if (btrfs_node_ptr_generation(parent, slot) !=
1756 btrfs_header_generation(child)) {
1758 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1759 btrfs_header_generation(child),
1760 btrfs_node_ptr_generation(parent, slot));
1765 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1766 struct walk_control *wc, int *level)
1768 enum btrfs_tree_block_status status;
1771 struct extent_buffer *next;
1772 struct extent_buffer *cur;
1777 WARN_ON(*level < 0);
1778 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1779 ret = btrfs_lookup_extent_info(NULL, root,
1780 path->nodes[*level]->start,
1781 *level, 1, &refs, NULL);
1788 ret = enter_shared_node(root, path->nodes[*level]->start,
1796 while (*level >= 0) {
1797 WARN_ON(*level < 0);
1798 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1799 cur = path->nodes[*level];
1801 if (btrfs_header_level(cur) != *level)
1804 if (path->slots[*level] >= btrfs_header_nritems(cur))
1807 ret = process_one_leaf(root, cur, wc);
1812 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1813 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1814 blocksize = btrfs_level_size(root, *level - 1);
1815 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1821 ret = enter_shared_node(root, bytenr, refs,
1824 path->slots[*level]++;
1829 next = btrfs_find_tree_block(root, bytenr, blocksize);
1830 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1831 free_extent_buffer(next);
1832 reada_walk_down(root, cur, path->slots[*level]);
1833 next = read_tree_block(root, bytenr, blocksize,
1835 if (!extent_buffer_uptodate(next)) {
1836 struct btrfs_key node_key;
1838 btrfs_node_key_to_cpu(path->nodes[*level],
1840 path->slots[*level]);
1841 btrfs_add_corrupt_extent_record(root->fs_info,
1843 path->nodes[*level]->start,
1844 root->leafsize, *level);
1850 ret = check_child_node(root, cur, path->slots[*level], next);
1856 if (btrfs_is_leaf(next))
1857 status = btrfs_check_leaf(root, NULL, next);
1859 status = btrfs_check_node(root, NULL, next);
1860 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1861 free_extent_buffer(next);
1866 *level = *level - 1;
1867 free_extent_buffer(path->nodes[*level]);
1868 path->nodes[*level] = next;
1869 path->slots[*level] = 0;
1872 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1876 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1877 struct walk_control *wc, int *level)
1880 struct extent_buffer *leaf;
1882 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1883 leaf = path->nodes[i];
1884 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1889 free_extent_buffer(path->nodes[*level]);
1890 path->nodes[*level] = NULL;
1891 BUG_ON(*level > wc->active_node);
1892 if (*level == wc->active_node)
1893 leave_shared_node(root, wc, *level);
1900 static int check_root_dir(struct inode_record *rec)
1902 struct inode_backref *backref;
1905 if (!rec->found_inode_item || rec->errors)
1907 if (rec->nlink != 1 || rec->found_link != 0)
1909 if (list_empty(&rec->backrefs))
1911 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1912 if (!backref->found_inode_ref)
1914 if (backref->index != 0 || backref->namelen != 2 ||
1915 memcmp(backref->name, "..", 2))
1917 if (backref->found_dir_index || backref->found_dir_item)
1924 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1925 struct btrfs_root *root, struct btrfs_path *path,
1926 struct inode_record *rec)
1928 struct btrfs_inode_item *ei;
1929 struct btrfs_key key;
1932 key.objectid = rec->ino;
1933 key.type = BTRFS_INODE_ITEM_KEY;
1934 key.offset = (u64)-1;
1936 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1940 if (!path->slots[0]) {
1947 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1948 if (key.objectid != rec->ino) {
1953 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1954 struct btrfs_inode_item);
1955 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1956 btrfs_mark_buffer_dirty(path->nodes[0]);
1957 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1958 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1959 root->root_key.objectid);
1961 btrfs_release_path(path);
1965 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1966 struct btrfs_root *root,
1967 struct btrfs_path *path,
1968 struct inode_record *rec)
1972 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1973 btrfs_release_path(path);
1975 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1979 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1980 struct btrfs_root *root,
1981 struct btrfs_path *path,
1982 struct inode_record *rec)
1984 struct btrfs_inode_item *ei;
1985 struct btrfs_key key;
1988 key.objectid = rec->ino;
1989 key.type = BTRFS_INODE_ITEM_KEY;
1992 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1999 /* Since ret == 0, no need to check anything */
2000 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2001 struct btrfs_inode_item);
2002 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2003 btrfs_mark_buffer_dirty(path->nodes[0]);
2004 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2005 printf("reset nbytes for ino %llu root %llu\n",
2006 rec->ino, root->root_key.objectid);
2008 btrfs_release_path(path);
2012 static int add_missing_dir_index(struct btrfs_root *root,
2013 struct cache_tree *inode_cache,
2014 struct inode_record *rec,
2015 struct inode_backref *backref)
2017 struct btrfs_path *path;
2018 struct btrfs_trans_handle *trans;
2019 struct btrfs_dir_item *dir_item;
2020 struct extent_buffer *leaf;
2021 struct btrfs_key key;
2022 struct btrfs_disk_key disk_key;
2023 struct inode_record *dir_rec;
2024 unsigned long name_ptr;
2025 u32 data_size = sizeof(*dir_item) + backref->namelen;
2028 path = btrfs_alloc_path();
2032 trans = btrfs_start_transaction(root, 1);
2033 if (IS_ERR(trans)) {
2034 btrfs_free_path(path);
2035 return PTR_ERR(trans);
2038 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2039 (unsigned long long)rec->ino);
2040 key.objectid = backref->dir;
2041 key.type = BTRFS_DIR_INDEX_KEY;
2042 key.offset = backref->index;
2044 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2047 leaf = path->nodes[0];
2048 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2050 disk_key.objectid = cpu_to_le64(rec->ino);
2051 disk_key.type = BTRFS_INODE_ITEM_KEY;
2052 disk_key.offset = 0;
2054 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2055 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2056 btrfs_set_dir_data_len(leaf, dir_item, 0);
2057 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2058 name_ptr = (unsigned long)(dir_item + 1);
2059 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2060 btrfs_mark_buffer_dirty(leaf);
2061 btrfs_free_path(path);
2062 btrfs_commit_transaction(trans, root);
2064 backref->found_dir_index = 1;
2065 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2068 dir_rec->found_size += backref->namelen;
2069 if (dir_rec->found_size == dir_rec->isize &&
2070 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2071 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2072 if (dir_rec->found_size != dir_rec->isize)
2073 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2078 static int delete_dir_index(struct btrfs_root *root,
2079 struct cache_tree *inode_cache,
2080 struct inode_record *rec,
2081 struct inode_backref *backref)
2083 struct btrfs_trans_handle *trans;
2084 struct btrfs_dir_item *di;
2085 struct btrfs_path *path;
2088 path = btrfs_alloc_path();
2092 trans = btrfs_start_transaction(root, 1);
2093 if (IS_ERR(trans)) {
2094 btrfs_free_path(path);
2095 return PTR_ERR(trans);
2099 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2100 (unsigned long long)backref->dir,
2101 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2102 (unsigned long long)root->objectid);
2104 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2105 backref->name, backref->namelen,
2106 backref->index, -1);
2109 btrfs_free_path(path);
2110 btrfs_commit_transaction(trans, root);
2117 ret = btrfs_del_item(trans, root, path);
2119 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2121 btrfs_free_path(path);
2122 btrfs_commit_transaction(trans, root);
2126 static int create_inode_item(struct btrfs_root *root,
2127 struct inode_record *rec,
2128 struct inode_backref *backref, int root_dir)
2130 struct btrfs_trans_handle *trans;
2131 struct btrfs_inode_item inode_item;
2132 time_t now = time(NULL);
2135 trans = btrfs_start_transaction(root, 1);
2136 if (IS_ERR(trans)) {
2137 ret = PTR_ERR(trans);
2141 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2142 "be incomplete, please check permissions and content after "
2143 "the fsck completes.\n", (unsigned long long)root->objectid,
2144 (unsigned long long)rec->ino);
2146 memset(&inode_item, 0, sizeof(inode_item));
2147 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2149 btrfs_set_stack_inode_nlink(&inode_item, 1);
2151 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2152 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2153 if (rec->found_dir_item) {
2154 if (rec->found_file_extent)
2155 fprintf(stderr, "root %llu inode %llu has both a dir "
2156 "item and extents, unsure if it is a dir or a "
2157 "regular file so setting it as a directory\n",
2158 (unsigned long long)root->objectid,
2159 (unsigned long long)rec->ino);
2160 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2161 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2162 } else if (!rec->found_dir_item) {
2163 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2164 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2166 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2167 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2168 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2169 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2170 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2171 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2172 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2173 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2175 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2177 btrfs_commit_transaction(trans, root);
2181 static int repair_inode_backrefs(struct btrfs_root *root,
2182 struct inode_record *rec,
2183 struct cache_tree *inode_cache,
2186 struct inode_backref *tmp, *backref;
2187 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2191 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2192 if (!delete && rec->ino == root_dirid) {
2193 if (!rec->found_inode_item) {
2194 ret = create_inode_item(root, rec, backref, 1);
2201 /* Index 0 for root dir's are special, don't mess with it */
2202 if (rec->ino == root_dirid && backref->index == 0)
2206 ((backref->found_dir_index && !backref->found_inode_ref) ||
2207 (backref->found_dir_index && backref->found_inode_ref &&
2208 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2209 ret = delete_dir_index(root, inode_cache, rec, backref);
2213 list_del(&backref->list);
2217 if (!delete && !backref->found_dir_index &&
2218 backref->found_dir_item && backref->found_inode_ref) {
2219 ret = add_missing_dir_index(root, inode_cache, rec,
2224 if (backref->found_dir_item &&
2225 backref->found_dir_index &&
2226 backref->found_dir_index) {
2227 if (!backref->errors &&
2228 backref->found_inode_ref) {
2229 list_del(&backref->list);
2235 if (!delete && (!backref->found_dir_index &&
2236 !backref->found_dir_item &&
2237 backref->found_inode_ref)) {
2238 struct btrfs_trans_handle *trans;
2239 struct btrfs_key location;
2241 ret = check_dir_conflict(root, backref->name,
2247 * let nlink fixing routine to handle it,
2248 * which can do it better.
2253 location.objectid = rec->ino;
2254 location.type = BTRFS_INODE_ITEM_KEY;
2255 location.offset = 0;
2257 trans = btrfs_start_transaction(root, 1);
2258 if (IS_ERR(trans)) {
2259 ret = PTR_ERR(trans);
2262 fprintf(stderr, "adding missing dir index/item pair "
2264 (unsigned long long)rec->ino);
2265 ret = btrfs_insert_dir_item(trans, root, backref->name,
2267 backref->dir, &location,
2268 imode_to_type(rec->imode),
2271 btrfs_commit_transaction(trans, root);
2275 if (!delete && (backref->found_inode_ref &&
2276 backref->found_dir_index &&
2277 backref->found_dir_item &&
2278 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2279 !rec->found_inode_item)) {
2280 ret = create_inode_item(root, rec, backref, 0);
2287 return ret ? ret : repaired;
2291 * To determine the file type for nlink/inode_item repair
2293 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2294 * Return -ENOENT if file type is not found.
2296 static int find_file_type(struct inode_record *rec, u8 *type)
2298 struct inode_backref *backref;
2300 /* For inode item recovered case */
2301 if (rec->found_inode_item) {
2302 *type = imode_to_type(rec->imode);
2306 list_for_each_entry(backref, &rec->backrefs, list) {
2307 if (backref->found_dir_index || backref->found_dir_item) {
2308 *type = backref->filetype;
2316 * To determine the file name for nlink repair
2318 * Return 0 if file name is found, set name and namelen.
2319 * Return -ENOENT if file name is not found.
2321 static int find_file_name(struct inode_record *rec,
2322 char *name, int *namelen)
2324 struct inode_backref *backref;
2326 list_for_each_entry(backref, &rec->backrefs, list) {
2327 if (backref->found_dir_index || backref->found_dir_item ||
2328 backref->found_inode_ref) {
2329 memcpy(name, backref->name, backref->namelen);
2330 *namelen = backref->namelen;
2337 /* Reset the nlink of the inode to the correct one */
2338 static int reset_nlink(struct btrfs_trans_handle *trans,
2339 struct btrfs_root *root,
2340 struct btrfs_path *path,
2341 struct inode_record *rec)
2343 struct inode_backref *backref;
2344 struct inode_backref *tmp;
2345 struct btrfs_key key;
2346 struct btrfs_inode_item *inode_item;
2349 /* We don't believe this either, reset it and iterate backref */
2350 rec->found_link = 0;
2352 /* Remove all backref including the valid ones */
2353 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2355 backref->index, backref->name,
2356 backref->namelen, 0);
2360 /* remove invalid backref, so it won't be added back */
2361 if (!(backref->found_dir_index &&
2362 backref->found_dir_item &&
2363 backref->found_inode_ref)) {
2364 list_del(&backref->list);
2371 /* Set nlink to 0 */
2372 key.objectid = rec->ino;
2373 key.type = BTRFS_INODE_ITEM_KEY;
2375 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2382 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2383 struct btrfs_inode_item);
2384 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2385 btrfs_mark_buffer_dirty(path->nodes[0]);
2386 btrfs_release_path(path);
2389 * Add back valid inode_ref/dir_item/dir_index,
2390 * add_link() will handle the nlink inc, so new nlink must be correct
2392 list_for_each_entry(backref, &rec->backrefs, list) {
2393 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2394 backref->name, backref->namelen,
2395 backref->ref_type, &backref->index, 1);
2400 btrfs_release_path(path);
2404 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2405 struct btrfs_root *root,
2406 struct btrfs_path *path,
2407 struct inode_record *rec)
2409 char *dir_name = "lost+found";
2410 char namebuf[BTRFS_NAME_LEN] = {0};
2415 int name_recovered = 0;
2416 int type_recovered = 0;
2420 * Get file name and type first before these invalid inode ref
2421 * are deleted by remove_all_invalid_backref()
2423 name_recovered = !find_file_name(rec, namebuf, &namelen);
2424 type_recovered = !find_file_type(rec, &type);
2426 if (!name_recovered) {
2427 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2428 rec->ino, rec->ino);
2429 namelen = count_digits(rec->ino);
2430 sprintf(namebuf, "%llu", rec->ino);
2433 if (!type_recovered) {
2434 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2436 type = BTRFS_FT_REG_FILE;
2440 ret = reset_nlink(trans, root, path, rec);
2443 "Failed to reset nlink for inode %llu: %s\n",
2444 rec->ino, strerror(-ret));
2448 if (rec->found_link == 0) {
2449 lost_found_ino = root->highest_inode;
2450 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2455 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2456 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2459 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2460 dir_name, strerror(-ret));
2463 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2464 namebuf, namelen, type, NULL, 1);
2466 * Add ".INO" suffix several times to handle case where
2467 * "FILENAME.INO" is already taken by another file.
2469 while (ret == -EEXIST) {
2471 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2473 if (namelen + count_digits(rec->ino) + 1 >
2478 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2480 namelen += count_digits(rec->ino) + 1;
2481 ret = btrfs_add_link(trans, root, rec->ino,
2482 lost_found_ino, namebuf,
2483 namelen, type, NULL, 1);
2487 "Failed to link the inode %llu to %s dir: %s\n",
2488 rec->ino, dir_name, strerror(-ret));
2492 * Just increase the found_link, don't actually add the
2493 * backref. This will make things easier and this inode
2494 * record will be freed after the repair is done.
2495 * So fsck will not report problem about this inode.
2498 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2499 namelen, namebuf, dir_name);
2501 printf("Fixed the nlink of inode %llu\n", rec->ino);
2504 * Clear the flag anyway, or we will loop forever for the same inode
2505 * as it will not be removed from the bad inode list and the dead loop
2508 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2509 btrfs_release_path(path);
2514 * Check if there is any normal(reg or prealloc) file extent for given
2516 * This is used to determine the file type when neither its dir_index/item or
2517 * inode_item exists.
2519 * This will *NOT* report error, if any error happens, just consider it does
2520 * not have any normal file extent.
2522 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2524 struct btrfs_path *path;
2525 struct btrfs_key key;
2526 struct btrfs_key found_key;
2527 struct btrfs_file_extent_item *fi;
2531 path = btrfs_alloc_path();
2535 key.type = BTRFS_EXTENT_DATA_KEY;
2538 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2543 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2544 ret = btrfs_next_leaf(root, path);
2551 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2553 if (found_key.objectid != ino ||
2554 found_key.type != BTRFS_EXTENT_DATA_KEY)
2556 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2557 struct btrfs_file_extent_item);
2558 type = btrfs_file_extent_type(path->nodes[0], fi);
2559 if (type != BTRFS_FILE_EXTENT_INLINE) {
2565 btrfs_free_path(path);
2569 static u32 btrfs_type_to_imode(u8 type)
2571 static u32 imode_by_btrfs_type[] = {
2572 [BTRFS_FT_REG_FILE] = S_IFREG,
2573 [BTRFS_FT_DIR] = S_IFDIR,
2574 [BTRFS_FT_CHRDEV] = S_IFCHR,
2575 [BTRFS_FT_BLKDEV] = S_IFBLK,
2576 [BTRFS_FT_FIFO] = S_IFIFO,
2577 [BTRFS_FT_SOCK] = S_IFSOCK,
2578 [BTRFS_FT_SYMLINK] = S_IFLNK,
2581 return imode_by_btrfs_type[(type)];
2584 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2585 struct btrfs_root *root,
2586 struct btrfs_path *path,
2587 struct inode_record *rec)
2591 int type_recovered = 0;
2594 printf("Trying to rebuild inode:%llu\n", rec->ino);
2596 type_recovered = !find_file_type(rec, &filetype);
2599 * Try to determine inode type if type not found.
2601 * For found regular file extent, it must be FILE.
2602 * For found dir_item/index, it must be DIR.
2604 * For undetermined one, use FILE as fallback.
2607 * 1. If found backref(inode_index/item is already handled) to it,
2609 * Need new inode-inode ref structure to allow search for that.
2611 if (!type_recovered) {
2612 if (rec->found_file_extent &&
2613 find_normal_file_extent(root, rec->ino)) {
2615 filetype = BTRFS_FT_REG_FILE;
2616 } else if (rec->found_dir_item) {
2618 filetype = BTRFS_FT_DIR;
2619 } else if (!list_empty(&rec->orphan_extents)) {
2621 filetype = BTRFS_FT_REG_FILE;
2623 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2626 filetype = BTRFS_FT_REG_FILE;
2630 ret = btrfs_new_inode(trans, root, rec->ino,
2631 mode | btrfs_type_to_imode(filetype));
2636 * Here inode rebuild is done, we only rebuild the inode item,
2637 * don't repair the nlink(like move to lost+found).
2638 * That is the job of nlink repair.
2640 * We just fill the record and return
2642 rec->found_dir_item = 1;
2643 rec->imode = mode | btrfs_type_to_imode(filetype);
2645 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2646 /* Ensure the inode_nlinks repair function will be called */
2647 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2652 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2653 struct btrfs_root *root,
2654 struct btrfs_path *path,
2655 struct inode_record *rec)
2657 struct orphan_data_extent *orphan;
2658 struct orphan_data_extent *tmp;
2661 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2663 * Check for conflicting file extents
2665 * Here we don't know whether the extents is compressed or not,
2666 * so we can only assume it not compressed nor data offset,
2667 * and use its disk_len as extent length.
2669 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2670 orphan->offset, orphan->disk_len, 0);
2671 btrfs_release_path(path);
2676 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2677 orphan->disk_bytenr, orphan->disk_len);
2678 ret = btrfs_free_extent(trans,
2679 root->fs_info->extent_root,
2680 orphan->disk_bytenr, orphan->disk_len,
2681 0, root->objectid, orphan->objectid,
2686 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2687 orphan->offset, orphan->disk_bytenr,
2688 orphan->disk_len, orphan->disk_len);
2692 /* Update file size info */
2693 rec->found_size += orphan->disk_len;
2694 if (rec->found_size == rec->nbytes)
2695 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2697 /* Update the file extent hole info too */
2698 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2702 if (RB_EMPTY_ROOT(&rec->holes))
2703 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2705 list_del(&orphan->list);
2708 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2713 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2714 struct btrfs_root *root,
2715 struct btrfs_path *path,
2716 struct inode_record *rec)
2718 struct rb_node *node;
2719 struct file_extent_hole *hole;
2723 node = rb_first(&rec->holes);
2727 hole = rb_entry(node, struct file_extent_hole, node);
2728 ret = btrfs_punch_hole(trans, root, rec->ino,
2729 hole->start, hole->len);
2732 ret = del_file_extent_hole(&rec->holes, hole->start,
2736 if (RB_EMPTY_ROOT(&rec->holes))
2737 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2738 node = rb_first(&rec->holes);
2740 /* special case for a file losing all its file extent */
2742 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2743 round_up(rec->isize, root->sectorsize));
2747 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2748 rec->ino, root->objectid);
2753 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2755 struct btrfs_trans_handle *trans;
2756 struct btrfs_path *path;
2759 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2760 I_ERR_NO_ORPHAN_ITEM |
2761 I_ERR_LINK_COUNT_WRONG |
2762 I_ERR_NO_INODE_ITEM |
2763 I_ERR_FILE_EXTENT_ORPHAN |
2764 I_ERR_FILE_EXTENT_DISCOUNT|
2765 I_ERR_FILE_NBYTES_WRONG)))
2768 path = btrfs_alloc_path();
2773 * For nlink repair, it may create a dir and add link, so
2774 * 2 for parent(256)'s dir_index and dir_item
2775 * 2 for lost+found dir's inode_item and inode_ref
2776 * 1 for the new inode_ref of the file
2777 * 2 for lost+found dir's dir_index and dir_item for the file
2779 trans = btrfs_start_transaction(root, 7);
2780 if (IS_ERR(trans)) {
2781 btrfs_free_path(path);
2782 return PTR_ERR(trans);
2785 if (rec->errors & I_ERR_NO_INODE_ITEM)
2786 ret = repair_inode_no_item(trans, root, path, rec);
2787 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2788 ret = repair_inode_orphan_extent(trans, root, path, rec);
2789 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2790 ret = repair_inode_discount_extent(trans, root, path, rec);
2791 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2792 ret = repair_inode_isize(trans, root, path, rec);
2793 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2794 ret = repair_inode_orphan_item(trans, root, path, rec);
2795 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2796 ret = repair_inode_nlinks(trans, root, path, rec);
2797 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2798 ret = repair_inode_nbytes(trans, root, path, rec);
2799 btrfs_commit_transaction(trans, root);
2800 btrfs_free_path(path);
2804 static int check_inode_recs(struct btrfs_root *root,
2805 struct cache_tree *inode_cache)
2807 struct cache_extent *cache;
2808 struct ptr_node *node;
2809 struct inode_record *rec;
2810 struct inode_backref *backref;
2815 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2817 if (btrfs_root_refs(&root->root_item) == 0) {
2818 if (!cache_tree_empty(inode_cache))
2819 fprintf(stderr, "warning line %d\n", __LINE__);
2824 * We need to record the highest inode number for later 'lost+found'
2826 * We must select a ino not used/refered by any existing inode, or
2827 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2828 * this may cause 'lost+found' dir has wrong nlinks.
2830 cache = last_cache_extent(inode_cache);
2832 node = container_of(cache, struct ptr_node, cache);
2834 if (rec->ino > root->highest_inode)
2835 root->highest_inode = rec->ino;
2839 * We need to repair backrefs first because we could change some of the
2840 * errors in the inode recs.
2842 * We also need to go through and delete invalid backrefs first and then
2843 * add the correct ones second. We do this because we may get EEXIST
2844 * when adding back the correct index because we hadn't yet deleted the
2847 * For example, if we were missing a dir index then the directories
2848 * isize would be wrong, so if we fixed the isize to what we thought it
2849 * would be and then fixed the backref we'd still have a invalid fs, so
2850 * we need to add back the dir index and then check to see if the isize
2855 if (stage == 3 && !err)
2858 cache = search_cache_extent(inode_cache, 0);
2859 while (repair && cache) {
2860 node = container_of(cache, struct ptr_node, cache);
2862 cache = next_cache_extent(cache);
2864 /* Need to free everything up and rescan */
2866 remove_cache_extent(inode_cache, &node->cache);
2868 free_inode_rec(rec);
2872 if (list_empty(&rec->backrefs))
2875 ret = repair_inode_backrefs(root, rec, inode_cache,
2889 rec = get_inode_rec(inode_cache, root_dirid, 0);
2891 ret = check_root_dir(rec);
2893 fprintf(stderr, "root %llu root dir %llu error\n",
2894 (unsigned long long)root->root_key.objectid,
2895 (unsigned long long)root_dirid);
2896 print_inode_error(root, rec);
2901 struct btrfs_trans_handle *trans;
2903 trans = btrfs_start_transaction(root, 1);
2904 if (IS_ERR(trans)) {
2905 err = PTR_ERR(trans);
2910 "root %llu missing its root dir, recreating\n",
2911 (unsigned long long)root->objectid);
2913 ret = btrfs_make_root_dir(trans, root, root_dirid);
2916 btrfs_commit_transaction(trans, root);
2920 fprintf(stderr, "root %llu root dir %llu not found\n",
2921 (unsigned long long)root->root_key.objectid,
2922 (unsigned long long)root_dirid);
2926 cache = search_cache_extent(inode_cache, 0);
2929 node = container_of(cache, struct ptr_node, cache);
2931 remove_cache_extent(inode_cache, &node->cache);
2933 if (rec->ino == root_dirid ||
2934 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2935 free_inode_rec(rec);
2939 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2940 ret = check_orphan_item(root, rec->ino);
2942 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2943 if (can_free_inode_rec(rec)) {
2944 free_inode_rec(rec);
2949 if (!rec->found_inode_item)
2950 rec->errors |= I_ERR_NO_INODE_ITEM;
2951 if (rec->found_link != rec->nlink)
2952 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2954 ret = try_repair_inode(root, rec);
2955 if (ret == 0 && can_free_inode_rec(rec)) {
2956 free_inode_rec(rec);
2962 if (!(repair && ret == 0))
2964 print_inode_error(root, rec);
2965 list_for_each_entry(backref, &rec->backrefs, list) {
2966 if (!backref->found_dir_item)
2967 backref->errors |= REF_ERR_NO_DIR_ITEM;
2968 if (!backref->found_dir_index)
2969 backref->errors |= REF_ERR_NO_DIR_INDEX;
2970 if (!backref->found_inode_ref)
2971 backref->errors |= REF_ERR_NO_INODE_REF;
2972 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2973 " namelen %u name %s filetype %d errors %x",
2974 (unsigned long long)backref->dir,
2975 (unsigned long long)backref->index,
2976 backref->namelen, backref->name,
2977 backref->filetype, backref->errors);
2978 print_ref_error(backref->errors);
2980 free_inode_rec(rec);
2982 return (error > 0) ? -1 : 0;
2985 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2988 struct cache_extent *cache;
2989 struct root_record *rec = NULL;
2992 cache = lookup_cache_extent(root_cache, objectid, 1);
2994 rec = container_of(cache, struct root_record, cache);
2996 rec = calloc(1, sizeof(*rec));
2997 rec->objectid = objectid;
2998 INIT_LIST_HEAD(&rec->backrefs);
2999 rec->cache.start = objectid;
3000 rec->cache.size = 1;
3002 ret = insert_cache_extent(root_cache, &rec->cache);
3008 static struct root_backref *get_root_backref(struct root_record *rec,
3009 u64 ref_root, u64 dir, u64 index,
3010 const char *name, int namelen)
3012 struct root_backref *backref;
3014 list_for_each_entry(backref, &rec->backrefs, list) {
3015 if (backref->ref_root != ref_root || backref->dir != dir ||
3016 backref->namelen != namelen)
3018 if (memcmp(name, backref->name, namelen))
3023 backref = calloc(1, sizeof(*backref) + namelen + 1);
3024 backref->ref_root = ref_root;
3026 backref->index = index;
3027 backref->namelen = namelen;
3028 memcpy(backref->name, name, namelen);
3029 backref->name[namelen] = '\0';
3030 list_add_tail(&backref->list, &rec->backrefs);
3034 static void free_root_record(struct cache_extent *cache)
3036 struct root_record *rec;
3037 struct root_backref *backref;
3039 rec = container_of(cache, struct root_record, cache);
3040 while (!list_empty(&rec->backrefs)) {
3041 backref = list_entry(rec->backrefs.next,
3042 struct root_backref, list);
3043 list_del(&backref->list);
3050 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3052 static int add_root_backref(struct cache_tree *root_cache,
3053 u64 root_id, u64 ref_root, u64 dir, u64 index,
3054 const char *name, int namelen,
3055 int item_type, int errors)
3057 struct root_record *rec;
3058 struct root_backref *backref;
3060 rec = get_root_rec(root_cache, root_id);
3061 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3063 backref->errors |= errors;
3065 if (item_type != BTRFS_DIR_ITEM_KEY) {
3066 if (backref->found_dir_index || backref->found_back_ref ||
3067 backref->found_forward_ref) {
3068 if (backref->index != index)
3069 backref->errors |= REF_ERR_INDEX_UNMATCH;
3071 backref->index = index;
3075 if (item_type == BTRFS_DIR_ITEM_KEY) {
3076 if (backref->found_forward_ref)
3078 backref->found_dir_item = 1;
3079 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3080 backref->found_dir_index = 1;
3081 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3082 if (backref->found_forward_ref)
3083 backref->errors |= REF_ERR_DUP_ROOT_REF;
3084 else if (backref->found_dir_item)
3086 backref->found_forward_ref = 1;
3087 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3088 if (backref->found_back_ref)
3089 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3090 backref->found_back_ref = 1;
3095 if (backref->found_forward_ref && backref->found_dir_item)
3096 backref->reachable = 1;
3100 static int merge_root_recs(struct btrfs_root *root,
3101 struct cache_tree *src_cache,
3102 struct cache_tree *dst_cache)
3104 struct cache_extent *cache;
3105 struct ptr_node *node;
3106 struct inode_record *rec;
3107 struct inode_backref *backref;
3110 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3111 free_inode_recs_tree(src_cache);
3116 cache = search_cache_extent(src_cache, 0);
3119 node = container_of(cache, struct ptr_node, cache);
3121 remove_cache_extent(src_cache, &node->cache);
3124 ret = is_child_root(root, root->objectid, rec->ino);
3130 list_for_each_entry(backref, &rec->backrefs, list) {
3131 BUG_ON(backref->found_inode_ref);
3132 if (backref->found_dir_item)
3133 add_root_backref(dst_cache, rec->ino,
3134 root->root_key.objectid, backref->dir,
3135 backref->index, backref->name,
3136 backref->namelen, BTRFS_DIR_ITEM_KEY,
3138 if (backref->found_dir_index)
3139 add_root_backref(dst_cache, rec->ino,
3140 root->root_key.objectid, backref->dir,
3141 backref->index, backref->name,
3142 backref->namelen, BTRFS_DIR_INDEX_KEY,
3146 free_inode_rec(rec);
3153 static int check_root_refs(struct btrfs_root *root,
3154 struct cache_tree *root_cache)
3156 struct root_record *rec;
3157 struct root_record *ref_root;
3158 struct root_backref *backref;
3159 struct cache_extent *cache;
3165 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3168 /* fixme: this can not detect circular references */
3171 cache = search_cache_extent(root_cache, 0);
3175 rec = container_of(cache, struct root_record, cache);
3176 cache = next_cache_extent(cache);
3178 if (rec->found_ref == 0)
3181 list_for_each_entry(backref, &rec->backrefs, list) {
3182 if (!backref->reachable)
3185 ref_root = get_root_rec(root_cache,
3187 if (ref_root->found_ref > 0)
3190 backref->reachable = 0;
3192 if (rec->found_ref == 0)
3198 cache = search_cache_extent(root_cache, 0);
3202 rec = container_of(cache, struct root_record, cache);
3203 cache = next_cache_extent(cache);
3205 if (rec->found_ref == 0 &&
3206 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3207 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3208 ret = check_orphan_item(root->fs_info->tree_root,
3214 * If we don't have a root item then we likely just have
3215 * a dir item in a snapshot for this root but no actual
3216 * ref key or anything so it's meaningless.
3218 if (!rec->found_root_item)
3221 fprintf(stderr, "fs tree %llu not referenced\n",
3222 (unsigned long long)rec->objectid);
3226 if (rec->found_ref > 0 && !rec->found_root_item)
3228 list_for_each_entry(backref, &rec->backrefs, list) {
3229 if (!backref->found_dir_item)
3230 backref->errors |= REF_ERR_NO_DIR_ITEM;
3231 if (!backref->found_dir_index)
3232 backref->errors |= REF_ERR_NO_DIR_INDEX;
3233 if (!backref->found_back_ref)
3234 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3235 if (!backref->found_forward_ref)
3236 backref->errors |= REF_ERR_NO_ROOT_REF;
3237 if (backref->reachable && backref->errors)
3244 fprintf(stderr, "fs tree %llu refs %u %s\n",
3245 (unsigned long long)rec->objectid, rec->found_ref,
3246 rec->found_root_item ? "" : "not found");
3248 list_for_each_entry(backref, &rec->backrefs, list) {
3249 if (!backref->reachable)
3251 if (!backref->errors && rec->found_root_item)
3253 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3254 " index %llu namelen %u name %s errors %x\n",
3255 (unsigned long long)backref->ref_root,
3256 (unsigned long long)backref->dir,
3257 (unsigned long long)backref->index,
3258 backref->namelen, backref->name,
3260 print_ref_error(backref->errors);
3263 return errors > 0 ? 1 : 0;
3266 static int process_root_ref(struct extent_buffer *eb, int slot,
3267 struct btrfs_key *key,
3268 struct cache_tree *root_cache)
3274 struct btrfs_root_ref *ref;
3275 char namebuf[BTRFS_NAME_LEN];
3278 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3280 dirid = btrfs_root_ref_dirid(eb, ref);
3281 index = btrfs_root_ref_sequence(eb, ref);
3282 name_len = btrfs_root_ref_name_len(eb, ref);
3284 if (name_len <= BTRFS_NAME_LEN) {
3288 len = BTRFS_NAME_LEN;
3289 error = REF_ERR_NAME_TOO_LONG;
3291 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3293 if (key->type == BTRFS_ROOT_REF_KEY) {
3294 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3295 index, namebuf, len, key->type, error);
3297 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3298 index, namebuf, len, key->type, error);
3303 static void free_corrupt_block(struct cache_extent *cache)
3305 struct btrfs_corrupt_block *corrupt;
3307 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3311 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3314 * Repair the btree of the given root.
3316 * The fix is to remove the node key in corrupt_blocks cache_tree.
3317 * and rebalance the tree.
3318 * After the fix, the btree should be writeable.
3320 static int repair_btree(struct btrfs_root *root,
3321 struct cache_tree *corrupt_blocks)
3323 struct btrfs_trans_handle *trans;
3324 struct btrfs_path *path;
3325 struct btrfs_corrupt_block *corrupt;
3326 struct cache_extent *cache;
3327 struct btrfs_key key;
3332 if (cache_tree_empty(corrupt_blocks))
3335 path = btrfs_alloc_path();
3339 trans = btrfs_start_transaction(root, 1);
3340 if (IS_ERR(trans)) {
3341 ret = PTR_ERR(trans);
3342 fprintf(stderr, "Error starting transaction: %s\n",
3346 cache = first_cache_extent(corrupt_blocks);
3348 corrupt = container_of(cache, struct btrfs_corrupt_block,
3350 level = corrupt->level;
3351 path->lowest_level = level;
3352 key.objectid = corrupt->key.objectid;
3353 key.type = corrupt->key.type;
3354 key.offset = corrupt->key.offset;
3357 * Here we don't want to do any tree balance, since it may
3358 * cause a balance with corrupted brother leaf/node,
3359 * so ins_len set to 0 here.
3360 * Balance will be done after all corrupt node/leaf is deleted.
3362 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3365 offset = btrfs_node_blockptr(path->nodes[level],
3366 path->slots[level]);
3368 /* Remove the ptr */
3369 ret = btrfs_del_ptr(trans, root, path, level,
3370 path->slots[level]);
3374 * Remove the corresponding extent
3375 * return value is not concerned.
3377 btrfs_release_path(path);
3378 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3379 0, root->root_key.objectid,
3381 cache = next_cache_extent(cache);
3384 /* Balance the btree using btrfs_search_slot() */
3385 cache = first_cache_extent(corrupt_blocks);
3387 corrupt = container_of(cache, struct btrfs_corrupt_block,
3389 memcpy(&key, &corrupt->key, sizeof(key));
3390 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3393 /* return will always >0 since it won't find the item */
3395 btrfs_release_path(path);
3396 cache = next_cache_extent(cache);
3399 btrfs_commit_transaction(trans, root);
3401 btrfs_free_path(path);
3405 static int check_fs_root(struct btrfs_root *root,
3406 struct cache_tree *root_cache,
3407 struct walk_control *wc)
3413 struct btrfs_path path;
3414 struct shared_node root_node;
3415 struct root_record *rec;
3416 struct btrfs_root_item *root_item = &root->root_item;
3417 struct cache_tree corrupt_blocks;
3418 struct orphan_data_extent *orphan;
3419 struct orphan_data_extent *tmp;
3420 enum btrfs_tree_block_status status;
3423 * Reuse the corrupt_block cache tree to record corrupted tree block
3425 * Unlike the usage in extent tree check, here we do it in a per
3426 * fs/subvol tree base.
3428 cache_tree_init(&corrupt_blocks);
3429 root->fs_info->corrupt_blocks = &corrupt_blocks;
3431 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3432 rec = get_root_rec(root_cache, root->root_key.objectid);
3433 if (btrfs_root_refs(root_item) > 0)
3434 rec->found_root_item = 1;
3437 btrfs_init_path(&path);
3438 memset(&root_node, 0, sizeof(root_node));
3439 cache_tree_init(&root_node.root_cache);
3440 cache_tree_init(&root_node.inode_cache);
3442 /* Move the orphan extent record to corresponding inode_record */
3443 list_for_each_entry_safe(orphan, tmp,
3444 &root->orphan_data_extents, list) {
3445 struct inode_record *inode;
3447 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3449 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3450 list_move(&orphan->list, &inode->orphan_extents);
3453 level = btrfs_header_level(root->node);
3454 memset(wc->nodes, 0, sizeof(wc->nodes));
3455 wc->nodes[level] = &root_node;
3456 wc->active_node = level;
3457 wc->root_level = level;
3459 /* We may not have checked the root block, lets do that now */
3460 if (btrfs_is_leaf(root->node))
3461 status = btrfs_check_leaf(root, NULL, root->node);
3463 status = btrfs_check_node(root, NULL, root->node);
3464 if (status != BTRFS_TREE_BLOCK_CLEAN)
3467 if (btrfs_root_refs(root_item) > 0 ||
3468 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3469 path.nodes[level] = root->node;
3470 extent_buffer_get(root->node);
3471 path.slots[level] = 0;
3473 struct btrfs_key key;
3474 struct btrfs_disk_key found_key;
3476 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3477 level = root_item->drop_level;
3478 path.lowest_level = level;
3479 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3482 btrfs_node_key(path.nodes[level], &found_key,
3484 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3485 sizeof(found_key)));
3489 wret = walk_down_tree(root, &path, wc, &level);
3495 wret = walk_up_tree(root, &path, wc, &level);
3502 btrfs_release_path(&path);
3504 if (!cache_tree_empty(&corrupt_blocks)) {
3505 struct cache_extent *cache;
3506 struct btrfs_corrupt_block *corrupt;
3508 printf("The following tree block(s) is corrupted in tree %llu:\n",
3509 root->root_key.objectid);
3510 cache = first_cache_extent(&corrupt_blocks);
3512 corrupt = container_of(cache,
3513 struct btrfs_corrupt_block,
3515 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3516 cache->start, corrupt->level,
3517 corrupt->key.objectid, corrupt->key.type,
3518 corrupt->key.offset);
3519 cache = next_cache_extent(cache);
3522 printf("Try to repair the btree for root %llu\n",
3523 root->root_key.objectid);
3524 ret = repair_btree(root, &corrupt_blocks);
3526 fprintf(stderr, "Failed to repair btree: %s\n",
3529 printf("Btree for root %llu is fixed\n",
3530 root->root_key.objectid);
3534 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3538 if (root_node.current) {
3539 root_node.current->checked = 1;
3540 maybe_free_inode_rec(&root_node.inode_cache,
3544 err = check_inode_recs(root, &root_node.inode_cache);
3548 free_corrupt_blocks_tree(&corrupt_blocks);
3549 root->fs_info->corrupt_blocks = NULL;
3550 free_orphan_data_extents(&root->orphan_data_extents);
3554 static int fs_root_objectid(u64 objectid)
3556 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3557 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3559 return is_fstree(objectid);
3562 static int check_fs_roots(struct btrfs_root *root,
3563 struct cache_tree *root_cache)
3565 struct btrfs_path path;
3566 struct btrfs_key key;
3567 struct walk_control wc;
3568 struct extent_buffer *leaf, *tree_node;
3569 struct btrfs_root *tmp_root;
3570 struct btrfs_root *tree_root = root->fs_info->tree_root;
3574 if (ctx.progress_enabled) {
3575 ctx.tp = TASK_FS_ROOTS;
3576 task_start(ctx.info);
3580 * Just in case we made any changes to the extent tree that weren't
3581 * reflected into the free space cache yet.
3584 reset_cached_block_groups(root->fs_info);
3585 memset(&wc, 0, sizeof(wc));
3586 cache_tree_init(&wc.shared);
3587 btrfs_init_path(&path);
3592 key.type = BTRFS_ROOT_ITEM_KEY;
3593 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3598 tree_node = tree_root->node;
3600 if (tree_node != tree_root->node) {
3601 free_root_recs_tree(root_cache);
3602 btrfs_release_path(&path);
3605 leaf = path.nodes[0];
3606 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3607 ret = btrfs_next_leaf(tree_root, &path);
3613 leaf = path.nodes[0];
3615 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3616 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3617 fs_root_objectid(key.objectid)) {
3618 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3619 tmp_root = btrfs_read_fs_root_no_cache(
3620 root->fs_info, &key);
3622 key.offset = (u64)-1;
3623 tmp_root = btrfs_read_fs_root(
3624 root->fs_info, &key);
3626 if (IS_ERR(tmp_root)) {
3630 ret = check_fs_root(tmp_root, root_cache, &wc);
3631 if (ret == -EAGAIN) {
3632 free_root_recs_tree(root_cache);
3633 btrfs_release_path(&path);
3638 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3639 btrfs_free_fs_root(tmp_root);
3640 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3641 key.type == BTRFS_ROOT_BACKREF_KEY) {
3642 process_root_ref(leaf, path.slots[0], &key,
3649 btrfs_release_path(&path);
3651 free_extent_cache_tree(&wc.shared);
3652 if (!cache_tree_empty(&wc.shared))
3653 fprintf(stderr, "warning line %d\n", __LINE__);
3655 task_stop(ctx.info);
3660 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3662 struct list_head *cur = rec->backrefs.next;
3663 struct extent_backref *back;
3664 struct tree_backref *tback;
3665 struct data_backref *dback;
3669 while(cur != &rec->backrefs) {
3670 back = list_entry(cur, struct extent_backref, list);
3672 if (!back->found_extent_tree) {
3676 if (back->is_data) {
3677 dback = (struct data_backref *)back;
3678 fprintf(stderr, "Backref %llu %s %llu"
3679 " owner %llu offset %llu num_refs %lu"
3680 " not found in extent tree\n",
3681 (unsigned long long)rec->start,
3682 back->full_backref ?
3684 back->full_backref ?
3685 (unsigned long long)dback->parent:
3686 (unsigned long long)dback->root,
3687 (unsigned long long)dback->owner,
3688 (unsigned long long)dback->offset,
3689 (unsigned long)dback->num_refs);
3691 tback = (struct tree_backref *)back;
3692 fprintf(stderr, "Backref %llu parent %llu"
3693 " root %llu not found in extent tree\n",
3694 (unsigned long long)rec->start,
3695 (unsigned long long)tback->parent,
3696 (unsigned long long)tback->root);
3699 if (!back->is_data && !back->found_ref) {
3703 tback = (struct tree_backref *)back;
3704 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3705 (unsigned long long)rec->start,
3706 back->full_backref ? "parent" : "root",
3707 back->full_backref ?
3708 (unsigned long long)tback->parent :
3709 (unsigned long long)tback->root, back);
3711 if (back->is_data) {
3712 dback = (struct data_backref *)back;
3713 if (dback->found_ref != dback->num_refs) {
3717 fprintf(stderr, "Incorrect local backref count"
3718 " on %llu %s %llu owner %llu"
3719 " offset %llu found %u wanted %u back %p\n",
3720 (unsigned long long)rec->start,
3721 back->full_backref ?
3723 back->full_backref ?
3724 (unsigned long long)dback->parent:
3725 (unsigned long long)dback->root,
3726 (unsigned long long)dback->owner,
3727 (unsigned long long)dback->offset,
3728 dback->found_ref, dback->num_refs, back);
3730 if (dback->disk_bytenr != rec->start) {
3734 fprintf(stderr, "Backref disk bytenr does not"
3735 " match extent record, bytenr=%llu, "
3736 "ref bytenr=%llu\n",
3737 (unsigned long long)rec->start,
3738 (unsigned long long)dback->disk_bytenr);
3741 if (dback->bytes != rec->nr) {
3745 fprintf(stderr, "Backref bytes do not match "
3746 "extent backref, bytenr=%llu, ref "
3747 "bytes=%llu, backref bytes=%llu\n",
3748 (unsigned long long)rec->start,
3749 (unsigned long long)rec->nr,
3750 (unsigned long long)dback->bytes);
3753 if (!back->is_data) {
3756 dback = (struct data_backref *)back;
3757 found += dback->found_ref;
3760 if (found != rec->refs) {
3764 fprintf(stderr, "Incorrect global backref count "
3765 "on %llu found %llu wanted %llu\n",
3766 (unsigned long long)rec->start,
3767 (unsigned long long)found,
3768 (unsigned long long)rec->refs);
3774 static int free_all_extent_backrefs(struct extent_record *rec)
3776 struct extent_backref *back;
3777 struct list_head *cur;
3778 while (!list_empty(&rec->backrefs)) {
3779 cur = rec->backrefs.next;
3780 back = list_entry(cur, struct extent_backref, list);
3787 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3788 struct cache_tree *extent_cache)
3790 struct cache_extent *cache;
3791 struct extent_record *rec;
3794 cache = first_cache_extent(extent_cache);
3797 rec = container_of(cache, struct extent_record, cache);
3798 remove_cache_extent(extent_cache, cache);
3799 free_all_extent_backrefs(rec);
3804 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3805 struct extent_record *rec)
3807 if (rec->content_checked && rec->owner_ref_checked &&
3808 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3809 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3810 !rec->bad_full_backref && !rec->crossing_stripes &&
3811 !rec->wrong_chunk_type) {
3812 remove_cache_extent(extent_cache, &rec->cache);
3813 free_all_extent_backrefs(rec);
3814 list_del_init(&rec->list);
3820 static int check_owner_ref(struct btrfs_root *root,
3821 struct extent_record *rec,
3822 struct extent_buffer *buf)
3824 struct extent_backref *node;
3825 struct tree_backref *back;
3826 struct btrfs_root *ref_root;
3827 struct btrfs_key key;
3828 struct btrfs_path path;
3829 struct extent_buffer *parent;
3834 list_for_each_entry(node, &rec->backrefs, list) {
3837 if (!node->found_ref)
3839 if (node->full_backref)
3841 back = (struct tree_backref *)node;
3842 if (btrfs_header_owner(buf) == back->root)
3845 BUG_ON(rec->is_root);
3847 /* try to find the block by search corresponding fs tree */
3848 key.objectid = btrfs_header_owner(buf);
3849 key.type = BTRFS_ROOT_ITEM_KEY;
3850 key.offset = (u64)-1;
3852 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3853 if (IS_ERR(ref_root))
3856 level = btrfs_header_level(buf);
3858 btrfs_item_key_to_cpu(buf, &key, 0);
3860 btrfs_node_key_to_cpu(buf, &key, 0);
3862 btrfs_init_path(&path);
3863 path.lowest_level = level + 1;
3864 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3868 parent = path.nodes[level + 1];
3869 if (parent && buf->start == btrfs_node_blockptr(parent,
3870 path.slots[level + 1]))
3873 btrfs_release_path(&path);
3874 return found ? 0 : 1;
3877 static int is_extent_tree_record(struct extent_record *rec)
3879 struct list_head *cur = rec->backrefs.next;
3880 struct extent_backref *node;
3881 struct tree_backref *back;
3884 while(cur != &rec->backrefs) {
3885 node = list_entry(cur, struct extent_backref, list);
3889 back = (struct tree_backref *)node;
3890 if (node->full_backref)
3892 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3899 static int record_bad_block_io(struct btrfs_fs_info *info,
3900 struct cache_tree *extent_cache,
3903 struct extent_record *rec;
3904 struct cache_extent *cache;
3905 struct btrfs_key key;
3907 cache = lookup_cache_extent(extent_cache, start, len);
3911 rec = container_of(cache, struct extent_record, cache);
3912 if (!is_extent_tree_record(rec))
3915 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3916 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3919 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3920 struct extent_buffer *buf, int slot)
3922 if (btrfs_header_level(buf)) {
3923 struct btrfs_key_ptr ptr1, ptr2;
3925 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3926 sizeof(struct btrfs_key_ptr));
3927 read_extent_buffer(buf, &ptr2,
3928 btrfs_node_key_ptr_offset(slot + 1),
3929 sizeof(struct btrfs_key_ptr));
3930 write_extent_buffer(buf, &ptr1,
3931 btrfs_node_key_ptr_offset(slot + 1),
3932 sizeof(struct btrfs_key_ptr));
3933 write_extent_buffer(buf, &ptr2,
3934 btrfs_node_key_ptr_offset(slot),
3935 sizeof(struct btrfs_key_ptr));
3937 struct btrfs_disk_key key;
3938 btrfs_node_key(buf, &key, 0);
3939 btrfs_fixup_low_keys(root, path, &key,
3940 btrfs_header_level(buf) + 1);
3943 struct btrfs_item *item1, *item2;
3944 struct btrfs_key k1, k2;
3945 char *item1_data, *item2_data;
3946 u32 item1_offset, item2_offset, item1_size, item2_size;
3948 item1 = btrfs_item_nr(slot);
3949 item2 = btrfs_item_nr(slot + 1);
3950 btrfs_item_key_to_cpu(buf, &k1, slot);
3951 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3952 item1_offset = btrfs_item_offset(buf, item1);
3953 item2_offset = btrfs_item_offset(buf, item2);
3954 item1_size = btrfs_item_size(buf, item1);
3955 item2_size = btrfs_item_size(buf, item2);
3957 item1_data = malloc(item1_size);
3960 item2_data = malloc(item2_size);
3966 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3967 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3969 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3970 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3974 btrfs_set_item_offset(buf, item1, item2_offset);
3975 btrfs_set_item_offset(buf, item2, item1_offset);
3976 btrfs_set_item_size(buf, item1, item2_size);
3977 btrfs_set_item_size(buf, item2, item1_size);
3979 path->slots[0] = slot;
3980 btrfs_set_item_key_unsafe(root, path, &k2);
3981 path->slots[0] = slot + 1;
3982 btrfs_set_item_key_unsafe(root, path, &k1);
3987 static int fix_key_order(struct btrfs_trans_handle *trans,
3988 struct btrfs_root *root,
3989 struct btrfs_path *path)
3991 struct extent_buffer *buf;
3992 struct btrfs_key k1, k2;
3994 int level = path->lowest_level;
3997 buf = path->nodes[level];
3998 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4000 btrfs_node_key_to_cpu(buf, &k1, i);
4001 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4003 btrfs_item_key_to_cpu(buf, &k1, i);
4004 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4006 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4008 ret = swap_values(root, path, buf, i);
4011 btrfs_mark_buffer_dirty(buf);
4017 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4018 struct btrfs_root *root,
4019 struct btrfs_path *path,
4020 struct extent_buffer *buf, int slot)
4022 struct btrfs_key key;
4023 int nritems = btrfs_header_nritems(buf);
4025 btrfs_item_key_to_cpu(buf, &key, slot);
4027 /* These are all the keys we can deal with missing. */
4028 if (key.type != BTRFS_DIR_INDEX_KEY &&
4029 key.type != BTRFS_EXTENT_ITEM_KEY &&
4030 key.type != BTRFS_METADATA_ITEM_KEY &&
4031 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4032 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4035 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4036 (unsigned long long)key.objectid, key.type,
4037 (unsigned long long)key.offset, slot, buf->start);
4038 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4039 btrfs_item_nr_offset(slot + 1),
4040 sizeof(struct btrfs_item) *
4041 (nritems - slot - 1));
4042 btrfs_set_header_nritems(buf, nritems - 1);
4044 struct btrfs_disk_key disk_key;
4046 btrfs_item_key(buf, &disk_key, 0);
4047 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4049 btrfs_mark_buffer_dirty(buf);
4053 static int fix_item_offset(struct btrfs_trans_handle *trans,
4054 struct btrfs_root *root,
4055 struct btrfs_path *path)
4057 struct extent_buffer *buf;
4061 /* We should only get this for leaves */
4062 BUG_ON(path->lowest_level);
4063 buf = path->nodes[0];
4065 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4066 unsigned int shift = 0, offset;
4068 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4069 BTRFS_LEAF_DATA_SIZE(root)) {
4070 if (btrfs_item_end_nr(buf, i) >
4071 BTRFS_LEAF_DATA_SIZE(root)) {
4072 ret = delete_bogus_item(trans, root, path,
4076 fprintf(stderr, "item is off the end of the "
4077 "leaf, can't fix\n");
4081 shift = BTRFS_LEAF_DATA_SIZE(root) -
4082 btrfs_item_end_nr(buf, i);
4083 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4084 btrfs_item_offset_nr(buf, i - 1)) {
4085 if (btrfs_item_end_nr(buf, i) >
4086 btrfs_item_offset_nr(buf, i - 1)) {
4087 ret = delete_bogus_item(trans, root, path,
4091 fprintf(stderr, "items overlap, can't fix\n");
4095 shift = btrfs_item_offset_nr(buf, i - 1) -
4096 btrfs_item_end_nr(buf, i);
4101 printf("Shifting item nr %d by %u bytes in block %llu\n",
4102 i, shift, (unsigned long long)buf->start);
4103 offset = btrfs_item_offset_nr(buf, i);
4104 memmove_extent_buffer(buf,
4105 btrfs_leaf_data(buf) + offset + shift,
4106 btrfs_leaf_data(buf) + offset,
4107 btrfs_item_size_nr(buf, i));
4108 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4110 btrfs_mark_buffer_dirty(buf);
4114 * We may have moved things, in which case we want to exit so we don't
4115 * write those changes out. Once we have proper abort functionality in
4116 * progs this can be changed to something nicer.
4123 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4124 * then just return -EIO.
4126 static int try_to_fix_bad_block(struct btrfs_root *root,
4127 struct extent_buffer *buf,
4128 enum btrfs_tree_block_status status)
4130 struct btrfs_trans_handle *trans;
4131 struct ulist *roots;
4132 struct ulist_node *node;
4133 struct btrfs_root *search_root;
4134 struct btrfs_path *path;
4135 struct ulist_iterator iter;
4136 struct btrfs_key root_key, key;
4139 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4140 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4143 path = btrfs_alloc_path();
4147 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4150 btrfs_free_path(path);
4154 ULIST_ITER_INIT(&iter);
4155 while ((node = ulist_next(roots, &iter))) {
4156 root_key.objectid = node->val;
4157 root_key.type = BTRFS_ROOT_ITEM_KEY;
4158 root_key.offset = (u64)-1;
4160 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4167 trans = btrfs_start_transaction(search_root, 0);
4168 if (IS_ERR(trans)) {
4169 ret = PTR_ERR(trans);
4173 path->lowest_level = btrfs_header_level(buf);
4174 path->skip_check_block = 1;
4175 if (path->lowest_level)
4176 btrfs_node_key_to_cpu(buf, &key, 0);
4178 btrfs_item_key_to_cpu(buf, &key, 0);
4179 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4182 btrfs_commit_transaction(trans, search_root);
4185 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4186 ret = fix_key_order(trans, search_root, path);
4187 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4188 ret = fix_item_offset(trans, search_root, path);
4190 btrfs_commit_transaction(trans, search_root);
4193 btrfs_release_path(path);
4194 btrfs_commit_transaction(trans, search_root);
4197 btrfs_free_path(path);
4201 static int check_block(struct btrfs_root *root,
4202 struct cache_tree *extent_cache,
4203 struct extent_buffer *buf, u64 flags)
4205 struct extent_record *rec;
4206 struct cache_extent *cache;
4207 struct btrfs_key key;
4208 enum btrfs_tree_block_status status;
4212 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4215 rec = container_of(cache, struct extent_record, cache);
4216 rec->generation = btrfs_header_generation(buf);
4218 level = btrfs_header_level(buf);
4219 if (btrfs_header_nritems(buf) > 0) {
4222 btrfs_item_key_to_cpu(buf, &key, 0);
4224 btrfs_node_key_to_cpu(buf, &key, 0);
4226 rec->info_objectid = key.objectid;
4228 rec->info_level = level;
4230 if (btrfs_is_leaf(buf))
4231 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4233 status = btrfs_check_node(root, &rec->parent_key, buf);
4235 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4237 status = try_to_fix_bad_block(root, buf, status);
4238 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4240 fprintf(stderr, "bad block %llu\n",
4241 (unsigned long long)buf->start);
4244 * Signal to callers we need to start the scan over
4245 * again since we'll have cow'ed blocks.
4250 rec->content_checked = 1;
4251 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4252 rec->owner_ref_checked = 1;
4254 ret = check_owner_ref(root, rec, buf);
4256 rec->owner_ref_checked = 1;
4260 maybe_free_extent_rec(extent_cache, rec);
4264 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4265 u64 parent, u64 root)
4267 struct list_head *cur = rec->backrefs.next;
4268 struct extent_backref *node;
4269 struct tree_backref *back;
4271 while(cur != &rec->backrefs) {
4272 node = list_entry(cur, struct extent_backref, list);
4276 back = (struct tree_backref *)node;
4278 if (!node->full_backref)
4280 if (parent == back->parent)
4283 if (node->full_backref)
4285 if (back->root == root)
4292 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4293 u64 parent, u64 root)
4295 struct tree_backref *ref = malloc(sizeof(*ref));
4296 memset(&ref->node, 0, sizeof(ref->node));
4298 ref->parent = parent;
4299 ref->node.full_backref = 1;
4302 ref->node.full_backref = 0;
4304 list_add_tail(&ref->node.list, &rec->backrefs);
4309 static struct data_backref *find_data_backref(struct extent_record *rec,
4310 u64 parent, u64 root,
4311 u64 owner, u64 offset,
4313 u64 disk_bytenr, u64 bytes)
4315 struct list_head *cur = rec->backrefs.next;
4316 struct extent_backref *node;
4317 struct data_backref *back;
4319 while(cur != &rec->backrefs) {
4320 node = list_entry(cur, struct extent_backref, list);
4324 back = (struct data_backref *)node;
4326 if (!node->full_backref)
4328 if (parent == back->parent)
4331 if (node->full_backref)
4333 if (back->root == root && back->owner == owner &&
4334 back->offset == offset) {
4335 if (found_ref && node->found_ref &&
4336 (back->bytes != bytes ||
4337 back->disk_bytenr != disk_bytenr))
4346 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4347 u64 parent, u64 root,
4348 u64 owner, u64 offset,
4351 struct data_backref *ref = malloc(sizeof(*ref));
4352 memset(&ref->node, 0, sizeof(ref->node));
4353 ref->node.is_data = 1;
4356 ref->parent = parent;
4359 ref->node.full_backref = 1;
4363 ref->offset = offset;
4364 ref->node.full_backref = 0;
4366 ref->bytes = max_size;
4369 list_add_tail(&ref->node.list, &rec->backrefs);
4370 if (max_size > rec->max_size)
4371 rec->max_size = max_size;
4375 /* Check if the type of extent matches with its chunk */
4376 static void check_extent_type(struct extent_record *rec)
4378 struct btrfs_block_group_cache *bg_cache;
4380 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4384 /* data extent, check chunk directly*/
4385 if (!rec->metadata) {
4386 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4387 rec->wrong_chunk_type = 1;
4391 /* metadata extent, check the obvious case first */
4392 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4393 BTRFS_BLOCK_GROUP_METADATA))) {
4394 rec->wrong_chunk_type = 1;
4399 * Check SYSTEM extent, as it's also marked as metadata, we can only
4400 * make sure it's a SYSTEM extent by its backref
4402 if (!list_empty(&rec->backrefs)) {
4403 struct extent_backref *node;
4404 struct tree_backref *tback;
4407 node = list_entry(rec->backrefs.next, struct extent_backref,
4409 if (node->is_data) {
4410 /* tree block shouldn't have data backref */
4411 rec->wrong_chunk_type = 1;
4414 tback = container_of(node, struct tree_backref, node);
4416 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4417 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4419 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4420 if (!(bg_cache->flags & bg_type))
4421 rec->wrong_chunk_type = 1;
4425 static int add_extent_rec(struct cache_tree *extent_cache,
4426 struct btrfs_key *parent_key, u64 parent_gen,
4427 u64 start, u64 nr, u64 extent_item_refs,
4428 int is_root, int inc_ref, int set_checked,
4429 int metadata, int extent_rec, u64 max_size)
4431 struct extent_record *rec;
4432 struct cache_extent *cache;
4436 cache = lookup_cache_extent(extent_cache, start, nr);
4438 rec = container_of(cache, struct extent_record, cache);
4442 rec->nr = max(nr, max_size);
4445 * We need to make sure to reset nr to whatever the extent
4446 * record says was the real size, this way we can compare it to
4450 if (start != rec->start || rec->found_rec) {
4451 struct extent_record *tmp;
4454 if (list_empty(&rec->list))
4455 list_add_tail(&rec->list,
4456 &duplicate_extents);
4459 * We have to do this song and dance in case we
4460 * find an extent record that falls inside of
4461 * our current extent record but does not have
4462 * the same objectid.
4464 tmp = malloc(sizeof(*tmp));
4468 tmp->max_size = max_size;
4471 tmp->metadata = metadata;
4472 tmp->extent_item_refs = extent_item_refs;
4473 INIT_LIST_HEAD(&tmp->list);
4474 list_add_tail(&tmp->list, &rec->dups);
4475 rec->num_duplicates++;
4482 if (extent_item_refs && !dup) {
4483 if (rec->extent_item_refs) {
4484 fprintf(stderr, "block %llu rec "
4485 "extent_item_refs %llu, passed %llu\n",
4486 (unsigned long long)start,
4487 (unsigned long long)
4488 rec->extent_item_refs,
4489 (unsigned long long)extent_item_refs);
4491 rec->extent_item_refs = extent_item_refs;
4496 rec->content_checked = 1;
4497 rec->owner_ref_checked = 1;
4501 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4503 rec->parent_generation = parent_gen;
4505 if (rec->max_size < max_size)
4506 rec->max_size = max_size;
4509 * A metadata extent can't cross stripe_len boundary, otherwise
4510 * kernel scrub won't be able to handle it.
4511 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4514 if (metadata && check_crossing_stripes(rec->start,
4516 rec->crossing_stripes = 1;
4517 check_extent_type(rec);
4518 maybe_free_extent_rec(extent_cache, rec);
4521 rec = malloc(sizeof(*rec));
4523 rec->max_size = max_size;
4524 rec->nr = max(nr, max_size);
4525 rec->found_rec = !!extent_rec;
4526 rec->content_checked = 0;
4527 rec->owner_ref_checked = 0;
4528 rec->num_duplicates = 0;
4529 rec->metadata = metadata;
4530 rec->flag_block_full_backref = -1;
4531 rec->bad_full_backref = 0;
4532 rec->crossing_stripes = 0;
4533 rec->wrong_chunk_type = 0;
4534 INIT_LIST_HEAD(&rec->backrefs);
4535 INIT_LIST_HEAD(&rec->dups);
4536 INIT_LIST_HEAD(&rec->list);
4548 if (extent_item_refs)
4549 rec->extent_item_refs = extent_item_refs;
4551 rec->extent_item_refs = 0;
4554 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4556 memset(&rec->parent_key, 0, sizeof(*parent_key));
4559 rec->parent_generation = parent_gen;
4561 rec->parent_generation = 0;
4563 rec->cache.start = start;
4564 rec->cache.size = nr;
4565 ret = insert_cache_extent(extent_cache, &rec->cache);
4569 rec->content_checked = 1;
4570 rec->owner_ref_checked = 1;
4574 if (check_crossing_stripes(rec->start, rec->max_size))
4575 rec->crossing_stripes = 1;
4576 check_extent_type(rec);
4580 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4581 u64 parent, u64 root, int found_ref)
4583 struct extent_record *rec;
4584 struct tree_backref *back;
4585 struct cache_extent *cache;
4587 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4589 add_extent_rec(extent_cache, NULL, 0, bytenr,
4590 1, 0, 0, 0, 0, 1, 0, 0);
4591 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4596 rec = container_of(cache, struct extent_record, cache);
4597 if (rec->start != bytenr) {
4601 back = find_tree_backref(rec, parent, root);
4603 back = alloc_tree_backref(rec, parent, root);
4606 if (back->node.found_ref) {
4607 fprintf(stderr, "Extent back ref already exists "
4608 "for %llu parent %llu root %llu \n",
4609 (unsigned long long)bytenr,
4610 (unsigned long long)parent,
4611 (unsigned long long)root);
4613 back->node.found_ref = 1;
4615 if (back->node.found_extent_tree) {
4616 fprintf(stderr, "Extent back ref already exists "
4617 "for %llu parent %llu root %llu \n",
4618 (unsigned long long)bytenr,
4619 (unsigned long long)parent,
4620 (unsigned long long)root);
4622 back->node.found_extent_tree = 1;
4624 check_extent_type(rec);
4625 maybe_free_extent_rec(extent_cache, rec);
4629 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4630 u64 parent, u64 root, u64 owner, u64 offset,
4631 u32 num_refs, int found_ref, u64 max_size)
4633 struct extent_record *rec;
4634 struct data_backref *back;
4635 struct cache_extent *cache;
4637 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4639 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4641 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4646 rec = container_of(cache, struct extent_record, cache);
4647 if (rec->max_size < max_size)
4648 rec->max_size = max_size;
4651 * If found_ref is set then max_size is the real size and must match the
4652 * existing refs. So if we have already found a ref then we need to
4653 * make sure that this ref matches the existing one, otherwise we need
4654 * to add a new backref so we can notice that the backrefs don't match
4655 * and we need to figure out who is telling the truth. This is to
4656 * account for that awful fsync bug I introduced where we'd end up with
4657 * a btrfs_file_extent_item that would have its length include multiple
4658 * prealloc extents or point inside of a prealloc extent.
4660 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4663 back = alloc_data_backref(rec, parent, root, owner, offset,
4667 BUG_ON(num_refs != 1);
4668 if (back->node.found_ref)
4669 BUG_ON(back->bytes != max_size);
4670 back->node.found_ref = 1;
4671 back->found_ref += 1;
4672 back->bytes = max_size;
4673 back->disk_bytenr = bytenr;
4675 rec->content_checked = 1;
4676 rec->owner_ref_checked = 1;
4678 if (back->node.found_extent_tree) {
4679 fprintf(stderr, "Extent back ref already exists "
4680 "for %llu parent %llu root %llu "
4681 "owner %llu offset %llu num_refs %lu\n",
4682 (unsigned long long)bytenr,
4683 (unsigned long long)parent,
4684 (unsigned long long)root,
4685 (unsigned long long)owner,
4686 (unsigned long long)offset,
4687 (unsigned long)num_refs);
4689 back->num_refs = num_refs;
4690 back->node.found_extent_tree = 1;
4692 maybe_free_extent_rec(extent_cache, rec);
4696 static int add_pending(struct cache_tree *pending,
4697 struct cache_tree *seen, u64 bytenr, u32 size)
4700 ret = add_cache_extent(seen, bytenr, size);
4703 add_cache_extent(pending, bytenr, size);
4707 static int pick_next_pending(struct cache_tree *pending,
4708 struct cache_tree *reada,
4709 struct cache_tree *nodes,
4710 u64 last, struct block_info *bits, int bits_nr,
4713 unsigned long node_start = last;
4714 struct cache_extent *cache;
4717 cache = search_cache_extent(reada, 0);
4719 bits[0].start = cache->start;
4720 bits[0].size = cache->size;
4725 if (node_start > 32768)
4726 node_start -= 32768;
4728 cache = search_cache_extent(nodes, node_start);
4730 cache = search_cache_extent(nodes, 0);
4733 cache = search_cache_extent(pending, 0);
4738 bits[ret].start = cache->start;
4739 bits[ret].size = cache->size;
4740 cache = next_cache_extent(cache);
4742 } while (cache && ret < bits_nr);
4748 bits[ret].start = cache->start;
4749 bits[ret].size = cache->size;
4750 cache = next_cache_extent(cache);
4752 } while (cache && ret < bits_nr);
4754 if (bits_nr - ret > 8) {
4755 u64 lookup = bits[0].start + bits[0].size;
4756 struct cache_extent *next;
4757 next = search_cache_extent(pending, lookup);
4759 if (next->start - lookup > 32768)
4761 bits[ret].start = next->start;
4762 bits[ret].size = next->size;
4763 lookup = next->start + next->size;
4767 next = next_cache_extent(next);
4775 static void free_chunk_record(struct cache_extent *cache)
4777 struct chunk_record *rec;
4779 rec = container_of(cache, struct chunk_record, cache);
4780 list_del_init(&rec->list);
4781 list_del_init(&rec->dextents);
4785 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4787 cache_tree_free_extents(chunk_cache, free_chunk_record);
4790 static void free_device_record(struct rb_node *node)
4792 struct device_record *rec;
4794 rec = container_of(node, struct device_record, node);
4798 FREE_RB_BASED_TREE(device_cache, free_device_record);
4800 int insert_block_group_record(struct block_group_tree *tree,
4801 struct block_group_record *bg_rec)
4805 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4809 list_add_tail(&bg_rec->list, &tree->block_groups);
4813 static void free_block_group_record(struct cache_extent *cache)
4815 struct block_group_record *rec;
4817 rec = container_of(cache, struct block_group_record, cache);
4818 list_del_init(&rec->list);
4822 void free_block_group_tree(struct block_group_tree *tree)
4824 cache_tree_free_extents(&tree->tree, free_block_group_record);
4827 int insert_device_extent_record(struct device_extent_tree *tree,
4828 struct device_extent_record *de_rec)
4833 * Device extent is a bit different from the other extents, because
4834 * the extents which belong to the different devices may have the
4835 * same start and size, so we need use the special extent cache
4836 * search/insert functions.
4838 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4842 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4843 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4847 static void free_device_extent_record(struct cache_extent *cache)
4849 struct device_extent_record *rec;
4851 rec = container_of(cache, struct device_extent_record, cache);
4852 if (!list_empty(&rec->chunk_list))
4853 list_del_init(&rec->chunk_list);
4854 if (!list_empty(&rec->device_list))
4855 list_del_init(&rec->device_list);
4859 void free_device_extent_tree(struct device_extent_tree *tree)
4861 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4864 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4865 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4866 struct extent_buffer *leaf, int slot)
4868 struct btrfs_extent_ref_v0 *ref0;
4869 struct btrfs_key key;
4871 btrfs_item_key_to_cpu(leaf, &key, slot);
4872 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4873 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4874 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4876 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4877 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4883 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4884 struct btrfs_key *key,
4887 struct btrfs_chunk *ptr;
4888 struct chunk_record *rec;
4891 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4892 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4894 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4896 fprintf(stderr, "memory allocation failed\n");
4900 INIT_LIST_HEAD(&rec->list);
4901 INIT_LIST_HEAD(&rec->dextents);
4904 rec->cache.start = key->offset;
4905 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4907 rec->generation = btrfs_header_generation(leaf);
4909 rec->objectid = key->objectid;
4910 rec->type = key->type;
4911 rec->offset = key->offset;
4913 rec->length = rec->cache.size;
4914 rec->owner = btrfs_chunk_owner(leaf, ptr);
4915 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4916 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4917 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4918 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4919 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4920 rec->num_stripes = num_stripes;
4921 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4923 for (i = 0; i < rec->num_stripes; ++i) {
4924 rec->stripes[i].devid =
4925 btrfs_stripe_devid_nr(leaf, ptr, i);
4926 rec->stripes[i].offset =
4927 btrfs_stripe_offset_nr(leaf, ptr, i);
4928 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4929 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4936 static int process_chunk_item(struct cache_tree *chunk_cache,
4937 struct btrfs_key *key, struct extent_buffer *eb,
4940 struct chunk_record *rec;
4943 rec = btrfs_new_chunk_record(eb, key, slot);
4944 ret = insert_cache_extent(chunk_cache, &rec->cache);
4946 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4947 rec->offset, rec->length);
4954 static int process_device_item(struct rb_root *dev_cache,
4955 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4957 struct btrfs_dev_item *ptr;
4958 struct device_record *rec;
4961 ptr = btrfs_item_ptr(eb,
4962 slot, struct btrfs_dev_item);
4964 rec = malloc(sizeof(*rec));
4966 fprintf(stderr, "memory allocation failed\n");
4970 rec->devid = key->offset;
4971 rec->generation = btrfs_header_generation(eb);
4973 rec->objectid = key->objectid;
4974 rec->type = key->type;
4975 rec->offset = key->offset;
4977 rec->devid = btrfs_device_id(eb, ptr);
4978 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4979 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4981 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4983 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4990 struct block_group_record *
4991 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4994 struct btrfs_block_group_item *ptr;
4995 struct block_group_record *rec;
4997 rec = calloc(1, sizeof(*rec));
4999 fprintf(stderr, "memory allocation failed\n");
5003 rec->cache.start = key->objectid;
5004 rec->cache.size = key->offset;
5006 rec->generation = btrfs_header_generation(leaf);
5008 rec->objectid = key->objectid;
5009 rec->type = key->type;
5010 rec->offset = key->offset;
5012 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5013 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5015 INIT_LIST_HEAD(&rec->list);
5020 static int process_block_group_item(struct block_group_tree *block_group_cache,
5021 struct btrfs_key *key,
5022 struct extent_buffer *eb, int slot)
5024 struct block_group_record *rec;
5027 rec = btrfs_new_block_group_record(eb, key, slot);
5028 ret = insert_block_group_record(block_group_cache, rec);
5030 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5031 rec->objectid, rec->offset);
5038 struct device_extent_record *
5039 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5040 struct btrfs_key *key, int slot)
5042 struct device_extent_record *rec;
5043 struct btrfs_dev_extent *ptr;
5045 rec = calloc(1, sizeof(*rec));
5047 fprintf(stderr, "memory allocation failed\n");
5051 rec->cache.objectid = key->objectid;
5052 rec->cache.start = key->offset;
5054 rec->generation = btrfs_header_generation(leaf);
5056 rec->objectid = key->objectid;
5057 rec->type = key->type;
5058 rec->offset = key->offset;
5060 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5061 rec->chunk_objecteid =
5062 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5064 btrfs_dev_extent_chunk_offset(leaf, ptr);
5065 rec->length = btrfs_dev_extent_length(leaf, ptr);
5066 rec->cache.size = rec->length;
5068 INIT_LIST_HEAD(&rec->chunk_list);
5069 INIT_LIST_HEAD(&rec->device_list);
5075 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5076 struct btrfs_key *key, struct extent_buffer *eb,
5079 struct device_extent_record *rec;
5082 rec = btrfs_new_device_extent_record(eb, key, slot);
5083 ret = insert_device_extent_record(dev_extent_cache, rec);
5086 "Device extent[%llu, %llu, %llu] existed.\n",
5087 rec->objectid, rec->offset, rec->length);
5094 static int process_extent_item(struct btrfs_root *root,
5095 struct cache_tree *extent_cache,
5096 struct extent_buffer *eb, int slot)
5098 struct btrfs_extent_item *ei;
5099 struct btrfs_extent_inline_ref *iref;
5100 struct btrfs_extent_data_ref *dref;
5101 struct btrfs_shared_data_ref *sref;
5102 struct btrfs_key key;
5106 u32 item_size = btrfs_item_size_nr(eb, slot);
5112 btrfs_item_key_to_cpu(eb, &key, slot);
5114 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5116 num_bytes = root->leafsize;
5118 num_bytes = key.offset;
5121 if (item_size < sizeof(*ei)) {
5122 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5123 struct btrfs_extent_item_v0 *ei0;
5124 BUG_ON(item_size != sizeof(*ei0));
5125 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5126 refs = btrfs_extent_refs_v0(eb, ei0);
5130 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5131 num_bytes, refs, 0, 0, 0, metadata, 1,
5135 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5136 refs = btrfs_extent_refs(eb, ei);
5137 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5142 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5143 refs, 0, 0, 0, metadata, 1, num_bytes);
5145 ptr = (unsigned long)(ei + 1);
5146 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5147 key.type == BTRFS_EXTENT_ITEM_KEY)
5148 ptr += sizeof(struct btrfs_tree_block_info);
5150 end = (unsigned long)ei + item_size;
5152 iref = (struct btrfs_extent_inline_ref *)ptr;
5153 type = btrfs_extent_inline_ref_type(eb, iref);
5154 offset = btrfs_extent_inline_ref_offset(eb, iref);
5156 case BTRFS_TREE_BLOCK_REF_KEY:
5157 add_tree_backref(extent_cache, key.objectid,
5160 case BTRFS_SHARED_BLOCK_REF_KEY:
5161 add_tree_backref(extent_cache, key.objectid,
5164 case BTRFS_EXTENT_DATA_REF_KEY:
5165 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5166 add_data_backref(extent_cache, key.objectid, 0,
5167 btrfs_extent_data_ref_root(eb, dref),
5168 btrfs_extent_data_ref_objectid(eb,
5170 btrfs_extent_data_ref_offset(eb, dref),
5171 btrfs_extent_data_ref_count(eb, dref),
5174 case BTRFS_SHARED_DATA_REF_KEY:
5175 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5176 add_data_backref(extent_cache, key.objectid, offset,
5178 btrfs_shared_data_ref_count(eb, sref),
5182 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5183 key.objectid, key.type, num_bytes);
5186 ptr += btrfs_extent_inline_ref_size(type);
5193 static int check_cache_range(struct btrfs_root *root,
5194 struct btrfs_block_group_cache *cache,
5195 u64 offset, u64 bytes)
5197 struct btrfs_free_space *entry;
5203 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5204 bytenr = btrfs_sb_offset(i);
5205 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5206 cache->key.objectid, bytenr, 0,
5207 &logical, &nr, &stripe_len);
5212 if (logical[nr] + stripe_len <= offset)
5214 if (offset + bytes <= logical[nr])
5216 if (logical[nr] == offset) {
5217 if (stripe_len >= bytes) {
5221 bytes -= stripe_len;
5222 offset += stripe_len;
5223 } else if (logical[nr] < offset) {
5224 if (logical[nr] + stripe_len >=
5229 bytes = (offset + bytes) -
5230 (logical[nr] + stripe_len);
5231 offset = logical[nr] + stripe_len;
5234 * Could be tricky, the super may land in the
5235 * middle of the area we're checking. First
5236 * check the easiest case, it's at the end.
5238 if (logical[nr] + stripe_len >=
5240 bytes = logical[nr] - offset;
5244 /* Check the left side */
5245 ret = check_cache_range(root, cache,
5247 logical[nr] - offset);
5253 /* Now we continue with the right side */
5254 bytes = (offset + bytes) -
5255 (logical[nr] + stripe_len);
5256 offset = logical[nr] + stripe_len;
5263 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5265 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5266 offset, offset+bytes);
5270 if (entry->offset != offset) {
5271 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5276 if (entry->bytes != bytes) {
5277 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5278 bytes, entry->bytes, offset);
5282 unlink_free_space(cache->free_space_ctl, entry);
5287 static int verify_space_cache(struct btrfs_root *root,
5288 struct btrfs_block_group_cache *cache)
5290 struct btrfs_path *path;
5291 struct extent_buffer *leaf;
5292 struct btrfs_key key;
5296 path = btrfs_alloc_path();
5300 root = root->fs_info->extent_root;
5302 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5304 key.objectid = last;
5306 key.type = BTRFS_EXTENT_ITEM_KEY;
5308 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5313 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5314 ret = btrfs_next_leaf(root, path);
5322 leaf = path->nodes[0];
5323 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5324 if (key.objectid >= cache->key.offset + cache->key.objectid)
5326 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5327 key.type != BTRFS_METADATA_ITEM_KEY) {
5332 if (last == key.objectid) {
5333 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5334 last = key.objectid + key.offset;
5336 last = key.objectid + root->leafsize;
5341 ret = check_cache_range(root, cache, last,
5342 key.objectid - last);
5345 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5346 last = key.objectid + key.offset;
5348 last = key.objectid + root->leafsize;
5352 if (last < cache->key.objectid + cache->key.offset)
5353 ret = check_cache_range(root, cache, last,
5354 cache->key.objectid +
5355 cache->key.offset - last);
5358 btrfs_free_path(path);
5361 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5362 fprintf(stderr, "There are still entries left in the space "
5370 static int check_space_cache(struct btrfs_root *root)
5372 struct btrfs_block_group_cache *cache;
5373 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5377 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5378 btrfs_super_generation(root->fs_info->super_copy) !=
5379 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5380 printf("cache and super generation don't match, space cache "
5381 "will be invalidated\n");
5385 if (ctx.progress_enabled) {
5386 ctx.tp = TASK_FREE_SPACE;
5387 task_start(ctx.info);
5391 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5395 start = cache->key.objectid + cache->key.offset;
5396 if (!cache->free_space_ctl) {
5397 if (btrfs_init_free_space_ctl(cache,
5398 root->sectorsize)) {
5403 btrfs_remove_free_space_cache(cache);
5406 ret = load_free_space_cache(root->fs_info, cache);
5410 ret = verify_space_cache(root, cache);
5412 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5413 cache->key.objectid);
5418 task_stop(ctx.info);
5420 return error ? -EINVAL : 0;
5423 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5424 u64 num_bytes, unsigned long leaf_offset,
5425 struct extent_buffer *eb) {
5428 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5430 unsigned long csum_offset;
5434 u64 data_checked = 0;
5440 if (num_bytes % root->sectorsize)
5443 data = malloc(num_bytes);
5447 while (offset < num_bytes) {
5450 read_len = num_bytes - offset;
5451 /* read as much space once a time */
5452 ret = read_extent_data(root, data + offset,
5453 bytenr + offset, &read_len, mirror);
5457 /* verify every 4k data's checksum */
5458 while (data_checked < read_len) {
5460 tmp = offset + data_checked;
5462 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5463 csum, root->sectorsize);
5464 btrfs_csum_final(csum, (char *)&csum);
5466 csum_offset = leaf_offset +
5467 tmp / root->sectorsize * csum_size;
5468 read_extent_buffer(eb, (char *)&csum_expected,
5469 csum_offset, csum_size);
5470 /* try another mirror */
5471 if (csum != csum_expected) {
5472 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5473 mirror, bytenr + tmp,
5474 csum, csum_expected);
5475 num_copies = btrfs_num_copies(
5476 &root->fs_info->mapping_tree,
5478 if (mirror < num_copies - 1) {
5483 data_checked += root->sectorsize;
5492 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5495 struct btrfs_path *path;
5496 struct extent_buffer *leaf;
5497 struct btrfs_key key;
5500 path = btrfs_alloc_path();
5502 fprintf(stderr, "Error allocing path\n");
5506 key.objectid = bytenr;
5507 key.type = BTRFS_EXTENT_ITEM_KEY;
5508 key.offset = (u64)-1;
5511 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5514 fprintf(stderr, "Error looking up extent record %d\n", ret);
5515 btrfs_free_path(path);
5518 if (path->slots[0] > 0) {
5521 ret = btrfs_prev_leaf(root, path);
5524 } else if (ret > 0) {
5531 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5534 * Block group items come before extent items if they have the same
5535 * bytenr, so walk back one more just in case. Dear future traveler,
5536 * first congrats on mastering time travel. Now if it's not too much
5537 * trouble could you go back to 2006 and tell Chris to make the
5538 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5539 * EXTENT_ITEM_KEY please?
5541 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5542 if (path->slots[0] > 0) {
5545 ret = btrfs_prev_leaf(root, path);
5548 } else if (ret > 0) {
5553 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5557 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5558 ret = btrfs_next_leaf(root, path);
5560 fprintf(stderr, "Error going to next leaf "
5562 btrfs_free_path(path);
5568 leaf = path->nodes[0];
5569 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5570 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5574 if (key.objectid + key.offset < bytenr) {
5578 if (key.objectid > bytenr + num_bytes)
5581 if (key.objectid == bytenr) {
5582 if (key.offset >= num_bytes) {
5586 num_bytes -= key.offset;
5587 bytenr += key.offset;
5588 } else if (key.objectid < bytenr) {
5589 if (key.objectid + key.offset >= bytenr + num_bytes) {
5593 num_bytes = (bytenr + num_bytes) -
5594 (key.objectid + key.offset);
5595 bytenr = key.objectid + key.offset;
5597 if (key.objectid + key.offset < bytenr + num_bytes) {
5598 u64 new_start = key.objectid + key.offset;
5599 u64 new_bytes = bytenr + num_bytes - new_start;
5602 * Weird case, the extent is in the middle of
5603 * our range, we'll have to search one side
5604 * and then the other. Not sure if this happens
5605 * in real life, but no harm in coding it up
5606 * anyway just in case.
5608 btrfs_release_path(path);
5609 ret = check_extent_exists(root, new_start,
5612 fprintf(stderr, "Right section didn't "
5616 num_bytes = key.objectid - bytenr;
5619 num_bytes = key.objectid - bytenr;
5626 if (num_bytes && !ret) {
5627 fprintf(stderr, "There are no extents for csum range "
5628 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5632 btrfs_free_path(path);
5636 static int check_csums(struct btrfs_root *root)
5638 struct btrfs_path *path;
5639 struct extent_buffer *leaf;
5640 struct btrfs_key key;
5641 u64 offset = 0, num_bytes = 0;
5642 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5646 unsigned long leaf_offset;
5648 root = root->fs_info->csum_root;
5649 if (!extent_buffer_uptodate(root->node)) {
5650 fprintf(stderr, "No valid csum tree found\n");
5654 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5655 key.type = BTRFS_EXTENT_CSUM_KEY;
5658 path = btrfs_alloc_path();
5662 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5664 fprintf(stderr, "Error searching csum tree %d\n", ret);
5665 btrfs_free_path(path);
5669 if (ret > 0 && path->slots[0])
5674 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5675 ret = btrfs_next_leaf(root, path);
5677 fprintf(stderr, "Error going to next leaf "
5684 leaf = path->nodes[0];
5686 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5687 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5692 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5693 csum_size) * root->sectorsize;
5694 if (!check_data_csum)
5695 goto skip_csum_check;
5696 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5697 ret = check_extent_csums(root, key.offset, data_len,
5703 offset = key.offset;
5704 } else if (key.offset != offset + num_bytes) {
5705 ret = check_extent_exists(root, offset, num_bytes);
5707 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5708 "there is no extent record\n",
5709 offset, offset+num_bytes);
5712 offset = key.offset;
5715 num_bytes += data_len;
5719 btrfs_free_path(path);
5723 static int is_dropped_key(struct btrfs_key *key,
5724 struct btrfs_key *drop_key) {
5725 if (key->objectid < drop_key->objectid)
5727 else if (key->objectid == drop_key->objectid) {
5728 if (key->type < drop_key->type)
5730 else if (key->type == drop_key->type) {
5731 if (key->offset < drop_key->offset)
5739 * Here are the rules for FULL_BACKREF.
5741 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5742 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5744 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5745 * if it happened after the relocation occurred since we'll have dropped the
5746 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5747 * have no real way to know for sure.
5749 * We process the blocks one root at a time, and we start from the lowest root
5750 * objectid and go to the highest. So we can just lookup the owner backref for
5751 * the record and if we don't find it then we know it doesn't exist and we have
5754 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5755 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5756 * be set or not and then we can check later once we've gathered all the refs.
5758 static int calc_extent_flag(struct btrfs_root *root,
5759 struct cache_tree *extent_cache,
5760 struct extent_buffer *buf,
5761 struct root_item_record *ri,
5764 struct extent_record *rec;
5765 struct cache_extent *cache;
5766 struct tree_backref *tback;
5769 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5770 /* we have added this extent before */
5772 rec = container_of(cache, struct extent_record, cache);
5775 * Except file/reloc tree, we can not have
5778 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5783 if (buf->start == ri->bytenr)
5786 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5789 owner = btrfs_header_owner(buf);
5790 if (owner == ri->objectid)
5793 tback = find_tree_backref(rec, 0, owner);
5798 if (rec->flag_block_full_backref != -1 &&
5799 rec->flag_block_full_backref != 0)
5800 rec->bad_full_backref = 1;
5803 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5804 if (rec->flag_block_full_backref != -1 &&
5805 rec->flag_block_full_backref != 1)
5806 rec->bad_full_backref = 1;
5810 static int run_next_block(struct btrfs_root *root,
5811 struct block_info *bits,
5814 struct cache_tree *pending,
5815 struct cache_tree *seen,
5816 struct cache_tree *reada,
5817 struct cache_tree *nodes,
5818 struct cache_tree *extent_cache,
5819 struct cache_tree *chunk_cache,
5820 struct rb_root *dev_cache,
5821 struct block_group_tree *block_group_cache,
5822 struct device_extent_tree *dev_extent_cache,
5823 struct root_item_record *ri)
5825 struct extent_buffer *buf;
5826 struct extent_record *rec = NULL;
5837 struct btrfs_key key;
5838 struct cache_extent *cache;
5841 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5842 bits_nr, &reada_bits);
5847 for(i = 0; i < nritems; i++) {
5848 ret = add_cache_extent(reada, bits[i].start,
5853 /* fixme, get the parent transid */
5854 readahead_tree_block(root, bits[i].start,
5858 *last = bits[0].start;
5859 bytenr = bits[0].start;
5860 size = bits[0].size;
5862 cache = lookup_cache_extent(pending, bytenr, size);
5864 remove_cache_extent(pending, cache);
5867 cache = lookup_cache_extent(reada, bytenr, size);
5869 remove_cache_extent(reada, cache);
5872 cache = lookup_cache_extent(nodes, bytenr, size);
5874 remove_cache_extent(nodes, cache);
5877 cache = lookup_cache_extent(extent_cache, bytenr, size);
5879 rec = container_of(cache, struct extent_record, cache);
5880 gen = rec->parent_generation;
5883 /* fixme, get the real parent transid */
5884 buf = read_tree_block(root, bytenr, size, gen);
5885 if (!extent_buffer_uptodate(buf)) {
5886 record_bad_block_io(root->fs_info,
5887 extent_cache, bytenr, size);
5891 nritems = btrfs_header_nritems(buf);
5894 if (!init_extent_tree) {
5895 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5896 btrfs_header_level(buf), 1, NULL,
5899 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5901 fprintf(stderr, "Couldn't calc extent flags\n");
5902 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5907 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5909 fprintf(stderr, "Couldn't calc extent flags\n");
5910 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5914 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5916 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5917 ri->objectid == btrfs_header_owner(buf)) {
5919 * Ok we got to this block from it's original owner and
5920 * we have FULL_BACKREF set. Relocation can leave
5921 * converted blocks over so this is altogether possible,
5922 * however it's not possible if the generation > the
5923 * last snapshot, so check for this case.
5925 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5926 btrfs_header_generation(buf) > ri->last_snapshot) {
5927 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5928 rec->bad_full_backref = 1;
5933 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5934 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5935 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5936 rec->bad_full_backref = 1;
5940 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5941 rec->flag_block_full_backref = 1;
5945 rec->flag_block_full_backref = 0;
5947 owner = btrfs_header_owner(buf);
5950 ret = check_block(root, extent_cache, buf, flags);
5954 if (btrfs_is_leaf(buf)) {
5955 btree_space_waste += btrfs_leaf_free_space(root, buf);
5956 for (i = 0; i < nritems; i++) {
5957 struct btrfs_file_extent_item *fi;
5958 btrfs_item_key_to_cpu(buf, &key, i);
5959 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5960 process_extent_item(root, extent_cache, buf,
5964 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5965 process_extent_item(root, extent_cache, buf,
5969 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5971 btrfs_item_size_nr(buf, i);
5974 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5975 process_chunk_item(chunk_cache, &key, buf, i);
5978 if (key.type == BTRFS_DEV_ITEM_KEY) {
5979 process_device_item(dev_cache, &key, buf, i);
5982 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5983 process_block_group_item(block_group_cache,
5987 if (key.type == BTRFS_DEV_EXTENT_KEY) {
5988 process_device_extent_item(dev_extent_cache,
5993 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5994 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5995 process_extent_ref_v0(extent_cache, buf, i);
6002 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6003 add_tree_backref(extent_cache, key.objectid, 0,
6007 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6008 add_tree_backref(extent_cache, key.objectid,
6012 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6013 struct btrfs_extent_data_ref *ref;
6014 ref = btrfs_item_ptr(buf, i,
6015 struct btrfs_extent_data_ref);
6016 add_data_backref(extent_cache,
6018 btrfs_extent_data_ref_root(buf, ref),
6019 btrfs_extent_data_ref_objectid(buf,
6021 btrfs_extent_data_ref_offset(buf, ref),
6022 btrfs_extent_data_ref_count(buf, ref),
6023 0, root->sectorsize);
6026 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6027 struct btrfs_shared_data_ref *ref;
6028 ref = btrfs_item_ptr(buf, i,
6029 struct btrfs_shared_data_ref);
6030 add_data_backref(extent_cache,
6031 key.objectid, key.offset, 0, 0, 0,
6032 btrfs_shared_data_ref_count(buf, ref),
6033 0, root->sectorsize);
6036 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6037 struct bad_item *bad;
6039 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6043 bad = malloc(sizeof(struct bad_item));
6046 INIT_LIST_HEAD(&bad->list);
6047 memcpy(&bad->key, &key,
6048 sizeof(struct btrfs_key));
6049 bad->root_id = owner;
6050 list_add_tail(&bad->list, &delete_items);
6053 if (key.type != BTRFS_EXTENT_DATA_KEY)
6055 fi = btrfs_item_ptr(buf, i,
6056 struct btrfs_file_extent_item);
6057 if (btrfs_file_extent_type(buf, fi) ==
6058 BTRFS_FILE_EXTENT_INLINE)
6060 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6063 data_bytes_allocated +=
6064 btrfs_file_extent_disk_num_bytes(buf, fi);
6065 if (data_bytes_allocated < root->sectorsize) {
6068 data_bytes_referenced +=
6069 btrfs_file_extent_num_bytes(buf, fi);
6070 add_data_backref(extent_cache,
6071 btrfs_file_extent_disk_bytenr(buf, fi),
6072 parent, owner, key.objectid, key.offset -
6073 btrfs_file_extent_offset(buf, fi), 1, 1,
6074 btrfs_file_extent_disk_num_bytes(buf, fi));
6078 struct btrfs_key first_key;
6080 first_key.objectid = 0;
6083 btrfs_item_key_to_cpu(buf, &first_key, 0);
6084 level = btrfs_header_level(buf);
6085 for (i = 0; i < nritems; i++) {
6086 ptr = btrfs_node_blockptr(buf, i);
6087 size = btrfs_level_size(root, level - 1);
6088 btrfs_node_key_to_cpu(buf, &key, i);
6090 if ((level == ri->drop_level)
6091 && is_dropped_key(&key, &ri->drop_key)) {
6095 ret = add_extent_rec(extent_cache, &key,
6096 btrfs_node_ptr_generation(buf, i),
6097 ptr, size, 0, 0, 1, 0, 1, 0,
6101 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6104 add_pending(nodes, seen, ptr, size);
6106 add_pending(pending, seen, ptr, size);
6109 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6110 nritems) * sizeof(struct btrfs_key_ptr);
6112 total_btree_bytes += buf->len;
6113 if (fs_root_objectid(btrfs_header_owner(buf)))
6114 total_fs_tree_bytes += buf->len;
6115 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6116 total_extent_tree_bytes += buf->len;
6117 if (!found_old_backref &&
6118 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6119 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6120 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6121 found_old_backref = 1;
6123 free_extent_buffer(buf);
6127 static int add_root_to_pending(struct extent_buffer *buf,
6128 struct cache_tree *extent_cache,
6129 struct cache_tree *pending,
6130 struct cache_tree *seen,
6131 struct cache_tree *nodes,
6134 if (btrfs_header_level(buf) > 0)
6135 add_pending(nodes, seen, buf->start, buf->len);
6137 add_pending(pending, seen, buf->start, buf->len);
6138 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6139 0, 1, 1, 0, 1, 0, buf->len);
6141 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6142 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6143 add_tree_backref(extent_cache, buf->start, buf->start,
6146 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6150 /* as we fix the tree, we might be deleting blocks that
6151 * we're tracking for repair. This hook makes sure we
6152 * remove any backrefs for blocks as we are fixing them.
6154 static int free_extent_hook(struct btrfs_trans_handle *trans,
6155 struct btrfs_root *root,
6156 u64 bytenr, u64 num_bytes, u64 parent,
6157 u64 root_objectid, u64 owner, u64 offset,
6160 struct extent_record *rec;
6161 struct cache_extent *cache;
6163 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6165 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6166 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6170 rec = container_of(cache, struct extent_record, cache);
6172 struct data_backref *back;
6173 back = find_data_backref(rec, parent, root_objectid, owner,
6174 offset, 1, bytenr, num_bytes);
6177 if (back->node.found_ref) {
6178 back->found_ref -= refs_to_drop;
6180 rec->refs -= refs_to_drop;
6182 if (back->node.found_extent_tree) {
6183 back->num_refs -= refs_to_drop;
6184 if (rec->extent_item_refs)
6185 rec->extent_item_refs -= refs_to_drop;
6187 if (back->found_ref == 0)
6188 back->node.found_ref = 0;
6189 if (back->num_refs == 0)
6190 back->node.found_extent_tree = 0;
6192 if (!back->node.found_extent_tree && back->node.found_ref) {
6193 list_del(&back->node.list);
6197 struct tree_backref *back;
6198 back = find_tree_backref(rec, parent, root_objectid);
6201 if (back->node.found_ref) {
6204 back->node.found_ref = 0;
6206 if (back->node.found_extent_tree) {
6207 if (rec->extent_item_refs)
6208 rec->extent_item_refs--;
6209 back->node.found_extent_tree = 0;
6211 if (!back->node.found_extent_tree && back->node.found_ref) {
6212 list_del(&back->node.list);
6216 maybe_free_extent_rec(extent_cache, rec);
6221 static int delete_extent_records(struct btrfs_trans_handle *trans,
6222 struct btrfs_root *root,
6223 struct btrfs_path *path,
6224 u64 bytenr, u64 new_len)
6226 struct btrfs_key key;
6227 struct btrfs_key found_key;
6228 struct extent_buffer *leaf;
6233 key.objectid = bytenr;
6235 key.offset = (u64)-1;
6238 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6245 if (path->slots[0] == 0)
6251 leaf = path->nodes[0];
6252 slot = path->slots[0];
6254 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6255 if (found_key.objectid != bytenr)
6258 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6259 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6260 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6261 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6262 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6263 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6264 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6265 btrfs_release_path(path);
6266 if (found_key.type == 0) {
6267 if (found_key.offset == 0)
6269 key.offset = found_key.offset - 1;
6270 key.type = found_key.type;
6272 key.type = found_key.type - 1;
6273 key.offset = (u64)-1;
6277 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6278 found_key.objectid, found_key.type, found_key.offset);
6280 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6283 btrfs_release_path(path);
6285 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6286 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6287 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6288 found_key.offset : root->leafsize;
6290 ret = btrfs_update_block_group(trans, root, bytenr,
6297 btrfs_release_path(path);
6302 * for a single backref, this will allocate a new extent
6303 * and add the backref to it.
6305 static int record_extent(struct btrfs_trans_handle *trans,
6306 struct btrfs_fs_info *info,
6307 struct btrfs_path *path,
6308 struct extent_record *rec,
6309 struct extent_backref *back,
6310 int allocated, u64 flags)
6313 struct btrfs_root *extent_root = info->extent_root;
6314 struct extent_buffer *leaf;
6315 struct btrfs_key ins_key;
6316 struct btrfs_extent_item *ei;
6317 struct tree_backref *tback;
6318 struct data_backref *dback;
6319 struct btrfs_tree_block_info *bi;
6322 rec->max_size = max_t(u64, rec->max_size,
6323 info->extent_root->leafsize);
6326 u32 item_size = sizeof(*ei);
6329 item_size += sizeof(*bi);
6331 ins_key.objectid = rec->start;
6332 ins_key.offset = rec->max_size;
6333 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6335 ret = btrfs_insert_empty_item(trans, extent_root, path,
6336 &ins_key, item_size);
6340 leaf = path->nodes[0];
6341 ei = btrfs_item_ptr(leaf, path->slots[0],
6342 struct btrfs_extent_item);
6344 btrfs_set_extent_refs(leaf, ei, 0);
6345 btrfs_set_extent_generation(leaf, ei, rec->generation);
6347 if (back->is_data) {
6348 btrfs_set_extent_flags(leaf, ei,
6349 BTRFS_EXTENT_FLAG_DATA);
6351 struct btrfs_disk_key copy_key;;
6353 tback = (struct tree_backref *)back;
6354 bi = (struct btrfs_tree_block_info *)(ei + 1);
6355 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6358 btrfs_set_disk_key_objectid(©_key,
6359 rec->info_objectid);
6360 btrfs_set_disk_key_type(©_key, 0);
6361 btrfs_set_disk_key_offset(©_key, 0);
6363 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6364 btrfs_set_tree_block_key(leaf, bi, ©_key);
6366 btrfs_set_extent_flags(leaf, ei,
6367 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6370 btrfs_mark_buffer_dirty(leaf);
6371 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6372 rec->max_size, 1, 0);
6375 btrfs_release_path(path);
6378 if (back->is_data) {
6382 dback = (struct data_backref *)back;
6383 if (back->full_backref)
6384 parent = dback->parent;
6388 for (i = 0; i < dback->found_ref; i++) {
6389 /* if parent != 0, we're doing a full backref
6390 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6391 * just makes the backref allocator create a data
6394 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6395 rec->start, rec->max_size,
6399 BTRFS_FIRST_FREE_OBJECTID :
6405 fprintf(stderr, "adding new data backref"
6406 " on %llu %s %llu owner %llu"
6407 " offset %llu found %d\n",
6408 (unsigned long long)rec->start,
6409 back->full_backref ?
6411 back->full_backref ?
6412 (unsigned long long)parent :
6413 (unsigned long long)dback->root,
6414 (unsigned long long)dback->owner,
6415 (unsigned long long)dback->offset,
6420 tback = (struct tree_backref *)back;
6421 if (back->full_backref)
6422 parent = tback->parent;
6426 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6427 rec->start, rec->max_size,
6428 parent, tback->root, 0, 0);
6429 fprintf(stderr, "adding new tree backref on "
6430 "start %llu len %llu parent %llu root %llu\n",
6431 rec->start, rec->max_size, parent, tback->root);
6434 btrfs_release_path(path);
6438 struct extent_entry {
6443 struct list_head list;
6446 static struct extent_entry *find_entry(struct list_head *entries,
6447 u64 bytenr, u64 bytes)
6449 struct extent_entry *entry = NULL;
6451 list_for_each_entry(entry, entries, list) {
6452 if (entry->bytenr == bytenr && entry->bytes == bytes)
6459 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6461 struct extent_entry *entry, *best = NULL, *prev = NULL;
6463 list_for_each_entry(entry, entries, list) {
6470 * If there are as many broken entries as entries then we know
6471 * not to trust this particular entry.
6473 if (entry->broken == entry->count)
6477 * If our current entry == best then we can't be sure our best
6478 * is really the best, so we need to keep searching.
6480 if (best && best->count == entry->count) {
6486 /* Prev == entry, not good enough, have to keep searching */
6487 if (!prev->broken && prev->count == entry->count)
6491 best = (prev->count > entry->count) ? prev : entry;
6492 else if (best->count < entry->count)
6500 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6501 struct data_backref *dback, struct extent_entry *entry)
6503 struct btrfs_trans_handle *trans;
6504 struct btrfs_root *root;
6505 struct btrfs_file_extent_item *fi;
6506 struct extent_buffer *leaf;
6507 struct btrfs_key key;
6511 key.objectid = dback->root;
6512 key.type = BTRFS_ROOT_ITEM_KEY;
6513 key.offset = (u64)-1;
6514 root = btrfs_read_fs_root(info, &key);
6516 fprintf(stderr, "Couldn't find root for our ref\n");
6521 * The backref points to the original offset of the extent if it was
6522 * split, so we need to search down to the offset we have and then walk
6523 * forward until we find the backref we're looking for.
6525 key.objectid = dback->owner;
6526 key.type = BTRFS_EXTENT_DATA_KEY;
6527 key.offset = dback->offset;
6528 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6530 fprintf(stderr, "Error looking up ref %d\n", ret);
6535 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6536 ret = btrfs_next_leaf(root, path);
6538 fprintf(stderr, "Couldn't find our ref, next\n");
6542 leaf = path->nodes[0];
6543 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6544 if (key.objectid != dback->owner ||
6545 key.type != BTRFS_EXTENT_DATA_KEY) {
6546 fprintf(stderr, "Couldn't find our ref, search\n");
6549 fi = btrfs_item_ptr(leaf, path->slots[0],
6550 struct btrfs_file_extent_item);
6551 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6552 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6554 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6559 btrfs_release_path(path);
6561 trans = btrfs_start_transaction(root, 1);
6563 return PTR_ERR(trans);
6566 * Ok we have the key of the file extent we want to fix, now we can cow
6567 * down to the thing and fix it.
6569 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6571 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6572 key.objectid, key.type, key.offset, ret);
6576 fprintf(stderr, "Well that's odd, we just found this key "
6577 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6582 leaf = path->nodes[0];
6583 fi = btrfs_item_ptr(leaf, path->slots[0],
6584 struct btrfs_file_extent_item);
6586 if (btrfs_file_extent_compression(leaf, fi) &&
6587 dback->disk_bytenr != entry->bytenr) {
6588 fprintf(stderr, "Ref doesn't match the record start and is "
6589 "compressed, please take a btrfs-image of this file "
6590 "system and send it to a btrfs developer so they can "
6591 "complete this functionality for bytenr %Lu\n",
6592 dback->disk_bytenr);
6597 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6598 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6599 } else if (dback->disk_bytenr > entry->bytenr) {
6600 u64 off_diff, offset;
6602 off_diff = dback->disk_bytenr - entry->bytenr;
6603 offset = btrfs_file_extent_offset(leaf, fi);
6604 if (dback->disk_bytenr + offset +
6605 btrfs_file_extent_num_bytes(leaf, fi) >
6606 entry->bytenr + entry->bytes) {
6607 fprintf(stderr, "Ref is past the entry end, please "
6608 "take a btrfs-image of this file system and "
6609 "send it to a btrfs developer, ref %Lu\n",
6610 dback->disk_bytenr);
6615 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6616 btrfs_set_file_extent_offset(leaf, fi, offset);
6617 } else if (dback->disk_bytenr < entry->bytenr) {
6620 offset = btrfs_file_extent_offset(leaf, fi);
6621 if (dback->disk_bytenr + offset < entry->bytenr) {
6622 fprintf(stderr, "Ref is before the entry start, please"
6623 " take a btrfs-image of this file system and "
6624 "send it to a btrfs developer, ref %Lu\n",
6625 dback->disk_bytenr);
6630 offset += dback->disk_bytenr;
6631 offset -= entry->bytenr;
6632 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6633 btrfs_set_file_extent_offset(leaf, fi, offset);
6636 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6639 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6640 * only do this if we aren't using compression, otherwise it's a
6643 if (!btrfs_file_extent_compression(leaf, fi))
6644 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6646 printf("ram bytes may be wrong?\n");
6647 btrfs_mark_buffer_dirty(leaf);
6649 err = btrfs_commit_transaction(trans, root);
6650 btrfs_release_path(path);
6651 return ret ? ret : err;
6654 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6655 struct extent_record *rec)
6657 struct extent_backref *back;
6658 struct data_backref *dback;
6659 struct extent_entry *entry, *best = NULL;
6662 int broken_entries = 0;
6667 * Metadata is easy and the backrefs should always agree on bytenr and
6668 * size, if not we've got bigger issues.
6673 list_for_each_entry(back, &rec->backrefs, list) {
6674 if (back->full_backref || !back->is_data)
6677 dback = (struct data_backref *)back;
6680 * We only pay attention to backrefs that we found a real
6683 if (dback->found_ref == 0)
6687 * For now we only catch when the bytes don't match, not the
6688 * bytenr. We can easily do this at the same time, but I want
6689 * to have a fs image to test on before we just add repair
6690 * functionality willy-nilly so we know we won't screw up the
6694 entry = find_entry(&entries, dback->disk_bytenr,
6697 entry = malloc(sizeof(struct extent_entry));
6702 memset(entry, 0, sizeof(*entry));
6703 entry->bytenr = dback->disk_bytenr;
6704 entry->bytes = dback->bytes;
6705 list_add_tail(&entry->list, &entries);
6710 * If we only have on entry we may think the entries agree when
6711 * in reality they don't so we have to do some extra checking.
6713 if (dback->disk_bytenr != rec->start ||
6714 dback->bytes != rec->nr || back->broken)
6725 /* Yay all the backrefs agree, carry on good sir */
6726 if (nr_entries <= 1 && !mismatch)
6729 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6730 "%Lu\n", rec->start);
6733 * First we want to see if the backrefs can agree amongst themselves who
6734 * is right, so figure out which one of the entries has the highest
6737 best = find_most_right_entry(&entries);
6740 * Ok so we may have an even split between what the backrefs think, so
6741 * this is where we use the extent ref to see what it thinks.
6744 entry = find_entry(&entries, rec->start, rec->nr);
6745 if (!entry && (!broken_entries || !rec->found_rec)) {
6746 fprintf(stderr, "Backrefs don't agree with each other "
6747 "and extent record doesn't agree with anybody,"
6748 " so we can't fix bytenr %Lu bytes %Lu\n",
6749 rec->start, rec->nr);
6752 } else if (!entry) {
6754 * Ok our backrefs were broken, we'll assume this is the
6755 * correct value and add an entry for this range.
6757 entry = malloc(sizeof(struct extent_entry));
6762 memset(entry, 0, sizeof(*entry));
6763 entry->bytenr = rec->start;
6764 entry->bytes = rec->nr;
6765 list_add_tail(&entry->list, &entries);
6769 best = find_most_right_entry(&entries);
6771 fprintf(stderr, "Backrefs and extent record evenly "
6772 "split on who is right, this is going to "
6773 "require user input to fix bytenr %Lu bytes "
6774 "%Lu\n", rec->start, rec->nr);
6781 * I don't think this can happen currently as we'll abort() if we catch
6782 * this case higher up, but in case somebody removes that we still can't
6783 * deal with it properly here yet, so just bail out of that's the case.
6785 if (best->bytenr != rec->start) {
6786 fprintf(stderr, "Extent start and backref starts don't match, "
6787 "please use btrfs-image on this file system and send "
6788 "it to a btrfs developer so they can make fsck fix "
6789 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6790 rec->start, rec->nr);
6796 * Ok great we all agreed on an extent record, let's go find the real
6797 * references and fix up the ones that don't match.
6799 list_for_each_entry(back, &rec->backrefs, list) {
6800 if (back->full_backref || !back->is_data)
6803 dback = (struct data_backref *)back;
6806 * Still ignoring backrefs that don't have a real ref attached
6809 if (dback->found_ref == 0)
6812 if (dback->bytes == best->bytes &&
6813 dback->disk_bytenr == best->bytenr)
6816 ret = repair_ref(info, path, dback, best);
6822 * Ok we messed with the actual refs, which means we need to drop our
6823 * entire cache and go back and rescan. I know this is a huge pain and
6824 * adds a lot of extra work, but it's the only way to be safe. Once all
6825 * the backrefs agree we may not need to do anything to the extent
6830 while (!list_empty(&entries)) {
6831 entry = list_entry(entries.next, struct extent_entry, list);
6832 list_del_init(&entry->list);
6838 static int process_duplicates(struct btrfs_root *root,
6839 struct cache_tree *extent_cache,
6840 struct extent_record *rec)
6842 struct extent_record *good, *tmp;
6843 struct cache_extent *cache;
6847 * If we found a extent record for this extent then return, or if we
6848 * have more than one duplicate we are likely going to need to delete
6851 if (rec->found_rec || rec->num_duplicates > 1)
6854 /* Shouldn't happen but just in case */
6855 BUG_ON(!rec->num_duplicates);
6858 * So this happens if we end up with a backref that doesn't match the
6859 * actual extent entry. So either the backref is bad or the extent
6860 * entry is bad. Either way we want to have the extent_record actually
6861 * reflect what we found in the extent_tree, so we need to take the
6862 * duplicate out and use that as the extent_record since the only way we
6863 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6865 remove_cache_extent(extent_cache, &rec->cache);
6867 good = list_entry(rec->dups.next, struct extent_record, list);
6868 list_del_init(&good->list);
6869 INIT_LIST_HEAD(&good->backrefs);
6870 INIT_LIST_HEAD(&good->dups);
6871 good->cache.start = good->start;
6872 good->cache.size = good->nr;
6873 good->content_checked = 0;
6874 good->owner_ref_checked = 0;
6875 good->num_duplicates = 0;
6876 good->refs = rec->refs;
6877 list_splice_init(&rec->backrefs, &good->backrefs);
6879 cache = lookup_cache_extent(extent_cache, good->start,
6883 tmp = container_of(cache, struct extent_record, cache);
6886 * If we find another overlapping extent and it's found_rec is
6887 * set then it's a duplicate and we need to try and delete
6890 if (tmp->found_rec || tmp->num_duplicates > 0) {
6891 if (list_empty(&good->list))
6892 list_add_tail(&good->list,
6893 &duplicate_extents);
6894 good->num_duplicates += tmp->num_duplicates + 1;
6895 list_splice_init(&tmp->dups, &good->dups);
6896 list_del_init(&tmp->list);
6897 list_add_tail(&tmp->list, &good->dups);
6898 remove_cache_extent(extent_cache, &tmp->cache);
6903 * Ok we have another non extent item backed extent rec, so lets
6904 * just add it to this extent and carry on like we did above.
6906 good->refs += tmp->refs;
6907 list_splice_init(&tmp->backrefs, &good->backrefs);
6908 remove_cache_extent(extent_cache, &tmp->cache);
6911 ret = insert_cache_extent(extent_cache, &good->cache);
6914 return good->num_duplicates ? 0 : 1;
6917 static int delete_duplicate_records(struct btrfs_root *root,
6918 struct extent_record *rec)
6920 struct btrfs_trans_handle *trans;
6921 LIST_HEAD(delete_list);
6922 struct btrfs_path *path;
6923 struct extent_record *tmp, *good, *n;
6926 struct btrfs_key key;
6928 path = btrfs_alloc_path();
6935 /* Find the record that covers all of the duplicates. */
6936 list_for_each_entry(tmp, &rec->dups, list) {
6937 if (good->start < tmp->start)
6939 if (good->nr > tmp->nr)
6942 if (tmp->start + tmp->nr < good->start + good->nr) {
6943 fprintf(stderr, "Ok we have overlapping extents that "
6944 "aren't completely covered by eachother, this "
6945 "is going to require more careful thought. "
6946 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6947 tmp->start, tmp->nr, good->start, good->nr);
6954 list_add_tail(&rec->list, &delete_list);
6956 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6959 list_move_tail(&tmp->list, &delete_list);
6962 root = root->fs_info->extent_root;
6963 trans = btrfs_start_transaction(root, 1);
6964 if (IS_ERR(trans)) {
6965 ret = PTR_ERR(trans);
6969 list_for_each_entry(tmp, &delete_list, list) {
6970 if (tmp->found_rec == 0)
6972 key.objectid = tmp->start;
6973 key.type = BTRFS_EXTENT_ITEM_KEY;
6974 key.offset = tmp->nr;
6976 /* Shouldn't happen but just in case */
6977 if (tmp->metadata) {
6978 fprintf(stderr, "Well this shouldn't happen, extent "
6979 "record overlaps but is metadata? "
6980 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6984 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6990 ret = btrfs_del_item(trans, root, path);
6993 btrfs_release_path(path);
6996 err = btrfs_commit_transaction(trans, root);
7000 while (!list_empty(&delete_list)) {
7001 tmp = list_entry(delete_list.next, struct extent_record, list);
7002 list_del_init(&tmp->list);
7008 while (!list_empty(&rec->dups)) {
7009 tmp = list_entry(rec->dups.next, struct extent_record, list);
7010 list_del_init(&tmp->list);
7014 btrfs_free_path(path);
7016 if (!ret && !nr_del)
7017 rec->num_duplicates = 0;
7019 return ret ? ret : nr_del;
7022 static int find_possible_backrefs(struct btrfs_fs_info *info,
7023 struct btrfs_path *path,
7024 struct cache_tree *extent_cache,
7025 struct extent_record *rec)
7027 struct btrfs_root *root;
7028 struct extent_backref *back;
7029 struct data_backref *dback;
7030 struct cache_extent *cache;
7031 struct btrfs_file_extent_item *fi;
7032 struct btrfs_key key;
7036 list_for_each_entry(back, &rec->backrefs, list) {
7037 /* Don't care about full backrefs (poor unloved backrefs) */
7038 if (back->full_backref || !back->is_data)
7041 dback = (struct data_backref *)back;
7043 /* We found this one, we don't need to do a lookup */
7044 if (dback->found_ref)
7047 key.objectid = dback->root;
7048 key.type = BTRFS_ROOT_ITEM_KEY;
7049 key.offset = (u64)-1;
7051 root = btrfs_read_fs_root(info, &key);
7053 /* No root, definitely a bad ref, skip */
7054 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7056 /* Other err, exit */
7058 return PTR_ERR(root);
7060 key.objectid = dback->owner;
7061 key.type = BTRFS_EXTENT_DATA_KEY;
7062 key.offset = dback->offset;
7063 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7065 btrfs_release_path(path);
7068 /* Didn't find it, we can carry on */
7073 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7074 struct btrfs_file_extent_item);
7075 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7076 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7077 btrfs_release_path(path);
7078 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7080 struct extent_record *tmp;
7081 tmp = container_of(cache, struct extent_record, cache);
7084 * If we found an extent record for the bytenr for this
7085 * particular backref then we can't add it to our
7086 * current extent record. We only want to add backrefs
7087 * that don't have a corresponding extent item in the
7088 * extent tree since they likely belong to this record
7089 * and we need to fix it if it doesn't match bytenrs.
7095 dback->found_ref += 1;
7096 dback->disk_bytenr = bytenr;
7097 dback->bytes = bytes;
7100 * Set this so the verify backref code knows not to trust the
7101 * values in this backref.
7110 * Record orphan data ref into corresponding root.
7112 * Return 0 if the extent item contains data ref and recorded.
7113 * Return 1 if the extent item contains no useful data ref
7114 * On that case, it may contains only shared_dataref or metadata backref
7115 * or the file extent exists(this should be handled by the extent bytenr
7117 * Return <0 if something goes wrong.
7119 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7120 struct extent_record *rec)
7122 struct btrfs_key key;
7123 struct btrfs_root *dest_root;
7124 struct extent_backref *back;
7125 struct data_backref *dback;
7126 struct orphan_data_extent *orphan;
7127 struct btrfs_path *path;
7128 int recorded_data_ref = 0;
7133 path = btrfs_alloc_path();
7136 list_for_each_entry(back, &rec->backrefs, list) {
7137 if (back->full_backref || !back->is_data ||
7138 !back->found_extent_tree)
7140 dback = (struct data_backref *)back;
7141 if (dback->found_ref)
7143 key.objectid = dback->root;
7144 key.type = BTRFS_ROOT_ITEM_KEY;
7145 key.offset = (u64)-1;
7147 dest_root = btrfs_read_fs_root(fs_info, &key);
7149 /* For non-exist root we just skip it */
7150 if (IS_ERR(dest_root) || !dest_root)
7153 key.objectid = dback->owner;
7154 key.type = BTRFS_EXTENT_DATA_KEY;
7155 key.offset = dback->offset;
7157 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7159 * For ret < 0, it's OK since the fs-tree may be corrupted,
7160 * we need to record it for inode/file extent rebuild.
7161 * For ret > 0, we record it only for file extent rebuild.
7162 * For ret == 0, the file extent exists but only bytenr
7163 * mismatch, let the original bytenr fix routine to handle,
7169 orphan = malloc(sizeof(*orphan));
7174 INIT_LIST_HEAD(&orphan->list);
7175 orphan->root = dback->root;
7176 orphan->objectid = dback->owner;
7177 orphan->offset = dback->offset;
7178 orphan->disk_bytenr = rec->cache.start;
7179 orphan->disk_len = rec->cache.size;
7180 list_add(&dest_root->orphan_data_extents, &orphan->list);
7181 recorded_data_ref = 1;
7184 btrfs_free_path(path);
7186 return !recorded_data_ref;
7192 * when an incorrect extent item is found, this will delete
7193 * all of the existing entries for it and recreate them
7194 * based on what the tree scan found.
7196 static int fixup_extent_refs(struct btrfs_fs_info *info,
7197 struct cache_tree *extent_cache,
7198 struct extent_record *rec)
7200 struct btrfs_trans_handle *trans = NULL;
7202 struct btrfs_path *path;
7203 struct list_head *cur = rec->backrefs.next;
7204 struct cache_extent *cache;
7205 struct extent_backref *back;
7209 if (rec->flag_block_full_backref)
7210 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7212 path = btrfs_alloc_path();
7216 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7218 * Sometimes the backrefs themselves are so broken they don't
7219 * get attached to any meaningful rec, so first go back and
7220 * check any of our backrefs that we couldn't find and throw
7221 * them into the list if we find the backref so that
7222 * verify_backrefs can figure out what to do.
7224 ret = find_possible_backrefs(info, path, extent_cache, rec);
7229 /* step one, make sure all of the backrefs agree */
7230 ret = verify_backrefs(info, path, rec);
7234 trans = btrfs_start_transaction(info->extent_root, 1);
7235 if (IS_ERR(trans)) {
7236 ret = PTR_ERR(trans);
7240 /* step two, delete all the existing records */
7241 ret = delete_extent_records(trans, info->extent_root, path,
7242 rec->start, rec->max_size);
7247 /* was this block corrupt? If so, don't add references to it */
7248 cache = lookup_cache_extent(info->corrupt_blocks,
7249 rec->start, rec->max_size);
7255 /* step three, recreate all the refs we did find */
7256 while(cur != &rec->backrefs) {
7257 back = list_entry(cur, struct extent_backref, list);
7261 * if we didn't find any references, don't create a
7264 if (!back->found_ref)
7267 rec->bad_full_backref = 0;
7268 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7276 int err = btrfs_commit_transaction(trans, info->extent_root);
7281 btrfs_free_path(path);
7285 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7286 struct extent_record *rec)
7288 struct btrfs_trans_handle *trans;
7289 struct btrfs_root *root = fs_info->extent_root;
7290 struct btrfs_path *path;
7291 struct btrfs_extent_item *ei;
7292 struct btrfs_key key;
7296 key.objectid = rec->start;
7297 if (rec->metadata) {
7298 key.type = BTRFS_METADATA_ITEM_KEY;
7299 key.offset = rec->info_level;
7301 key.type = BTRFS_EXTENT_ITEM_KEY;
7302 key.offset = rec->max_size;
7305 path = btrfs_alloc_path();
7309 trans = btrfs_start_transaction(root, 0);
7310 if (IS_ERR(trans)) {
7311 btrfs_free_path(path);
7312 return PTR_ERR(trans);
7315 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7317 btrfs_free_path(path);
7318 btrfs_commit_transaction(trans, root);
7321 fprintf(stderr, "Didn't find extent for %llu\n",
7322 (unsigned long long)rec->start);
7323 btrfs_free_path(path);
7324 btrfs_commit_transaction(trans, root);
7328 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7329 struct btrfs_extent_item);
7330 flags = btrfs_extent_flags(path->nodes[0], ei);
7331 if (rec->flag_block_full_backref) {
7332 fprintf(stderr, "setting full backref on %llu\n",
7333 (unsigned long long)key.objectid);
7334 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7336 fprintf(stderr, "clearing full backref on %llu\n",
7337 (unsigned long long)key.objectid);
7338 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7340 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7341 btrfs_mark_buffer_dirty(path->nodes[0]);
7342 btrfs_free_path(path);
7343 return btrfs_commit_transaction(trans, root);
7346 /* right now we only prune from the extent allocation tree */
7347 static int prune_one_block(struct btrfs_trans_handle *trans,
7348 struct btrfs_fs_info *info,
7349 struct btrfs_corrupt_block *corrupt)
7352 struct btrfs_path path;
7353 struct extent_buffer *eb;
7357 int level = corrupt->level + 1;
7359 btrfs_init_path(&path);
7361 /* we want to stop at the parent to our busted block */
7362 path.lowest_level = level;
7364 ret = btrfs_search_slot(trans, info->extent_root,
7365 &corrupt->key, &path, -1, 1);
7370 eb = path.nodes[level];
7377 * hopefully the search gave us the block we want to prune,
7378 * lets try that first
7380 slot = path.slots[level];
7381 found = btrfs_node_blockptr(eb, slot);
7382 if (found == corrupt->cache.start)
7385 nritems = btrfs_header_nritems(eb);
7387 /* the search failed, lets scan this node and hope we find it */
7388 for (slot = 0; slot < nritems; slot++) {
7389 found = btrfs_node_blockptr(eb, slot);
7390 if (found == corrupt->cache.start)
7394 * we couldn't find the bad block. TODO, search all the nodes for pointers
7397 if (eb == info->extent_root->node) {
7402 btrfs_release_path(&path);
7407 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7408 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7411 btrfs_release_path(&path);
7415 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7417 struct btrfs_trans_handle *trans = NULL;
7418 struct cache_extent *cache;
7419 struct btrfs_corrupt_block *corrupt;
7422 cache = search_cache_extent(info->corrupt_blocks, 0);
7426 trans = btrfs_start_transaction(info->extent_root, 1);
7428 return PTR_ERR(trans);
7430 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7431 prune_one_block(trans, info, corrupt);
7432 remove_cache_extent(info->corrupt_blocks, cache);
7435 return btrfs_commit_transaction(trans, info->extent_root);
7439 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7441 struct btrfs_block_group_cache *cache;
7446 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7447 &start, &end, EXTENT_DIRTY);
7450 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7456 cache = btrfs_lookup_first_block_group(fs_info, start);
7461 start = cache->key.objectid + cache->key.offset;
7465 static int check_extent_refs(struct btrfs_root *root,
7466 struct cache_tree *extent_cache)
7468 struct extent_record *rec;
7469 struct cache_extent *cache;
7478 * if we're doing a repair, we have to make sure
7479 * we don't allocate from the problem extents.
7480 * In the worst case, this will be all the
7483 cache = search_cache_extent(extent_cache, 0);
7485 rec = container_of(cache, struct extent_record, cache);
7486 set_extent_dirty(root->fs_info->excluded_extents,
7488 rec->start + rec->max_size - 1,
7490 cache = next_cache_extent(cache);
7493 /* pin down all the corrupted blocks too */
7494 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7496 set_extent_dirty(root->fs_info->excluded_extents,
7498 cache->start + cache->size - 1,
7500 cache = next_cache_extent(cache);
7502 prune_corrupt_blocks(root->fs_info);
7503 reset_cached_block_groups(root->fs_info);
7506 reset_cached_block_groups(root->fs_info);
7509 * We need to delete any duplicate entries we find first otherwise we
7510 * could mess up the extent tree when we have backrefs that actually
7511 * belong to a different extent item and not the weird duplicate one.
7513 while (repair && !list_empty(&duplicate_extents)) {
7514 rec = list_entry(duplicate_extents.next, struct extent_record,
7516 list_del_init(&rec->list);
7518 /* Sometimes we can find a backref before we find an actual
7519 * extent, so we need to process it a little bit to see if there
7520 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7521 * if this is a backref screwup. If we need to delete stuff
7522 * process_duplicates() will return 0, otherwise it will return
7525 if (process_duplicates(root, extent_cache, rec))
7527 ret = delete_duplicate_records(root, rec);
7531 * delete_duplicate_records will return the number of entries
7532 * deleted, so if it's greater than 0 then we know we actually
7533 * did something and we need to remove.
7547 cache = search_cache_extent(extent_cache, 0);
7550 rec = container_of(cache, struct extent_record, cache);
7551 if (rec->num_duplicates) {
7552 fprintf(stderr, "extent item %llu has multiple extent "
7553 "items\n", (unsigned long long)rec->start);
7558 if (rec->refs != rec->extent_item_refs) {
7559 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7560 (unsigned long long)rec->start,
7561 (unsigned long long)rec->nr);
7562 fprintf(stderr, "extent item %llu, found %llu\n",
7563 (unsigned long long)rec->extent_item_refs,
7564 (unsigned long long)rec->refs);
7565 ret = record_orphan_data_extents(root->fs_info, rec);
7572 * we can't use the extent to repair file
7573 * extent, let the fallback method handle it.
7575 if (!fixed && repair) {
7576 ret = fixup_extent_refs(
7587 if (all_backpointers_checked(rec, 1)) {
7588 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7589 (unsigned long long)rec->start,
7590 (unsigned long long)rec->nr);
7592 if (!fixed && !recorded && repair) {
7593 ret = fixup_extent_refs(root->fs_info,
7602 if (!rec->owner_ref_checked) {
7603 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7604 (unsigned long long)rec->start,
7605 (unsigned long long)rec->nr);
7606 if (!fixed && !recorded && repair) {
7607 ret = fixup_extent_refs(root->fs_info,
7616 if (rec->bad_full_backref) {
7617 fprintf(stderr, "bad full backref, on [%llu]\n",
7618 (unsigned long long)rec->start);
7620 ret = fixup_extent_flags(root->fs_info, rec);
7629 * Although it's not a extent ref's problem, we reuse this
7630 * routine for error reporting.
7631 * No repair function yet.
7633 if (rec->crossing_stripes) {
7635 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7636 rec->start, rec->start + rec->max_size);
7641 if (rec->wrong_chunk_type) {
7643 "bad extent [%llu, %llu), type mismatch with chunk\n",
7644 rec->start, rec->start + rec->max_size);
7649 remove_cache_extent(extent_cache, cache);
7650 free_all_extent_backrefs(rec);
7651 if (!init_extent_tree && repair && (!cur_err || fixed))
7652 clear_extent_dirty(root->fs_info->excluded_extents,
7654 rec->start + rec->max_size - 1,
7660 if (ret && ret != -EAGAIN) {
7661 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7664 struct btrfs_trans_handle *trans;
7666 root = root->fs_info->extent_root;
7667 trans = btrfs_start_transaction(root, 1);
7668 if (IS_ERR(trans)) {
7669 ret = PTR_ERR(trans);
7673 btrfs_fix_block_accounting(trans, root);
7674 ret = btrfs_commit_transaction(trans, root);
7679 fprintf(stderr, "repaired damaged extent references\n");
7685 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7689 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7690 stripe_size = length;
7691 stripe_size /= num_stripes;
7692 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7693 stripe_size = length * 2;
7694 stripe_size /= num_stripes;
7695 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7696 stripe_size = length;
7697 stripe_size /= (num_stripes - 1);
7698 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7699 stripe_size = length;
7700 stripe_size /= (num_stripes - 2);
7702 stripe_size = length;
7708 * Check the chunk with its block group/dev list ref:
7709 * Return 0 if all refs seems valid.
7710 * Return 1 if part of refs seems valid, need later check for rebuild ref
7711 * like missing block group and needs to search extent tree to rebuild them.
7712 * Return -1 if essential refs are missing and unable to rebuild.
7714 static int check_chunk_refs(struct chunk_record *chunk_rec,
7715 struct block_group_tree *block_group_cache,
7716 struct device_extent_tree *dev_extent_cache,
7719 struct cache_extent *block_group_item;
7720 struct block_group_record *block_group_rec;
7721 struct cache_extent *dev_extent_item;
7722 struct device_extent_record *dev_extent_rec;
7726 int metadump_v2 = 0;
7730 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7733 if (block_group_item) {
7734 block_group_rec = container_of(block_group_item,
7735 struct block_group_record,
7737 if (chunk_rec->length != block_group_rec->offset ||
7738 chunk_rec->offset != block_group_rec->objectid ||
7740 chunk_rec->type_flags != block_group_rec->flags)) {
7743 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7744 chunk_rec->objectid,
7749 chunk_rec->type_flags,
7750 block_group_rec->objectid,
7751 block_group_rec->type,
7752 block_group_rec->offset,
7753 block_group_rec->offset,
7754 block_group_rec->objectid,
7755 block_group_rec->flags);
7758 list_del_init(&block_group_rec->list);
7759 chunk_rec->bg_rec = block_group_rec;
7764 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7765 chunk_rec->objectid,
7770 chunk_rec->type_flags);
7777 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7778 chunk_rec->num_stripes);
7779 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7780 devid = chunk_rec->stripes[i].devid;
7781 offset = chunk_rec->stripes[i].offset;
7782 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7783 devid, offset, length);
7784 if (dev_extent_item) {
7785 dev_extent_rec = container_of(dev_extent_item,
7786 struct device_extent_record,
7788 if (dev_extent_rec->objectid != devid ||
7789 dev_extent_rec->offset != offset ||
7790 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7791 dev_extent_rec->length != length) {
7794 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7795 chunk_rec->objectid,
7798 chunk_rec->stripes[i].devid,
7799 chunk_rec->stripes[i].offset,
7800 dev_extent_rec->objectid,
7801 dev_extent_rec->offset,
7802 dev_extent_rec->length);
7805 list_move(&dev_extent_rec->chunk_list,
7806 &chunk_rec->dextents);
7811 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7812 chunk_rec->objectid,
7815 chunk_rec->stripes[i].devid,
7816 chunk_rec->stripes[i].offset);
7823 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7824 int check_chunks(struct cache_tree *chunk_cache,
7825 struct block_group_tree *block_group_cache,
7826 struct device_extent_tree *dev_extent_cache,
7827 struct list_head *good, struct list_head *bad,
7828 struct list_head *rebuild, int silent)
7830 struct cache_extent *chunk_item;
7831 struct chunk_record *chunk_rec;
7832 struct block_group_record *bg_rec;
7833 struct device_extent_record *dext_rec;
7837 chunk_item = first_cache_extent(chunk_cache);
7838 while (chunk_item) {
7839 chunk_rec = container_of(chunk_item, struct chunk_record,
7841 err = check_chunk_refs(chunk_rec, block_group_cache,
7842 dev_extent_cache, silent);
7845 if (err == 0 && good)
7846 list_add_tail(&chunk_rec->list, good);
7847 if (err > 0 && rebuild)
7848 list_add_tail(&chunk_rec->list, rebuild);
7850 list_add_tail(&chunk_rec->list, bad);
7851 chunk_item = next_cache_extent(chunk_item);
7854 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7857 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7865 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7869 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7880 static int check_device_used(struct device_record *dev_rec,
7881 struct device_extent_tree *dext_cache)
7883 struct cache_extent *cache;
7884 struct device_extent_record *dev_extent_rec;
7887 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7889 dev_extent_rec = container_of(cache,
7890 struct device_extent_record,
7892 if (dev_extent_rec->objectid != dev_rec->devid)
7895 list_del_init(&dev_extent_rec->device_list);
7896 total_byte += dev_extent_rec->length;
7897 cache = next_cache_extent(cache);
7900 if (total_byte != dev_rec->byte_used) {
7902 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7903 total_byte, dev_rec->byte_used, dev_rec->objectid,
7904 dev_rec->type, dev_rec->offset);
7911 /* check btrfs_dev_item -> btrfs_dev_extent */
7912 static int check_devices(struct rb_root *dev_cache,
7913 struct device_extent_tree *dev_extent_cache)
7915 struct rb_node *dev_node;
7916 struct device_record *dev_rec;
7917 struct device_extent_record *dext_rec;
7921 dev_node = rb_first(dev_cache);
7923 dev_rec = container_of(dev_node, struct device_record, node);
7924 err = check_device_used(dev_rec, dev_extent_cache);
7928 dev_node = rb_next(dev_node);
7930 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7933 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7934 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7941 static int add_root_item_to_list(struct list_head *head,
7942 u64 objectid, u64 bytenr, u64 last_snapshot,
7943 u8 level, u8 drop_level,
7944 int level_size, struct btrfs_key *drop_key)
7947 struct root_item_record *ri_rec;
7948 ri_rec = malloc(sizeof(*ri_rec));
7951 ri_rec->bytenr = bytenr;
7952 ri_rec->objectid = objectid;
7953 ri_rec->level = level;
7954 ri_rec->level_size = level_size;
7955 ri_rec->drop_level = drop_level;
7956 ri_rec->last_snapshot = last_snapshot;
7958 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7959 list_add_tail(&ri_rec->list, head);
7964 static void free_root_item_list(struct list_head *list)
7966 struct root_item_record *ri_rec;
7968 while (!list_empty(list)) {
7969 ri_rec = list_first_entry(list, struct root_item_record,
7971 list_del_init(&ri_rec->list);
7976 static int deal_root_from_list(struct list_head *list,
7977 struct btrfs_root *root,
7978 struct block_info *bits,
7980 struct cache_tree *pending,
7981 struct cache_tree *seen,
7982 struct cache_tree *reada,
7983 struct cache_tree *nodes,
7984 struct cache_tree *extent_cache,
7985 struct cache_tree *chunk_cache,
7986 struct rb_root *dev_cache,
7987 struct block_group_tree *block_group_cache,
7988 struct device_extent_tree *dev_extent_cache)
7993 while (!list_empty(list)) {
7994 struct root_item_record *rec;
7995 struct extent_buffer *buf;
7996 rec = list_entry(list->next,
7997 struct root_item_record, list);
7999 buf = read_tree_block(root->fs_info->tree_root,
8000 rec->bytenr, rec->level_size, 0);
8001 if (!extent_buffer_uptodate(buf)) {
8002 free_extent_buffer(buf);
8006 add_root_to_pending(buf, extent_cache, pending,
8007 seen, nodes, rec->objectid);
8009 * To rebuild extent tree, we need deal with snapshot
8010 * one by one, otherwise we deal with node firstly which
8011 * can maximize readahead.
8014 ret = run_next_block(root, bits, bits_nr, &last,
8015 pending, seen, reada, nodes,
8016 extent_cache, chunk_cache,
8017 dev_cache, block_group_cache,
8018 dev_extent_cache, rec);
8022 free_extent_buffer(buf);
8023 list_del(&rec->list);
8029 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8030 reada, nodes, extent_cache, chunk_cache,
8031 dev_cache, block_group_cache,
8032 dev_extent_cache, NULL);
8042 static int check_chunks_and_extents(struct btrfs_root *root)
8044 struct rb_root dev_cache;
8045 struct cache_tree chunk_cache;
8046 struct block_group_tree block_group_cache;
8047 struct device_extent_tree dev_extent_cache;
8048 struct cache_tree extent_cache;
8049 struct cache_tree seen;
8050 struct cache_tree pending;
8051 struct cache_tree reada;
8052 struct cache_tree nodes;
8053 struct extent_io_tree excluded_extents;
8054 struct cache_tree corrupt_blocks;
8055 struct btrfs_path path;
8056 struct btrfs_key key;
8057 struct btrfs_key found_key;
8059 struct block_info *bits;
8061 struct extent_buffer *leaf;
8063 struct btrfs_root_item ri;
8064 struct list_head dropping_trees;
8065 struct list_head normal_trees;
8066 struct btrfs_root *root1;
8071 dev_cache = RB_ROOT;
8072 cache_tree_init(&chunk_cache);
8073 block_group_tree_init(&block_group_cache);
8074 device_extent_tree_init(&dev_extent_cache);
8076 cache_tree_init(&extent_cache);
8077 cache_tree_init(&seen);
8078 cache_tree_init(&pending);
8079 cache_tree_init(&nodes);
8080 cache_tree_init(&reada);
8081 cache_tree_init(&corrupt_blocks);
8082 extent_io_tree_init(&excluded_extents);
8083 INIT_LIST_HEAD(&dropping_trees);
8084 INIT_LIST_HEAD(&normal_trees);
8087 root->fs_info->excluded_extents = &excluded_extents;
8088 root->fs_info->fsck_extent_cache = &extent_cache;
8089 root->fs_info->free_extent_hook = free_extent_hook;
8090 root->fs_info->corrupt_blocks = &corrupt_blocks;
8094 bits = malloc(bits_nr * sizeof(struct block_info));
8100 if (ctx.progress_enabled) {
8101 ctx.tp = TASK_EXTENTS;
8102 task_start(ctx.info);
8106 root1 = root->fs_info->tree_root;
8107 level = btrfs_header_level(root1->node);
8108 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8109 root1->node->start, 0, level, 0,
8110 btrfs_level_size(root1, level), NULL);
8113 root1 = root->fs_info->chunk_root;
8114 level = btrfs_header_level(root1->node);
8115 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8116 root1->node->start, 0, level, 0,
8117 btrfs_level_size(root1, level), NULL);
8120 btrfs_init_path(&path);
8123 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8124 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8129 leaf = path.nodes[0];
8130 slot = path.slots[0];
8131 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8132 ret = btrfs_next_leaf(root, &path);
8135 leaf = path.nodes[0];
8136 slot = path.slots[0];
8138 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8139 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8140 unsigned long offset;
8143 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8144 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8145 last_snapshot = btrfs_root_last_snapshot(&ri);
8146 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8147 level = btrfs_root_level(&ri);
8148 level_size = btrfs_level_size(root, level);
8149 ret = add_root_item_to_list(&normal_trees,
8151 btrfs_root_bytenr(&ri),
8152 last_snapshot, level,
8153 0, level_size, NULL);
8157 level = btrfs_root_level(&ri);
8158 level_size = btrfs_level_size(root, level);
8159 objectid = found_key.objectid;
8160 btrfs_disk_key_to_cpu(&found_key,
8162 ret = add_root_item_to_list(&dropping_trees,
8164 btrfs_root_bytenr(&ri),
8165 last_snapshot, level,
8167 level_size, &found_key);
8174 btrfs_release_path(&path);
8177 * check_block can return -EAGAIN if it fixes something, please keep
8178 * this in mind when dealing with return values from these functions, if
8179 * we get -EAGAIN we want to fall through and restart the loop.
8181 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8182 &seen, &reada, &nodes, &extent_cache,
8183 &chunk_cache, &dev_cache, &block_group_cache,
8190 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8191 &pending, &seen, &reada, &nodes,
8192 &extent_cache, &chunk_cache, &dev_cache,
8193 &block_group_cache, &dev_extent_cache);
8200 ret = check_chunks(&chunk_cache, &block_group_cache,
8201 &dev_extent_cache, NULL, NULL, NULL, 0);
8208 ret = check_extent_refs(root, &extent_cache);
8215 ret = check_devices(&dev_cache, &dev_extent_cache);
8220 task_stop(ctx.info);
8222 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8223 extent_io_tree_cleanup(&excluded_extents);
8224 root->fs_info->fsck_extent_cache = NULL;
8225 root->fs_info->free_extent_hook = NULL;
8226 root->fs_info->corrupt_blocks = NULL;
8227 root->fs_info->excluded_extents = NULL;
8230 free_chunk_cache_tree(&chunk_cache);
8231 free_device_cache_tree(&dev_cache);
8232 free_block_group_tree(&block_group_cache);
8233 free_device_extent_tree(&dev_extent_cache);
8234 free_extent_cache_tree(&seen);
8235 free_extent_cache_tree(&pending);
8236 free_extent_cache_tree(&reada);
8237 free_extent_cache_tree(&nodes);
8240 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8241 free_extent_cache_tree(&seen);
8242 free_extent_cache_tree(&pending);
8243 free_extent_cache_tree(&reada);
8244 free_extent_cache_tree(&nodes);
8245 free_chunk_cache_tree(&chunk_cache);
8246 free_block_group_tree(&block_group_cache);
8247 free_device_cache_tree(&dev_cache);
8248 free_device_extent_tree(&dev_extent_cache);
8249 free_extent_record_cache(root->fs_info, &extent_cache);
8250 free_root_item_list(&normal_trees);
8251 free_root_item_list(&dropping_trees);
8252 extent_io_tree_cleanup(&excluded_extents);
8256 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8257 struct btrfs_root *root, int overwrite)
8259 struct extent_buffer *c;
8260 struct extent_buffer *old = root->node;
8263 struct btrfs_disk_key disk_key = {0,0,0};
8269 extent_buffer_get(c);
8272 c = btrfs_alloc_free_block(trans, root,
8273 btrfs_level_size(root, 0),
8274 root->root_key.objectid,
8275 &disk_key, level, 0, 0);
8278 extent_buffer_get(c);
8282 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8283 btrfs_set_header_level(c, level);
8284 btrfs_set_header_bytenr(c, c->start);
8285 btrfs_set_header_generation(c, trans->transid);
8286 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8287 btrfs_set_header_owner(c, root->root_key.objectid);
8289 write_extent_buffer(c, root->fs_info->fsid,
8290 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8292 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8293 btrfs_header_chunk_tree_uuid(c),
8296 btrfs_mark_buffer_dirty(c);
8298 * this case can happen in the following case:
8300 * 1.overwrite previous root.
8302 * 2.reinit reloc data root, this is because we skip pin
8303 * down reloc data tree before which means we can allocate
8304 * same block bytenr here.
8306 if (old->start == c->start) {
8307 btrfs_set_root_generation(&root->root_item,
8309 root->root_item.level = btrfs_header_level(root->node);
8310 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8311 &root->root_key, &root->root_item);
8313 free_extent_buffer(c);
8317 free_extent_buffer(old);
8319 add_root_to_dirty_list(root);
8323 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8324 struct extent_buffer *eb, int tree_root)
8326 struct extent_buffer *tmp;
8327 struct btrfs_root_item *ri;
8328 struct btrfs_key key;
8331 int level = btrfs_header_level(eb);
8337 * If we have pinned this block before, don't pin it again.
8338 * This can not only avoid forever loop with broken filesystem
8339 * but also give us some speedups.
8341 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8342 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8345 btrfs_pin_extent(fs_info, eb->start, eb->len);
8347 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8348 nritems = btrfs_header_nritems(eb);
8349 for (i = 0; i < nritems; i++) {
8351 btrfs_item_key_to_cpu(eb, &key, i);
8352 if (key.type != BTRFS_ROOT_ITEM_KEY)
8354 /* Skip the extent root and reloc roots */
8355 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8356 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8357 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8359 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8360 bytenr = btrfs_disk_root_bytenr(eb, ri);
8363 * If at any point we start needing the real root we
8364 * will have to build a stump root for the root we are
8365 * in, but for now this doesn't actually use the root so
8366 * just pass in extent_root.
8368 tmp = read_tree_block(fs_info->extent_root, bytenr,
8370 if (!extent_buffer_uptodate(tmp)) {
8371 fprintf(stderr, "Error reading root block\n");
8374 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8375 free_extent_buffer(tmp);
8379 bytenr = btrfs_node_blockptr(eb, i);
8381 /* If we aren't the tree root don't read the block */
8382 if (level == 1 && !tree_root) {
8383 btrfs_pin_extent(fs_info, bytenr, leafsize);
8387 tmp = read_tree_block(fs_info->extent_root, bytenr,
8389 if (!extent_buffer_uptodate(tmp)) {
8390 fprintf(stderr, "Error reading tree block\n");
8393 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8394 free_extent_buffer(tmp);
8403 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8407 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8411 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8414 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8416 struct btrfs_block_group_cache *cache;
8417 struct btrfs_path *path;
8418 struct extent_buffer *leaf;
8419 struct btrfs_chunk *chunk;
8420 struct btrfs_key key;
8424 path = btrfs_alloc_path();
8429 key.type = BTRFS_CHUNK_ITEM_KEY;
8432 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8434 btrfs_free_path(path);
8439 * We do this in case the block groups were screwed up and had alloc
8440 * bits that aren't actually set on the chunks. This happens with
8441 * restored images every time and could happen in real life I guess.
8443 fs_info->avail_data_alloc_bits = 0;
8444 fs_info->avail_metadata_alloc_bits = 0;
8445 fs_info->avail_system_alloc_bits = 0;
8447 /* First we need to create the in-memory block groups */
8449 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8450 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8452 btrfs_free_path(path);
8460 leaf = path->nodes[0];
8461 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8462 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8467 chunk = btrfs_item_ptr(leaf, path->slots[0],
8468 struct btrfs_chunk);
8469 btrfs_add_block_group(fs_info, 0,
8470 btrfs_chunk_type(leaf, chunk),
8471 key.objectid, key.offset,
8472 btrfs_chunk_length(leaf, chunk));
8473 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8474 key.offset + btrfs_chunk_length(leaf, chunk),
8480 cache = btrfs_lookup_first_block_group(fs_info, start);
8484 start = cache->key.objectid + cache->key.offset;
8487 btrfs_free_path(path);
8491 static int reset_balance(struct btrfs_trans_handle *trans,
8492 struct btrfs_fs_info *fs_info)
8494 struct btrfs_root *root = fs_info->tree_root;
8495 struct btrfs_path *path;
8496 struct extent_buffer *leaf;
8497 struct btrfs_key key;
8498 int del_slot, del_nr = 0;
8502 path = btrfs_alloc_path();
8506 key.objectid = BTRFS_BALANCE_OBJECTID;
8507 key.type = BTRFS_BALANCE_ITEM_KEY;
8510 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8515 goto reinit_data_reloc;
8520 ret = btrfs_del_item(trans, root, path);
8523 btrfs_release_path(path);
8525 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8526 key.type = BTRFS_ROOT_ITEM_KEY;
8529 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8533 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8538 ret = btrfs_del_items(trans, root, path,
8545 btrfs_release_path(path);
8548 ret = btrfs_search_slot(trans, root, &key, path,
8555 leaf = path->nodes[0];
8556 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8557 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8559 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8564 del_slot = path->slots[0];
8573 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8577 btrfs_release_path(path);
8580 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8581 key.type = BTRFS_ROOT_ITEM_KEY;
8582 key.offset = (u64)-1;
8583 root = btrfs_read_fs_root(fs_info, &key);
8585 fprintf(stderr, "Error reading data reloc tree\n");
8586 ret = PTR_ERR(root);
8589 record_root_in_trans(trans, root);
8590 ret = btrfs_fsck_reinit_root(trans, root, 0);
8593 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8595 btrfs_free_path(path);
8599 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8600 struct btrfs_fs_info *fs_info)
8606 * The only reason we don't do this is because right now we're just
8607 * walking the trees we find and pinning down their bytes, we don't look
8608 * at any of the leaves. In order to do mixed groups we'd have to check
8609 * the leaves of any fs roots and pin down the bytes for any file
8610 * extents we find. Not hard but why do it if we don't have to?
8612 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8613 fprintf(stderr, "We don't support re-initing the extent tree "
8614 "for mixed block groups yet, please notify a btrfs "
8615 "developer you want to do this so they can add this "
8616 "functionality.\n");
8621 * first we need to walk all of the trees except the extent tree and pin
8622 * down the bytes that are in use so we don't overwrite any existing
8625 ret = pin_metadata_blocks(fs_info);
8627 fprintf(stderr, "error pinning down used bytes\n");
8632 * Need to drop all the block groups since we're going to recreate all
8635 btrfs_free_block_groups(fs_info);
8636 ret = reset_block_groups(fs_info);
8638 fprintf(stderr, "error resetting the block groups\n");
8642 /* Ok we can allocate now, reinit the extent root */
8643 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8645 fprintf(stderr, "extent root initialization failed\n");
8647 * When the transaction code is updated we should end the
8648 * transaction, but for now progs only knows about commit so
8649 * just return an error.
8655 * Now we have all the in-memory block groups setup so we can make
8656 * allocations properly, and the metadata we care about is safe since we
8657 * pinned all of it above.
8660 struct btrfs_block_group_cache *cache;
8662 cache = btrfs_lookup_first_block_group(fs_info, start);
8665 start = cache->key.objectid + cache->key.offset;
8666 ret = btrfs_insert_item(trans, fs_info->extent_root,
8667 &cache->key, &cache->item,
8668 sizeof(cache->item));
8670 fprintf(stderr, "Error adding block group\n");
8673 btrfs_extent_post_op(trans, fs_info->extent_root);
8676 ret = reset_balance(trans, fs_info);
8678 fprintf(stderr, "error reseting the pending balance\n");
8683 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8685 struct btrfs_path *path;
8686 struct btrfs_trans_handle *trans;
8687 struct btrfs_key key;
8690 printf("Recowing metadata block %llu\n", eb->start);
8691 key.objectid = btrfs_header_owner(eb);
8692 key.type = BTRFS_ROOT_ITEM_KEY;
8693 key.offset = (u64)-1;
8695 root = btrfs_read_fs_root(root->fs_info, &key);
8697 fprintf(stderr, "Couldn't find owner root %llu\n",
8699 return PTR_ERR(root);
8702 path = btrfs_alloc_path();
8706 trans = btrfs_start_transaction(root, 1);
8707 if (IS_ERR(trans)) {
8708 btrfs_free_path(path);
8709 return PTR_ERR(trans);
8712 path->lowest_level = btrfs_header_level(eb);
8713 if (path->lowest_level)
8714 btrfs_node_key_to_cpu(eb, &key, 0);
8716 btrfs_item_key_to_cpu(eb, &key, 0);
8718 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8719 btrfs_commit_transaction(trans, root);
8720 btrfs_free_path(path);
8724 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8726 struct btrfs_path *path;
8727 struct btrfs_trans_handle *trans;
8728 struct btrfs_key key;
8731 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8732 bad->key.type, bad->key.offset);
8733 key.objectid = bad->root_id;
8734 key.type = BTRFS_ROOT_ITEM_KEY;
8735 key.offset = (u64)-1;
8737 root = btrfs_read_fs_root(root->fs_info, &key);
8739 fprintf(stderr, "Couldn't find owner root %llu\n",
8741 return PTR_ERR(root);
8744 path = btrfs_alloc_path();
8748 trans = btrfs_start_transaction(root, 1);
8749 if (IS_ERR(trans)) {
8750 btrfs_free_path(path);
8751 return PTR_ERR(trans);
8754 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8760 ret = btrfs_del_item(trans, root, path);
8762 btrfs_commit_transaction(trans, root);
8763 btrfs_free_path(path);
8767 static int zero_log_tree(struct btrfs_root *root)
8769 struct btrfs_trans_handle *trans;
8772 trans = btrfs_start_transaction(root, 1);
8773 if (IS_ERR(trans)) {
8774 ret = PTR_ERR(trans);
8777 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8778 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8779 ret = btrfs_commit_transaction(trans, root);
8783 static int populate_csum(struct btrfs_trans_handle *trans,
8784 struct btrfs_root *csum_root, char *buf, u64 start,
8791 while (offset < len) {
8792 sectorsize = csum_root->sectorsize;
8793 ret = read_extent_data(csum_root, buf, start + offset,
8797 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8798 start + offset, buf, sectorsize);
8801 offset += sectorsize;
8806 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8807 struct btrfs_root *csum_root,
8808 struct btrfs_root *cur_root)
8810 struct btrfs_path *path;
8811 struct btrfs_key key;
8812 struct extent_buffer *node;
8813 struct btrfs_file_extent_item *fi;
8820 path = btrfs_alloc_path();
8823 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8833 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8836 /* Iterate all regular file extents and fill its csum */
8838 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8840 if (key.type != BTRFS_EXTENT_DATA_KEY)
8842 node = path->nodes[0];
8843 slot = path->slots[0];
8844 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8845 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8847 start = btrfs_file_extent_disk_bytenr(node, fi);
8848 len = btrfs_file_extent_disk_num_bytes(node, fi);
8850 ret = populate_csum(trans, csum_root, buf, start, len);
8857 * TODO: if next leaf is corrupted, jump to nearest next valid
8860 ret = btrfs_next_item(cur_root, path);
8870 btrfs_free_path(path);
8875 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8876 struct btrfs_root *csum_root)
8878 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8879 struct btrfs_path *path;
8880 struct btrfs_root *tree_root = fs_info->tree_root;
8881 struct btrfs_root *cur_root;
8882 struct extent_buffer *node;
8883 struct btrfs_key key;
8887 path = btrfs_alloc_path();
8891 key.objectid = BTRFS_FS_TREE_OBJECTID;
8893 key.type = BTRFS_ROOT_ITEM_KEY;
8895 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8904 node = path->nodes[0];
8905 slot = path->slots[0];
8906 btrfs_item_key_to_cpu(node, &key, slot);
8907 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8909 if (key.type != BTRFS_ROOT_ITEM_KEY)
8911 if (!is_fstree(key.objectid))
8913 key.offset = (u64)-1;
8915 cur_root = btrfs_read_fs_root(fs_info, &key);
8916 if (IS_ERR(cur_root) || !cur_root) {
8917 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8921 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8926 ret = btrfs_next_item(tree_root, path);
8936 btrfs_free_path(path);
8940 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8941 struct btrfs_root *csum_root)
8943 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8944 struct btrfs_path *path;
8945 struct btrfs_extent_item *ei;
8946 struct extent_buffer *leaf;
8948 struct btrfs_key key;
8951 path = btrfs_alloc_path();
8956 key.type = BTRFS_EXTENT_ITEM_KEY;
8959 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8961 btrfs_free_path(path);
8965 buf = malloc(csum_root->sectorsize);
8967 btrfs_free_path(path);
8972 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8973 ret = btrfs_next_leaf(extent_root, path);
8981 leaf = path->nodes[0];
8983 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8984 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8989 ei = btrfs_item_ptr(leaf, path->slots[0],
8990 struct btrfs_extent_item);
8991 if (!(btrfs_extent_flags(leaf, ei) &
8992 BTRFS_EXTENT_FLAG_DATA)) {
8997 ret = populate_csum(trans, csum_root, buf, key.objectid,
9004 btrfs_free_path(path);
9010 * Recalculate the csum and put it into the csum tree.
9012 * Extent tree init will wipe out all the extent info, so in that case, we
9013 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9014 * will use fs/subvol trees to init the csum tree.
9016 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9017 struct btrfs_root *csum_root,
9021 return fill_csum_tree_from_fs(trans, csum_root);
9023 return fill_csum_tree_from_extent(trans, csum_root);
9026 struct root_item_info {
9027 /* level of the root */
9029 /* number of nodes at this level, must be 1 for a root */
9033 struct cache_extent cache_extent;
9036 static struct cache_tree *roots_info_cache = NULL;
9038 static void free_roots_info_cache(void)
9040 if (!roots_info_cache)
9043 while (!cache_tree_empty(roots_info_cache)) {
9044 struct cache_extent *entry;
9045 struct root_item_info *rii;
9047 entry = first_cache_extent(roots_info_cache);
9050 remove_cache_extent(roots_info_cache, entry);
9051 rii = container_of(entry, struct root_item_info, cache_extent);
9055 free(roots_info_cache);
9056 roots_info_cache = NULL;
9059 static int build_roots_info_cache(struct btrfs_fs_info *info)
9062 struct btrfs_key key;
9063 struct extent_buffer *leaf;
9064 struct btrfs_path *path;
9066 if (!roots_info_cache) {
9067 roots_info_cache = malloc(sizeof(*roots_info_cache));
9068 if (!roots_info_cache)
9070 cache_tree_init(roots_info_cache);
9073 path = btrfs_alloc_path();
9078 key.type = BTRFS_EXTENT_ITEM_KEY;
9081 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9084 leaf = path->nodes[0];
9087 struct btrfs_key found_key;
9088 struct btrfs_extent_item *ei;
9089 struct btrfs_extent_inline_ref *iref;
9090 int slot = path->slots[0];
9095 struct cache_extent *entry;
9096 struct root_item_info *rii;
9098 if (slot >= btrfs_header_nritems(leaf)) {
9099 ret = btrfs_next_leaf(info->extent_root, path);
9106 leaf = path->nodes[0];
9107 slot = path->slots[0];
9110 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9112 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9113 found_key.type != BTRFS_METADATA_ITEM_KEY)
9116 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9117 flags = btrfs_extent_flags(leaf, ei);
9119 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9120 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9123 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9124 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9125 level = found_key.offset;
9127 struct btrfs_tree_block_info *info;
9129 info = (struct btrfs_tree_block_info *)(ei + 1);
9130 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9131 level = btrfs_tree_block_level(leaf, info);
9135 * For a root extent, it must be of the following type and the
9136 * first (and only one) iref in the item.
9138 type = btrfs_extent_inline_ref_type(leaf, iref);
9139 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9142 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9143 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9145 rii = malloc(sizeof(struct root_item_info));
9150 rii->cache_extent.start = root_id;
9151 rii->cache_extent.size = 1;
9152 rii->level = (u8)-1;
9153 entry = &rii->cache_extent;
9154 ret = insert_cache_extent(roots_info_cache, entry);
9157 rii = container_of(entry, struct root_item_info,
9161 ASSERT(rii->cache_extent.start == root_id);
9162 ASSERT(rii->cache_extent.size == 1);
9164 if (level > rii->level || rii->level == (u8)-1) {
9166 rii->bytenr = found_key.objectid;
9167 rii->gen = btrfs_extent_generation(leaf, ei);
9168 rii->node_count = 1;
9169 } else if (level == rii->level) {
9177 btrfs_free_path(path);
9182 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9183 struct btrfs_path *path,
9184 const struct btrfs_key *root_key,
9185 const int read_only_mode)
9187 const u64 root_id = root_key->objectid;
9188 struct cache_extent *entry;
9189 struct root_item_info *rii;
9190 struct btrfs_root_item ri;
9191 unsigned long offset;
9193 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9196 "Error: could not find extent items for root %llu\n",
9197 root_key->objectid);
9201 rii = container_of(entry, struct root_item_info, cache_extent);
9202 ASSERT(rii->cache_extent.start == root_id);
9203 ASSERT(rii->cache_extent.size == 1);
9205 if (rii->node_count != 1) {
9207 "Error: could not find btree root extent for root %llu\n",
9212 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9213 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9215 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9216 btrfs_root_level(&ri) != rii->level ||
9217 btrfs_root_generation(&ri) != rii->gen) {
9220 * If we're in repair mode but our caller told us to not update
9221 * the root item, i.e. just check if it needs to be updated, don't
9222 * print this message, since the caller will call us again shortly
9223 * for the same root item without read only mode (the caller will
9224 * open a transaction first).
9226 if (!(read_only_mode && repair))
9228 "%sroot item for root %llu,"
9229 " current bytenr %llu, current gen %llu, current level %u,"
9230 " new bytenr %llu, new gen %llu, new level %u\n",
9231 (read_only_mode ? "" : "fixing "),
9233 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9234 btrfs_root_level(&ri),
9235 rii->bytenr, rii->gen, rii->level);
9237 if (btrfs_root_generation(&ri) > rii->gen) {
9239 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9240 root_id, btrfs_root_generation(&ri), rii->gen);
9244 if (!read_only_mode) {
9245 btrfs_set_root_bytenr(&ri, rii->bytenr);
9246 btrfs_set_root_level(&ri, rii->level);
9247 btrfs_set_root_generation(&ri, rii->gen);
9248 write_extent_buffer(path->nodes[0], &ri,
9249 offset, sizeof(ri));
9259 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9260 * caused read-only snapshots to be corrupted if they were created at a moment
9261 * when the source subvolume/snapshot had orphan items. The issue was that the
9262 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9263 * node instead of the post orphan cleanup root node.
9264 * So this function, and its callees, just detects and fixes those cases. Even
9265 * though the regression was for read-only snapshots, this function applies to
9266 * any snapshot/subvolume root.
9267 * This must be run before any other repair code - not doing it so, makes other
9268 * repair code delete or modify backrefs in the extent tree for example, which
9269 * will result in an inconsistent fs after repairing the root items.
9271 static int repair_root_items(struct btrfs_fs_info *info)
9273 struct btrfs_path *path = NULL;
9274 struct btrfs_key key;
9275 struct extent_buffer *leaf;
9276 struct btrfs_trans_handle *trans = NULL;
9281 ret = build_roots_info_cache(info);
9285 path = btrfs_alloc_path();
9291 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9292 key.type = BTRFS_ROOT_ITEM_KEY;
9297 * Avoid opening and committing transactions if a leaf doesn't have
9298 * any root items that need to be fixed, so that we avoid rotating
9299 * backup roots unnecessarily.
9302 trans = btrfs_start_transaction(info->tree_root, 1);
9303 if (IS_ERR(trans)) {
9304 ret = PTR_ERR(trans);
9309 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9313 leaf = path->nodes[0];
9316 struct btrfs_key found_key;
9318 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9319 int no_more_keys = find_next_key(path, &key);
9321 btrfs_release_path(path);
9323 ret = btrfs_commit_transaction(trans,
9335 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9337 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9339 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9342 ret = maybe_repair_root_item(info, path, &found_key,
9347 if (!trans && repair) {
9350 btrfs_release_path(path);
9360 free_roots_info_cache();
9361 btrfs_free_path(path);
9363 btrfs_commit_transaction(trans, info->tree_root);
9370 const char * const cmd_check_usage[] = {
9371 "btrfs check [options] <device>",
9372 "Check structural inegrity of a filesystem (unmounted).",
9373 "Check structural inegrity of an unmounted filesystem. Verify internal",
9374 "trees' consistency and item connectivity. In the repair mode try to",
9375 "fix the problems found.",
9376 "WARNING: the repair mode is considered dangerous",
9378 "-s|--super <superblock> use this superblock copy",
9379 "-b|--backup use the backup root copy",
9380 "--repair try to repair the filesystem",
9381 "--readonly run in read-only mode (default)",
9382 "--init-csum-tree create a new CRC tree",
9383 "--init-extent-tree create a new extent tree",
9384 "--check-data-csum verify checkums of data blocks",
9385 "-Q|--qgroup-report print a report on qgroup consistency",
9386 "-E|--subvol-extents <subvolid>",
9387 " print subvolume extents and sharing state",
9388 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9389 "-p|--progress indicate progress",
9393 int cmd_check(int argc, char **argv)
9395 struct cache_tree root_cache;
9396 struct btrfs_root *root;
9397 struct btrfs_fs_info *info;
9400 u64 tree_root_bytenr = 0;
9401 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9404 int init_csum_tree = 0;
9406 int qgroup_report = 0;
9407 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9411 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9412 OPT_CHECK_CSUM, OPT_READONLY };
9413 static const struct option long_options[] = {
9414 { "super", required_argument, NULL, 's' },
9415 { "repair", no_argument, NULL, OPT_REPAIR },
9416 { "readonly", no_argument, NULL, OPT_READONLY },
9417 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9418 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9419 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9420 { "backup", no_argument, NULL, 'b' },
9421 { "subvol-extents", required_argument, NULL, 'E' },
9422 { "qgroup-report", no_argument, NULL, 'Q' },
9423 { "tree-root", required_argument, NULL, 'r' },
9424 { "progress", no_argument, NULL, 'p' },
9428 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9432 case 'a': /* ignored */ break;
9434 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9437 num = arg_strtou64(optarg);
9438 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9440 "ERROR: super mirror should be less than: %d\n",
9441 BTRFS_SUPER_MIRROR_MAX);
9444 bytenr = btrfs_sb_offset(((int)num));
9445 printf("using SB copy %llu, bytenr %llu\n", num,
9446 (unsigned long long)bytenr);
9452 subvolid = arg_strtou64(optarg);
9455 tree_root_bytenr = arg_strtou64(optarg);
9458 ctx.progress_enabled = true;
9462 usage(cmd_check_usage);
9464 printf("enabling repair mode\n");
9466 ctree_flags |= OPEN_CTREE_WRITES;
9472 printf("Creating a new CRC tree\n");
9475 ctree_flags |= OPEN_CTREE_WRITES;
9477 case OPT_INIT_EXTENT:
9478 init_extent_tree = 1;
9479 ctree_flags |= (OPEN_CTREE_WRITES |
9480 OPEN_CTREE_NO_BLOCK_GROUPS);
9483 case OPT_CHECK_CSUM:
9484 check_data_csum = 1;
9488 argc = argc - optind;
9490 if (check_argc_exact(argc, 1))
9491 usage(cmd_check_usage);
9493 if (ctx.progress_enabled) {
9494 ctx.tp = TASK_NOTHING;
9495 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9498 /* This check is the only reason for --readonly to exist */
9499 if (readonly && repair) {
9500 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9505 cache_tree_init(&root_cache);
9507 if((ret = check_mounted(argv[optind])) < 0) {
9508 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9511 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9516 /* only allow partial opening under repair mode */
9518 ctree_flags |= OPEN_CTREE_PARTIAL;
9520 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9523 fprintf(stderr, "Couldn't open file system\n");
9529 root = info->fs_root;
9532 * repair mode will force us to commit transaction which
9533 * will make us fail to load log tree when mounting.
9535 if (repair && btrfs_super_log_root(info->super_copy)) {
9536 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9541 ret = zero_log_tree(root);
9543 fprintf(stderr, "fail to zero log tree\n");
9548 uuid_unparse(info->super_copy->fsid, uuidbuf);
9549 if (qgroup_report) {
9550 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9552 ret = qgroup_verify_all(info);
9554 print_qgroup_report(1);
9558 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9559 subvolid, argv[optind], uuidbuf);
9560 ret = print_extent_state(info, subvolid);
9563 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9565 if (!extent_buffer_uptodate(info->tree_root->node) ||
9566 !extent_buffer_uptodate(info->dev_root->node) ||
9567 !extent_buffer_uptodate(info->chunk_root->node)) {
9568 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9573 if (init_extent_tree || init_csum_tree) {
9574 struct btrfs_trans_handle *trans;
9576 trans = btrfs_start_transaction(info->extent_root, 0);
9577 if (IS_ERR(trans)) {
9578 fprintf(stderr, "Error starting transaction\n");
9579 ret = PTR_ERR(trans);
9583 if (init_extent_tree) {
9584 printf("Creating a new extent tree\n");
9585 ret = reinit_extent_tree(trans, info);
9590 if (init_csum_tree) {
9591 fprintf(stderr, "Reinit crc root\n");
9592 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9594 fprintf(stderr, "crc root initialization failed\n");
9599 ret = fill_csum_tree(trans, info->csum_root,
9602 fprintf(stderr, "crc refilling failed\n");
9607 * Ok now we commit and run the normal fsck, which will add
9608 * extent entries for all of the items it finds.
9610 ret = btrfs_commit_transaction(trans, info->extent_root);
9614 if (!extent_buffer_uptodate(info->extent_root->node)) {
9615 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9619 if (!extent_buffer_uptodate(info->csum_root->node)) {
9620 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9625 if (!ctx.progress_enabled)
9626 fprintf(stderr, "checking extents\n");
9627 ret = check_chunks_and_extents(root);
9629 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9631 ret = repair_root_items(info);
9635 fprintf(stderr, "Fixed %d roots.\n", ret);
9637 } else if (ret > 0) {
9639 "Found %d roots with an outdated root item.\n",
9642 "Please run a filesystem check with the option --repair to fix them.\n");
9647 if (!ctx.progress_enabled)
9648 fprintf(stderr, "checking free space cache\n");
9649 ret = check_space_cache(root);
9654 * We used to have to have these hole extents in between our real
9655 * extents so if we don't have this flag set we need to make sure there
9656 * are no gaps in the file extents for inodes, otherwise we can just
9657 * ignore it when this happens.
9659 no_holes = btrfs_fs_incompat(root->fs_info,
9660 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9661 if (!ctx.progress_enabled)
9662 fprintf(stderr, "checking fs roots\n");
9663 ret = check_fs_roots(root, &root_cache);
9667 fprintf(stderr, "checking csums\n");
9668 ret = check_csums(root);
9672 fprintf(stderr, "checking root refs\n");
9673 ret = check_root_refs(root, &root_cache);
9677 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9678 struct extent_buffer *eb;
9680 eb = list_first_entry(&root->fs_info->recow_ebs,
9681 struct extent_buffer, recow);
9682 list_del_init(&eb->recow);
9683 ret = recow_extent_buffer(root, eb);
9688 while (!list_empty(&delete_items)) {
9689 struct bad_item *bad;
9691 bad = list_first_entry(&delete_items, struct bad_item, list);
9692 list_del_init(&bad->list);
9694 ret = delete_bad_item(root, bad);
9698 if (info->quota_enabled) {
9700 fprintf(stderr, "checking quota groups\n");
9701 err = qgroup_verify_all(info);
9706 if (!list_empty(&root->fs_info->recow_ebs)) {
9707 fprintf(stderr, "Transid errors in file system\n");
9711 print_qgroup_report(0);
9712 if (found_old_backref) { /*
9713 * there was a disk format change when mixed
9714 * backref was in testing tree. The old format
9715 * existed about one week.
9717 printf("\n * Found old mixed backref format. "
9718 "The old format is not supported! *"
9719 "\n * Please mount the FS in readonly mode, "
9720 "backup data and re-format the FS. *\n\n");
9723 printf("found %llu bytes used err is %d\n",
9724 (unsigned long long)bytes_used, ret);
9725 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9726 printf("total tree bytes: %llu\n",
9727 (unsigned long long)total_btree_bytes);
9728 printf("total fs tree bytes: %llu\n",
9729 (unsigned long long)total_fs_tree_bytes);
9730 printf("total extent tree bytes: %llu\n",
9731 (unsigned long long)total_extent_tree_bytes);
9732 printf("btree space waste bytes: %llu\n",
9733 (unsigned long long)btree_space_waste);
9734 printf("file data blocks allocated: %llu\n referenced %llu\n",
9735 (unsigned long long)data_bytes_allocated,
9736 (unsigned long long)data_bytes_referenced);
9738 free_root_recs_tree(&root_cache);
9742 if (ctx.progress_enabled)
9743 task_deinit(ctx.info);