2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct orphan_data_extent *src_orphan;
570 struct orphan_data_extent *dst_orphan;
574 rec = malloc(sizeof(*rec));
575 memcpy(rec, orig_rec, sizeof(*rec));
577 INIT_LIST_HEAD(&rec->backrefs);
578 INIT_LIST_HEAD(&rec->orphan_extents);
579 rec->holes = RB_ROOT;
581 list_for_each_entry(orig, &orig_rec->backrefs, list) {
582 size = sizeof(*orig) + orig->namelen + 1;
583 backref = malloc(size);
584 memcpy(backref, orig, size);
585 list_add_tail(&backref->list, &rec->backrefs);
587 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588 dst_orphan = malloc(sizeof(*dst_orphan));
589 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
591 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
594 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
603 struct orphan_data_extent *orphan;
605 if (list_empty(orphan_extents))
607 printf("The following data extent is lost in tree %llu:\n",
609 list_for_each_entry(orphan, orphan_extents, list) {
610 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611 orphan->objectid, orphan->offset, orphan->disk_bytenr,
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
618 u64 root_objectid = root->root_key.objectid;
619 int errors = rec->errors;
623 /* reloc root errors, we print its corresponding fs root objectid*/
624 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625 root_objectid = root->root_key.offset;
626 fprintf(stderr, "reloc");
628 fprintf(stderr, "root %llu inode %llu errors %x",
629 (unsigned long long) root_objectid,
630 (unsigned long long) rec->ino, rec->errors);
632 if (errors & I_ERR_NO_INODE_ITEM)
633 fprintf(stderr, ", no inode item");
634 if (errors & I_ERR_NO_ORPHAN_ITEM)
635 fprintf(stderr, ", no orphan item");
636 if (errors & I_ERR_DUP_INODE_ITEM)
637 fprintf(stderr, ", dup inode item");
638 if (errors & I_ERR_DUP_DIR_INDEX)
639 fprintf(stderr, ", dup dir index");
640 if (errors & I_ERR_ODD_DIR_ITEM)
641 fprintf(stderr, ", odd dir item");
642 if (errors & I_ERR_ODD_FILE_EXTENT)
643 fprintf(stderr, ", odd file extent");
644 if (errors & I_ERR_BAD_FILE_EXTENT)
645 fprintf(stderr, ", bad file extent");
646 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647 fprintf(stderr, ", file extent overlap");
648 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649 fprintf(stderr, ", file extent discount");
650 if (errors & I_ERR_DIR_ISIZE_WRONG)
651 fprintf(stderr, ", dir isize wrong");
652 if (errors & I_ERR_FILE_NBYTES_WRONG)
653 fprintf(stderr, ", nbytes wrong");
654 if (errors & I_ERR_ODD_CSUM_ITEM)
655 fprintf(stderr, ", odd csum item");
656 if (errors & I_ERR_SOME_CSUM_MISSING)
657 fprintf(stderr, ", some csum missing");
658 if (errors & I_ERR_LINK_COUNT_WRONG)
659 fprintf(stderr, ", link count wrong");
660 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661 fprintf(stderr, ", orphan file extent");
662 fprintf(stderr, "\n");
663 /* Print the orphan extents if needed */
664 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
667 /* Print the holes if needed */
668 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669 struct file_extent_hole *hole;
670 struct rb_node *node;
673 node = rb_first(&rec->holes);
674 fprintf(stderr, "Found file extent holes:\n");
677 hole = rb_entry(node, struct file_extent_hole, node);
678 fprintf(stderr, "\tstart: %llu, len: %llu\n",
679 hole->start, hole->len);
680 node = rb_next(node);
683 fprintf(stderr, "\tstart: 0, len: %llu\n",
684 round_up(rec->isize, root->sectorsize));
688 static void print_ref_error(int errors)
690 if (errors & REF_ERR_NO_DIR_ITEM)
691 fprintf(stderr, ", no dir item");
692 if (errors & REF_ERR_NO_DIR_INDEX)
693 fprintf(stderr, ", no dir index");
694 if (errors & REF_ERR_NO_INODE_REF)
695 fprintf(stderr, ", no inode ref");
696 if (errors & REF_ERR_DUP_DIR_ITEM)
697 fprintf(stderr, ", dup dir item");
698 if (errors & REF_ERR_DUP_DIR_INDEX)
699 fprintf(stderr, ", dup dir index");
700 if (errors & REF_ERR_DUP_INODE_REF)
701 fprintf(stderr, ", dup inode ref");
702 if (errors & REF_ERR_INDEX_UNMATCH)
703 fprintf(stderr, ", index unmatch");
704 if (errors & REF_ERR_FILETYPE_UNMATCH)
705 fprintf(stderr, ", filetype unmatch");
706 if (errors & REF_ERR_NAME_TOO_LONG)
707 fprintf(stderr, ", name too long");
708 if (errors & REF_ERR_NO_ROOT_REF)
709 fprintf(stderr, ", no root ref");
710 if (errors & REF_ERR_NO_ROOT_BACKREF)
711 fprintf(stderr, ", no root backref");
712 if (errors & REF_ERR_DUP_ROOT_REF)
713 fprintf(stderr, ", dup root ref");
714 if (errors & REF_ERR_DUP_ROOT_BACKREF)
715 fprintf(stderr, ", dup root backref");
716 fprintf(stderr, "\n");
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
722 struct ptr_node *node;
723 struct cache_extent *cache;
724 struct inode_record *rec = NULL;
727 cache = lookup_cache_extent(inode_cache, ino, 1);
729 node = container_of(cache, struct ptr_node, cache);
731 if (mod && rec->refs > 1) {
732 node->data = clone_inode_rec(rec);
737 rec = calloc(1, sizeof(*rec));
739 rec->extent_start = (u64)-1;
741 INIT_LIST_HEAD(&rec->backrefs);
742 INIT_LIST_HEAD(&rec->orphan_extents);
743 rec->holes = RB_ROOT;
745 node = malloc(sizeof(*node));
746 node->cache.start = ino;
747 node->cache.size = 1;
750 if (ino == BTRFS_FREE_INO_OBJECTID)
753 ret = insert_cache_extent(inode_cache, &node->cache);
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
761 struct orphan_data_extent *orphan;
763 while (!list_empty(orphan_extents)) {
764 orphan = list_entry(orphan_extents->next,
765 struct orphan_data_extent, list);
766 list_del(&orphan->list);
771 static void free_inode_rec(struct inode_record *rec)
773 struct inode_backref *backref;
778 while (!list_empty(&rec->backrefs)) {
779 backref = list_entry(rec->backrefs.next,
780 struct inode_backref, list);
781 list_del(&backref->list);
784 free_orphan_data_extents(&rec->orphan_extents);
785 free_file_extent_holes(&rec->holes);
789 static int can_free_inode_rec(struct inode_record *rec)
791 if (!rec->errors && rec->checked && rec->found_inode_item &&
792 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798 struct inode_record *rec)
800 struct cache_extent *cache;
801 struct inode_backref *tmp, *backref;
802 struct ptr_node *node;
803 unsigned char filetype;
805 if (!rec->found_inode_item)
808 filetype = imode_to_type(rec->imode);
809 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810 if (backref->found_dir_item && backref->found_dir_index) {
811 if (backref->filetype != filetype)
812 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813 if (!backref->errors && backref->found_inode_ref) {
814 list_del(&backref->list);
820 if (!rec->checked || rec->merging)
823 if (S_ISDIR(rec->imode)) {
824 if (rec->found_size != rec->isize)
825 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
826 if (rec->found_file_extent)
827 rec->errors |= I_ERR_ODD_FILE_EXTENT;
828 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
829 if (rec->found_dir_item)
830 rec->errors |= I_ERR_ODD_DIR_ITEM;
831 if (rec->found_size != rec->nbytes)
832 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
833 if (rec->nlink > 0 && !no_holes &&
834 (rec->extent_end < rec->isize ||
835 first_extent_gap(&rec->holes) < rec->isize))
836 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
839 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
840 if (rec->found_csum_item && rec->nodatasum)
841 rec->errors |= I_ERR_ODD_CSUM_ITEM;
842 if (rec->some_csum_missing && !rec->nodatasum)
843 rec->errors |= I_ERR_SOME_CSUM_MISSING;
846 BUG_ON(rec->refs != 1);
847 if (can_free_inode_rec(rec)) {
848 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
849 node = container_of(cache, struct ptr_node, cache);
850 BUG_ON(node->data != rec);
851 remove_cache_extent(inode_cache, &node->cache);
857 static int check_orphan_item(struct btrfs_root *root, u64 ino)
859 struct btrfs_path path;
860 struct btrfs_key key;
863 key.objectid = BTRFS_ORPHAN_OBJECTID;
864 key.type = BTRFS_ORPHAN_ITEM_KEY;
867 btrfs_init_path(&path);
868 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
869 btrfs_release_path(&path);
875 static int process_inode_item(struct extent_buffer *eb,
876 int slot, struct btrfs_key *key,
877 struct shared_node *active_node)
879 struct inode_record *rec;
880 struct btrfs_inode_item *item;
882 rec = active_node->current;
883 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
884 if (rec->found_inode_item) {
885 rec->errors |= I_ERR_DUP_INODE_ITEM;
888 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
889 rec->nlink = btrfs_inode_nlink(eb, item);
890 rec->isize = btrfs_inode_size(eb, item);
891 rec->nbytes = btrfs_inode_nbytes(eb, item);
892 rec->imode = btrfs_inode_mode(eb, item);
893 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
895 rec->found_inode_item = 1;
897 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
898 maybe_free_inode_rec(&active_node->inode_cache, rec);
902 static struct inode_backref *get_inode_backref(struct inode_record *rec,
904 int namelen, u64 dir)
906 struct inode_backref *backref;
908 list_for_each_entry(backref, &rec->backrefs, list) {
909 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
911 if (backref->dir != dir || backref->namelen != namelen)
913 if (memcmp(name, backref->name, namelen))
918 backref = malloc(sizeof(*backref) + namelen + 1);
919 memset(backref, 0, sizeof(*backref));
921 backref->namelen = namelen;
922 memcpy(backref->name, name, namelen);
923 backref->name[namelen] = '\0';
924 list_add_tail(&backref->list, &rec->backrefs);
928 static int add_inode_backref(struct cache_tree *inode_cache,
929 u64 ino, u64 dir, u64 index,
930 const char *name, int namelen,
931 int filetype, int itemtype, int errors)
933 struct inode_record *rec;
934 struct inode_backref *backref;
936 rec = get_inode_rec(inode_cache, ino, 1);
937 backref = get_inode_backref(rec, name, namelen, dir);
939 backref->errors |= errors;
940 if (itemtype == BTRFS_DIR_INDEX_KEY) {
941 if (backref->found_dir_index)
942 backref->errors |= REF_ERR_DUP_DIR_INDEX;
943 if (backref->found_inode_ref && backref->index != index)
944 backref->errors |= REF_ERR_INDEX_UNMATCH;
945 if (backref->found_dir_item && backref->filetype != filetype)
946 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
948 backref->index = index;
949 backref->filetype = filetype;
950 backref->found_dir_index = 1;
951 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
953 if (backref->found_dir_item)
954 backref->errors |= REF_ERR_DUP_DIR_ITEM;
955 if (backref->found_dir_index && backref->filetype != filetype)
956 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
958 backref->filetype = filetype;
959 backref->found_dir_item = 1;
960 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
961 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
962 if (backref->found_inode_ref)
963 backref->errors |= REF_ERR_DUP_INODE_REF;
964 if (backref->found_dir_index && backref->index != index)
965 backref->errors |= REF_ERR_INDEX_UNMATCH;
967 backref->index = index;
969 backref->ref_type = itemtype;
970 backref->found_inode_ref = 1;
975 maybe_free_inode_rec(inode_cache, rec);
979 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
980 struct cache_tree *dst_cache)
982 struct inode_backref *backref;
987 list_for_each_entry(backref, &src->backrefs, list) {
988 if (backref->found_dir_index) {
989 add_inode_backref(dst_cache, dst->ino, backref->dir,
990 backref->index, backref->name,
991 backref->namelen, backref->filetype,
992 BTRFS_DIR_INDEX_KEY, backref->errors);
994 if (backref->found_dir_item) {
996 add_inode_backref(dst_cache, dst->ino,
997 backref->dir, 0, backref->name,
998 backref->namelen, backref->filetype,
999 BTRFS_DIR_ITEM_KEY, backref->errors);
1001 if (backref->found_inode_ref) {
1002 add_inode_backref(dst_cache, dst->ino,
1003 backref->dir, backref->index,
1004 backref->name, backref->namelen, 0,
1005 backref->ref_type, backref->errors);
1009 if (src->found_dir_item)
1010 dst->found_dir_item = 1;
1011 if (src->found_file_extent)
1012 dst->found_file_extent = 1;
1013 if (src->found_csum_item)
1014 dst->found_csum_item = 1;
1015 if (src->some_csum_missing)
1016 dst->some_csum_missing = 1;
1017 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1018 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1023 BUG_ON(src->found_link < dir_count);
1024 dst->found_link += src->found_link - dir_count;
1025 dst->found_size += src->found_size;
1026 if (src->extent_start != (u64)-1) {
1027 if (dst->extent_start == (u64)-1) {
1028 dst->extent_start = src->extent_start;
1029 dst->extent_end = src->extent_end;
1031 if (dst->extent_end > src->extent_start)
1032 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1033 else if (dst->extent_end < src->extent_start) {
1034 ret = add_file_extent_hole(&dst->holes,
1036 src->extent_start - dst->extent_end);
1038 if (dst->extent_end < src->extent_end)
1039 dst->extent_end = src->extent_end;
1043 dst->errors |= src->errors;
1044 if (src->found_inode_item) {
1045 if (!dst->found_inode_item) {
1046 dst->nlink = src->nlink;
1047 dst->isize = src->isize;
1048 dst->nbytes = src->nbytes;
1049 dst->imode = src->imode;
1050 dst->nodatasum = src->nodatasum;
1051 dst->found_inode_item = 1;
1053 dst->errors |= I_ERR_DUP_INODE_ITEM;
1061 static int splice_shared_node(struct shared_node *src_node,
1062 struct shared_node *dst_node)
1064 struct cache_extent *cache;
1065 struct ptr_node *node, *ins;
1066 struct cache_tree *src, *dst;
1067 struct inode_record *rec, *conflict;
1068 u64 current_ino = 0;
1072 if (--src_node->refs == 0)
1074 if (src_node->current)
1075 current_ino = src_node->current->ino;
1077 src = &src_node->root_cache;
1078 dst = &dst_node->root_cache;
1080 cache = search_cache_extent(src, 0);
1082 node = container_of(cache, struct ptr_node, cache);
1084 cache = next_cache_extent(cache);
1087 remove_cache_extent(src, &node->cache);
1090 ins = malloc(sizeof(*ins));
1091 ins->cache.start = node->cache.start;
1092 ins->cache.size = node->cache.size;
1096 ret = insert_cache_extent(dst, &ins->cache);
1097 if (ret == -EEXIST) {
1098 conflict = get_inode_rec(dst, rec->ino, 1);
1099 merge_inode_recs(rec, conflict, dst);
1101 conflict->checked = 1;
1102 if (dst_node->current == conflict)
1103 dst_node->current = NULL;
1105 maybe_free_inode_rec(dst, conflict);
1106 free_inode_rec(rec);
1113 if (src == &src_node->root_cache) {
1114 src = &src_node->inode_cache;
1115 dst = &dst_node->inode_cache;
1119 if (current_ino > 0 && (!dst_node->current ||
1120 current_ino > dst_node->current->ino)) {
1121 if (dst_node->current) {
1122 dst_node->current->checked = 1;
1123 maybe_free_inode_rec(dst, dst_node->current);
1125 dst_node->current = get_inode_rec(dst, current_ino, 1);
1130 static void free_inode_ptr(struct cache_extent *cache)
1132 struct ptr_node *node;
1133 struct inode_record *rec;
1135 node = container_of(cache, struct ptr_node, cache);
1137 free_inode_rec(rec);
1141 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1143 static struct shared_node *find_shared_node(struct cache_tree *shared,
1146 struct cache_extent *cache;
1147 struct shared_node *node;
1149 cache = lookup_cache_extent(shared, bytenr, 1);
1151 node = container_of(cache, struct shared_node, cache);
1157 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1160 struct shared_node *node;
1162 node = calloc(1, sizeof(*node));
1163 node->cache.start = bytenr;
1164 node->cache.size = 1;
1165 cache_tree_init(&node->root_cache);
1166 cache_tree_init(&node->inode_cache);
1169 ret = insert_cache_extent(shared, &node->cache);
1174 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1175 struct walk_control *wc, int level)
1177 struct shared_node *node;
1178 struct shared_node *dest;
1180 if (level == wc->active_node)
1183 BUG_ON(wc->active_node <= level);
1184 node = find_shared_node(&wc->shared, bytenr);
1186 add_shared_node(&wc->shared, bytenr, refs);
1187 node = find_shared_node(&wc->shared, bytenr);
1188 wc->nodes[level] = node;
1189 wc->active_node = level;
1193 if (wc->root_level == wc->active_node &&
1194 btrfs_root_refs(&root->root_item) == 0) {
1195 if (--node->refs == 0) {
1196 free_inode_recs_tree(&node->root_cache);
1197 free_inode_recs_tree(&node->inode_cache);
1198 remove_cache_extent(&wc->shared, &node->cache);
1204 dest = wc->nodes[wc->active_node];
1205 splice_shared_node(node, dest);
1206 if (node->refs == 0) {
1207 remove_cache_extent(&wc->shared, &node->cache);
1213 static int leave_shared_node(struct btrfs_root *root,
1214 struct walk_control *wc, int level)
1216 struct shared_node *node;
1217 struct shared_node *dest;
1220 if (level == wc->root_level)
1223 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1227 BUG_ON(i >= BTRFS_MAX_LEVEL);
1229 node = wc->nodes[wc->active_node];
1230 wc->nodes[wc->active_node] = NULL;
1231 wc->active_node = i;
1233 dest = wc->nodes[wc->active_node];
1234 if (wc->active_node < wc->root_level ||
1235 btrfs_root_refs(&root->root_item) > 0) {
1236 BUG_ON(node->refs <= 1);
1237 splice_shared_node(node, dest);
1239 BUG_ON(node->refs < 2);
1248 * 1 - if the root with id child_root_id is a child of root parent_root_id
1249 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1250 * has other root(s) as parent(s)
1251 * 2 - if the root child_root_id doesn't have any parent roots
1253 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1256 struct btrfs_path path;
1257 struct btrfs_key key;
1258 struct extent_buffer *leaf;
1262 btrfs_init_path(&path);
1264 key.objectid = parent_root_id;
1265 key.type = BTRFS_ROOT_REF_KEY;
1266 key.offset = child_root_id;
1267 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1271 btrfs_release_path(&path);
1275 key.objectid = child_root_id;
1276 key.type = BTRFS_ROOT_BACKREF_KEY;
1278 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1284 leaf = path.nodes[0];
1285 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1286 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1289 leaf = path.nodes[0];
1292 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1293 if (key.objectid != child_root_id ||
1294 key.type != BTRFS_ROOT_BACKREF_KEY)
1299 if (key.offset == parent_root_id) {
1300 btrfs_release_path(&path);
1307 btrfs_release_path(&path);
1310 return has_parent ? 0 : 2;
1313 static int process_dir_item(struct btrfs_root *root,
1314 struct extent_buffer *eb,
1315 int slot, struct btrfs_key *key,
1316 struct shared_node *active_node)
1326 struct btrfs_dir_item *di;
1327 struct inode_record *rec;
1328 struct cache_tree *root_cache;
1329 struct cache_tree *inode_cache;
1330 struct btrfs_key location;
1331 char namebuf[BTRFS_NAME_LEN];
1333 root_cache = &active_node->root_cache;
1334 inode_cache = &active_node->inode_cache;
1335 rec = active_node->current;
1336 rec->found_dir_item = 1;
1338 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1339 total = btrfs_item_size_nr(eb, slot);
1340 while (cur < total) {
1342 btrfs_dir_item_key_to_cpu(eb, di, &location);
1343 name_len = btrfs_dir_name_len(eb, di);
1344 data_len = btrfs_dir_data_len(eb, di);
1345 filetype = btrfs_dir_type(eb, di);
1347 rec->found_size += name_len;
1348 if (name_len <= BTRFS_NAME_LEN) {
1352 len = BTRFS_NAME_LEN;
1353 error = REF_ERR_NAME_TOO_LONG;
1355 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1357 if (location.type == BTRFS_INODE_ITEM_KEY) {
1358 add_inode_backref(inode_cache, location.objectid,
1359 key->objectid, key->offset, namebuf,
1360 len, filetype, key->type, error);
1361 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1362 add_inode_backref(root_cache, location.objectid,
1363 key->objectid, key->offset,
1364 namebuf, len, filetype,
1367 fprintf(stderr, "invalid location in dir item %u\n",
1369 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1370 key->objectid, key->offset, namebuf,
1371 len, filetype, key->type, error);
1374 len = sizeof(*di) + name_len + data_len;
1375 di = (struct btrfs_dir_item *)((char *)di + len);
1378 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1379 rec->errors |= I_ERR_DUP_DIR_INDEX;
1384 static int process_inode_ref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1394 struct cache_tree *inode_cache;
1395 struct btrfs_inode_ref *ref;
1396 char namebuf[BTRFS_NAME_LEN];
1398 inode_cache = &active_node->inode_cache;
1400 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1401 total = btrfs_item_size_nr(eb, slot);
1402 while (cur < total) {
1403 name_len = btrfs_inode_ref_name_len(eb, ref);
1404 index = btrfs_inode_ref_index(eb, ref);
1405 if (name_len <= BTRFS_NAME_LEN) {
1409 len = BTRFS_NAME_LEN;
1410 error = REF_ERR_NAME_TOO_LONG;
1412 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1413 add_inode_backref(inode_cache, key->objectid, key->offset,
1414 index, namebuf, len, 0, key->type, error);
1416 len = sizeof(*ref) + name_len;
1417 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1423 static int process_inode_extref(struct extent_buffer *eb,
1424 int slot, struct btrfs_key *key,
1425 struct shared_node *active_node)
1434 struct cache_tree *inode_cache;
1435 struct btrfs_inode_extref *extref;
1436 char namebuf[BTRFS_NAME_LEN];
1438 inode_cache = &active_node->inode_cache;
1440 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1441 total = btrfs_item_size_nr(eb, slot);
1442 while (cur < total) {
1443 name_len = btrfs_inode_extref_name_len(eb, extref);
1444 index = btrfs_inode_extref_index(eb, extref);
1445 parent = btrfs_inode_extref_parent(eb, extref);
1446 if (name_len <= BTRFS_NAME_LEN) {
1450 len = BTRFS_NAME_LEN;
1451 error = REF_ERR_NAME_TOO_LONG;
1453 read_extent_buffer(eb, namebuf,
1454 (unsigned long)(extref + 1), len);
1455 add_inode_backref(inode_cache, key->objectid, parent,
1456 index, namebuf, len, 0, key->type, error);
1458 len = sizeof(*extref) + name_len;
1459 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1466 static int count_csum_range(struct btrfs_root *root, u64 start,
1467 u64 len, u64 *found)
1469 struct btrfs_key key;
1470 struct btrfs_path path;
1471 struct extent_buffer *leaf;
1476 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1478 btrfs_init_path(&path);
1480 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1482 key.type = BTRFS_EXTENT_CSUM_KEY;
1484 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1488 if (ret > 0 && path.slots[0] > 0) {
1489 leaf = path.nodes[0];
1490 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1491 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1492 key.type == BTRFS_EXTENT_CSUM_KEY)
1497 leaf = path.nodes[0];
1498 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1499 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1504 leaf = path.nodes[0];
1507 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1508 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1509 key.type != BTRFS_EXTENT_CSUM_KEY)
1512 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1513 if (key.offset >= start + len)
1516 if (key.offset > start)
1519 size = btrfs_item_size_nr(leaf, path.slots[0]);
1520 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1521 if (csum_end > start) {
1522 size = min(csum_end - start, len);
1531 btrfs_release_path(&path);
1537 static int process_file_extent(struct btrfs_root *root,
1538 struct extent_buffer *eb,
1539 int slot, struct btrfs_key *key,
1540 struct shared_node *active_node)
1542 struct inode_record *rec;
1543 struct btrfs_file_extent_item *fi;
1545 u64 disk_bytenr = 0;
1546 u64 extent_offset = 0;
1547 u64 mask = root->sectorsize - 1;
1551 rec = active_node->current;
1552 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1553 rec->found_file_extent = 1;
1555 if (rec->extent_start == (u64)-1) {
1556 rec->extent_start = key->offset;
1557 rec->extent_end = key->offset;
1560 if (rec->extent_end > key->offset)
1561 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1562 else if (rec->extent_end < key->offset) {
1563 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1564 key->offset - rec->extent_end);
1569 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1570 extent_type = btrfs_file_extent_type(eb, fi);
1572 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1573 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1575 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1576 rec->found_size += num_bytes;
1577 num_bytes = (num_bytes + mask) & ~mask;
1578 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1579 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1580 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1581 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1582 extent_offset = btrfs_file_extent_offset(eb, fi);
1583 if (num_bytes == 0 || (num_bytes & mask))
1584 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1585 if (num_bytes + extent_offset >
1586 btrfs_file_extent_ram_bytes(eb, fi))
1587 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1588 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1589 (btrfs_file_extent_compression(eb, fi) ||
1590 btrfs_file_extent_encryption(eb, fi) ||
1591 btrfs_file_extent_other_encoding(eb, fi)))
1592 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1593 if (disk_bytenr > 0)
1594 rec->found_size += num_bytes;
1596 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1598 rec->extent_end = key->offset + num_bytes;
1601 * The data reloc tree will copy full extents into its inode and then
1602 * copy the corresponding csums. Because the extent it copied could be
1603 * a preallocated extent that hasn't been written to yet there may be no
1604 * csums to copy, ergo we won't have csums for our file extent. This is
1605 * ok so just don't bother checking csums if the inode belongs to the
1608 if (disk_bytenr > 0 &&
1609 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1611 if (btrfs_file_extent_compression(eb, fi))
1612 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1614 disk_bytenr += extent_offset;
1616 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1619 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1621 rec->found_csum_item = 1;
1622 if (found < num_bytes)
1623 rec->some_csum_missing = 1;
1624 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1626 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1632 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1633 struct walk_control *wc)
1635 struct btrfs_key key;
1639 struct cache_tree *inode_cache;
1640 struct shared_node *active_node;
1642 if (wc->root_level == wc->active_node &&
1643 btrfs_root_refs(&root->root_item) == 0)
1646 active_node = wc->nodes[wc->active_node];
1647 inode_cache = &active_node->inode_cache;
1648 nritems = btrfs_header_nritems(eb);
1649 for (i = 0; i < nritems; i++) {
1650 btrfs_item_key_to_cpu(eb, &key, i);
1652 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1654 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1657 if (active_node->current == NULL ||
1658 active_node->current->ino < key.objectid) {
1659 if (active_node->current) {
1660 active_node->current->checked = 1;
1661 maybe_free_inode_rec(inode_cache,
1662 active_node->current);
1664 active_node->current = get_inode_rec(inode_cache,
1668 case BTRFS_DIR_ITEM_KEY:
1669 case BTRFS_DIR_INDEX_KEY:
1670 ret = process_dir_item(root, eb, i, &key, active_node);
1672 case BTRFS_INODE_REF_KEY:
1673 ret = process_inode_ref(eb, i, &key, active_node);
1675 case BTRFS_INODE_EXTREF_KEY:
1676 ret = process_inode_extref(eb, i, &key, active_node);
1678 case BTRFS_INODE_ITEM_KEY:
1679 ret = process_inode_item(eb, i, &key, active_node);
1681 case BTRFS_EXTENT_DATA_KEY:
1682 ret = process_file_extent(root, eb, i, &key,
1692 static void reada_walk_down(struct btrfs_root *root,
1693 struct extent_buffer *node, int slot)
1702 level = btrfs_header_level(node);
1706 nritems = btrfs_header_nritems(node);
1707 blocksize = btrfs_level_size(root, level - 1);
1708 for (i = slot; i < nritems; i++) {
1709 bytenr = btrfs_node_blockptr(node, i);
1710 ptr_gen = btrfs_node_ptr_generation(node, i);
1711 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1716 * Check the child node/leaf by the following condition:
1717 * 1. the first item key of the node/leaf should be the same with the one
1719 * 2. block in parent node should match the child node/leaf.
1720 * 3. generation of parent node and child's header should be consistent.
1722 * Or the child node/leaf pointed by the key in parent is not valid.
1724 * We hope to check leaf owner too, but since subvol may share leaves,
1725 * which makes leaf owner check not so strong, key check should be
1726 * sufficient enough for that case.
1728 static int check_child_node(struct btrfs_root *root,
1729 struct extent_buffer *parent, int slot,
1730 struct extent_buffer *child)
1732 struct btrfs_key parent_key;
1733 struct btrfs_key child_key;
1736 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1737 if (btrfs_header_level(child) == 0)
1738 btrfs_item_key_to_cpu(child, &child_key, 0);
1740 btrfs_node_key_to_cpu(child, &child_key, 0);
1742 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1745 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1746 parent_key.objectid, parent_key.type, parent_key.offset,
1747 child_key.objectid, child_key.type, child_key.offset);
1749 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1751 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1752 btrfs_node_blockptr(parent, slot),
1753 btrfs_header_bytenr(child));
1755 if (btrfs_node_ptr_generation(parent, slot) !=
1756 btrfs_header_generation(child)) {
1758 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1759 btrfs_header_generation(child),
1760 btrfs_node_ptr_generation(parent, slot));
1765 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1766 struct walk_control *wc, int *level)
1768 enum btrfs_tree_block_status status;
1771 struct extent_buffer *next;
1772 struct extent_buffer *cur;
1777 WARN_ON(*level < 0);
1778 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1779 ret = btrfs_lookup_extent_info(NULL, root,
1780 path->nodes[*level]->start,
1781 *level, 1, &refs, NULL);
1788 ret = enter_shared_node(root, path->nodes[*level]->start,
1796 while (*level >= 0) {
1797 WARN_ON(*level < 0);
1798 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1799 cur = path->nodes[*level];
1801 if (btrfs_header_level(cur) != *level)
1804 if (path->slots[*level] >= btrfs_header_nritems(cur))
1807 ret = process_one_leaf(root, cur, wc);
1812 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1813 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1814 blocksize = btrfs_level_size(root, *level - 1);
1815 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1821 ret = enter_shared_node(root, bytenr, refs,
1824 path->slots[*level]++;
1829 next = btrfs_find_tree_block(root, bytenr, blocksize);
1830 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1831 free_extent_buffer(next);
1832 reada_walk_down(root, cur, path->slots[*level]);
1833 next = read_tree_block(root, bytenr, blocksize,
1835 if (!extent_buffer_uptodate(next)) {
1836 struct btrfs_key node_key;
1838 btrfs_node_key_to_cpu(path->nodes[*level],
1840 path->slots[*level]);
1841 btrfs_add_corrupt_extent_record(root->fs_info,
1843 path->nodes[*level]->start,
1844 root->leafsize, *level);
1850 ret = check_child_node(root, cur, path->slots[*level], next);
1856 if (btrfs_is_leaf(next))
1857 status = btrfs_check_leaf(root, NULL, next);
1859 status = btrfs_check_node(root, NULL, next);
1860 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1861 free_extent_buffer(next);
1866 *level = *level - 1;
1867 free_extent_buffer(path->nodes[*level]);
1868 path->nodes[*level] = next;
1869 path->slots[*level] = 0;
1872 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1876 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1877 struct walk_control *wc, int *level)
1880 struct extent_buffer *leaf;
1882 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1883 leaf = path->nodes[i];
1884 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1889 free_extent_buffer(path->nodes[*level]);
1890 path->nodes[*level] = NULL;
1891 BUG_ON(*level > wc->active_node);
1892 if (*level == wc->active_node)
1893 leave_shared_node(root, wc, *level);
1900 static int check_root_dir(struct inode_record *rec)
1902 struct inode_backref *backref;
1905 if (!rec->found_inode_item || rec->errors)
1907 if (rec->nlink != 1 || rec->found_link != 0)
1909 if (list_empty(&rec->backrefs))
1911 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1912 if (!backref->found_inode_ref)
1914 if (backref->index != 0 || backref->namelen != 2 ||
1915 memcmp(backref->name, "..", 2))
1917 if (backref->found_dir_index || backref->found_dir_item)
1924 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1925 struct btrfs_root *root, struct btrfs_path *path,
1926 struct inode_record *rec)
1928 struct btrfs_inode_item *ei;
1929 struct btrfs_key key;
1932 key.objectid = rec->ino;
1933 key.type = BTRFS_INODE_ITEM_KEY;
1934 key.offset = (u64)-1;
1936 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1940 if (!path->slots[0]) {
1947 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1948 if (key.objectid != rec->ino) {
1953 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1954 struct btrfs_inode_item);
1955 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1956 btrfs_mark_buffer_dirty(path->nodes[0]);
1957 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1958 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1959 root->root_key.objectid);
1961 btrfs_release_path(path);
1965 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1966 struct btrfs_root *root,
1967 struct btrfs_path *path,
1968 struct inode_record *rec)
1972 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1973 btrfs_release_path(path);
1975 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1979 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1980 struct btrfs_root *root,
1981 struct btrfs_path *path,
1982 struct inode_record *rec)
1984 struct btrfs_inode_item *ei;
1985 struct btrfs_key key;
1988 key.objectid = rec->ino;
1989 key.type = BTRFS_INODE_ITEM_KEY;
1992 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1999 /* Since ret == 0, no need to check anything */
2000 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2001 struct btrfs_inode_item);
2002 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2003 btrfs_mark_buffer_dirty(path->nodes[0]);
2004 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2005 printf("reset nbytes for ino %llu root %llu\n",
2006 rec->ino, root->root_key.objectid);
2008 btrfs_release_path(path);
2012 static int add_missing_dir_index(struct btrfs_root *root,
2013 struct cache_tree *inode_cache,
2014 struct inode_record *rec,
2015 struct inode_backref *backref)
2017 struct btrfs_path *path;
2018 struct btrfs_trans_handle *trans;
2019 struct btrfs_dir_item *dir_item;
2020 struct extent_buffer *leaf;
2021 struct btrfs_key key;
2022 struct btrfs_disk_key disk_key;
2023 struct inode_record *dir_rec;
2024 unsigned long name_ptr;
2025 u32 data_size = sizeof(*dir_item) + backref->namelen;
2028 path = btrfs_alloc_path();
2032 trans = btrfs_start_transaction(root, 1);
2033 if (IS_ERR(trans)) {
2034 btrfs_free_path(path);
2035 return PTR_ERR(trans);
2038 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2039 (unsigned long long)rec->ino);
2040 key.objectid = backref->dir;
2041 key.type = BTRFS_DIR_INDEX_KEY;
2042 key.offset = backref->index;
2044 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2047 leaf = path->nodes[0];
2048 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2050 disk_key.objectid = cpu_to_le64(rec->ino);
2051 disk_key.type = BTRFS_INODE_ITEM_KEY;
2052 disk_key.offset = 0;
2054 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2055 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2056 btrfs_set_dir_data_len(leaf, dir_item, 0);
2057 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2058 name_ptr = (unsigned long)(dir_item + 1);
2059 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2060 btrfs_mark_buffer_dirty(leaf);
2061 btrfs_free_path(path);
2062 btrfs_commit_transaction(trans, root);
2064 backref->found_dir_index = 1;
2065 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2068 dir_rec->found_size += backref->namelen;
2069 if (dir_rec->found_size == dir_rec->isize &&
2070 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2071 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2072 if (dir_rec->found_size != dir_rec->isize)
2073 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2078 static int delete_dir_index(struct btrfs_root *root,
2079 struct cache_tree *inode_cache,
2080 struct inode_record *rec,
2081 struct inode_backref *backref)
2083 struct btrfs_trans_handle *trans;
2084 struct btrfs_dir_item *di;
2085 struct btrfs_path *path;
2088 path = btrfs_alloc_path();
2092 trans = btrfs_start_transaction(root, 1);
2093 if (IS_ERR(trans)) {
2094 btrfs_free_path(path);
2095 return PTR_ERR(trans);
2099 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2100 (unsigned long long)backref->dir,
2101 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2102 (unsigned long long)root->objectid);
2104 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2105 backref->name, backref->namelen,
2106 backref->index, -1);
2109 btrfs_free_path(path);
2110 btrfs_commit_transaction(trans, root);
2117 ret = btrfs_del_item(trans, root, path);
2119 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2121 btrfs_free_path(path);
2122 btrfs_commit_transaction(trans, root);
2126 static int create_inode_item(struct btrfs_root *root,
2127 struct inode_record *rec,
2128 struct inode_backref *backref, int root_dir)
2130 struct btrfs_trans_handle *trans;
2131 struct btrfs_inode_item inode_item;
2132 time_t now = time(NULL);
2135 trans = btrfs_start_transaction(root, 1);
2136 if (IS_ERR(trans)) {
2137 ret = PTR_ERR(trans);
2141 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2142 "be incomplete, please check permissions and content after "
2143 "the fsck completes.\n", (unsigned long long)root->objectid,
2144 (unsigned long long)rec->ino);
2146 memset(&inode_item, 0, sizeof(inode_item));
2147 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2149 btrfs_set_stack_inode_nlink(&inode_item, 1);
2151 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2152 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2153 if (rec->found_dir_item) {
2154 if (rec->found_file_extent)
2155 fprintf(stderr, "root %llu inode %llu has both a dir "
2156 "item and extents, unsure if it is a dir or a "
2157 "regular file so setting it as a directory\n",
2158 (unsigned long long)root->objectid,
2159 (unsigned long long)rec->ino);
2160 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2161 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2162 } else if (!rec->found_dir_item) {
2163 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2164 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2166 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2167 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2168 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2169 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2170 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2171 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2172 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2173 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2175 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2177 btrfs_commit_transaction(trans, root);
2181 static int repair_inode_backrefs(struct btrfs_root *root,
2182 struct inode_record *rec,
2183 struct cache_tree *inode_cache,
2186 struct inode_backref *tmp, *backref;
2187 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2191 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2192 if (!delete && rec->ino == root_dirid) {
2193 if (!rec->found_inode_item) {
2194 ret = create_inode_item(root, rec, backref, 1);
2201 /* Index 0 for root dir's are special, don't mess with it */
2202 if (rec->ino == root_dirid && backref->index == 0)
2206 ((backref->found_dir_index && !backref->found_inode_ref) ||
2207 (backref->found_dir_index && backref->found_inode_ref &&
2208 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2209 ret = delete_dir_index(root, inode_cache, rec, backref);
2213 list_del(&backref->list);
2217 if (!delete && !backref->found_dir_index &&
2218 backref->found_dir_item && backref->found_inode_ref) {
2219 ret = add_missing_dir_index(root, inode_cache, rec,
2224 if (backref->found_dir_item &&
2225 backref->found_dir_index &&
2226 backref->found_dir_index) {
2227 if (!backref->errors &&
2228 backref->found_inode_ref) {
2229 list_del(&backref->list);
2235 if (!delete && (!backref->found_dir_index &&
2236 !backref->found_dir_item &&
2237 backref->found_inode_ref)) {
2238 struct btrfs_trans_handle *trans;
2239 struct btrfs_key location;
2241 ret = check_dir_conflict(root, backref->name,
2247 * let nlink fixing routine to handle it,
2248 * which can do it better.
2253 location.objectid = rec->ino;
2254 location.type = BTRFS_INODE_ITEM_KEY;
2255 location.offset = 0;
2257 trans = btrfs_start_transaction(root, 1);
2258 if (IS_ERR(trans)) {
2259 ret = PTR_ERR(trans);
2262 fprintf(stderr, "adding missing dir index/item pair "
2264 (unsigned long long)rec->ino);
2265 ret = btrfs_insert_dir_item(trans, root, backref->name,
2267 backref->dir, &location,
2268 imode_to_type(rec->imode),
2271 btrfs_commit_transaction(trans, root);
2275 if (!delete && (backref->found_inode_ref &&
2276 backref->found_dir_index &&
2277 backref->found_dir_item &&
2278 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2279 !rec->found_inode_item)) {
2280 ret = create_inode_item(root, rec, backref, 0);
2287 return ret ? ret : repaired;
2291 * To determine the file type for nlink/inode_item repair
2293 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2294 * Return -ENOENT if file type is not found.
2296 static int find_file_type(struct inode_record *rec, u8 *type)
2298 struct inode_backref *backref;
2300 /* For inode item recovered case */
2301 if (rec->found_inode_item) {
2302 *type = imode_to_type(rec->imode);
2306 list_for_each_entry(backref, &rec->backrefs, list) {
2307 if (backref->found_dir_index || backref->found_dir_item) {
2308 *type = backref->filetype;
2316 * To determine the file name for nlink repair
2318 * Return 0 if file name is found, set name and namelen.
2319 * Return -ENOENT if file name is not found.
2321 static int find_file_name(struct inode_record *rec,
2322 char *name, int *namelen)
2324 struct inode_backref *backref;
2326 list_for_each_entry(backref, &rec->backrefs, list) {
2327 if (backref->found_dir_index || backref->found_dir_item ||
2328 backref->found_inode_ref) {
2329 memcpy(name, backref->name, backref->namelen);
2330 *namelen = backref->namelen;
2337 /* Reset the nlink of the inode to the correct one */
2338 static int reset_nlink(struct btrfs_trans_handle *trans,
2339 struct btrfs_root *root,
2340 struct btrfs_path *path,
2341 struct inode_record *rec)
2343 struct inode_backref *backref;
2344 struct inode_backref *tmp;
2345 struct btrfs_key key;
2346 struct btrfs_inode_item *inode_item;
2349 /* We don't believe this either, reset it and iterate backref */
2350 rec->found_link = 0;
2352 /* Remove all backref including the valid ones */
2353 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2355 backref->index, backref->name,
2356 backref->namelen, 0);
2360 /* remove invalid backref, so it won't be added back */
2361 if (!(backref->found_dir_index &&
2362 backref->found_dir_item &&
2363 backref->found_inode_ref)) {
2364 list_del(&backref->list);
2371 /* Set nlink to 0 */
2372 key.objectid = rec->ino;
2373 key.type = BTRFS_INODE_ITEM_KEY;
2375 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2382 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2383 struct btrfs_inode_item);
2384 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2385 btrfs_mark_buffer_dirty(path->nodes[0]);
2386 btrfs_release_path(path);
2389 * Add back valid inode_ref/dir_item/dir_index,
2390 * add_link() will handle the nlink inc, so new nlink must be correct
2392 list_for_each_entry(backref, &rec->backrefs, list) {
2393 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2394 backref->name, backref->namelen,
2395 backref->ref_type, &backref->index, 1);
2400 btrfs_release_path(path);
2404 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2405 struct btrfs_root *root,
2406 struct btrfs_path *path,
2407 struct inode_record *rec)
2409 char *dir_name = "lost+found";
2410 char namebuf[BTRFS_NAME_LEN] = {0};
2415 int name_recovered = 0;
2416 int type_recovered = 0;
2420 * Get file name and type first before these invalid inode ref
2421 * are deleted by remove_all_invalid_backref()
2423 name_recovered = !find_file_name(rec, namebuf, &namelen);
2424 type_recovered = !find_file_type(rec, &type);
2426 if (!name_recovered) {
2427 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2428 rec->ino, rec->ino);
2429 namelen = count_digits(rec->ino);
2430 sprintf(namebuf, "%llu", rec->ino);
2433 if (!type_recovered) {
2434 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2436 type = BTRFS_FT_REG_FILE;
2440 ret = reset_nlink(trans, root, path, rec);
2443 "Failed to reset nlink for inode %llu: %s\n",
2444 rec->ino, strerror(-ret));
2448 if (rec->found_link == 0) {
2449 lost_found_ino = root->highest_inode;
2450 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2455 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2456 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2459 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2460 dir_name, strerror(-ret));
2463 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2464 namebuf, namelen, type, NULL, 1);
2466 * Add ".INO" suffix several times to handle case where
2467 * "FILENAME.INO" is already taken by another file.
2469 while (ret == -EEXIST) {
2471 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2473 if (namelen + count_digits(rec->ino) + 1 >
2478 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2480 namelen += count_digits(rec->ino) + 1;
2481 ret = btrfs_add_link(trans, root, rec->ino,
2482 lost_found_ino, namebuf,
2483 namelen, type, NULL, 1);
2487 "Failed to link the inode %llu to %s dir: %s\n",
2488 rec->ino, dir_name, strerror(-ret));
2492 * Just increase the found_link, don't actually add the
2493 * backref. This will make things easier and this inode
2494 * record will be freed after the repair is done.
2495 * So fsck will not report problem about this inode.
2498 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2499 namelen, namebuf, dir_name);
2501 printf("Fixed the nlink of inode %llu\n", rec->ino);
2504 * Clear the flag anyway, or we will loop forever for the same inode
2505 * as it will not be removed from the bad inode list and the dead loop
2508 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2509 btrfs_release_path(path);
2514 * Check if there is any normal(reg or prealloc) file extent for given
2516 * This is used to determine the file type when neither its dir_index/item or
2517 * inode_item exists.
2519 * This will *NOT* report error, if any error happens, just consider it does
2520 * not have any normal file extent.
2522 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2524 struct btrfs_path *path;
2525 struct btrfs_key key;
2526 struct btrfs_key found_key;
2527 struct btrfs_file_extent_item *fi;
2531 path = btrfs_alloc_path();
2535 key.type = BTRFS_EXTENT_DATA_KEY;
2538 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2543 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2544 ret = btrfs_next_leaf(root, path);
2551 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2553 if (found_key.objectid != ino ||
2554 found_key.type != BTRFS_EXTENT_DATA_KEY)
2556 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2557 struct btrfs_file_extent_item);
2558 type = btrfs_file_extent_type(path->nodes[0], fi);
2559 if (type != BTRFS_FILE_EXTENT_INLINE) {
2565 btrfs_free_path(path);
2569 static u32 btrfs_type_to_imode(u8 type)
2571 static u32 imode_by_btrfs_type[] = {
2572 [BTRFS_FT_REG_FILE] = S_IFREG,
2573 [BTRFS_FT_DIR] = S_IFDIR,
2574 [BTRFS_FT_CHRDEV] = S_IFCHR,
2575 [BTRFS_FT_BLKDEV] = S_IFBLK,
2576 [BTRFS_FT_FIFO] = S_IFIFO,
2577 [BTRFS_FT_SOCK] = S_IFSOCK,
2578 [BTRFS_FT_SYMLINK] = S_IFLNK,
2581 return imode_by_btrfs_type[(type)];
2584 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2585 struct btrfs_root *root,
2586 struct btrfs_path *path,
2587 struct inode_record *rec)
2591 int type_recovered = 0;
2594 printf("Trying to rebuild inode:%llu\n", rec->ino);
2596 type_recovered = !find_file_type(rec, &filetype);
2599 * Try to determine inode type if type not found.
2601 * For found regular file extent, it must be FILE.
2602 * For found dir_item/index, it must be DIR.
2604 * For undetermined one, use FILE as fallback.
2607 * 1. If found backref(inode_index/item is already handled) to it,
2609 * Need new inode-inode ref structure to allow search for that.
2611 if (!type_recovered) {
2612 if (rec->found_file_extent &&
2613 find_normal_file_extent(root, rec->ino)) {
2615 filetype = BTRFS_FT_REG_FILE;
2616 } else if (rec->found_dir_item) {
2618 filetype = BTRFS_FT_DIR;
2619 } else if (!list_empty(&rec->orphan_extents)) {
2621 filetype = BTRFS_FT_REG_FILE;
2623 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2626 filetype = BTRFS_FT_REG_FILE;
2630 ret = btrfs_new_inode(trans, root, rec->ino,
2631 mode | btrfs_type_to_imode(filetype));
2636 * Here inode rebuild is done, we only rebuild the inode item,
2637 * don't repair the nlink(like move to lost+found).
2638 * That is the job of nlink repair.
2640 * We just fill the record and return
2642 rec->found_dir_item = 1;
2643 rec->imode = mode | btrfs_type_to_imode(filetype);
2645 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2646 /* Ensure the inode_nlinks repair function will be called */
2647 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2652 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2653 struct btrfs_root *root,
2654 struct btrfs_path *path,
2655 struct inode_record *rec)
2657 struct orphan_data_extent *orphan;
2658 struct orphan_data_extent *tmp;
2661 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2663 * Check for conflicting file extents
2665 * Here we don't know whether the extents is compressed or not,
2666 * so we can only assume it not compressed nor data offset,
2667 * and use its disk_len as extent length.
2669 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2670 orphan->offset, orphan->disk_len, 0);
2671 btrfs_release_path(path);
2676 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2677 orphan->disk_bytenr, orphan->disk_len);
2678 ret = btrfs_free_extent(trans,
2679 root->fs_info->extent_root,
2680 orphan->disk_bytenr, orphan->disk_len,
2681 0, root->objectid, orphan->objectid,
2686 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2687 orphan->offset, orphan->disk_bytenr,
2688 orphan->disk_len, orphan->disk_len);
2692 /* Update file size info */
2693 rec->found_size += orphan->disk_len;
2694 if (rec->found_size == rec->nbytes)
2695 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2697 /* Update the file extent hole info too */
2698 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2702 if (RB_EMPTY_ROOT(&rec->holes))
2703 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2705 list_del(&orphan->list);
2708 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2713 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2714 struct btrfs_root *root,
2715 struct btrfs_path *path,
2716 struct inode_record *rec)
2718 struct rb_node *node;
2719 struct file_extent_hole *hole;
2723 node = rb_first(&rec->holes);
2727 hole = rb_entry(node, struct file_extent_hole, node);
2728 ret = btrfs_punch_hole(trans, root, rec->ino,
2729 hole->start, hole->len);
2732 ret = del_file_extent_hole(&rec->holes, hole->start,
2736 if (RB_EMPTY_ROOT(&rec->holes))
2737 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2738 node = rb_first(&rec->holes);
2740 /* special case for a file losing all its file extent */
2742 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2743 round_up(rec->isize, root->sectorsize));
2747 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2748 rec->ino, root->objectid);
2753 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2755 struct btrfs_trans_handle *trans;
2756 struct btrfs_path *path;
2759 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2760 I_ERR_NO_ORPHAN_ITEM |
2761 I_ERR_LINK_COUNT_WRONG |
2762 I_ERR_NO_INODE_ITEM |
2763 I_ERR_FILE_EXTENT_ORPHAN |
2764 I_ERR_FILE_EXTENT_DISCOUNT|
2765 I_ERR_FILE_NBYTES_WRONG)))
2768 path = btrfs_alloc_path();
2773 * For nlink repair, it may create a dir and add link, so
2774 * 2 for parent(256)'s dir_index and dir_item
2775 * 2 for lost+found dir's inode_item and inode_ref
2776 * 1 for the new inode_ref of the file
2777 * 2 for lost+found dir's dir_index and dir_item for the file
2779 trans = btrfs_start_transaction(root, 7);
2780 if (IS_ERR(trans)) {
2781 btrfs_free_path(path);
2782 return PTR_ERR(trans);
2785 if (rec->errors & I_ERR_NO_INODE_ITEM)
2786 ret = repair_inode_no_item(trans, root, path, rec);
2787 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2788 ret = repair_inode_orphan_extent(trans, root, path, rec);
2789 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2790 ret = repair_inode_discount_extent(trans, root, path, rec);
2791 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2792 ret = repair_inode_isize(trans, root, path, rec);
2793 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2794 ret = repair_inode_orphan_item(trans, root, path, rec);
2795 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2796 ret = repair_inode_nlinks(trans, root, path, rec);
2797 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2798 ret = repair_inode_nbytes(trans, root, path, rec);
2799 btrfs_commit_transaction(trans, root);
2800 btrfs_free_path(path);
2804 static int check_inode_recs(struct btrfs_root *root,
2805 struct cache_tree *inode_cache)
2807 struct cache_extent *cache;
2808 struct ptr_node *node;
2809 struct inode_record *rec;
2810 struct inode_backref *backref;
2815 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2817 if (btrfs_root_refs(&root->root_item) == 0) {
2818 if (!cache_tree_empty(inode_cache))
2819 fprintf(stderr, "warning line %d\n", __LINE__);
2824 * We need to record the highest inode number for later 'lost+found'
2826 * We must select a ino not used/refered by any existing inode, or
2827 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2828 * this may cause 'lost+found' dir has wrong nlinks.
2830 cache = last_cache_extent(inode_cache);
2832 node = container_of(cache, struct ptr_node, cache);
2834 if (rec->ino > root->highest_inode)
2835 root->highest_inode = rec->ino;
2839 * We need to repair backrefs first because we could change some of the
2840 * errors in the inode recs.
2842 * We also need to go through and delete invalid backrefs first and then
2843 * add the correct ones second. We do this because we may get EEXIST
2844 * when adding back the correct index because we hadn't yet deleted the
2847 * For example, if we were missing a dir index then the directories
2848 * isize would be wrong, so if we fixed the isize to what we thought it
2849 * would be and then fixed the backref we'd still have a invalid fs, so
2850 * we need to add back the dir index and then check to see if the isize
2855 if (stage == 3 && !err)
2858 cache = search_cache_extent(inode_cache, 0);
2859 while (repair && cache) {
2860 node = container_of(cache, struct ptr_node, cache);
2862 cache = next_cache_extent(cache);
2864 /* Need to free everything up and rescan */
2866 remove_cache_extent(inode_cache, &node->cache);
2868 free_inode_rec(rec);
2872 if (list_empty(&rec->backrefs))
2875 ret = repair_inode_backrefs(root, rec, inode_cache,
2889 rec = get_inode_rec(inode_cache, root_dirid, 0);
2891 ret = check_root_dir(rec);
2893 fprintf(stderr, "root %llu root dir %llu error\n",
2894 (unsigned long long)root->root_key.objectid,
2895 (unsigned long long)root_dirid);
2896 print_inode_error(root, rec);
2901 struct btrfs_trans_handle *trans;
2903 trans = btrfs_start_transaction(root, 1);
2904 if (IS_ERR(trans)) {
2905 err = PTR_ERR(trans);
2910 "root %llu missing its root dir, recreating\n",
2911 (unsigned long long)root->objectid);
2913 ret = btrfs_make_root_dir(trans, root, root_dirid);
2916 btrfs_commit_transaction(trans, root);
2920 fprintf(stderr, "root %llu root dir %llu not found\n",
2921 (unsigned long long)root->root_key.objectid,
2922 (unsigned long long)root_dirid);
2926 cache = search_cache_extent(inode_cache, 0);
2929 node = container_of(cache, struct ptr_node, cache);
2931 remove_cache_extent(inode_cache, &node->cache);
2933 if (rec->ino == root_dirid ||
2934 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2935 free_inode_rec(rec);
2939 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2940 ret = check_orphan_item(root, rec->ino);
2942 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2943 if (can_free_inode_rec(rec)) {
2944 free_inode_rec(rec);
2949 if (!rec->found_inode_item)
2950 rec->errors |= I_ERR_NO_INODE_ITEM;
2951 if (rec->found_link != rec->nlink)
2952 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2954 ret = try_repair_inode(root, rec);
2955 if (ret == 0 && can_free_inode_rec(rec)) {
2956 free_inode_rec(rec);
2962 if (!(repair && ret == 0))
2964 print_inode_error(root, rec);
2965 list_for_each_entry(backref, &rec->backrefs, list) {
2966 if (!backref->found_dir_item)
2967 backref->errors |= REF_ERR_NO_DIR_ITEM;
2968 if (!backref->found_dir_index)
2969 backref->errors |= REF_ERR_NO_DIR_INDEX;
2970 if (!backref->found_inode_ref)
2971 backref->errors |= REF_ERR_NO_INODE_REF;
2972 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2973 " namelen %u name %s filetype %d errors %x",
2974 (unsigned long long)backref->dir,
2975 (unsigned long long)backref->index,
2976 backref->namelen, backref->name,
2977 backref->filetype, backref->errors);
2978 print_ref_error(backref->errors);
2980 free_inode_rec(rec);
2982 return (error > 0) ? -1 : 0;
2985 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2988 struct cache_extent *cache;
2989 struct root_record *rec = NULL;
2992 cache = lookup_cache_extent(root_cache, objectid, 1);
2994 rec = container_of(cache, struct root_record, cache);
2996 rec = calloc(1, sizeof(*rec));
2997 rec->objectid = objectid;
2998 INIT_LIST_HEAD(&rec->backrefs);
2999 rec->cache.start = objectid;
3000 rec->cache.size = 1;
3002 ret = insert_cache_extent(root_cache, &rec->cache);
3008 static struct root_backref *get_root_backref(struct root_record *rec,
3009 u64 ref_root, u64 dir, u64 index,
3010 const char *name, int namelen)
3012 struct root_backref *backref;
3014 list_for_each_entry(backref, &rec->backrefs, list) {
3015 if (backref->ref_root != ref_root || backref->dir != dir ||
3016 backref->namelen != namelen)
3018 if (memcmp(name, backref->name, namelen))
3023 backref = calloc(1, sizeof(*backref) + namelen + 1);
3024 backref->ref_root = ref_root;
3026 backref->index = index;
3027 backref->namelen = namelen;
3028 memcpy(backref->name, name, namelen);
3029 backref->name[namelen] = '\0';
3030 list_add_tail(&backref->list, &rec->backrefs);
3034 static void free_root_record(struct cache_extent *cache)
3036 struct root_record *rec;
3037 struct root_backref *backref;
3039 rec = container_of(cache, struct root_record, cache);
3040 while (!list_empty(&rec->backrefs)) {
3041 backref = list_entry(rec->backrefs.next,
3042 struct root_backref, list);
3043 list_del(&backref->list);
3050 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3052 static int add_root_backref(struct cache_tree *root_cache,
3053 u64 root_id, u64 ref_root, u64 dir, u64 index,
3054 const char *name, int namelen,
3055 int item_type, int errors)
3057 struct root_record *rec;
3058 struct root_backref *backref;
3060 rec = get_root_rec(root_cache, root_id);
3061 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3063 backref->errors |= errors;
3065 if (item_type != BTRFS_DIR_ITEM_KEY) {
3066 if (backref->found_dir_index || backref->found_back_ref ||
3067 backref->found_forward_ref) {
3068 if (backref->index != index)
3069 backref->errors |= REF_ERR_INDEX_UNMATCH;
3071 backref->index = index;
3075 if (item_type == BTRFS_DIR_ITEM_KEY) {
3076 if (backref->found_forward_ref)
3078 backref->found_dir_item = 1;
3079 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3080 backref->found_dir_index = 1;
3081 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3082 if (backref->found_forward_ref)
3083 backref->errors |= REF_ERR_DUP_ROOT_REF;
3084 else if (backref->found_dir_item)
3086 backref->found_forward_ref = 1;
3087 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3088 if (backref->found_back_ref)
3089 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3090 backref->found_back_ref = 1;
3095 if (backref->found_forward_ref && backref->found_dir_item)
3096 backref->reachable = 1;
3100 static int merge_root_recs(struct btrfs_root *root,
3101 struct cache_tree *src_cache,
3102 struct cache_tree *dst_cache)
3104 struct cache_extent *cache;
3105 struct ptr_node *node;
3106 struct inode_record *rec;
3107 struct inode_backref *backref;
3110 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3111 free_inode_recs_tree(src_cache);
3116 cache = search_cache_extent(src_cache, 0);
3119 node = container_of(cache, struct ptr_node, cache);
3121 remove_cache_extent(src_cache, &node->cache);
3124 ret = is_child_root(root, root->objectid, rec->ino);
3130 list_for_each_entry(backref, &rec->backrefs, list) {
3131 BUG_ON(backref->found_inode_ref);
3132 if (backref->found_dir_item)
3133 add_root_backref(dst_cache, rec->ino,
3134 root->root_key.objectid, backref->dir,
3135 backref->index, backref->name,
3136 backref->namelen, BTRFS_DIR_ITEM_KEY,
3138 if (backref->found_dir_index)
3139 add_root_backref(dst_cache, rec->ino,
3140 root->root_key.objectid, backref->dir,
3141 backref->index, backref->name,
3142 backref->namelen, BTRFS_DIR_INDEX_KEY,
3146 free_inode_rec(rec);
3153 static int check_root_refs(struct btrfs_root *root,
3154 struct cache_tree *root_cache)
3156 struct root_record *rec;
3157 struct root_record *ref_root;
3158 struct root_backref *backref;
3159 struct cache_extent *cache;
3165 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3168 /* fixme: this can not detect circular references */
3171 cache = search_cache_extent(root_cache, 0);
3175 rec = container_of(cache, struct root_record, cache);
3176 cache = next_cache_extent(cache);
3178 if (rec->found_ref == 0)
3181 list_for_each_entry(backref, &rec->backrefs, list) {
3182 if (!backref->reachable)
3185 ref_root = get_root_rec(root_cache,
3187 if (ref_root->found_ref > 0)
3190 backref->reachable = 0;
3192 if (rec->found_ref == 0)
3198 cache = search_cache_extent(root_cache, 0);
3202 rec = container_of(cache, struct root_record, cache);
3203 cache = next_cache_extent(cache);
3205 if (rec->found_ref == 0 &&
3206 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3207 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3208 ret = check_orphan_item(root->fs_info->tree_root,
3214 * If we don't have a root item then we likely just have
3215 * a dir item in a snapshot for this root but no actual
3216 * ref key or anything so it's meaningless.
3218 if (!rec->found_root_item)
3221 fprintf(stderr, "fs tree %llu not referenced\n",
3222 (unsigned long long)rec->objectid);
3226 if (rec->found_ref > 0 && !rec->found_root_item)
3228 list_for_each_entry(backref, &rec->backrefs, list) {
3229 if (!backref->found_dir_item)
3230 backref->errors |= REF_ERR_NO_DIR_ITEM;
3231 if (!backref->found_dir_index)
3232 backref->errors |= REF_ERR_NO_DIR_INDEX;
3233 if (!backref->found_back_ref)
3234 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3235 if (!backref->found_forward_ref)
3236 backref->errors |= REF_ERR_NO_ROOT_REF;
3237 if (backref->reachable && backref->errors)
3244 fprintf(stderr, "fs tree %llu refs %u %s\n",
3245 (unsigned long long)rec->objectid, rec->found_ref,
3246 rec->found_root_item ? "" : "not found");
3248 list_for_each_entry(backref, &rec->backrefs, list) {
3249 if (!backref->reachable)
3251 if (!backref->errors && rec->found_root_item)
3253 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3254 " index %llu namelen %u name %s errors %x\n",
3255 (unsigned long long)backref->ref_root,
3256 (unsigned long long)backref->dir,
3257 (unsigned long long)backref->index,
3258 backref->namelen, backref->name,
3260 print_ref_error(backref->errors);
3263 return errors > 0 ? 1 : 0;
3266 static int process_root_ref(struct extent_buffer *eb, int slot,
3267 struct btrfs_key *key,
3268 struct cache_tree *root_cache)
3274 struct btrfs_root_ref *ref;
3275 char namebuf[BTRFS_NAME_LEN];
3278 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3280 dirid = btrfs_root_ref_dirid(eb, ref);
3281 index = btrfs_root_ref_sequence(eb, ref);
3282 name_len = btrfs_root_ref_name_len(eb, ref);
3284 if (name_len <= BTRFS_NAME_LEN) {
3288 len = BTRFS_NAME_LEN;
3289 error = REF_ERR_NAME_TOO_LONG;
3291 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3293 if (key->type == BTRFS_ROOT_REF_KEY) {
3294 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3295 index, namebuf, len, key->type, error);
3297 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3298 index, namebuf, len, key->type, error);
3303 static void free_corrupt_block(struct cache_extent *cache)
3305 struct btrfs_corrupt_block *corrupt;
3307 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3311 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3314 * Repair the btree of the given root.
3316 * The fix is to remove the node key in corrupt_blocks cache_tree.
3317 * and rebalance the tree.
3318 * After the fix, the btree should be writeable.
3320 static int repair_btree(struct btrfs_root *root,
3321 struct cache_tree *corrupt_blocks)
3323 struct btrfs_trans_handle *trans;
3324 struct btrfs_path *path;
3325 struct btrfs_corrupt_block *corrupt;
3326 struct cache_extent *cache;
3327 struct btrfs_key key;
3332 if (cache_tree_empty(corrupt_blocks))
3335 path = btrfs_alloc_path();
3339 trans = btrfs_start_transaction(root, 1);
3340 if (IS_ERR(trans)) {
3341 ret = PTR_ERR(trans);
3342 fprintf(stderr, "Error starting transaction: %s\n",
3346 cache = first_cache_extent(corrupt_blocks);
3348 corrupt = container_of(cache, struct btrfs_corrupt_block,
3350 level = corrupt->level;
3351 path->lowest_level = level;
3352 key.objectid = corrupt->key.objectid;
3353 key.type = corrupt->key.type;
3354 key.offset = corrupt->key.offset;
3357 * Here we don't want to do any tree balance, since it may
3358 * cause a balance with corrupted brother leaf/node,
3359 * so ins_len set to 0 here.
3360 * Balance will be done after all corrupt node/leaf is deleted.
3362 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3365 offset = btrfs_node_blockptr(path->nodes[level],
3366 path->slots[level]);
3368 /* Remove the ptr */
3369 ret = btrfs_del_ptr(trans, root, path, level,
3370 path->slots[level]);
3374 * Remove the corresponding extent
3375 * return value is not concerned.
3377 btrfs_release_path(path);
3378 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3379 0, root->root_key.objectid,
3381 cache = next_cache_extent(cache);
3384 /* Balance the btree using btrfs_search_slot() */
3385 cache = first_cache_extent(corrupt_blocks);
3387 corrupt = container_of(cache, struct btrfs_corrupt_block,
3389 memcpy(&key, &corrupt->key, sizeof(key));
3390 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3393 /* return will always >0 since it won't find the item */
3395 btrfs_release_path(path);
3396 cache = next_cache_extent(cache);
3399 btrfs_commit_transaction(trans, root);
3401 btrfs_free_path(path);
3405 static int check_fs_root(struct btrfs_root *root,
3406 struct cache_tree *root_cache,
3407 struct walk_control *wc)
3413 struct btrfs_path path;
3414 struct shared_node root_node;
3415 struct root_record *rec;
3416 struct btrfs_root_item *root_item = &root->root_item;
3417 struct cache_tree corrupt_blocks;
3418 struct orphan_data_extent *orphan;
3419 struct orphan_data_extent *tmp;
3420 enum btrfs_tree_block_status status;
3423 * Reuse the corrupt_block cache tree to record corrupted tree block
3425 * Unlike the usage in extent tree check, here we do it in a per
3426 * fs/subvol tree base.
3428 cache_tree_init(&corrupt_blocks);
3429 root->fs_info->corrupt_blocks = &corrupt_blocks;
3431 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3432 rec = get_root_rec(root_cache, root->root_key.objectid);
3433 if (btrfs_root_refs(root_item) > 0)
3434 rec->found_root_item = 1;
3437 btrfs_init_path(&path);
3438 memset(&root_node, 0, sizeof(root_node));
3439 cache_tree_init(&root_node.root_cache);
3440 cache_tree_init(&root_node.inode_cache);
3442 /* Move the orphan extent record to corresponding inode_record */
3443 list_for_each_entry_safe(orphan, tmp,
3444 &root->orphan_data_extents, list) {
3445 struct inode_record *inode;
3447 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3449 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3450 list_move(&orphan->list, &inode->orphan_extents);
3453 level = btrfs_header_level(root->node);
3454 memset(wc->nodes, 0, sizeof(wc->nodes));
3455 wc->nodes[level] = &root_node;
3456 wc->active_node = level;
3457 wc->root_level = level;
3459 /* We may not have checked the root block, lets do that now */
3460 if (btrfs_is_leaf(root->node))
3461 status = btrfs_check_leaf(root, NULL, root->node);
3463 status = btrfs_check_node(root, NULL, root->node);
3464 if (status != BTRFS_TREE_BLOCK_CLEAN)
3467 if (btrfs_root_refs(root_item) > 0 ||
3468 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3469 path.nodes[level] = root->node;
3470 extent_buffer_get(root->node);
3471 path.slots[level] = 0;
3473 struct btrfs_key key;
3474 struct btrfs_disk_key found_key;
3476 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3477 level = root_item->drop_level;
3478 path.lowest_level = level;
3479 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3482 btrfs_node_key(path.nodes[level], &found_key,
3484 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3485 sizeof(found_key)));
3489 wret = walk_down_tree(root, &path, wc, &level);
3495 wret = walk_up_tree(root, &path, wc, &level);
3502 btrfs_release_path(&path);
3504 if (!cache_tree_empty(&corrupt_blocks)) {
3505 struct cache_extent *cache;
3506 struct btrfs_corrupt_block *corrupt;
3508 printf("The following tree block(s) is corrupted in tree %llu:\n",
3509 root->root_key.objectid);
3510 cache = first_cache_extent(&corrupt_blocks);
3512 corrupt = container_of(cache,
3513 struct btrfs_corrupt_block,
3515 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3516 cache->start, corrupt->level,
3517 corrupt->key.objectid, corrupt->key.type,
3518 corrupt->key.offset);
3519 cache = next_cache_extent(cache);
3522 printf("Try to repair the btree for root %llu\n",
3523 root->root_key.objectid);
3524 ret = repair_btree(root, &corrupt_blocks);
3526 fprintf(stderr, "Failed to repair btree: %s\n",
3529 printf("Btree for root %llu is fixed\n",
3530 root->root_key.objectid);
3534 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3538 if (root_node.current) {
3539 root_node.current->checked = 1;
3540 maybe_free_inode_rec(&root_node.inode_cache,
3544 err = check_inode_recs(root, &root_node.inode_cache);
3548 free_corrupt_blocks_tree(&corrupt_blocks);
3549 root->fs_info->corrupt_blocks = NULL;
3550 free_orphan_data_extents(&root->orphan_data_extents);
3554 static int fs_root_objectid(u64 objectid)
3556 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3557 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3559 return is_fstree(objectid);
3562 static int check_fs_roots(struct btrfs_root *root,
3563 struct cache_tree *root_cache)
3565 struct btrfs_path path;
3566 struct btrfs_key key;
3567 struct walk_control wc;
3568 struct extent_buffer *leaf, *tree_node;
3569 struct btrfs_root *tmp_root;
3570 struct btrfs_root *tree_root = root->fs_info->tree_root;
3574 if (ctx.progress_enabled) {
3575 ctx.tp = TASK_FS_ROOTS;
3576 task_start(ctx.info);
3580 * Just in case we made any changes to the extent tree that weren't
3581 * reflected into the free space cache yet.
3584 reset_cached_block_groups(root->fs_info);
3585 memset(&wc, 0, sizeof(wc));
3586 cache_tree_init(&wc.shared);
3587 btrfs_init_path(&path);
3592 key.type = BTRFS_ROOT_ITEM_KEY;
3593 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3598 tree_node = tree_root->node;
3600 if (tree_node != tree_root->node) {
3601 free_root_recs_tree(root_cache);
3602 btrfs_release_path(&path);
3605 leaf = path.nodes[0];
3606 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3607 ret = btrfs_next_leaf(tree_root, &path);
3613 leaf = path.nodes[0];
3615 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3616 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3617 fs_root_objectid(key.objectid)) {
3618 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3619 tmp_root = btrfs_read_fs_root_no_cache(
3620 root->fs_info, &key);
3622 key.offset = (u64)-1;
3623 tmp_root = btrfs_read_fs_root(
3624 root->fs_info, &key);
3626 if (IS_ERR(tmp_root)) {
3630 ret = check_fs_root(tmp_root, root_cache, &wc);
3631 if (ret == -EAGAIN) {
3632 free_root_recs_tree(root_cache);
3633 btrfs_release_path(&path);
3638 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3639 btrfs_free_fs_root(tmp_root);
3640 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3641 key.type == BTRFS_ROOT_BACKREF_KEY) {
3642 process_root_ref(leaf, path.slots[0], &key,
3649 btrfs_release_path(&path);
3651 free_extent_cache_tree(&wc.shared);
3652 if (!cache_tree_empty(&wc.shared))
3653 fprintf(stderr, "warning line %d\n", __LINE__);
3655 task_stop(ctx.info);
3660 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3662 struct list_head *cur = rec->backrefs.next;
3663 struct extent_backref *back;
3664 struct tree_backref *tback;
3665 struct data_backref *dback;
3669 while(cur != &rec->backrefs) {
3670 back = list_entry(cur, struct extent_backref, list);
3672 if (!back->found_extent_tree) {
3676 if (back->is_data) {
3677 dback = (struct data_backref *)back;
3678 fprintf(stderr, "Backref %llu %s %llu"
3679 " owner %llu offset %llu num_refs %lu"
3680 " not found in extent tree\n",
3681 (unsigned long long)rec->start,
3682 back->full_backref ?
3684 back->full_backref ?
3685 (unsigned long long)dback->parent:
3686 (unsigned long long)dback->root,
3687 (unsigned long long)dback->owner,
3688 (unsigned long long)dback->offset,
3689 (unsigned long)dback->num_refs);
3691 tback = (struct tree_backref *)back;
3692 fprintf(stderr, "Backref %llu parent %llu"
3693 " root %llu not found in extent tree\n",
3694 (unsigned long long)rec->start,
3695 (unsigned long long)tback->parent,
3696 (unsigned long long)tback->root);
3699 if (!back->is_data && !back->found_ref) {
3703 tback = (struct tree_backref *)back;
3704 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3705 (unsigned long long)rec->start,
3706 back->full_backref ? "parent" : "root",
3707 back->full_backref ?
3708 (unsigned long long)tback->parent :
3709 (unsigned long long)tback->root, back);
3711 if (back->is_data) {
3712 dback = (struct data_backref *)back;
3713 if (dback->found_ref != dback->num_refs) {
3717 fprintf(stderr, "Incorrect local backref count"
3718 " on %llu %s %llu owner %llu"
3719 " offset %llu found %u wanted %u back %p\n",
3720 (unsigned long long)rec->start,
3721 back->full_backref ?
3723 back->full_backref ?
3724 (unsigned long long)dback->parent:
3725 (unsigned long long)dback->root,
3726 (unsigned long long)dback->owner,
3727 (unsigned long long)dback->offset,
3728 dback->found_ref, dback->num_refs, back);
3730 if (dback->disk_bytenr != rec->start) {
3734 fprintf(stderr, "Backref disk bytenr does not"
3735 " match extent record, bytenr=%llu, "
3736 "ref bytenr=%llu\n",
3737 (unsigned long long)rec->start,
3738 (unsigned long long)dback->disk_bytenr);
3741 if (dback->bytes != rec->nr) {
3745 fprintf(stderr, "Backref bytes do not match "
3746 "extent backref, bytenr=%llu, ref "
3747 "bytes=%llu, backref bytes=%llu\n",
3748 (unsigned long long)rec->start,
3749 (unsigned long long)rec->nr,
3750 (unsigned long long)dback->bytes);
3753 if (!back->is_data) {
3756 dback = (struct data_backref *)back;
3757 found += dback->found_ref;
3760 if (found != rec->refs) {
3764 fprintf(stderr, "Incorrect global backref count "
3765 "on %llu found %llu wanted %llu\n",
3766 (unsigned long long)rec->start,
3767 (unsigned long long)found,
3768 (unsigned long long)rec->refs);
3774 static int free_all_extent_backrefs(struct extent_record *rec)
3776 struct extent_backref *back;
3777 struct list_head *cur;
3778 while (!list_empty(&rec->backrefs)) {
3779 cur = rec->backrefs.next;
3780 back = list_entry(cur, struct extent_backref, list);
3787 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3788 struct cache_tree *extent_cache)
3790 struct cache_extent *cache;
3791 struct extent_record *rec;
3794 cache = first_cache_extent(extent_cache);
3797 rec = container_of(cache, struct extent_record, cache);
3798 remove_cache_extent(extent_cache, cache);
3799 free_all_extent_backrefs(rec);
3804 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3805 struct extent_record *rec)
3807 if (rec->content_checked && rec->owner_ref_checked &&
3808 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3809 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3810 !rec->bad_full_backref && !rec->crossing_stripes &&
3811 !rec->wrong_chunk_type) {
3812 remove_cache_extent(extent_cache, &rec->cache);
3813 free_all_extent_backrefs(rec);
3814 list_del_init(&rec->list);
3820 static int check_owner_ref(struct btrfs_root *root,
3821 struct extent_record *rec,
3822 struct extent_buffer *buf)
3824 struct extent_backref *node;
3825 struct tree_backref *back;
3826 struct btrfs_root *ref_root;
3827 struct btrfs_key key;
3828 struct btrfs_path path;
3829 struct extent_buffer *parent;
3834 list_for_each_entry(node, &rec->backrefs, list) {
3837 if (!node->found_ref)
3839 if (node->full_backref)
3841 back = (struct tree_backref *)node;
3842 if (btrfs_header_owner(buf) == back->root)
3845 BUG_ON(rec->is_root);
3847 /* try to find the block by search corresponding fs tree */
3848 key.objectid = btrfs_header_owner(buf);
3849 key.type = BTRFS_ROOT_ITEM_KEY;
3850 key.offset = (u64)-1;
3852 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3853 if (IS_ERR(ref_root))
3856 level = btrfs_header_level(buf);
3858 btrfs_item_key_to_cpu(buf, &key, 0);
3860 btrfs_node_key_to_cpu(buf, &key, 0);
3862 btrfs_init_path(&path);
3863 path.lowest_level = level + 1;
3864 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3868 parent = path.nodes[level + 1];
3869 if (parent && buf->start == btrfs_node_blockptr(parent,
3870 path.slots[level + 1]))
3873 btrfs_release_path(&path);
3874 return found ? 0 : 1;
3877 static int is_extent_tree_record(struct extent_record *rec)
3879 struct list_head *cur = rec->backrefs.next;
3880 struct extent_backref *node;
3881 struct tree_backref *back;
3884 while(cur != &rec->backrefs) {
3885 node = list_entry(cur, struct extent_backref, list);
3889 back = (struct tree_backref *)node;
3890 if (node->full_backref)
3892 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3899 static int record_bad_block_io(struct btrfs_fs_info *info,
3900 struct cache_tree *extent_cache,
3903 struct extent_record *rec;
3904 struct cache_extent *cache;
3905 struct btrfs_key key;
3907 cache = lookup_cache_extent(extent_cache, start, len);
3911 rec = container_of(cache, struct extent_record, cache);
3912 if (!is_extent_tree_record(rec))
3915 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3916 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3919 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3920 struct extent_buffer *buf, int slot)
3922 if (btrfs_header_level(buf)) {
3923 struct btrfs_key_ptr ptr1, ptr2;
3925 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3926 sizeof(struct btrfs_key_ptr));
3927 read_extent_buffer(buf, &ptr2,
3928 btrfs_node_key_ptr_offset(slot + 1),
3929 sizeof(struct btrfs_key_ptr));
3930 write_extent_buffer(buf, &ptr1,
3931 btrfs_node_key_ptr_offset(slot + 1),
3932 sizeof(struct btrfs_key_ptr));
3933 write_extent_buffer(buf, &ptr2,
3934 btrfs_node_key_ptr_offset(slot),
3935 sizeof(struct btrfs_key_ptr));
3937 struct btrfs_disk_key key;
3938 btrfs_node_key(buf, &key, 0);
3939 btrfs_fixup_low_keys(root, path, &key,
3940 btrfs_header_level(buf) + 1);
3943 struct btrfs_item *item1, *item2;
3944 struct btrfs_key k1, k2;
3945 char *item1_data, *item2_data;
3946 u32 item1_offset, item2_offset, item1_size, item2_size;
3948 item1 = btrfs_item_nr(slot);
3949 item2 = btrfs_item_nr(slot + 1);
3950 btrfs_item_key_to_cpu(buf, &k1, slot);
3951 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3952 item1_offset = btrfs_item_offset(buf, item1);
3953 item2_offset = btrfs_item_offset(buf, item2);
3954 item1_size = btrfs_item_size(buf, item1);
3955 item2_size = btrfs_item_size(buf, item2);
3957 item1_data = malloc(item1_size);
3960 item2_data = malloc(item2_size);
3966 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3967 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3969 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3970 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3974 btrfs_set_item_offset(buf, item1, item2_offset);
3975 btrfs_set_item_offset(buf, item2, item1_offset);
3976 btrfs_set_item_size(buf, item1, item2_size);
3977 btrfs_set_item_size(buf, item2, item1_size);
3979 path->slots[0] = slot;
3980 btrfs_set_item_key_unsafe(root, path, &k2);
3981 path->slots[0] = slot + 1;
3982 btrfs_set_item_key_unsafe(root, path, &k1);
3987 static int fix_key_order(struct btrfs_trans_handle *trans,
3988 struct btrfs_root *root,
3989 struct btrfs_path *path)
3991 struct extent_buffer *buf;
3992 struct btrfs_key k1, k2;
3994 int level = path->lowest_level;
3997 buf = path->nodes[level];
3998 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4000 btrfs_node_key_to_cpu(buf, &k1, i);
4001 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4003 btrfs_item_key_to_cpu(buf, &k1, i);
4004 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4006 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4008 ret = swap_values(root, path, buf, i);
4011 btrfs_mark_buffer_dirty(buf);
4017 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4018 struct btrfs_root *root,
4019 struct btrfs_path *path,
4020 struct extent_buffer *buf, int slot)
4022 struct btrfs_key key;
4023 int nritems = btrfs_header_nritems(buf);
4025 btrfs_item_key_to_cpu(buf, &key, slot);
4027 /* These are all the keys we can deal with missing. */
4028 if (key.type != BTRFS_DIR_INDEX_KEY &&
4029 key.type != BTRFS_EXTENT_ITEM_KEY &&
4030 key.type != BTRFS_METADATA_ITEM_KEY &&
4031 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4032 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4035 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4036 (unsigned long long)key.objectid, key.type,
4037 (unsigned long long)key.offset, slot, buf->start);
4038 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4039 btrfs_item_nr_offset(slot + 1),
4040 sizeof(struct btrfs_item) *
4041 (nritems - slot - 1));
4042 btrfs_set_header_nritems(buf, nritems - 1);
4044 struct btrfs_disk_key disk_key;
4046 btrfs_item_key(buf, &disk_key, 0);
4047 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4049 btrfs_mark_buffer_dirty(buf);
4053 static int fix_item_offset(struct btrfs_trans_handle *trans,
4054 struct btrfs_root *root,
4055 struct btrfs_path *path)
4057 struct extent_buffer *buf;
4061 /* We should only get this for leaves */
4062 BUG_ON(path->lowest_level);
4063 buf = path->nodes[0];
4065 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4066 unsigned int shift = 0, offset;
4068 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4069 BTRFS_LEAF_DATA_SIZE(root)) {
4070 if (btrfs_item_end_nr(buf, i) >
4071 BTRFS_LEAF_DATA_SIZE(root)) {
4072 ret = delete_bogus_item(trans, root, path,
4076 fprintf(stderr, "item is off the end of the "
4077 "leaf, can't fix\n");
4081 shift = BTRFS_LEAF_DATA_SIZE(root) -
4082 btrfs_item_end_nr(buf, i);
4083 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4084 btrfs_item_offset_nr(buf, i - 1)) {
4085 if (btrfs_item_end_nr(buf, i) >
4086 btrfs_item_offset_nr(buf, i - 1)) {
4087 ret = delete_bogus_item(trans, root, path,
4091 fprintf(stderr, "items overlap, can't fix\n");
4095 shift = btrfs_item_offset_nr(buf, i - 1) -
4096 btrfs_item_end_nr(buf, i);
4101 printf("Shifting item nr %d by %u bytes in block %llu\n",
4102 i, shift, (unsigned long long)buf->start);
4103 offset = btrfs_item_offset_nr(buf, i);
4104 memmove_extent_buffer(buf,
4105 btrfs_leaf_data(buf) + offset + shift,
4106 btrfs_leaf_data(buf) + offset,
4107 btrfs_item_size_nr(buf, i));
4108 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4110 btrfs_mark_buffer_dirty(buf);
4114 * We may have moved things, in which case we want to exit so we don't
4115 * write those changes out. Once we have proper abort functionality in
4116 * progs this can be changed to something nicer.
4123 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4124 * then just return -EIO.
4126 static int try_to_fix_bad_block(struct btrfs_root *root,
4127 struct extent_buffer *buf,
4128 enum btrfs_tree_block_status status)
4130 struct btrfs_trans_handle *trans;
4131 struct ulist *roots;
4132 struct ulist_node *node;
4133 struct btrfs_root *search_root;
4134 struct btrfs_path *path;
4135 struct ulist_iterator iter;
4136 struct btrfs_key root_key, key;
4139 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4140 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4143 path = btrfs_alloc_path();
4147 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4150 btrfs_free_path(path);
4154 ULIST_ITER_INIT(&iter);
4155 while ((node = ulist_next(roots, &iter))) {
4156 root_key.objectid = node->val;
4157 root_key.type = BTRFS_ROOT_ITEM_KEY;
4158 root_key.offset = (u64)-1;
4160 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4167 trans = btrfs_start_transaction(search_root, 0);
4168 if (IS_ERR(trans)) {
4169 ret = PTR_ERR(trans);
4173 path->lowest_level = btrfs_header_level(buf);
4174 path->skip_check_block = 1;
4175 if (path->lowest_level)
4176 btrfs_node_key_to_cpu(buf, &key, 0);
4178 btrfs_item_key_to_cpu(buf, &key, 0);
4179 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4182 btrfs_commit_transaction(trans, search_root);
4185 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4186 ret = fix_key_order(trans, search_root, path);
4187 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4188 ret = fix_item_offset(trans, search_root, path);
4190 btrfs_commit_transaction(trans, search_root);
4193 btrfs_release_path(path);
4194 btrfs_commit_transaction(trans, search_root);
4197 btrfs_free_path(path);
4201 static int check_block(struct btrfs_root *root,
4202 struct cache_tree *extent_cache,
4203 struct extent_buffer *buf, u64 flags)
4205 struct extent_record *rec;
4206 struct cache_extent *cache;
4207 struct btrfs_key key;
4208 enum btrfs_tree_block_status status;
4212 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4215 rec = container_of(cache, struct extent_record, cache);
4216 rec->generation = btrfs_header_generation(buf);
4218 level = btrfs_header_level(buf);
4219 if (btrfs_header_nritems(buf) > 0) {
4222 btrfs_item_key_to_cpu(buf, &key, 0);
4224 btrfs_node_key_to_cpu(buf, &key, 0);
4226 rec->info_objectid = key.objectid;
4228 rec->info_level = level;
4230 if (btrfs_is_leaf(buf))
4231 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4233 status = btrfs_check_node(root, &rec->parent_key, buf);
4235 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4237 status = try_to_fix_bad_block(root, buf, status);
4238 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4240 fprintf(stderr, "bad block %llu\n",
4241 (unsigned long long)buf->start);
4244 * Signal to callers we need to start the scan over
4245 * again since we'll have cow'ed blocks.
4250 rec->content_checked = 1;
4251 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4252 rec->owner_ref_checked = 1;
4254 ret = check_owner_ref(root, rec, buf);
4256 rec->owner_ref_checked = 1;
4260 maybe_free_extent_rec(extent_cache, rec);
4264 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4265 u64 parent, u64 root)
4267 struct list_head *cur = rec->backrefs.next;
4268 struct extent_backref *node;
4269 struct tree_backref *back;
4271 while(cur != &rec->backrefs) {
4272 node = list_entry(cur, struct extent_backref, list);
4276 back = (struct tree_backref *)node;
4278 if (!node->full_backref)
4280 if (parent == back->parent)
4283 if (node->full_backref)
4285 if (back->root == root)
4292 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4293 u64 parent, u64 root)
4295 struct tree_backref *ref = malloc(sizeof(*ref));
4296 memset(&ref->node, 0, sizeof(ref->node));
4298 ref->parent = parent;
4299 ref->node.full_backref = 1;
4302 ref->node.full_backref = 0;
4304 list_add_tail(&ref->node.list, &rec->backrefs);
4309 static struct data_backref *find_data_backref(struct extent_record *rec,
4310 u64 parent, u64 root,
4311 u64 owner, u64 offset,
4313 u64 disk_bytenr, u64 bytes)
4315 struct list_head *cur = rec->backrefs.next;
4316 struct extent_backref *node;
4317 struct data_backref *back;
4319 while(cur != &rec->backrefs) {
4320 node = list_entry(cur, struct extent_backref, list);
4324 back = (struct data_backref *)node;
4326 if (!node->full_backref)
4328 if (parent == back->parent)
4331 if (node->full_backref)
4333 if (back->root == root && back->owner == owner &&
4334 back->offset == offset) {
4335 if (found_ref && node->found_ref &&
4336 (back->bytes != bytes ||
4337 back->disk_bytenr != disk_bytenr))
4346 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4347 u64 parent, u64 root,
4348 u64 owner, u64 offset,
4351 struct data_backref *ref = malloc(sizeof(*ref));
4352 memset(&ref->node, 0, sizeof(ref->node));
4353 ref->node.is_data = 1;
4356 ref->parent = parent;
4359 ref->node.full_backref = 1;
4363 ref->offset = offset;
4364 ref->node.full_backref = 0;
4366 ref->bytes = max_size;
4369 list_add_tail(&ref->node.list, &rec->backrefs);
4370 if (max_size > rec->max_size)
4371 rec->max_size = max_size;
4375 /* Check if the type of extent matches with its chunk */
4376 static void check_extent_type(struct extent_record *rec)
4378 struct btrfs_block_group_cache *bg_cache;
4380 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4384 /* data extent, check chunk directly*/
4385 if (!rec->metadata) {
4386 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4387 rec->wrong_chunk_type = 1;
4391 /* metadata extent, check the obvious case first */
4392 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4393 BTRFS_BLOCK_GROUP_METADATA))) {
4394 rec->wrong_chunk_type = 1;
4399 * Check SYSTEM extent, as it's also marked as metadata, we can only
4400 * make sure it's a SYSTEM extent by its backref
4402 if (!list_empty(&rec->backrefs)) {
4403 struct extent_backref *node;
4404 struct tree_backref *tback;
4407 node = list_entry(rec->backrefs.next, struct extent_backref,
4409 if (node->is_data) {
4410 /* tree block shouldn't have data backref */
4411 rec->wrong_chunk_type = 1;
4414 tback = container_of(node, struct tree_backref, node);
4416 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4417 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4419 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4420 if (!(bg_cache->flags & bg_type))
4421 rec->wrong_chunk_type = 1;
4425 static int add_extent_rec(struct cache_tree *extent_cache,
4426 struct btrfs_key *parent_key, u64 parent_gen,
4427 u64 start, u64 nr, u64 extent_item_refs,
4428 int is_root, int inc_ref, int set_checked,
4429 int metadata, int extent_rec, u64 max_size)
4431 struct extent_record *rec;
4432 struct cache_extent *cache;
4436 cache = lookup_cache_extent(extent_cache, start, nr);
4438 rec = container_of(cache, struct extent_record, cache);
4442 rec->nr = max(nr, max_size);
4445 * We need to make sure to reset nr to whatever the extent
4446 * record says was the real size, this way we can compare it to
4450 if (start != rec->start || rec->found_rec) {
4451 struct extent_record *tmp;
4454 if (list_empty(&rec->list))
4455 list_add_tail(&rec->list,
4456 &duplicate_extents);
4459 * We have to do this song and dance in case we
4460 * find an extent record that falls inside of
4461 * our current extent record but does not have
4462 * the same objectid.
4464 tmp = malloc(sizeof(*tmp));
4468 tmp->max_size = max_size;
4471 tmp->metadata = metadata;
4472 tmp->extent_item_refs = extent_item_refs;
4473 INIT_LIST_HEAD(&tmp->list);
4474 list_add_tail(&tmp->list, &rec->dups);
4475 rec->num_duplicates++;
4482 if (extent_item_refs && !dup) {
4483 if (rec->extent_item_refs) {
4484 fprintf(stderr, "block %llu rec "
4485 "extent_item_refs %llu, passed %llu\n",
4486 (unsigned long long)start,
4487 (unsigned long long)
4488 rec->extent_item_refs,
4489 (unsigned long long)extent_item_refs);
4491 rec->extent_item_refs = extent_item_refs;
4496 rec->content_checked = 1;
4497 rec->owner_ref_checked = 1;
4501 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4503 rec->parent_generation = parent_gen;
4505 if (rec->max_size < max_size)
4506 rec->max_size = max_size;
4509 * A metadata extent can't cross stripe_len boundary, otherwise
4510 * kernel scrub won't be able to handle it.
4511 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4514 if (metadata && check_crossing_stripes(rec->start,
4516 rec->crossing_stripes = 1;
4517 check_extent_type(rec);
4518 maybe_free_extent_rec(extent_cache, rec);
4521 rec = malloc(sizeof(*rec));
4523 rec->max_size = max_size;
4524 rec->nr = max(nr, max_size);
4525 rec->found_rec = !!extent_rec;
4526 rec->content_checked = 0;
4527 rec->owner_ref_checked = 0;
4528 rec->num_duplicates = 0;
4529 rec->metadata = metadata;
4530 rec->flag_block_full_backref = -1;
4531 rec->bad_full_backref = 0;
4532 rec->crossing_stripes = 0;
4533 rec->wrong_chunk_type = 0;
4534 INIT_LIST_HEAD(&rec->backrefs);
4535 INIT_LIST_HEAD(&rec->dups);
4536 INIT_LIST_HEAD(&rec->list);
4548 if (extent_item_refs)
4549 rec->extent_item_refs = extent_item_refs;
4551 rec->extent_item_refs = 0;
4554 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4556 memset(&rec->parent_key, 0, sizeof(*parent_key));
4559 rec->parent_generation = parent_gen;
4561 rec->parent_generation = 0;
4563 rec->cache.start = start;
4564 rec->cache.size = nr;
4565 ret = insert_cache_extent(extent_cache, &rec->cache);
4569 rec->content_checked = 1;
4570 rec->owner_ref_checked = 1;
4574 if (check_crossing_stripes(rec->start, rec->max_size))
4575 rec->crossing_stripes = 1;
4576 check_extent_type(rec);
4580 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4581 u64 parent, u64 root, int found_ref)
4583 struct extent_record *rec;
4584 struct tree_backref *back;
4585 struct cache_extent *cache;
4587 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4589 add_extent_rec(extent_cache, NULL, 0, bytenr,
4590 1, 0, 0, 0, 0, 1, 0, 0);
4591 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4596 rec = container_of(cache, struct extent_record, cache);
4597 if (rec->start != bytenr) {
4601 back = find_tree_backref(rec, parent, root);
4603 back = alloc_tree_backref(rec, parent, root);
4606 if (back->node.found_ref) {
4607 fprintf(stderr, "Extent back ref already exists "
4608 "for %llu parent %llu root %llu \n",
4609 (unsigned long long)bytenr,
4610 (unsigned long long)parent,
4611 (unsigned long long)root);
4613 back->node.found_ref = 1;
4615 if (back->node.found_extent_tree) {
4616 fprintf(stderr, "Extent back ref already exists "
4617 "for %llu parent %llu root %llu \n",
4618 (unsigned long long)bytenr,
4619 (unsigned long long)parent,
4620 (unsigned long long)root);
4622 back->node.found_extent_tree = 1;
4624 check_extent_type(rec);
4625 maybe_free_extent_rec(extent_cache, rec);
4629 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4630 u64 parent, u64 root, u64 owner, u64 offset,
4631 u32 num_refs, int found_ref, u64 max_size)
4633 struct extent_record *rec;
4634 struct data_backref *back;
4635 struct cache_extent *cache;
4637 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4639 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4641 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4646 rec = container_of(cache, struct extent_record, cache);
4647 if (rec->max_size < max_size)
4648 rec->max_size = max_size;
4651 * If found_ref is set then max_size is the real size and must match the
4652 * existing refs. So if we have already found a ref then we need to
4653 * make sure that this ref matches the existing one, otherwise we need
4654 * to add a new backref so we can notice that the backrefs don't match
4655 * and we need to figure out who is telling the truth. This is to
4656 * account for that awful fsync bug I introduced where we'd end up with
4657 * a btrfs_file_extent_item that would have its length include multiple
4658 * prealloc extents or point inside of a prealloc extent.
4660 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4663 back = alloc_data_backref(rec, parent, root, owner, offset,
4667 BUG_ON(num_refs != 1);
4668 if (back->node.found_ref)
4669 BUG_ON(back->bytes != max_size);
4670 back->node.found_ref = 1;
4671 back->found_ref += 1;
4672 back->bytes = max_size;
4673 back->disk_bytenr = bytenr;
4675 rec->content_checked = 1;
4676 rec->owner_ref_checked = 1;
4678 if (back->node.found_extent_tree) {
4679 fprintf(stderr, "Extent back ref already exists "
4680 "for %llu parent %llu root %llu "
4681 "owner %llu offset %llu num_refs %lu\n",
4682 (unsigned long long)bytenr,
4683 (unsigned long long)parent,
4684 (unsigned long long)root,
4685 (unsigned long long)owner,
4686 (unsigned long long)offset,
4687 (unsigned long)num_refs);
4689 back->num_refs = num_refs;
4690 back->node.found_extent_tree = 1;
4692 maybe_free_extent_rec(extent_cache, rec);
4696 static int add_pending(struct cache_tree *pending,
4697 struct cache_tree *seen, u64 bytenr, u32 size)
4700 ret = add_cache_extent(seen, bytenr, size);
4703 add_cache_extent(pending, bytenr, size);
4707 static int pick_next_pending(struct cache_tree *pending,
4708 struct cache_tree *reada,
4709 struct cache_tree *nodes,
4710 u64 last, struct block_info *bits, int bits_nr,
4713 unsigned long node_start = last;
4714 struct cache_extent *cache;
4717 cache = search_cache_extent(reada, 0);
4719 bits[0].start = cache->start;
4720 bits[0].size = cache->size;
4725 if (node_start > 32768)
4726 node_start -= 32768;
4728 cache = search_cache_extent(nodes, node_start);
4730 cache = search_cache_extent(nodes, 0);
4733 cache = search_cache_extent(pending, 0);
4738 bits[ret].start = cache->start;
4739 bits[ret].size = cache->size;
4740 cache = next_cache_extent(cache);
4742 } while (cache && ret < bits_nr);
4748 bits[ret].start = cache->start;
4749 bits[ret].size = cache->size;
4750 cache = next_cache_extent(cache);
4752 } while (cache && ret < bits_nr);
4754 if (bits_nr - ret > 8) {
4755 u64 lookup = bits[0].start + bits[0].size;
4756 struct cache_extent *next;
4757 next = search_cache_extent(pending, lookup);
4759 if (next->start - lookup > 32768)
4761 bits[ret].start = next->start;
4762 bits[ret].size = next->size;
4763 lookup = next->start + next->size;
4767 next = next_cache_extent(next);
4775 static void free_chunk_record(struct cache_extent *cache)
4777 struct chunk_record *rec;
4779 rec = container_of(cache, struct chunk_record, cache);
4780 list_del_init(&rec->list);
4781 list_del_init(&rec->dextents);
4785 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4787 cache_tree_free_extents(chunk_cache, free_chunk_record);
4790 static void free_device_record(struct rb_node *node)
4792 struct device_record *rec;
4794 rec = container_of(node, struct device_record, node);
4798 FREE_RB_BASED_TREE(device_cache, free_device_record);
4800 int insert_block_group_record(struct block_group_tree *tree,
4801 struct block_group_record *bg_rec)
4805 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4809 list_add_tail(&bg_rec->list, &tree->block_groups);
4813 static void free_block_group_record(struct cache_extent *cache)
4815 struct block_group_record *rec;
4817 rec = container_of(cache, struct block_group_record, cache);
4818 list_del_init(&rec->list);
4822 void free_block_group_tree(struct block_group_tree *tree)
4824 cache_tree_free_extents(&tree->tree, free_block_group_record);
4827 int insert_device_extent_record(struct device_extent_tree *tree,
4828 struct device_extent_record *de_rec)
4833 * Device extent is a bit different from the other extents, because
4834 * the extents which belong to the different devices may have the
4835 * same start and size, so we need use the special extent cache
4836 * search/insert functions.
4838 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4842 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4843 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4847 static void free_device_extent_record(struct cache_extent *cache)
4849 struct device_extent_record *rec;
4851 rec = container_of(cache, struct device_extent_record, cache);
4852 if (!list_empty(&rec->chunk_list))
4853 list_del_init(&rec->chunk_list);
4854 if (!list_empty(&rec->device_list))
4855 list_del_init(&rec->device_list);
4859 void free_device_extent_tree(struct device_extent_tree *tree)
4861 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4864 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4865 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4866 struct extent_buffer *leaf, int slot)
4868 struct btrfs_extent_ref_v0 *ref0;
4869 struct btrfs_key key;
4871 btrfs_item_key_to_cpu(leaf, &key, slot);
4872 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4873 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4874 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4876 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4877 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4883 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4884 struct btrfs_key *key,
4887 struct btrfs_chunk *ptr;
4888 struct chunk_record *rec;
4891 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4892 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4894 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4896 fprintf(stderr, "memory allocation failed\n");
4900 INIT_LIST_HEAD(&rec->list);
4901 INIT_LIST_HEAD(&rec->dextents);
4904 rec->cache.start = key->offset;
4905 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4907 rec->generation = btrfs_header_generation(leaf);
4909 rec->objectid = key->objectid;
4910 rec->type = key->type;
4911 rec->offset = key->offset;
4913 rec->length = rec->cache.size;
4914 rec->owner = btrfs_chunk_owner(leaf, ptr);
4915 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4916 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4917 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4918 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4919 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4920 rec->num_stripes = num_stripes;
4921 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4923 for (i = 0; i < rec->num_stripes; ++i) {
4924 rec->stripes[i].devid =
4925 btrfs_stripe_devid_nr(leaf, ptr, i);
4926 rec->stripes[i].offset =
4927 btrfs_stripe_offset_nr(leaf, ptr, i);
4928 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4929 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4936 static int process_chunk_item(struct cache_tree *chunk_cache,
4937 struct btrfs_key *key, struct extent_buffer *eb,
4940 struct chunk_record *rec;
4943 rec = btrfs_new_chunk_record(eb, key, slot);
4944 ret = insert_cache_extent(chunk_cache, &rec->cache);
4946 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4947 rec->offset, rec->length);
4954 static int process_device_item(struct rb_root *dev_cache,
4955 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4957 struct btrfs_dev_item *ptr;
4958 struct device_record *rec;
4961 ptr = btrfs_item_ptr(eb,
4962 slot, struct btrfs_dev_item);
4964 rec = malloc(sizeof(*rec));
4966 fprintf(stderr, "memory allocation failed\n");
4970 rec->devid = key->offset;
4971 rec->generation = btrfs_header_generation(eb);
4973 rec->objectid = key->objectid;
4974 rec->type = key->type;
4975 rec->offset = key->offset;
4977 rec->devid = btrfs_device_id(eb, ptr);
4978 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4979 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4981 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4983 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4990 struct block_group_record *
4991 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4994 struct btrfs_block_group_item *ptr;
4995 struct block_group_record *rec;
4997 rec = calloc(1, sizeof(*rec));
4999 fprintf(stderr, "memory allocation failed\n");
5003 rec->cache.start = key->objectid;
5004 rec->cache.size = key->offset;
5006 rec->generation = btrfs_header_generation(leaf);
5008 rec->objectid = key->objectid;
5009 rec->type = key->type;
5010 rec->offset = key->offset;
5012 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5013 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5015 INIT_LIST_HEAD(&rec->list);
5020 static int process_block_group_item(struct block_group_tree *block_group_cache,
5021 struct btrfs_key *key,
5022 struct extent_buffer *eb, int slot)
5024 struct block_group_record *rec;
5027 rec = btrfs_new_block_group_record(eb, key, slot);
5028 ret = insert_block_group_record(block_group_cache, rec);
5030 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5031 rec->objectid, rec->offset);
5038 struct device_extent_record *
5039 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5040 struct btrfs_key *key, int slot)
5042 struct device_extent_record *rec;
5043 struct btrfs_dev_extent *ptr;
5045 rec = calloc(1, sizeof(*rec));
5047 fprintf(stderr, "memory allocation failed\n");
5051 rec->cache.objectid = key->objectid;
5052 rec->cache.start = key->offset;
5054 rec->generation = btrfs_header_generation(leaf);
5056 rec->objectid = key->objectid;
5057 rec->type = key->type;
5058 rec->offset = key->offset;
5060 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5061 rec->chunk_objecteid =
5062 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5064 btrfs_dev_extent_chunk_offset(leaf, ptr);
5065 rec->length = btrfs_dev_extent_length(leaf, ptr);
5066 rec->cache.size = rec->length;
5068 INIT_LIST_HEAD(&rec->chunk_list);
5069 INIT_LIST_HEAD(&rec->device_list);
5075 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5076 struct btrfs_key *key, struct extent_buffer *eb,
5079 struct device_extent_record *rec;
5082 rec = btrfs_new_device_extent_record(eb, key, slot);
5083 ret = insert_device_extent_record(dev_extent_cache, rec);
5086 "Device extent[%llu, %llu, %llu] existed.\n",
5087 rec->objectid, rec->offset, rec->length);
5094 static int process_extent_item(struct btrfs_root *root,
5095 struct cache_tree *extent_cache,
5096 struct extent_buffer *eb, int slot)
5098 struct btrfs_extent_item *ei;
5099 struct btrfs_extent_inline_ref *iref;
5100 struct btrfs_extent_data_ref *dref;
5101 struct btrfs_shared_data_ref *sref;
5102 struct btrfs_key key;
5106 u32 item_size = btrfs_item_size_nr(eb, slot);
5112 btrfs_item_key_to_cpu(eb, &key, slot);
5114 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5116 num_bytes = root->leafsize;
5118 num_bytes = key.offset;
5121 if (item_size < sizeof(*ei)) {
5122 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5123 struct btrfs_extent_item_v0 *ei0;
5124 BUG_ON(item_size != sizeof(*ei0));
5125 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5126 refs = btrfs_extent_refs_v0(eb, ei0);
5130 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5131 num_bytes, refs, 0, 0, 0, metadata, 1,
5135 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5136 refs = btrfs_extent_refs(eb, ei);
5138 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5139 refs, 0, 0, 0, metadata, 1, num_bytes);
5141 ptr = (unsigned long)(ei + 1);
5142 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5143 key.type == BTRFS_EXTENT_ITEM_KEY)
5144 ptr += sizeof(struct btrfs_tree_block_info);
5146 end = (unsigned long)ei + item_size;
5148 iref = (struct btrfs_extent_inline_ref *)ptr;
5149 type = btrfs_extent_inline_ref_type(eb, iref);
5150 offset = btrfs_extent_inline_ref_offset(eb, iref);
5152 case BTRFS_TREE_BLOCK_REF_KEY:
5153 add_tree_backref(extent_cache, key.objectid,
5156 case BTRFS_SHARED_BLOCK_REF_KEY:
5157 add_tree_backref(extent_cache, key.objectid,
5160 case BTRFS_EXTENT_DATA_REF_KEY:
5161 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5162 add_data_backref(extent_cache, key.objectid, 0,
5163 btrfs_extent_data_ref_root(eb, dref),
5164 btrfs_extent_data_ref_objectid(eb,
5166 btrfs_extent_data_ref_offset(eb, dref),
5167 btrfs_extent_data_ref_count(eb, dref),
5170 case BTRFS_SHARED_DATA_REF_KEY:
5171 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5172 add_data_backref(extent_cache, key.objectid, offset,
5174 btrfs_shared_data_ref_count(eb, sref),
5178 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5179 key.objectid, key.type, num_bytes);
5182 ptr += btrfs_extent_inline_ref_size(type);
5189 static int check_cache_range(struct btrfs_root *root,
5190 struct btrfs_block_group_cache *cache,
5191 u64 offset, u64 bytes)
5193 struct btrfs_free_space *entry;
5199 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5200 bytenr = btrfs_sb_offset(i);
5201 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5202 cache->key.objectid, bytenr, 0,
5203 &logical, &nr, &stripe_len);
5208 if (logical[nr] + stripe_len <= offset)
5210 if (offset + bytes <= logical[nr])
5212 if (logical[nr] == offset) {
5213 if (stripe_len >= bytes) {
5217 bytes -= stripe_len;
5218 offset += stripe_len;
5219 } else if (logical[nr] < offset) {
5220 if (logical[nr] + stripe_len >=
5225 bytes = (offset + bytes) -
5226 (logical[nr] + stripe_len);
5227 offset = logical[nr] + stripe_len;
5230 * Could be tricky, the super may land in the
5231 * middle of the area we're checking. First
5232 * check the easiest case, it's at the end.
5234 if (logical[nr] + stripe_len >=
5236 bytes = logical[nr] - offset;
5240 /* Check the left side */
5241 ret = check_cache_range(root, cache,
5243 logical[nr] - offset);
5249 /* Now we continue with the right side */
5250 bytes = (offset + bytes) -
5251 (logical[nr] + stripe_len);
5252 offset = logical[nr] + stripe_len;
5259 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5261 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5262 offset, offset+bytes);
5266 if (entry->offset != offset) {
5267 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5272 if (entry->bytes != bytes) {
5273 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5274 bytes, entry->bytes, offset);
5278 unlink_free_space(cache->free_space_ctl, entry);
5283 static int verify_space_cache(struct btrfs_root *root,
5284 struct btrfs_block_group_cache *cache)
5286 struct btrfs_path *path;
5287 struct extent_buffer *leaf;
5288 struct btrfs_key key;
5292 path = btrfs_alloc_path();
5296 root = root->fs_info->extent_root;
5298 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5300 key.objectid = last;
5302 key.type = BTRFS_EXTENT_ITEM_KEY;
5304 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5309 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5310 ret = btrfs_next_leaf(root, path);
5318 leaf = path->nodes[0];
5319 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5320 if (key.objectid >= cache->key.offset + cache->key.objectid)
5322 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5323 key.type != BTRFS_METADATA_ITEM_KEY) {
5328 if (last == key.objectid) {
5329 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5330 last = key.objectid + key.offset;
5332 last = key.objectid + root->leafsize;
5337 ret = check_cache_range(root, cache, last,
5338 key.objectid - last);
5341 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5342 last = key.objectid + key.offset;
5344 last = key.objectid + root->leafsize;
5348 if (last < cache->key.objectid + cache->key.offset)
5349 ret = check_cache_range(root, cache, last,
5350 cache->key.objectid +
5351 cache->key.offset - last);
5354 btrfs_free_path(path);
5357 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5358 fprintf(stderr, "There are still entries left in the space "
5366 static int check_space_cache(struct btrfs_root *root)
5368 struct btrfs_block_group_cache *cache;
5369 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5373 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5374 btrfs_super_generation(root->fs_info->super_copy) !=
5375 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5376 printf("cache and super generation don't match, space cache "
5377 "will be invalidated\n");
5381 if (ctx.progress_enabled) {
5382 ctx.tp = TASK_FREE_SPACE;
5383 task_start(ctx.info);
5387 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5391 start = cache->key.objectid + cache->key.offset;
5392 if (!cache->free_space_ctl) {
5393 if (btrfs_init_free_space_ctl(cache,
5394 root->sectorsize)) {
5399 btrfs_remove_free_space_cache(cache);
5402 ret = load_free_space_cache(root->fs_info, cache);
5406 ret = verify_space_cache(root, cache);
5408 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5409 cache->key.objectid);
5414 task_stop(ctx.info);
5416 return error ? -EINVAL : 0;
5419 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5420 u64 num_bytes, unsigned long leaf_offset,
5421 struct extent_buffer *eb) {
5424 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5426 unsigned long csum_offset;
5430 u64 data_checked = 0;
5436 if (num_bytes % root->sectorsize)
5439 data = malloc(num_bytes);
5443 while (offset < num_bytes) {
5446 read_len = num_bytes - offset;
5447 /* read as much space once a time */
5448 ret = read_extent_data(root, data + offset,
5449 bytenr + offset, &read_len, mirror);
5453 /* verify every 4k data's checksum */
5454 while (data_checked < read_len) {
5456 tmp = offset + data_checked;
5458 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5459 csum, root->sectorsize);
5460 btrfs_csum_final(csum, (char *)&csum);
5462 csum_offset = leaf_offset +
5463 tmp / root->sectorsize * csum_size;
5464 read_extent_buffer(eb, (char *)&csum_expected,
5465 csum_offset, csum_size);
5466 /* try another mirror */
5467 if (csum != csum_expected) {
5468 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5469 mirror, bytenr + tmp,
5470 csum, csum_expected);
5471 num_copies = btrfs_num_copies(
5472 &root->fs_info->mapping_tree,
5474 if (mirror < num_copies - 1) {
5479 data_checked += root->sectorsize;
5488 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5491 struct btrfs_path *path;
5492 struct extent_buffer *leaf;
5493 struct btrfs_key key;
5496 path = btrfs_alloc_path();
5498 fprintf(stderr, "Error allocing path\n");
5502 key.objectid = bytenr;
5503 key.type = BTRFS_EXTENT_ITEM_KEY;
5504 key.offset = (u64)-1;
5507 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5510 fprintf(stderr, "Error looking up extent record %d\n", ret);
5511 btrfs_free_path(path);
5514 if (path->slots[0] > 0) {
5517 ret = btrfs_prev_leaf(root, path);
5520 } else if (ret > 0) {
5527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5530 * Block group items come before extent items if they have the same
5531 * bytenr, so walk back one more just in case. Dear future traveler,
5532 * first congrats on mastering time travel. Now if it's not too much
5533 * trouble could you go back to 2006 and tell Chris to make the
5534 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5535 * EXTENT_ITEM_KEY please?
5537 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5538 if (path->slots[0] > 0) {
5541 ret = btrfs_prev_leaf(root, path);
5544 } else if (ret > 0) {
5549 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5553 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5554 ret = btrfs_next_leaf(root, path);
5556 fprintf(stderr, "Error going to next leaf "
5558 btrfs_free_path(path);
5564 leaf = path->nodes[0];
5565 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5566 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5570 if (key.objectid + key.offset < bytenr) {
5574 if (key.objectid > bytenr + num_bytes)
5577 if (key.objectid == bytenr) {
5578 if (key.offset >= num_bytes) {
5582 num_bytes -= key.offset;
5583 bytenr += key.offset;
5584 } else if (key.objectid < bytenr) {
5585 if (key.objectid + key.offset >= bytenr + num_bytes) {
5589 num_bytes = (bytenr + num_bytes) -
5590 (key.objectid + key.offset);
5591 bytenr = key.objectid + key.offset;
5593 if (key.objectid + key.offset < bytenr + num_bytes) {
5594 u64 new_start = key.objectid + key.offset;
5595 u64 new_bytes = bytenr + num_bytes - new_start;
5598 * Weird case, the extent is in the middle of
5599 * our range, we'll have to search one side
5600 * and then the other. Not sure if this happens
5601 * in real life, but no harm in coding it up
5602 * anyway just in case.
5604 btrfs_release_path(path);
5605 ret = check_extent_exists(root, new_start,
5608 fprintf(stderr, "Right section didn't "
5612 num_bytes = key.objectid - bytenr;
5615 num_bytes = key.objectid - bytenr;
5622 if (num_bytes && !ret) {
5623 fprintf(stderr, "There are no extents for csum range "
5624 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5628 btrfs_free_path(path);
5632 static int check_csums(struct btrfs_root *root)
5634 struct btrfs_path *path;
5635 struct extent_buffer *leaf;
5636 struct btrfs_key key;
5637 u64 offset = 0, num_bytes = 0;
5638 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5642 unsigned long leaf_offset;
5644 root = root->fs_info->csum_root;
5645 if (!extent_buffer_uptodate(root->node)) {
5646 fprintf(stderr, "No valid csum tree found\n");
5650 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5651 key.type = BTRFS_EXTENT_CSUM_KEY;
5654 path = btrfs_alloc_path();
5658 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5660 fprintf(stderr, "Error searching csum tree %d\n", ret);
5661 btrfs_free_path(path);
5665 if (ret > 0 && path->slots[0])
5670 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5671 ret = btrfs_next_leaf(root, path);
5673 fprintf(stderr, "Error going to next leaf "
5680 leaf = path->nodes[0];
5682 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5683 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5688 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5689 csum_size) * root->sectorsize;
5690 if (!check_data_csum)
5691 goto skip_csum_check;
5692 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5693 ret = check_extent_csums(root, key.offset, data_len,
5699 offset = key.offset;
5700 } else if (key.offset != offset + num_bytes) {
5701 ret = check_extent_exists(root, offset, num_bytes);
5703 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5704 "there is no extent record\n",
5705 offset, offset+num_bytes);
5708 offset = key.offset;
5711 num_bytes += data_len;
5715 btrfs_free_path(path);
5719 static int is_dropped_key(struct btrfs_key *key,
5720 struct btrfs_key *drop_key) {
5721 if (key->objectid < drop_key->objectid)
5723 else if (key->objectid == drop_key->objectid) {
5724 if (key->type < drop_key->type)
5726 else if (key->type == drop_key->type) {
5727 if (key->offset < drop_key->offset)
5735 * Here are the rules for FULL_BACKREF.
5737 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5738 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5740 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5741 * if it happened after the relocation occurred since we'll have dropped the
5742 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5743 * have no real way to know for sure.
5745 * We process the blocks one root at a time, and we start from the lowest root
5746 * objectid and go to the highest. So we can just lookup the owner backref for
5747 * the record and if we don't find it then we know it doesn't exist and we have
5750 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5751 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5752 * be set or not and then we can check later once we've gathered all the refs.
5754 static int calc_extent_flag(struct btrfs_root *root,
5755 struct cache_tree *extent_cache,
5756 struct extent_buffer *buf,
5757 struct root_item_record *ri,
5760 struct extent_record *rec;
5761 struct cache_extent *cache;
5762 struct tree_backref *tback;
5765 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5766 /* we have added this extent before */
5768 rec = container_of(cache, struct extent_record, cache);
5771 * Except file/reloc tree, we can not have
5774 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5779 if (buf->start == ri->bytenr)
5782 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5785 owner = btrfs_header_owner(buf);
5786 if (owner == ri->objectid)
5789 tback = find_tree_backref(rec, 0, owner);
5794 if (rec->flag_block_full_backref != -1 &&
5795 rec->flag_block_full_backref != 0)
5796 rec->bad_full_backref = 1;
5799 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5800 if (rec->flag_block_full_backref != -1 &&
5801 rec->flag_block_full_backref != 1)
5802 rec->bad_full_backref = 1;
5806 static int run_next_block(struct btrfs_root *root,
5807 struct block_info *bits,
5810 struct cache_tree *pending,
5811 struct cache_tree *seen,
5812 struct cache_tree *reada,
5813 struct cache_tree *nodes,
5814 struct cache_tree *extent_cache,
5815 struct cache_tree *chunk_cache,
5816 struct rb_root *dev_cache,
5817 struct block_group_tree *block_group_cache,
5818 struct device_extent_tree *dev_extent_cache,
5819 struct root_item_record *ri)
5821 struct extent_buffer *buf;
5822 struct extent_record *rec = NULL;
5833 struct btrfs_key key;
5834 struct cache_extent *cache;
5837 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5838 bits_nr, &reada_bits);
5843 for(i = 0; i < nritems; i++) {
5844 ret = add_cache_extent(reada, bits[i].start,
5849 /* fixme, get the parent transid */
5850 readahead_tree_block(root, bits[i].start,
5854 *last = bits[0].start;
5855 bytenr = bits[0].start;
5856 size = bits[0].size;
5858 cache = lookup_cache_extent(pending, bytenr, size);
5860 remove_cache_extent(pending, cache);
5863 cache = lookup_cache_extent(reada, bytenr, size);
5865 remove_cache_extent(reada, cache);
5868 cache = lookup_cache_extent(nodes, bytenr, size);
5870 remove_cache_extent(nodes, cache);
5873 cache = lookup_cache_extent(extent_cache, bytenr, size);
5875 rec = container_of(cache, struct extent_record, cache);
5876 gen = rec->parent_generation;
5879 /* fixme, get the real parent transid */
5880 buf = read_tree_block(root, bytenr, size, gen);
5881 if (!extent_buffer_uptodate(buf)) {
5882 record_bad_block_io(root->fs_info,
5883 extent_cache, bytenr, size);
5887 nritems = btrfs_header_nritems(buf);
5890 if (!init_extent_tree) {
5891 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5892 btrfs_header_level(buf), 1, NULL,
5895 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5897 fprintf(stderr, "Couldn't calc extent flags\n");
5898 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5903 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5905 fprintf(stderr, "Couldn't calc extent flags\n");
5906 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5910 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5912 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5913 ri->objectid == btrfs_header_owner(buf)) {
5915 * Ok we got to this block from it's original owner and
5916 * we have FULL_BACKREF set. Relocation can leave
5917 * converted blocks over so this is altogether possible,
5918 * however it's not possible if the generation > the
5919 * last snapshot, so check for this case.
5921 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5922 btrfs_header_generation(buf) > ri->last_snapshot) {
5923 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5924 rec->bad_full_backref = 1;
5929 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5930 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5931 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5932 rec->bad_full_backref = 1;
5936 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5937 rec->flag_block_full_backref = 1;
5941 rec->flag_block_full_backref = 0;
5943 owner = btrfs_header_owner(buf);
5946 ret = check_block(root, extent_cache, buf, flags);
5950 if (btrfs_is_leaf(buf)) {
5951 btree_space_waste += btrfs_leaf_free_space(root, buf);
5952 for (i = 0; i < nritems; i++) {
5953 struct btrfs_file_extent_item *fi;
5954 btrfs_item_key_to_cpu(buf, &key, i);
5955 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5956 process_extent_item(root, extent_cache, buf,
5960 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5961 process_extent_item(root, extent_cache, buf,
5965 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5967 btrfs_item_size_nr(buf, i);
5970 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5971 process_chunk_item(chunk_cache, &key, buf, i);
5974 if (key.type == BTRFS_DEV_ITEM_KEY) {
5975 process_device_item(dev_cache, &key, buf, i);
5978 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5979 process_block_group_item(block_group_cache,
5983 if (key.type == BTRFS_DEV_EXTENT_KEY) {
5984 process_device_extent_item(dev_extent_cache,
5989 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5990 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5991 process_extent_ref_v0(extent_cache, buf, i);
5998 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5999 add_tree_backref(extent_cache, key.objectid, 0,
6003 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6004 add_tree_backref(extent_cache, key.objectid,
6008 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6009 struct btrfs_extent_data_ref *ref;
6010 ref = btrfs_item_ptr(buf, i,
6011 struct btrfs_extent_data_ref);
6012 add_data_backref(extent_cache,
6014 btrfs_extent_data_ref_root(buf, ref),
6015 btrfs_extent_data_ref_objectid(buf,
6017 btrfs_extent_data_ref_offset(buf, ref),
6018 btrfs_extent_data_ref_count(buf, ref),
6019 0, root->sectorsize);
6022 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6023 struct btrfs_shared_data_ref *ref;
6024 ref = btrfs_item_ptr(buf, i,
6025 struct btrfs_shared_data_ref);
6026 add_data_backref(extent_cache,
6027 key.objectid, key.offset, 0, 0, 0,
6028 btrfs_shared_data_ref_count(buf, ref),
6029 0, root->sectorsize);
6032 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6033 struct bad_item *bad;
6035 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6039 bad = malloc(sizeof(struct bad_item));
6042 INIT_LIST_HEAD(&bad->list);
6043 memcpy(&bad->key, &key,
6044 sizeof(struct btrfs_key));
6045 bad->root_id = owner;
6046 list_add_tail(&bad->list, &delete_items);
6049 if (key.type != BTRFS_EXTENT_DATA_KEY)
6051 fi = btrfs_item_ptr(buf, i,
6052 struct btrfs_file_extent_item);
6053 if (btrfs_file_extent_type(buf, fi) ==
6054 BTRFS_FILE_EXTENT_INLINE)
6056 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6059 data_bytes_allocated +=
6060 btrfs_file_extent_disk_num_bytes(buf, fi);
6061 if (data_bytes_allocated < root->sectorsize) {
6064 data_bytes_referenced +=
6065 btrfs_file_extent_num_bytes(buf, fi);
6066 add_data_backref(extent_cache,
6067 btrfs_file_extent_disk_bytenr(buf, fi),
6068 parent, owner, key.objectid, key.offset -
6069 btrfs_file_extent_offset(buf, fi), 1, 1,
6070 btrfs_file_extent_disk_num_bytes(buf, fi));
6074 struct btrfs_key first_key;
6076 first_key.objectid = 0;
6079 btrfs_item_key_to_cpu(buf, &first_key, 0);
6080 level = btrfs_header_level(buf);
6081 for (i = 0; i < nritems; i++) {
6082 ptr = btrfs_node_blockptr(buf, i);
6083 size = btrfs_level_size(root, level - 1);
6084 btrfs_node_key_to_cpu(buf, &key, i);
6086 if ((level == ri->drop_level)
6087 && is_dropped_key(&key, &ri->drop_key)) {
6091 ret = add_extent_rec(extent_cache, &key,
6092 btrfs_node_ptr_generation(buf, i),
6093 ptr, size, 0, 0, 1, 0, 1, 0,
6097 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6100 add_pending(nodes, seen, ptr, size);
6102 add_pending(pending, seen, ptr, size);
6105 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6106 nritems) * sizeof(struct btrfs_key_ptr);
6108 total_btree_bytes += buf->len;
6109 if (fs_root_objectid(btrfs_header_owner(buf)))
6110 total_fs_tree_bytes += buf->len;
6111 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6112 total_extent_tree_bytes += buf->len;
6113 if (!found_old_backref &&
6114 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6115 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6116 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6117 found_old_backref = 1;
6119 free_extent_buffer(buf);
6123 static int add_root_to_pending(struct extent_buffer *buf,
6124 struct cache_tree *extent_cache,
6125 struct cache_tree *pending,
6126 struct cache_tree *seen,
6127 struct cache_tree *nodes,
6130 if (btrfs_header_level(buf) > 0)
6131 add_pending(nodes, seen, buf->start, buf->len);
6133 add_pending(pending, seen, buf->start, buf->len);
6134 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6135 0, 1, 1, 0, 1, 0, buf->len);
6137 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6138 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6139 add_tree_backref(extent_cache, buf->start, buf->start,
6142 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6146 /* as we fix the tree, we might be deleting blocks that
6147 * we're tracking for repair. This hook makes sure we
6148 * remove any backrefs for blocks as we are fixing them.
6150 static int free_extent_hook(struct btrfs_trans_handle *trans,
6151 struct btrfs_root *root,
6152 u64 bytenr, u64 num_bytes, u64 parent,
6153 u64 root_objectid, u64 owner, u64 offset,
6156 struct extent_record *rec;
6157 struct cache_extent *cache;
6159 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6161 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6162 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6166 rec = container_of(cache, struct extent_record, cache);
6168 struct data_backref *back;
6169 back = find_data_backref(rec, parent, root_objectid, owner,
6170 offset, 1, bytenr, num_bytes);
6173 if (back->node.found_ref) {
6174 back->found_ref -= refs_to_drop;
6176 rec->refs -= refs_to_drop;
6178 if (back->node.found_extent_tree) {
6179 back->num_refs -= refs_to_drop;
6180 if (rec->extent_item_refs)
6181 rec->extent_item_refs -= refs_to_drop;
6183 if (back->found_ref == 0)
6184 back->node.found_ref = 0;
6185 if (back->num_refs == 0)
6186 back->node.found_extent_tree = 0;
6188 if (!back->node.found_extent_tree && back->node.found_ref) {
6189 list_del(&back->node.list);
6193 struct tree_backref *back;
6194 back = find_tree_backref(rec, parent, root_objectid);
6197 if (back->node.found_ref) {
6200 back->node.found_ref = 0;
6202 if (back->node.found_extent_tree) {
6203 if (rec->extent_item_refs)
6204 rec->extent_item_refs--;
6205 back->node.found_extent_tree = 0;
6207 if (!back->node.found_extent_tree && back->node.found_ref) {
6208 list_del(&back->node.list);
6212 maybe_free_extent_rec(extent_cache, rec);
6217 static int delete_extent_records(struct btrfs_trans_handle *trans,
6218 struct btrfs_root *root,
6219 struct btrfs_path *path,
6220 u64 bytenr, u64 new_len)
6222 struct btrfs_key key;
6223 struct btrfs_key found_key;
6224 struct extent_buffer *leaf;
6229 key.objectid = bytenr;
6231 key.offset = (u64)-1;
6234 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6241 if (path->slots[0] == 0)
6247 leaf = path->nodes[0];
6248 slot = path->slots[0];
6250 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6251 if (found_key.objectid != bytenr)
6254 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6255 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6256 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6257 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6258 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6259 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6260 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6261 btrfs_release_path(path);
6262 if (found_key.type == 0) {
6263 if (found_key.offset == 0)
6265 key.offset = found_key.offset - 1;
6266 key.type = found_key.type;
6268 key.type = found_key.type - 1;
6269 key.offset = (u64)-1;
6273 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6274 found_key.objectid, found_key.type, found_key.offset);
6276 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6279 btrfs_release_path(path);
6281 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6282 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6283 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6284 found_key.offset : root->leafsize;
6286 ret = btrfs_update_block_group(trans, root, bytenr,
6293 btrfs_release_path(path);
6298 * for a single backref, this will allocate a new extent
6299 * and add the backref to it.
6301 static int record_extent(struct btrfs_trans_handle *trans,
6302 struct btrfs_fs_info *info,
6303 struct btrfs_path *path,
6304 struct extent_record *rec,
6305 struct extent_backref *back,
6306 int allocated, u64 flags)
6309 struct btrfs_root *extent_root = info->extent_root;
6310 struct extent_buffer *leaf;
6311 struct btrfs_key ins_key;
6312 struct btrfs_extent_item *ei;
6313 struct tree_backref *tback;
6314 struct data_backref *dback;
6315 struct btrfs_tree_block_info *bi;
6318 rec->max_size = max_t(u64, rec->max_size,
6319 info->extent_root->leafsize);
6322 u32 item_size = sizeof(*ei);
6325 item_size += sizeof(*bi);
6327 ins_key.objectid = rec->start;
6328 ins_key.offset = rec->max_size;
6329 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6331 ret = btrfs_insert_empty_item(trans, extent_root, path,
6332 &ins_key, item_size);
6336 leaf = path->nodes[0];
6337 ei = btrfs_item_ptr(leaf, path->slots[0],
6338 struct btrfs_extent_item);
6340 btrfs_set_extent_refs(leaf, ei, 0);
6341 btrfs_set_extent_generation(leaf, ei, rec->generation);
6343 if (back->is_data) {
6344 btrfs_set_extent_flags(leaf, ei,
6345 BTRFS_EXTENT_FLAG_DATA);
6347 struct btrfs_disk_key copy_key;;
6349 tback = (struct tree_backref *)back;
6350 bi = (struct btrfs_tree_block_info *)(ei + 1);
6351 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6354 btrfs_set_disk_key_objectid(©_key,
6355 rec->info_objectid);
6356 btrfs_set_disk_key_type(©_key, 0);
6357 btrfs_set_disk_key_offset(©_key, 0);
6359 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6360 btrfs_set_tree_block_key(leaf, bi, ©_key);
6362 btrfs_set_extent_flags(leaf, ei,
6363 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6366 btrfs_mark_buffer_dirty(leaf);
6367 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6368 rec->max_size, 1, 0);
6371 btrfs_release_path(path);
6374 if (back->is_data) {
6378 dback = (struct data_backref *)back;
6379 if (back->full_backref)
6380 parent = dback->parent;
6384 for (i = 0; i < dback->found_ref; i++) {
6385 /* if parent != 0, we're doing a full backref
6386 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6387 * just makes the backref allocator create a data
6390 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6391 rec->start, rec->max_size,
6395 BTRFS_FIRST_FREE_OBJECTID :
6401 fprintf(stderr, "adding new data backref"
6402 " on %llu %s %llu owner %llu"
6403 " offset %llu found %d\n",
6404 (unsigned long long)rec->start,
6405 back->full_backref ?
6407 back->full_backref ?
6408 (unsigned long long)parent :
6409 (unsigned long long)dback->root,
6410 (unsigned long long)dback->owner,
6411 (unsigned long long)dback->offset,
6416 tback = (struct tree_backref *)back;
6417 if (back->full_backref)
6418 parent = tback->parent;
6422 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6423 rec->start, rec->max_size,
6424 parent, tback->root, 0, 0);
6425 fprintf(stderr, "adding new tree backref on "
6426 "start %llu len %llu parent %llu root %llu\n",
6427 rec->start, rec->max_size, parent, tback->root);
6432 btrfs_release_path(path);
6436 struct extent_entry {
6441 struct list_head list;
6444 static struct extent_entry *find_entry(struct list_head *entries,
6445 u64 bytenr, u64 bytes)
6447 struct extent_entry *entry = NULL;
6449 list_for_each_entry(entry, entries, list) {
6450 if (entry->bytenr == bytenr && entry->bytes == bytes)
6457 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6459 struct extent_entry *entry, *best = NULL, *prev = NULL;
6461 list_for_each_entry(entry, entries, list) {
6468 * If there are as many broken entries as entries then we know
6469 * not to trust this particular entry.
6471 if (entry->broken == entry->count)
6475 * If our current entry == best then we can't be sure our best
6476 * is really the best, so we need to keep searching.
6478 if (best && best->count == entry->count) {
6484 /* Prev == entry, not good enough, have to keep searching */
6485 if (!prev->broken && prev->count == entry->count)
6489 best = (prev->count > entry->count) ? prev : entry;
6490 else if (best->count < entry->count)
6498 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6499 struct data_backref *dback, struct extent_entry *entry)
6501 struct btrfs_trans_handle *trans;
6502 struct btrfs_root *root;
6503 struct btrfs_file_extent_item *fi;
6504 struct extent_buffer *leaf;
6505 struct btrfs_key key;
6509 key.objectid = dback->root;
6510 key.type = BTRFS_ROOT_ITEM_KEY;
6511 key.offset = (u64)-1;
6512 root = btrfs_read_fs_root(info, &key);
6514 fprintf(stderr, "Couldn't find root for our ref\n");
6519 * The backref points to the original offset of the extent if it was
6520 * split, so we need to search down to the offset we have and then walk
6521 * forward until we find the backref we're looking for.
6523 key.objectid = dback->owner;
6524 key.type = BTRFS_EXTENT_DATA_KEY;
6525 key.offset = dback->offset;
6526 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6528 fprintf(stderr, "Error looking up ref %d\n", ret);
6533 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6534 ret = btrfs_next_leaf(root, path);
6536 fprintf(stderr, "Couldn't find our ref, next\n");
6540 leaf = path->nodes[0];
6541 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6542 if (key.objectid != dback->owner ||
6543 key.type != BTRFS_EXTENT_DATA_KEY) {
6544 fprintf(stderr, "Couldn't find our ref, search\n");
6547 fi = btrfs_item_ptr(leaf, path->slots[0],
6548 struct btrfs_file_extent_item);
6549 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6550 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6552 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6557 btrfs_release_path(path);
6559 trans = btrfs_start_transaction(root, 1);
6561 return PTR_ERR(trans);
6564 * Ok we have the key of the file extent we want to fix, now we can cow
6565 * down to the thing and fix it.
6567 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6569 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6570 key.objectid, key.type, key.offset, ret);
6574 fprintf(stderr, "Well that's odd, we just found this key "
6575 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6580 leaf = path->nodes[0];
6581 fi = btrfs_item_ptr(leaf, path->slots[0],
6582 struct btrfs_file_extent_item);
6584 if (btrfs_file_extent_compression(leaf, fi) &&
6585 dback->disk_bytenr != entry->bytenr) {
6586 fprintf(stderr, "Ref doesn't match the record start and is "
6587 "compressed, please take a btrfs-image of this file "
6588 "system and send it to a btrfs developer so they can "
6589 "complete this functionality for bytenr %Lu\n",
6590 dback->disk_bytenr);
6595 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6596 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6597 } else if (dback->disk_bytenr > entry->bytenr) {
6598 u64 off_diff, offset;
6600 off_diff = dback->disk_bytenr - entry->bytenr;
6601 offset = btrfs_file_extent_offset(leaf, fi);
6602 if (dback->disk_bytenr + offset +
6603 btrfs_file_extent_num_bytes(leaf, fi) >
6604 entry->bytenr + entry->bytes) {
6605 fprintf(stderr, "Ref is past the entry end, please "
6606 "take a btrfs-image of this file system and "
6607 "send it to a btrfs developer, ref %Lu\n",
6608 dback->disk_bytenr);
6613 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6614 btrfs_set_file_extent_offset(leaf, fi, offset);
6615 } else if (dback->disk_bytenr < entry->bytenr) {
6618 offset = btrfs_file_extent_offset(leaf, fi);
6619 if (dback->disk_bytenr + offset < entry->bytenr) {
6620 fprintf(stderr, "Ref is before the entry start, please"
6621 " take a btrfs-image of this file system and "
6622 "send it to a btrfs developer, ref %Lu\n",
6623 dback->disk_bytenr);
6628 offset += dback->disk_bytenr;
6629 offset -= entry->bytenr;
6630 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6631 btrfs_set_file_extent_offset(leaf, fi, offset);
6634 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6637 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6638 * only do this if we aren't using compression, otherwise it's a
6641 if (!btrfs_file_extent_compression(leaf, fi))
6642 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6644 printf("ram bytes may be wrong?\n");
6645 btrfs_mark_buffer_dirty(leaf);
6647 err = btrfs_commit_transaction(trans, root);
6648 btrfs_release_path(path);
6649 return ret ? ret : err;
6652 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6653 struct extent_record *rec)
6655 struct extent_backref *back;
6656 struct data_backref *dback;
6657 struct extent_entry *entry, *best = NULL;
6660 int broken_entries = 0;
6665 * Metadata is easy and the backrefs should always agree on bytenr and
6666 * size, if not we've got bigger issues.
6671 list_for_each_entry(back, &rec->backrefs, list) {
6672 if (back->full_backref || !back->is_data)
6675 dback = (struct data_backref *)back;
6678 * We only pay attention to backrefs that we found a real
6681 if (dback->found_ref == 0)
6685 * For now we only catch when the bytes don't match, not the
6686 * bytenr. We can easily do this at the same time, but I want
6687 * to have a fs image to test on before we just add repair
6688 * functionality willy-nilly so we know we won't screw up the
6692 entry = find_entry(&entries, dback->disk_bytenr,
6695 entry = malloc(sizeof(struct extent_entry));
6700 memset(entry, 0, sizeof(*entry));
6701 entry->bytenr = dback->disk_bytenr;
6702 entry->bytes = dback->bytes;
6703 list_add_tail(&entry->list, &entries);
6708 * If we only have on entry we may think the entries agree when
6709 * in reality they don't so we have to do some extra checking.
6711 if (dback->disk_bytenr != rec->start ||
6712 dback->bytes != rec->nr || back->broken)
6723 /* Yay all the backrefs agree, carry on good sir */
6724 if (nr_entries <= 1 && !mismatch)
6727 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6728 "%Lu\n", rec->start);
6731 * First we want to see if the backrefs can agree amongst themselves who
6732 * is right, so figure out which one of the entries has the highest
6735 best = find_most_right_entry(&entries);
6738 * Ok so we may have an even split between what the backrefs think, so
6739 * this is where we use the extent ref to see what it thinks.
6742 entry = find_entry(&entries, rec->start, rec->nr);
6743 if (!entry && (!broken_entries || !rec->found_rec)) {
6744 fprintf(stderr, "Backrefs don't agree with each other "
6745 "and extent record doesn't agree with anybody,"
6746 " so we can't fix bytenr %Lu bytes %Lu\n",
6747 rec->start, rec->nr);
6750 } else if (!entry) {
6752 * Ok our backrefs were broken, we'll assume this is the
6753 * correct value and add an entry for this range.
6755 entry = malloc(sizeof(struct extent_entry));
6760 memset(entry, 0, sizeof(*entry));
6761 entry->bytenr = rec->start;
6762 entry->bytes = rec->nr;
6763 list_add_tail(&entry->list, &entries);
6767 best = find_most_right_entry(&entries);
6769 fprintf(stderr, "Backrefs and extent record evenly "
6770 "split on who is right, this is going to "
6771 "require user input to fix bytenr %Lu bytes "
6772 "%Lu\n", rec->start, rec->nr);
6779 * I don't think this can happen currently as we'll abort() if we catch
6780 * this case higher up, but in case somebody removes that we still can't
6781 * deal with it properly here yet, so just bail out of that's the case.
6783 if (best->bytenr != rec->start) {
6784 fprintf(stderr, "Extent start and backref starts don't match, "
6785 "please use btrfs-image on this file system and send "
6786 "it to a btrfs developer so they can make fsck fix "
6787 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6788 rec->start, rec->nr);
6794 * Ok great we all agreed on an extent record, let's go find the real
6795 * references and fix up the ones that don't match.
6797 list_for_each_entry(back, &rec->backrefs, list) {
6798 if (back->full_backref || !back->is_data)
6801 dback = (struct data_backref *)back;
6804 * Still ignoring backrefs that don't have a real ref attached
6807 if (dback->found_ref == 0)
6810 if (dback->bytes == best->bytes &&
6811 dback->disk_bytenr == best->bytenr)
6814 ret = repair_ref(info, path, dback, best);
6820 * Ok we messed with the actual refs, which means we need to drop our
6821 * entire cache and go back and rescan. I know this is a huge pain and
6822 * adds a lot of extra work, but it's the only way to be safe. Once all
6823 * the backrefs agree we may not need to do anything to the extent
6828 while (!list_empty(&entries)) {
6829 entry = list_entry(entries.next, struct extent_entry, list);
6830 list_del_init(&entry->list);
6836 static int process_duplicates(struct btrfs_root *root,
6837 struct cache_tree *extent_cache,
6838 struct extent_record *rec)
6840 struct extent_record *good, *tmp;
6841 struct cache_extent *cache;
6845 * If we found a extent record for this extent then return, or if we
6846 * have more than one duplicate we are likely going to need to delete
6849 if (rec->found_rec || rec->num_duplicates > 1)
6852 /* Shouldn't happen but just in case */
6853 BUG_ON(!rec->num_duplicates);
6856 * So this happens if we end up with a backref that doesn't match the
6857 * actual extent entry. So either the backref is bad or the extent
6858 * entry is bad. Either way we want to have the extent_record actually
6859 * reflect what we found in the extent_tree, so we need to take the
6860 * duplicate out and use that as the extent_record since the only way we
6861 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6863 remove_cache_extent(extent_cache, &rec->cache);
6865 good = list_entry(rec->dups.next, struct extent_record, list);
6866 list_del_init(&good->list);
6867 INIT_LIST_HEAD(&good->backrefs);
6868 INIT_LIST_HEAD(&good->dups);
6869 good->cache.start = good->start;
6870 good->cache.size = good->nr;
6871 good->content_checked = 0;
6872 good->owner_ref_checked = 0;
6873 good->num_duplicates = 0;
6874 good->refs = rec->refs;
6875 list_splice_init(&rec->backrefs, &good->backrefs);
6877 cache = lookup_cache_extent(extent_cache, good->start,
6881 tmp = container_of(cache, struct extent_record, cache);
6884 * If we find another overlapping extent and it's found_rec is
6885 * set then it's a duplicate and we need to try and delete
6888 if (tmp->found_rec || tmp->num_duplicates > 0) {
6889 if (list_empty(&good->list))
6890 list_add_tail(&good->list,
6891 &duplicate_extents);
6892 good->num_duplicates += tmp->num_duplicates + 1;
6893 list_splice_init(&tmp->dups, &good->dups);
6894 list_del_init(&tmp->list);
6895 list_add_tail(&tmp->list, &good->dups);
6896 remove_cache_extent(extent_cache, &tmp->cache);
6901 * Ok we have another non extent item backed extent rec, so lets
6902 * just add it to this extent and carry on like we did above.
6904 good->refs += tmp->refs;
6905 list_splice_init(&tmp->backrefs, &good->backrefs);
6906 remove_cache_extent(extent_cache, &tmp->cache);
6909 ret = insert_cache_extent(extent_cache, &good->cache);
6912 return good->num_duplicates ? 0 : 1;
6915 static int delete_duplicate_records(struct btrfs_root *root,
6916 struct extent_record *rec)
6918 struct btrfs_trans_handle *trans;
6919 LIST_HEAD(delete_list);
6920 struct btrfs_path *path;
6921 struct extent_record *tmp, *good, *n;
6924 struct btrfs_key key;
6926 path = btrfs_alloc_path();
6933 /* Find the record that covers all of the duplicates. */
6934 list_for_each_entry(tmp, &rec->dups, list) {
6935 if (good->start < tmp->start)
6937 if (good->nr > tmp->nr)
6940 if (tmp->start + tmp->nr < good->start + good->nr) {
6941 fprintf(stderr, "Ok we have overlapping extents that "
6942 "aren't completely covered by eachother, this "
6943 "is going to require more careful thought. "
6944 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6945 tmp->start, tmp->nr, good->start, good->nr);
6952 list_add_tail(&rec->list, &delete_list);
6954 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6957 list_move_tail(&tmp->list, &delete_list);
6960 root = root->fs_info->extent_root;
6961 trans = btrfs_start_transaction(root, 1);
6962 if (IS_ERR(trans)) {
6963 ret = PTR_ERR(trans);
6967 list_for_each_entry(tmp, &delete_list, list) {
6968 if (tmp->found_rec == 0)
6970 key.objectid = tmp->start;
6971 key.type = BTRFS_EXTENT_ITEM_KEY;
6972 key.offset = tmp->nr;
6974 /* Shouldn't happen but just in case */
6975 if (tmp->metadata) {
6976 fprintf(stderr, "Well this shouldn't happen, extent "
6977 "record overlaps but is metadata? "
6978 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6982 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6988 ret = btrfs_del_item(trans, root, path);
6991 btrfs_release_path(path);
6994 err = btrfs_commit_transaction(trans, root);
6998 while (!list_empty(&delete_list)) {
6999 tmp = list_entry(delete_list.next, struct extent_record, list);
7000 list_del_init(&tmp->list);
7006 while (!list_empty(&rec->dups)) {
7007 tmp = list_entry(rec->dups.next, struct extent_record, list);
7008 list_del_init(&tmp->list);
7012 btrfs_free_path(path);
7014 if (!ret && !nr_del)
7015 rec->num_duplicates = 0;
7017 return ret ? ret : nr_del;
7020 static int find_possible_backrefs(struct btrfs_fs_info *info,
7021 struct btrfs_path *path,
7022 struct cache_tree *extent_cache,
7023 struct extent_record *rec)
7025 struct btrfs_root *root;
7026 struct extent_backref *back;
7027 struct data_backref *dback;
7028 struct cache_extent *cache;
7029 struct btrfs_file_extent_item *fi;
7030 struct btrfs_key key;
7034 list_for_each_entry(back, &rec->backrefs, list) {
7035 /* Don't care about full backrefs (poor unloved backrefs) */
7036 if (back->full_backref || !back->is_data)
7039 dback = (struct data_backref *)back;
7041 /* We found this one, we don't need to do a lookup */
7042 if (dback->found_ref)
7045 key.objectid = dback->root;
7046 key.type = BTRFS_ROOT_ITEM_KEY;
7047 key.offset = (u64)-1;
7049 root = btrfs_read_fs_root(info, &key);
7051 /* No root, definitely a bad ref, skip */
7052 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7054 /* Other err, exit */
7056 return PTR_ERR(root);
7058 key.objectid = dback->owner;
7059 key.type = BTRFS_EXTENT_DATA_KEY;
7060 key.offset = dback->offset;
7061 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7063 btrfs_release_path(path);
7066 /* Didn't find it, we can carry on */
7071 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7072 struct btrfs_file_extent_item);
7073 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7074 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7075 btrfs_release_path(path);
7076 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7078 struct extent_record *tmp;
7079 tmp = container_of(cache, struct extent_record, cache);
7082 * If we found an extent record for the bytenr for this
7083 * particular backref then we can't add it to our
7084 * current extent record. We only want to add backrefs
7085 * that don't have a corresponding extent item in the
7086 * extent tree since they likely belong to this record
7087 * and we need to fix it if it doesn't match bytenrs.
7093 dback->found_ref += 1;
7094 dback->disk_bytenr = bytenr;
7095 dback->bytes = bytes;
7098 * Set this so the verify backref code knows not to trust the
7099 * values in this backref.
7108 * Record orphan data ref into corresponding root.
7110 * Return 0 if the extent item contains data ref and recorded.
7111 * Return 1 if the extent item contains no useful data ref
7112 * On that case, it may contains only shared_dataref or metadata backref
7113 * or the file extent exists(this should be handled by the extent bytenr
7115 * Return <0 if something goes wrong.
7117 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7118 struct extent_record *rec)
7120 struct btrfs_key key;
7121 struct btrfs_root *dest_root;
7122 struct extent_backref *back;
7123 struct data_backref *dback;
7124 struct orphan_data_extent *orphan;
7125 struct btrfs_path *path;
7126 int recorded_data_ref = 0;
7131 path = btrfs_alloc_path();
7134 list_for_each_entry(back, &rec->backrefs, list) {
7135 if (back->full_backref || !back->is_data ||
7136 !back->found_extent_tree)
7138 dback = (struct data_backref *)back;
7139 if (dback->found_ref)
7141 key.objectid = dback->root;
7142 key.type = BTRFS_ROOT_ITEM_KEY;
7143 key.offset = (u64)-1;
7145 dest_root = btrfs_read_fs_root(fs_info, &key);
7147 /* For non-exist root we just skip it */
7148 if (IS_ERR(dest_root) || !dest_root)
7151 key.objectid = dback->owner;
7152 key.type = BTRFS_EXTENT_DATA_KEY;
7153 key.offset = dback->offset;
7155 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7157 * For ret < 0, it's OK since the fs-tree may be corrupted,
7158 * we need to record it for inode/file extent rebuild.
7159 * For ret > 0, we record it only for file extent rebuild.
7160 * For ret == 0, the file extent exists but only bytenr
7161 * mismatch, let the original bytenr fix routine to handle,
7167 orphan = malloc(sizeof(*orphan));
7172 INIT_LIST_HEAD(&orphan->list);
7173 orphan->root = dback->root;
7174 orphan->objectid = dback->owner;
7175 orphan->offset = dback->offset;
7176 orphan->disk_bytenr = rec->cache.start;
7177 orphan->disk_len = rec->cache.size;
7178 list_add(&dest_root->orphan_data_extents, &orphan->list);
7179 recorded_data_ref = 1;
7182 btrfs_free_path(path);
7184 return !recorded_data_ref;
7190 * when an incorrect extent item is found, this will delete
7191 * all of the existing entries for it and recreate them
7192 * based on what the tree scan found.
7194 static int fixup_extent_refs(struct btrfs_fs_info *info,
7195 struct cache_tree *extent_cache,
7196 struct extent_record *rec)
7198 struct btrfs_trans_handle *trans = NULL;
7200 struct btrfs_path *path;
7201 struct list_head *cur = rec->backrefs.next;
7202 struct cache_extent *cache;
7203 struct extent_backref *back;
7207 if (rec->flag_block_full_backref)
7208 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7210 path = btrfs_alloc_path();
7214 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7216 * Sometimes the backrefs themselves are so broken they don't
7217 * get attached to any meaningful rec, so first go back and
7218 * check any of our backrefs that we couldn't find and throw
7219 * them into the list if we find the backref so that
7220 * verify_backrefs can figure out what to do.
7222 ret = find_possible_backrefs(info, path, extent_cache, rec);
7227 /* step one, make sure all of the backrefs agree */
7228 ret = verify_backrefs(info, path, rec);
7232 trans = btrfs_start_transaction(info->extent_root, 1);
7233 if (IS_ERR(trans)) {
7234 ret = PTR_ERR(trans);
7238 /* step two, delete all the existing records */
7239 ret = delete_extent_records(trans, info->extent_root, path,
7240 rec->start, rec->max_size);
7245 /* was this block corrupt? If so, don't add references to it */
7246 cache = lookup_cache_extent(info->corrupt_blocks,
7247 rec->start, rec->max_size);
7253 /* step three, recreate all the refs we did find */
7254 while(cur != &rec->backrefs) {
7255 back = list_entry(cur, struct extent_backref, list);
7259 * if we didn't find any references, don't create a
7262 if (!back->found_ref)
7265 rec->bad_full_backref = 0;
7266 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7274 int err = btrfs_commit_transaction(trans, info->extent_root);
7279 btrfs_free_path(path);
7283 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7284 struct extent_record *rec)
7286 struct btrfs_trans_handle *trans;
7287 struct btrfs_root *root = fs_info->extent_root;
7288 struct btrfs_path *path;
7289 struct btrfs_extent_item *ei;
7290 struct btrfs_key key;
7294 key.objectid = rec->start;
7295 if (rec->metadata) {
7296 key.type = BTRFS_METADATA_ITEM_KEY;
7297 key.offset = rec->info_level;
7299 key.type = BTRFS_EXTENT_ITEM_KEY;
7300 key.offset = rec->max_size;
7303 path = btrfs_alloc_path();
7307 trans = btrfs_start_transaction(root, 0);
7308 if (IS_ERR(trans)) {
7309 btrfs_free_path(path);
7310 return PTR_ERR(trans);
7313 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7315 btrfs_free_path(path);
7316 btrfs_commit_transaction(trans, root);
7319 fprintf(stderr, "Didn't find extent for %llu\n",
7320 (unsigned long long)rec->start);
7321 btrfs_free_path(path);
7322 btrfs_commit_transaction(trans, root);
7326 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7327 struct btrfs_extent_item);
7328 flags = btrfs_extent_flags(path->nodes[0], ei);
7329 if (rec->flag_block_full_backref) {
7330 fprintf(stderr, "setting full backref on %llu\n",
7331 (unsigned long long)key.objectid);
7332 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7334 fprintf(stderr, "clearing full backref on %llu\n",
7335 (unsigned long long)key.objectid);
7336 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7338 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7339 btrfs_mark_buffer_dirty(path->nodes[0]);
7340 btrfs_free_path(path);
7341 return btrfs_commit_transaction(trans, root);
7344 /* right now we only prune from the extent allocation tree */
7345 static int prune_one_block(struct btrfs_trans_handle *trans,
7346 struct btrfs_fs_info *info,
7347 struct btrfs_corrupt_block *corrupt)
7350 struct btrfs_path path;
7351 struct extent_buffer *eb;
7355 int level = corrupt->level + 1;
7357 btrfs_init_path(&path);
7359 /* we want to stop at the parent to our busted block */
7360 path.lowest_level = level;
7362 ret = btrfs_search_slot(trans, info->extent_root,
7363 &corrupt->key, &path, -1, 1);
7368 eb = path.nodes[level];
7375 * hopefully the search gave us the block we want to prune,
7376 * lets try that first
7378 slot = path.slots[level];
7379 found = btrfs_node_blockptr(eb, slot);
7380 if (found == corrupt->cache.start)
7383 nritems = btrfs_header_nritems(eb);
7385 /* the search failed, lets scan this node and hope we find it */
7386 for (slot = 0; slot < nritems; slot++) {
7387 found = btrfs_node_blockptr(eb, slot);
7388 if (found == corrupt->cache.start)
7392 * we couldn't find the bad block. TODO, search all the nodes for pointers
7395 if (eb == info->extent_root->node) {
7400 btrfs_release_path(&path);
7405 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7406 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7409 btrfs_release_path(&path);
7413 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7415 struct btrfs_trans_handle *trans = NULL;
7416 struct cache_extent *cache;
7417 struct btrfs_corrupt_block *corrupt;
7420 cache = search_cache_extent(info->corrupt_blocks, 0);
7424 trans = btrfs_start_transaction(info->extent_root, 1);
7426 return PTR_ERR(trans);
7428 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7429 prune_one_block(trans, info, corrupt);
7430 remove_cache_extent(info->corrupt_blocks, cache);
7433 return btrfs_commit_transaction(trans, info->extent_root);
7437 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7439 struct btrfs_block_group_cache *cache;
7444 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7445 &start, &end, EXTENT_DIRTY);
7448 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7454 cache = btrfs_lookup_first_block_group(fs_info, start);
7459 start = cache->key.objectid + cache->key.offset;
7463 static int check_extent_refs(struct btrfs_root *root,
7464 struct cache_tree *extent_cache)
7466 struct extent_record *rec;
7467 struct cache_extent *cache;
7476 * if we're doing a repair, we have to make sure
7477 * we don't allocate from the problem extents.
7478 * In the worst case, this will be all the
7481 cache = search_cache_extent(extent_cache, 0);
7483 rec = container_of(cache, struct extent_record, cache);
7484 set_extent_dirty(root->fs_info->excluded_extents,
7486 rec->start + rec->max_size - 1,
7488 cache = next_cache_extent(cache);
7491 /* pin down all the corrupted blocks too */
7492 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7494 set_extent_dirty(root->fs_info->excluded_extents,
7496 cache->start + cache->size - 1,
7498 cache = next_cache_extent(cache);
7500 prune_corrupt_blocks(root->fs_info);
7501 reset_cached_block_groups(root->fs_info);
7504 reset_cached_block_groups(root->fs_info);
7507 * We need to delete any duplicate entries we find first otherwise we
7508 * could mess up the extent tree when we have backrefs that actually
7509 * belong to a different extent item and not the weird duplicate one.
7511 while (repair && !list_empty(&duplicate_extents)) {
7512 rec = list_entry(duplicate_extents.next, struct extent_record,
7514 list_del_init(&rec->list);
7516 /* Sometimes we can find a backref before we find an actual
7517 * extent, so we need to process it a little bit to see if there
7518 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7519 * if this is a backref screwup. If we need to delete stuff
7520 * process_duplicates() will return 0, otherwise it will return
7523 if (process_duplicates(root, extent_cache, rec))
7525 ret = delete_duplicate_records(root, rec);
7529 * delete_duplicate_records will return the number of entries
7530 * deleted, so if it's greater than 0 then we know we actually
7531 * did something and we need to remove.
7545 cache = search_cache_extent(extent_cache, 0);
7548 rec = container_of(cache, struct extent_record, cache);
7549 if (rec->num_duplicates) {
7550 fprintf(stderr, "extent item %llu has multiple extent "
7551 "items\n", (unsigned long long)rec->start);
7556 if (rec->refs != rec->extent_item_refs) {
7557 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7558 (unsigned long long)rec->start,
7559 (unsigned long long)rec->nr);
7560 fprintf(stderr, "extent item %llu, found %llu\n",
7561 (unsigned long long)rec->extent_item_refs,
7562 (unsigned long long)rec->refs);
7563 ret = record_orphan_data_extents(root->fs_info, rec);
7570 * we can't use the extent to repair file
7571 * extent, let the fallback method handle it.
7573 if (!fixed && repair) {
7574 ret = fixup_extent_refs(
7585 if (all_backpointers_checked(rec, 1)) {
7586 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7587 (unsigned long long)rec->start,
7588 (unsigned long long)rec->nr);
7590 if (!fixed && !recorded && repair) {
7591 ret = fixup_extent_refs(root->fs_info,
7600 if (!rec->owner_ref_checked) {
7601 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7602 (unsigned long long)rec->start,
7603 (unsigned long long)rec->nr);
7604 if (!fixed && !recorded && repair) {
7605 ret = fixup_extent_refs(root->fs_info,
7614 if (rec->bad_full_backref) {
7615 fprintf(stderr, "bad full backref, on [%llu]\n",
7616 (unsigned long long)rec->start);
7618 ret = fixup_extent_flags(root->fs_info, rec);
7627 * Although it's not a extent ref's problem, we reuse this
7628 * routine for error reporting.
7629 * No repair function yet.
7631 if (rec->crossing_stripes) {
7633 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7634 rec->start, rec->start + rec->max_size);
7639 if (rec->wrong_chunk_type) {
7641 "bad extent [%llu, %llu), type mismatch with chunk\n",
7642 rec->start, rec->start + rec->max_size);
7647 remove_cache_extent(extent_cache, cache);
7648 free_all_extent_backrefs(rec);
7649 if (!init_extent_tree && repair && (!cur_err || fixed))
7650 clear_extent_dirty(root->fs_info->excluded_extents,
7652 rec->start + rec->max_size - 1,
7658 if (ret && ret != -EAGAIN) {
7659 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7662 struct btrfs_trans_handle *trans;
7664 root = root->fs_info->extent_root;
7665 trans = btrfs_start_transaction(root, 1);
7666 if (IS_ERR(trans)) {
7667 ret = PTR_ERR(trans);
7671 btrfs_fix_block_accounting(trans, root);
7672 ret = btrfs_commit_transaction(trans, root);
7677 fprintf(stderr, "repaired damaged extent references\n");
7683 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7687 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7688 stripe_size = length;
7689 stripe_size /= num_stripes;
7690 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7691 stripe_size = length * 2;
7692 stripe_size /= num_stripes;
7693 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7694 stripe_size = length;
7695 stripe_size /= (num_stripes - 1);
7696 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7697 stripe_size = length;
7698 stripe_size /= (num_stripes - 2);
7700 stripe_size = length;
7706 * Check the chunk with its block group/dev list ref:
7707 * Return 0 if all refs seems valid.
7708 * Return 1 if part of refs seems valid, need later check for rebuild ref
7709 * like missing block group and needs to search extent tree to rebuild them.
7710 * Return -1 if essential refs are missing and unable to rebuild.
7712 static int check_chunk_refs(struct chunk_record *chunk_rec,
7713 struct block_group_tree *block_group_cache,
7714 struct device_extent_tree *dev_extent_cache,
7717 struct cache_extent *block_group_item;
7718 struct block_group_record *block_group_rec;
7719 struct cache_extent *dev_extent_item;
7720 struct device_extent_record *dev_extent_rec;
7724 int metadump_v2 = 0;
7728 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7731 if (block_group_item) {
7732 block_group_rec = container_of(block_group_item,
7733 struct block_group_record,
7735 if (chunk_rec->length != block_group_rec->offset ||
7736 chunk_rec->offset != block_group_rec->objectid ||
7738 chunk_rec->type_flags != block_group_rec->flags)) {
7741 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7742 chunk_rec->objectid,
7747 chunk_rec->type_flags,
7748 block_group_rec->objectid,
7749 block_group_rec->type,
7750 block_group_rec->offset,
7751 block_group_rec->offset,
7752 block_group_rec->objectid,
7753 block_group_rec->flags);
7756 list_del_init(&block_group_rec->list);
7757 chunk_rec->bg_rec = block_group_rec;
7762 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7763 chunk_rec->objectid,
7768 chunk_rec->type_flags);
7775 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7776 chunk_rec->num_stripes);
7777 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7778 devid = chunk_rec->stripes[i].devid;
7779 offset = chunk_rec->stripes[i].offset;
7780 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7781 devid, offset, length);
7782 if (dev_extent_item) {
7783 dev_extent_rec = container_of(dev_extent_item,
7784 struct device_extent_record,
7786 if (dev_extent_rec->objectid != devid ||
7787 dev_extent_rec->offset != offset ||
7788 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7789 dev_extent_rec->length != length) {
7792 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7793 chunk_rec->objectid,
7796 chunk_rec->stripes[i].devid,
7797 chunk_rec->stripes[i].offset,
7798 dev_extent_rec->objectid,
7799 dev_extent_rec->offset,
7800 dev_extent_rec->length);
7803 list_move(&dev_extent_rec->chunk_list,
7804 &chunk_rec->dextents);
7809 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7810 chunk_rec->objectid,
7813 chunk_rec->stripes[i].devid,
7814 chunk_rec->stripes[i].offset);
7821 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7822 int check_chunks(struct cache_tree *chunk_cache,
7823 struct block_group_tree *block_group_cache,
7824 struct device_extent_tree *dev_extent_cache,
7825 struct list_head *good, struct list_head *bad,
7826 struct list_head *rebuild, int silent)
7828 struct cache_extent *chunk_item;
7829 struct chunk_record *chunk_rec;
7830 struct block_group_record *bg_rec;
7831 struct device_extent_record *dext_rec;
7835 chunk_item = first_cache_extent(chunk_cache);
7836 while (chunk_item) {
7837 chunk_rec = container_of(chunk_item, struct chunk_record,
7839 err = check_chunk_refs(chunk_rec, block_group_cache,
7840 dev_extent_cache, silent);
7843 if (err == 0 && good)
7844 list_add_tail(&chunk_rec->list, good);
7845 if (err > 0 && rebuild)
7846 list_add_tail(&chunk_rec->list, rebuild);
7848 list_add_tail(&chunk_rec->list, bad);
7849 chunk_item = next_cache_extent(chunk_item);
7852 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7855 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7863 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7867 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7878 static int check_device_used(struct device_record *dev_rec,
7879 struct device_extent_tree *dext_cache)
7881 struct cache_extent *cache;
7882 struct device_extent_record *dev_extent_rec;
7885 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7887 dev_extent_rec = container_of(cache,
7888 struct device_extent_record,
7890 if (dev_extent_rec->objectid != dev_rec->devid)
7893 list_del_init(&dev_extent_rec->device_list);
7894 total_byte += dev_extent_rec->length;
7895 cache = next_cache_extent(cache);
7898 if (total_byte != dev_rec->byte_used) {
7900 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7901 total_byte, dev_rec->byte_used, dev_rec->objectid,
7902 dev_rec->type, dev_rec->offset);
7909 /* check btrfs_dev_item -> btrfs_dev_extent */
7910 static int check_devices(struct rb_root *dev_cache,
7911 struct device_extent_tree *dev_extent_cache)
7913 struct rb_node *dev_node;
7914 struct device_record *dev_rec;
7915 struct device_extent_record *dext_rec;
7919 dev_node = rb_first(dev_cache);
7921 dev_rec = container_of(dev_node, struct device_record, node);
7922 err = check_device_used(dev_rec, dev_extent_cache);
7926 dev_node = rb_next(dev_node);
7928 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7931 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7932 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7939 static int add_root_item_to_list(struct list_head *head,
7940 u64 objectid, u64 bytenr, u64 last_snapshot,
7941 u8 level, u8 drop_level,
7942 int level_size, struct btrfs_key *drop_key)
7945 struct root_item_record *ri_rec;
7946 ri_rec = malloc(sizeof(*ri_rec));
7949 ri_rec->bytenr = bytenr;
7950 ri_rec->objectid = objectid;
7951 ri_rec->level = level;
7952 ri_rec->level_size = level_size;
7953 ri_rec->drop_level = drop_level;
7954 ri_rec->last_snapshot = last_snapshot;
7956 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7957 list_add_tail(&ri_rec->list, head);
7962 static void free_root_item_list(struct list_head *list)
7964 struct root_item_record *ri_rec;
7966 while (!list_empty(list)) {
7967 ri_rec = list_first_entry(list, struct root_item_record,
7969 list_del_init(&ri_rec->list);
7974 static int deal_root_from_list(struct list_head *list,
7975 struct btrfs_root *root,
7976 struct block_info *bits,
7978 struct cache_tree *pending,
7979 struct cache_tree *seen,
7980 struct cache_tree *reada,
7981 struct cache_tree *nodes,
7982 struct cache_tree *extent_cache,
7983 struct cache_tree *chunk_cache,
7984 struct rb_root *dev_cache,
7985 struct block_group_tree *block_group_cache,
7986 struct device_extent_tree *dev_extent_cache)
7991 while (!list_empty(list)) {
7992 struct root_item_record *rec;
7993 struct extent_buffer *buf;
7994 rec = list_entry(list->next,
7995 struct root_item_record, list);
7997 buf = read_tree_block(root->fs_info->tree_root,
7998 rec->bytenr, rec->level_size, 0);
7999 if (!extent_buffer_uptodate(buf)) {
8000 free_extent_buffer(buf);
8004 add_root_to_pending(buf, extent_cache, pending,
8005 seen, nodes, rec->objectid);
8007 * To rebuild extent tree, we need deal with snapshot
8008 * one by one, otherwise we deal with node firstly which
8009 * can maximize readahead.
8012 ret = run_next_block(root, bits, bits_nr, &last,
8013 pending, seen, reada, nodes,
8014 extent_cache, chunk_cache,
8015 dev_cache, block_group_cache,
8016 dev_extent_cache, rec);
8020 free_extent_buffer(buf);
8021 list_del(&rec->list);
8027 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8028 reada, nodes, extent_cache, chunk_cache,
8029 dev_cache, block_group_cache,
8030 dev_extent_cache, NULL);
8040 static int check_chunks_and_extents(struct btrfs_root *root)
8042 struct rb_root dev_cache;
8043 struct cache_tree chunk_cache;
8044 struct block_group_tree block_group_cache;
8045 struct device_extent_tree dev_extent_cache;
8046 struct cache_tree extent_cache;
8047 struct cache_tree seen;
8048 struct cache_tree pending;
8049 struct cache_tree reada;
8050 struct cache_tree nodes;
8051 struct extent_io_tree excluded_extents;
8052 struct cache_tree corrupt_blocks;
8053 struct btrfs_path path;
8054 struct btrfs_key key;
8055 struct btrfs_key found_key;
8057 struct block_info *bits;
8059 struct extent_buffer *leaf;
8061 struct btrfs_root_item ri;
8062 struct list_head dropping_trees;
8063 struct list_head normal_trees;
8064 struct btrfs_root *root1;
8069 dev_cache = RB_ROOT;
8070 cache_tree_init(&chunk_cache);
8071 block_group_tree_init(&block_group_cache);
8072 device_extent_tree_init(&dev_extent_cache);
8074 cache_tree_init(&extent_cache);
8075 cache_tree_init(&seen);
8076 cache_tree_init(&pending);
8077 cache_tree_init(&nodes);
8078 cache_tree_init(&reada);
8079 cache_tree_init(&corrupt_blocks);
8080 extent_io_tree_init(&excluded_extents);
8081 INIT_LIST_HEAD(&dropping_trees);
8082 INIT_LIST_HEAD(&normal_trees);
8085 root->fs_info->excluded_extents = &excluded_extents;
8086 root->fs_info->fsck_extent_cache = &extent_cache;
8087 root->fs_info->free_extent_hook = free_extent_hook;
8088 root->fs_info->corrupt_blocks = &corrupt_blocks;
8092 bits = malloc(bits_nr * sizeof(struct block_info));
8098 if (ctx.progress_enabled) {
8099 ctx.tp = TASK_EXTENTS;
8100 task_start(ctx.info);
8104 root1 = root->fs_info->tree_root;
8105 level = btrfs_header_level(root1->node);
8106 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8107 root1->node->start, 0, level, 0,
8108 btrfs_level_size(root1, level), NULL);
8111 root1 = root->fs_info->chunk_root;
8112 level = btrfs_header_level(root1->node);
8113 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8114 root1->node->start, 0, level, 0,
8115 btrfs_level_size(root1, level), NULL);
8118 btrfs_init_path(&path);
8121 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8122 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8127 leaf = path.nodes[0];
8128 slot = path.slots[0];
8129 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8130 ret = btrfs_next_leaf(root, &path);
8133 leaf = path.nodes[0];
8134 slot = path.slots[0];
8136 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8137 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8138 unsigned long offset;
8141 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8142 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8143 last_snapshot = btrfs_root_last_snapshot(&ri);
8144 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8145 level = btrfs_root_level(&ri);
8146 level_size = btrfs_level_size(root, level);
8147 ret = add_root_item_to_list(&normal_trees,
8149 btrfs_root_bytenr(&ri),
8150 last_snapshot, level,
8151 0, level_size, NULL);
8155 level = btrfs_root_level(&ri);
8156 level_size = btrfs_level_size(root, level);
8157 objectid = found_key.objectid;
8158 btrfs_disk_key_to_cpu(&found_key,
8160 ret = add_root_item_to_list(&dropping_trees,
8162 btrfs_root_bytenr(&ri),
8163 last_snapshot, level,
8165 level_size, &found_key);
8172 btrfs_release_path(&path);
8175 * check_block can return -EAGAIN if it fixes something, please keep
8176 * this in mind when dealing with return values from these functions, if
8177 * we get -EAGAIN we want to fall through and restart the loop.
8179 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8180 &seen, &reada, &nodes, &extent_cache,
8181 &chunk_cache, &dev_cache, &block_group_cache,
8188 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8189 &pending, &seen, &reada, &nodes,
8190 &extent_cache, &chunk_cache, &dev_cache,
8191 &block_group_cache, &dev_extent_cache);
8198 ret = check_chunks(&chunk_cache, &block_group_cache,
8199 &dev_extent_cache, NULL, NULL, NULL, 0);
8206 ret = check_extent_refs(root, &extent_cache);
8213 ret = check_devices(&dev_cache, &dev_extent_cache);
8218 task_stop(ctx.info);
8220 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8221 extent_io_tree_cleanup(&excluded_extents);
8222 root->fs_info->fsck_extent_cache = NULL;
8223 root->fs_info->free_extent_hook = NULL;
8224 root->fs_info->corrupt_blocks = NULL;
8225 root->fs_info->excluded_extents = NULL;
8228 free_chunk_cache_tree(&chunk_cache);
8229 free_device_cache_tree(&dev_cache);
8230 free_block_group_tree(&block_group_cache);
8231 free_device_extent_tree(&dev_extent_cache);
8232 free_extent_cache_tree(&seen);
8233 free_extent_cache_tree(&pending);
8234 free_extent_cache_tree(&reada);
8235 free_extent_cache_tree(&nodes);
8238 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8239 free_extent_cache_tree(&seen);
8240 free_extent_cache_tree(&pending);
8241 free_extent_cache_tree(&reada);
8242 free_extent_cache_tree(&nodes);
8243 free_chunk_cache_tree(&chunk_cache);
8244 free_block_group_tree(&block_group_cache);
8245 free_device_cache_tree(&dev_cache);
8246 free_device_extent_tree(&dev_extent_cache);
8247 free_extent_record_cache(root->fs_info, &extent_cache);
8248 free_root_item_list(&normal_trees);
8249 free_root_item_list(&dropping_trees);
8250 extent_io_tree_cleanup(&excluded_extents);
8254 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8255 struct btrfs_root *root, int overwrite)
8257 struct extent_buffer *c;
8258 struct extent_buffer *old = root->node;
8261 struct btrfs_disk_key disk_key = {0,0,0};
8267 extent_buffer_get(c);
8270 c = btrfs_alloc_free_block(trans, root,
8271 btrfs_level_size(root, 0),
8272 root->root_key.objectid,
8273 &disk_key, level, 0, 0);
8276 extent_buffer_get(c);
8280 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8281 btrfs_set_header_level(c, level);
8282 btrfs_set_header_bytenr(c, c->start);
8283 btrfs_set_header_generation(c, trans->transid);
8284 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8285 btrfs_set_header_owner(c, root->root_key.objectid);
8287 write_extent_buffer(c, root->fs_info->fsid,
8288 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8290 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8291 btrfs_header_chunk_tree_uuid(c),
8294 btrfs_mark_buffer_dirty(c);
8296 * this case can happen in the following case:
8298 * 1.overwrite previous root.
8300 * 2.reinit reloc data root, this is because we skip pin
8301 * down reloc data tree before which means we can allocate
8302 * same block bytenr here.
8304 if (old->start == c->start) {
8305 btrfs_set_root_generation(&root->root_item,
8307 root->root_item.level = btrfs_header_level(root->node);
8308 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8309 &root->root_key, &root->root_item);
8311 free_extent_buffer(c);
8315 free_extent_buffer(old);
8317 add_root_to_dirty_list(root);
8321 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8322 struct extent_buffer *eb, int tree_root)
8324 struct extent_buffer *tmp;
8325 struct btrfs_root_item *ri;
8326 struct btrfs_key key;
8329 int level = btrfs_header_level(eb);
8335 * If we have pinned this block before, don't pin it again.
8336 * This can not only avoid forever loop with broken filesystem
8337 * but also give us some speedups.
8339 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8340 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8343 btrfs_pin_extent(fs_info, eb->start, eb->len);
8345 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8346 nritems = btrfs_header_nritems(eb);
8347 for (i = 0; i < nritems; i++) {
8349 btrfs_item_key_to_cpu(eb, &key, i);
8350 if (key.type != BTRFS_ROOT_ITEM_KEY)
8352 /* Skip the extent root and reloc roots */
8353 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8354 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8355 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8357 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8358 bytenr = btrfs_disk_root_bytenr(eb, ri);
8361 * If at any point we start needing the real root we
8362 * will have to build a stump root for the root we are
8363 * in, but for now this doesn't actually use the root so
8364 * just pass in extent_root.
8366 tmp = read_tree_block(fs_info->extent_root, bytenr,
8368 if (!extent_buffer_uptodate(tmp)) {
8369 fprintf(stderr, "Error reading root block\n");
8372 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8373 free_extent_buffer(tmp);
8377 bytenr = btrfs_node_blockptr(eb, i);
8379 /* If we aren't the tree root don't read the block */
8380 if (level == 1 && !tree_root) {
8381 btrfs_pin_extent(fs_info, bytenr, leafsize);
8385 tmp = read_tree_block(fs_info->extent_root, bytenr,
8387 if (!extent_buffer_uptodate(tmp)) {
8388 fprintf(stderr, "Error reading tree block\n");
8391 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8392 free_extent_buffer(tmp);
8401 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8405 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8409 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8412 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8414 struct btrfs_block_group_cache *cache;
8415 struct btrfs_path *path;
8416 struct extent_buffer *leaf;
8417 struct btrfs_chunk *chunk;
8418 struct btrfs_key key;
8422 path = btrfs_alloc_path();
8427 key.type = BTRFS_CHUNK_ITEM_KEY;
8430 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8432 btrfs_free_path(path);
8437 * We do this in case the block groups were screwed up and had alloc
8438 * bits that aren't actually set on the chunks. This happens with
8439 * restored images every time and could happen in real life I guess.
8441 fs_info->avail_data_alloc_bits = 0;
8442 fs_info->avail_metadata_alloc_bits = 0;
8443 fs_info->avail_system_alloc_bits = 0;
8445 /* First we need to create the in-memory block groups */
8447 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8448 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8450 btrfs_free_path(path);
8458 leaf = path->nodes[0];
8459 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8460 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8465 chunk = btrfs_item_ptr(leaf, path->slots[0],
8466 struct btrfs_chunk);
8467 btrfs_add_block_group(fs_info, 0,
8468 btrfs_chunk_type(leaf, chunk),
8469 key.objectid, key.offset,
8470 btrfs_chunk_length(leaf, chunk));
8471 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8472 key.offset + btrfs_chunk_length(leaf, chunk),
8478 cache = btrfs_lookup_first_block_group(fs_info, start);
8482 start = cache->key.objectid + cache->key.offset;
8485 btrfs_free_path(path);
8489 static int reset_balance(struct btrfs_trans_handle *trans,
8490 struct btrfs_fs_info *fs_info)
8492 struct btrfs_root *root = fs_info->tree_root;
8493 struct btrfs_path *path;
8494 struct extent_buffer *leaf;
8495 struct btrfs_key key;
8496 int del_slot, del_nr = 0;
8500 path = btrfs_alloc_path();
8504 key.objectid = BTRFS_BALANCE_OBJECTID;
8505 key.type = BTRFS_BALANCE_ITEM_KEY;
8508 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8513 goto reinit_data_reloc;
8518 ret = btrfs_del_item(trans, root, path);
8521 btrfs_release_path(path);
8523 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8524 key.type = BTRFS_ROOT_ITEM_KEY;
8527 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8531 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8536 ret = btrfs_del_items(trans, root, path,
8543 btrfs_release_path(path);
8546 ret = btrfs_search_slot(trans, root, &key, path,
8553 leaf = path->nodes[0];
8554 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8555 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8557 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8562 del_slot = path->slots[0];
8571 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8575 btrfs_release_path(path);
8578 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8579 key.type = BTRFS_ROOT_ITEM_KEY;
8580 key.offset = (u64)-1;
8581 root = btrfs_read_fs_root(fs_info, &key);
8583 fprintf(stderr, "Error reading data reloc tree\n");
8584 ret = PTR_ERR(root);
8587 record_root_in_trans(trans, root);
8588 ret = btrfs_fsck_reinit_root(trans, root, 0);
8591 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8593 btrfs_free_path(path);
8597 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8598 struct btrfs_fs_info *fs_info)
8604 * The only reason we don't do this is because right now we're just
8605 * walking the trees we find and pinning down their bytes, we don't look
8606 * at any of the leaves. In order to do mixed groups we'd have to check
8607 * the leaves of any fs roots and pin down the bytes for any file
8608 * extents we find. Not hard but why do it if we don't have to?
8610 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8611 fprintf(stderr, "We don't support re-initing the extent tree "
8612 "for mixed block groups yet, please notify a btrfs "
8613 "developer you want to do this so they can add this "
8614 "functionality.\n");
8619 * first we need to walk all of the trees except the extent tree and pin
8620 * down the bytes that are in use so we don't overwrite any existing
8623 ret = pin_metadata_blocks(fs_info);
8625 fprintf(stderr, "error pinning down used bytes\n");
8630 * Need to drop all the block groups since we're going to recreate all
8633 btrfs_free_block_groups(fs_info);
8634 ret = reset_block_groups(fs_info);
8636 fprintf(stderr, "error resetting the block groups\n");
8640 /* Ok we can allocate now, reinit the extent root */
8641 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8643 fprintf(stderr, "extent root initialization failed\n");
8645 * When the transaction code is updated we should end the
8646 * transaction, but for now progs only knows about commit so
8647 * just return an error.
8653 * Now we have all the in-memory block groups setup so we can make
8654 * allocations properly, and the metadata we care about is safe since we
8655 * pinned all of it above.
8658 struct btrfs_block_group_cache *cache;
8660 cache = btrfs_lookup_first_block_group(fs_info, start);
8663 start = cache->key.objectid + cache->key.offset;
8664 ret = btrfs_insert_item(trans, fs_info->extent_root,
8665 &cache->key, &cache->item,
8666 sizeof(cache->item));
8668 fprintf(stderr, "Error adding block group\n");
8671 btrfs_extent_post_op(trans, fs_info->extent_root);
8674 ret = reset_balance(trans, fs_info);
8676 fprintf(stderr, "error reseting the pending balance\n");
8681 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8683 struct btrfs_path *path;
8684 struct btrfs_trans_handle *trans;
8685 struct btrfs_key key;
8688 printf("Recowing metadata block %llu\n", eb->start);
8689 key.objectid = btrfs_header_owner(eb);
8690 key.type = BTRFS_ROOT_ITEM_KEY;
8691 key.offset = (u64)-1;
8693 root = btrfs_read_fs_root(root->fs_info, &key);
8695 fprintf(stderr, "Couldn't find owner root %llu\n",
8697 return PTR_ERR(root);
8700 path = btrfs_alloc_path();
8704 trans = btrfs_start_transaction(root, 1);
8705 if (IS_ERR(trans)) {
8706 btrfs_free_path(path);
8707 return PTR_ERR(trans);
8710 path->lowest_level = btrfs_header_level(eb);
8711 if (path->lowest_level)
8712 btrfs_node_key_to_cpu(eb, &key, 0);
8714 btrfs_item_key_to_cpu(eb, &key, 0);
8716 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8717 btrfs_commit_transaction(trans, root);
8718 btrfs_free_path(path);
8722 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8724 struct btrfs_path *path;
8725 struct btrfs_trans_handle *trans;
8726 struct btrfs_key key;
8729 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8730 bad->key.type, bad->key.offset);
8731 key.objectid = bad->root_id;
8732 key.type = BTRFS_ROOT_ITEM_KEY;
8733 key.offset = (u64)-1;
8735 root = btrfs_read_fs_root(root->fs_info, &key);
8737 fprintf(stderr, "Couldn't find owner root %llu\n",
8739 return PTR_ERR(root);
8742 path = btrfs_alloc_path();
8746 trans = btrfs_start_transaction(root, 1);
8747 if (IS_ERR(trans)) {
8748 btrfs_free_path(path);
8749 return PTR_ERR(trans);
8752 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8758 ret = btrfs_del_item(trans, root, path);
8760 btrfs_commit_transaction(trans, root);
8761 btrfs_free_path(path);
8765 static int zero_log_tree(struct btrfs_root *root)
8767 struct btrfs_trans_handle *trans;
8770 trans = btrfs_start_transaction(root, 1);
8771 if (IS_ERR(trans)) {
8772 ret = PTR_ERR(trans);
8775 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8776 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8777 ret = btrfs_commit_transaction(trans, root);
8781 static int populate_csum(struct btrfs_trans_handle *trans,
8782 struct btrfs_root *csum_root, char *buf, u64 start,
8789 while (offset < len) {
8790 sectorsize = csum_root->sectorsize;
8791 ret = read_extent_data(csum_root, buf, start + offset,
8795 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8796 start + offset, buf, sectorsize);
8799 offset += sectorsize;
8804 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8805 struct btrfs_root *csum_root,
8806 struct btrfs_root *cur_root)
8808 struct btrfs_path *path;
8809 struct btrfs_key key;
8810 struct extent_buffer *node;
8811 struct btrfs_file_extent_item *fi;
8818 path = btrfs_alloc_path();
8821 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8831 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8834 /* Iterate all regular file extents and fill its csum */
8836 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8838 if (key.type != BTRFS_EXTENT_DATA_KEY)
8840 node = path->nodes[0];
8841 slot = path->slots[0];
8842 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8843 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8845 start = btrfs_file_extent_disk_bytenr(node, fi);
8846 len = btrfs_file_extent_disk_num_bytes(node, fi);
8848 ret = populate_csum(trans, csum_root, buf, start, len);
8855 * TODO: if next leaf is corrupted, jump to nearest next valid
8858 ret = btrfs_next_item(cur_root, path);
8868 btrfs_free_path(path);
8873 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8874 struct btrfs_root *csum_root)
8876 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8877 struct btrfs_path *path;
8878 struct btrfs_root *tree_root = fs_info->tree_root;
8879 struct btrfs_root *cur_root;
8880 struct extent_buffer *node;
8881 struct btrfs_key key;
8885 path = btrfs_alloc_path();
8889 key.objectid = BTRFS_FS_TREE_OBJECTID;
8891 key.type = BTRFS_ROOT_ITEM_KEY;
8893 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8902 node = path->nodes[0];
8903 slot = path->slots[0];
8904 btrfs_item_key_to_cpu(node, &key, slot);
8905 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8907 if (key.type != BTRFS_ROOT_ITEM_KEY)
8909 if (!is_fstree(key.objectid))
8911 key.offset = (u64)-1;
8913 cur_root = btrfs_read_fs_root(fs_info, &key);
8914 if (IS_ERR(cur_root) || !cur_root) {
8915 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8919 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8924 ret = btrfs_next_item(tree_root, path);
8934 btrfs_free_path(path);
8938 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8939 struct btrfs_root *csum_root)
8941 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8942 struct btrfs_path *path;
8943 struct btrfs_extent_item *ei;
8944 struct extent_buffer *leaf;
8946 struct btrfs_key key;
8949 path = btrfs_alloc_path();
8954 key.type = BTRFS_EXTENT_ITEM_KEY;
8957 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8959 btrfs_free_path(path);
8963 buf = malloc(csum_root->sectorsize);
8965 btrfs_free_path(path);
8970 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8971 ret = btrfs_next_leaf(extent_root, path);
8979 leaf = path->nodes[0];
8981 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8982 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8987 ei = btrfs_item_ptr(leaf, path->slots[0],
8988 struct btrfs_extent_item);
8989 if (!(btrfs_extent_flags(leaf, ei) &
8990 BTRFS_EXTENT_FLAG_DATA)) {
8995 ret = populate_csum(trans, csum_root, buf, key.objectid,
9002 btrfs_free_path(path);
9008 * Recalculate the csum and put it into the csum tree.
9010 * Extent tree init will wipe out all the extent info, so in that case, we
9011 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9012 * will use fs/subvol trees to init the csum tree.
9014 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9015 struct btrfs_root *csum_root,
9019 return fill_csum_tree_from_fs(trans, csum_root);
9021 return fill_csum_tree_from_extent(trans, csum_root);
9024 struct root_item_info {
9025 /* level of the root */
9027 /* number of nodes at this level, must be 1 for a root */
9031 struct cache_extent cache_extent;
9034 static struct cache_tree *roots_info_cache = NULL;
9036 static void free_roots_info_cache(void)
9038 if (!roots_info_cache)
9041 while (!cache_tree_empty(roots_info_cache)) {
9042 struct cache_extent *entry;
9043 struct root_item_info *rii;
9045 entry = first_cache_extent(roots_info_cache);
9048 remove_cache_extent(roots_info_cache, entry);
9049 rii = container_of(entry, struct root_item_info, cache_extent);
9053 free(roots_info_cache);
9054 roots_info_cache = NULL;
9057 static int build_roots_info_cache(struct btrfs_fs_info *info)
9060 struct btrfs_key key;
9061 struct extent_buffer *leaf;
9062 struct btrfs_path *path;
9064 if (!roots_info_cache) {
9065 roots_info_cache = malloc(sizeof(*roots_info_cache));
9066 if (!roots_info_cache)
9068 cache_tree_init(roots_info_cache);
9071 path = btrfs_alloc_path();
9076 key.type = BTRFS_EXTENT_ITEM_KEY;
9079 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9082 leaf = path->nodes[0];
9085 struct btrfs_key found_key;
9086 struct btrfs_extent_item *ei;
9087 struct btrfs_extent_inline_ref *iref;
9088 int slot = path->slots[0];
9093 struct cache_extent *entry;
9094 struct root_item_info *rii;
9096 if (slot >= btrfs_header_nritems(leaf)) {
9097 ret = btrfs_next_leaf(info->extent_root, path);
9104 leaf = path->nodes[0];
9105 slot = path->slots[0];
9108 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9110 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9111 found_key.type != BTRFS_METADATA_ITEM_KEY)
9114 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9115 flags = btrfs_extent_flags(leaf, ei);
9117 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9118 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9121 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9122 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9123 level = found_key.offset;
9125 struct btrfs_tree_block_info *info;
9127 info = (struct btrfs_tree_block_info *)(ei + 1);
9128 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9129 level = btrfs_tree_block_level(leaf, info);
9133 * For a root extent, it must be of the following type and the
9134 * first (and only one) iref in the item.
9136 type = btrfs_extent_inline_ref_type(leaf, iref);
9137 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9140 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9141 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9143 rii = malloc(sizeof(struct root_item_info));
9148 rii->cache_extent.start = root_id;
9149 rii->cache_extent.size = 1;
9150 rii->level = (u8)-1;
9151 entry = &rii->cache_extent;
9152 ret = insert_cache_extent(roots_info_cache, entry);
9155 rii = container_of(entry, struct root_item_info,
9159 ASSERT(rii->cache_extent.start == root_id);
9160 ASSERT(rii->cache_extent.size == 1);
9162 if (level > rii->level || rii->level == (u8)-1) {
9164 rii->bytenr = found_key.objectid;
9165 rii->gen = btrfs_extent_generation(leaf, ei);
9166 rii->node_count = 1;
9167 } else if (level == rii->level) {
9175 btrfs_free_path(path);
9180 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9181 struct btrfs_path *path,
9182 const struct btrfs_key *root_key,
9183 const int read_only_mode)
9185 const u64 root_id = root_key->objectid;
9186 struct cache_extent *entry;
9187 struct root_item_info *rii;
9188 struct btrfs_root_item ri;
9189 unsigned long offset;
9191 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9194 "Error: could not find extent items for root %llu\n",
9195 root_key->objectid);
9199 rii = container_of(entry, struct root_item_info, cache_extent);
9200 ASSERT(rii->cache_extent.start == root_id);
9201 ASSERT(rii->cache_extent.size == 1);
9203 if (rii->node_count != 1) {
9205 "Error: could not find btree root extent for root %llu\n",
9210 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9211 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9213 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9214 btrfs_root_level(&ri) != rii->level ||
9215 btrfs_root_generation(&ri) != rii->gen) {
9218 * If we're in repair mode but our caller told us to not update
9219 * the root item, i.e. just check if it needs to be updated, don't
9220 * print this message, since the caller will call us again shortly
9221 * for the same root item without read only mode (the caller will
9222 * open a transaction first).
9224 if (!(read_only_mode && repair))
9226 "%sroot item for root %llu,"
9227 " current bytenr %llu, current gen %llu, current level %u,"
9228 " new bytenr %llu, new gen %llu, new level %u\n",
9229 (read_only_mode ? "" : "fixing "),
9231 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9232 btrfs_root_level(&ri),
9233 rii->bytenr, rii->gen, rii->level);
9235 if (btrfs_root_generation(&ri) > rii->gen) {
9237 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9238 root_id, btrfs_root_generation(&ri), rii->gen);
9242 if (!read_only_mode) {
9243 btrfs_set_root_bytenr(&ri, rii->bytenr);
9244 btrfs_set_root_level(&ri, rii->level);
9245 btrfs_set_root_generation(&ri, rii->gen);
9246 write_extent_buffer(path->nodes[0], &ri,
9247 offset, sizeof(ri));
9257 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9258 * caused read-only snapshots to be corrupted if they were created at a moment
9259 * when the source subvolume/snapshot had orphan items. The issue was that the
9260 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9261 * node instead of the post orphan cleanup root node.
9262 * So this function, and its callees, just detects and fixes those cases. Even
9263 * though the regression was for read-only snapshots, this function applies to
9264 * any snapshot/subvolume root.
9265 * This must be run before any other repair code - not doing it so, makes other
9266 * repair code delete or modify backrefs in the extent tree for example, which
9267 * will result in an inconsistent fs after repairing the root items.
9269 static int repair_root_items(struct btrfs_fs_info *info)
9271 struct btrfs_path *path = NULL;
9272 struct btrfs_key key;
9273 struct extent_buffer *leaf;
9274 struct btrfs_trans_handle *trans = NULL;
9279 ret = build_roots_info_cache(info);
9283 path = btrfs_alloc_path();
9289 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9290 key.type = BTRFS_ROOT_ITEM_KEY;
9295 * Avoid opening and committing transactions if a leaf doesn't have
9296 * any root items that need to be fixed, so that we avoid rotating
9297 * backup roots unnecessarily.
9300 trans = btrfs_start_transaction(info->tree_root, 1);
9301 if (IS_ERR(trans)) {
9302 ret = PTR_ERR(trans);
9307 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9311 leaf = path->nodes[0];
9314 struct btrfs_key found_key;
9316 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9317 int no_more_keys = find_next_key(path, &key);
9319 btrfs_release_path(path);
9321 ret = btrfs_commit_transaction(trans,
9333 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9335 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9337 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9340 ret = maybe_repair_root_item(info, path, &found_key,
9345 if (!trans && repair) {
9348 btrfs_release_path(path);
9358 free_roots_info_cache();
9359 btrfs_free_path(path);
9361 btrfs_commit_transaction(trans, info->tree_root);
9368 const char * const cmd_check_usage[] = {
9369 "btrfs check [options] <device>",
9370 "Check structural inegrity of a filesystem (unmounted).",
9371 "Check structural inegrity of an unmounted filesystem. Verify internal",
9372 "trees' consistency and item connectivity. In the repair mode try to",
9373 "fix the problems found.",
9374 "WARNING: the repair mode is considered dangerous",
9376 "-s|--super <superblock> use this superblock copy",
9377 "-b|--backup use the backup root copy",
9378 "--repair try to repair the filesystem",
9379 "--readonly run in read-only mode (default)",
9380 "--init-csum-tree create a new CRC tree",
9381 "--init-extent-tree create a new extent tree",
9382 "--check-data-csum verify checkums of data blocks",
9383 "-Q|--qgroup-report print a report on qgroup consistency",
9384 "-E|--subvol-extents <subvolid>",
9385 " print subvolume extents and sharing state",
9386 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9387 "-p|--progress indicate progress",
9391 int cmd_check(int argc, char **argv)
9393 struct cache_tree root_cache;
9394 struct btrfs_root *root;
9395 struct btrfs_fs_info *info;
9398 u64 tree_root_bytenr = 0;
9399 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9402 int init_csum_tree = 0;
9404 int qgroup_report = 0;
9405 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9409 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9410 OPT_CHECK_CSUM, OPT_READONLY };
9411 static const struct option long_options[] = {
9412 { "super", required_argument, NULL, 's' },
9413 { "repair", no_argument, NULL, OPT_REPAIR },
9414 { "readonly", no_argument, NULL, OPT_READONLY },
9415 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9416 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9417 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9418 { "backup", no_argument, NULL, 'b' },
9419 { "subvol-extents", required_argument, NULL, 'E' },
9420 { "qgroup-report", no_argument, NULL, 'Q' },
9421 { "tree-root", required_argument, NULL, 'r' },
9422 { "progress", no_argument, NULL, 'p' },
9426 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9430 case 'a': /* ignored */ break;
9432 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9435 num = arg_strtou64(optarg);
9436 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9438 "ERROR: super mirror should be less than: %d\n",
9439 BTRFS_SUPER_MIRROR_MAX);
9442 bytenr = btrfs_sb_offset(((int)num));
9443 printf("using SB copy %llu, bytenr %llu\n", num,
9444 (unsigned long long)bytenr);
9450 subvolid = arg_strtou64(optarg);
9453 tree_root_bytenr = arg_strtou64(optarg);
9456 ctx.progress_enabled = true;
9460 usage(cmd_check_usage);
9462 printf("enabling repair mode\n");
9464 ctree_flags |= OPEN_CTREE_WRITES;
9470 printf("Creating a new CRC tree\n");
9473 ctree_flags |= OPEN_CTREE_WRITES;
9475 case OPT_INIT_EXTENT:
9476 init_extent_tree = 1;
9477 ctree_flags |= (OPEN_CTREE_WRITES |
9478 OPEN_CTREE_NO_BLOCK_GROUPS);
9481 case OPT_CHECK_CSUM:
9482 check_data_csum = 1;
9486 argc = argc - optind;
9488 if (check_argc_exact(argc, 1))
9489 usage(cmd_check_usage);
9491 if (ctx.progress_enabled) {
9492 ctx.tp = TASK_NOTHING;
9493 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9496 /* This check is the only reason for --readonly to exist */
9497 if (readonly && repair) {
9498 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9503 cache_tree_init(&root_cache);
9505 if((ret = check_mounted(argv[optind])) < 0) {
9506 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9509 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9514 /* only allow partial opening under repair mode */
9516 ctree_flags |= OPEN_CTREE_PARTIAL;
9518 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9521 fprintf(stderr, "Couldn't open file system\n");
9527 root = info->fs_root;
9530 * repair mode will force us to commit transaction which
9531 * will make us fail to load log tree when mounting.
9533 if (repair && btrfs_super_log_root(info->super_copy)) {
9534 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9539 ret = zero_log_tree(root);
9541 fprintf(stderr, "fail to zero log tree\n");
9546 uuid_unparse(info->super_copy->fsid, uuidbuf);
9547 if (qgroup_report) {
9548 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9550 ret = qgroup_verify_all(info);
9552 print_qgroup_report(1);
9556 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9557 subvolid, argv[optind], uuidbuf);
9558 ret = print_extent_state(info, subvolid);
9561 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9563 if (!extent_buffer_uptodate(info->tree_root->node) ||
9564 !extent_buffer_uptodate(info->dev_root->node) ||
9565 !extent_buffer_uptodate(info->chunk_root->node)) {
9566 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9571 if (init_extent_tree || init_csum_tree) {
9572 struct btrfs_trans_handle *trans;
9574 trans = btrfs_start_transaction(info->extent_root, 0);
9575 if (IS_ERR(trans)) {
9576 fprintf(stderr, "Error starting transaction\n");
9577 ret = PTR_ERR(trans);
9581 if (init_extent_tree) {
9582 printf("Creating a new extent tree\n");
9583 ret = reinit_extent_tree(trans, info);
9588 if (init_csum_tree) {
9589 fprintf(stderr, "Reinit crc root\n");
9590 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9592 fprintf(stderr, "crc root initialization failed\n");
9597 ret = fill_csum_tree(trans, info->csum_root,
9600 fprintf(stderr, "crc refilling failed\n");
9605 * Ok now we commit and run the normal fsck, which will add
9606 * extent entries for all of the items it finds.
9608 ret = btrfs_commit_transaction(trans, info->extent_root);
9612 if (!extent_buffer_uptodate(info->extent_root->node)) {
9613 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9617 if (!extent_buffer_uptodate(info->csum_root->node)) {
9618 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9623 if (!ctx.progress_enabled)
9624 fprintf(stderr, "checking extents\n");
9625 ret = check_chunks_and_extents(root);
9627 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9629 ret = repair_root_items(info);
9633 fprintf(stderr, "Fixed %d roots.\n", ret);
9635 } else if (ret > 0) {
9637 "Found %d roots with an outdated root item.\n",
9640 "Please run a filesystem check with the option --repair to fix them.\n");
9645 if (!ctx.progress_enabled)
9646 fprintf(stderr, "checking free space cache\n");
9647 ret = check_space_cache(root);
9652 * We used to have to have these hole extents in between our real
9653 * extents so if we don't have this flag set we need to make sure there
9654 * are no gaps in the file extents for inodes, otherwise we can just
9655 * ignore it when this happens.
9657 no_holes = btrfs_fs_incompat(root->fs_info,
9658 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9659 if (!ctx.progress_enabled)
9660 fprintf(stderr, "checking fs roots\n");
9661 ret = check_fs_roots(root, &root_cache);
9665 fprintf(stderr, "checking csums\n");
9666 ret = check_csums(root);
9670 fprintf(stderr, "checking root refs\n");
9671 ret = check_root_refs(root, &root_cache);
9675 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9676 struct extent_buffer *eb;
9678 eb = list_first_entry(&root->fs_info->recow_ebs,
9679 struct extent_buffer, recow);
9680 list_del_init(&eb->recow);
9681 ret = recow_extent_buffer(root, eb);
9686 while (!list_empty(&delete_items)) {
9687 struct bad_item *bad;
9689 bad = list_first_entry(&delete_items, struct bad_item, list);
9690 list_del_init(&bad->list);
9692 ret = delete_bad_item(root, bad);
9696 if (info->quota_enabled) {
9698 fprintf(stderr, "checking quota groups\n");
9699 err = qgroup_verify_all(info);
9704 if (!list_empty(&root->fs_info->recow_ebs)) {
9705 fprintf(stderr, "Transid errors in file system\n");
9709 print_qgroup_report(0);
9710 if (found_old_backref) { /*
9711 * there was a disk format change when mixed
9712 * backref was in testing tree. The old format
9713 * existed about one week.
9715 printf("\n * Found old mixed backref format. "
9716 "The old format is not supported! *"
9717 "\n * Please mount the FS in readonly mode, "
9718 "backup data and re-format the FS. *\n\n");
9721 printf("found %llu bytes used err is %d\n",
9722 (unsigned long long)bytes_used, ret);
9723 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9724 printf("total tree bytes: %llu\n",
9725 (unsigned long long)total_btree_bytes);
9726 printf("total fs tree bytes: %llu\n",
9727 (unsigned long long)total_fs_tree_bytes);
9728 printf("total extent tree bytes: %llu\n",
9729 (unsigned long long)total_extent_tree_bytes);
9730 printf("btree space waste bytes: %llu\n",
9731 (unsigned long long)btree_space_waste);
9732 printf("file data blocks allocated: %llu\n referenced %llu\n",
9733 (unsigned long long)data_bytes_allocated,
9734 (unsigned long long)data_bytes_referenced);
9735 printf("%s\n", PACKAGE_STRING);
9737 free_root_recs_tree(&root_cache);
9740 btrfs_close_all_devices();
9742 if (ctx.progress_enabled)
9743 task_deinit(ctx.info);