2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
48 TASK_NOTHING, /* have to be the last element */
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 static char *task_position_string[] = {
83 "checking free space cache",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
97 task_period_wait(priv->info);
102 static int print_status_return(void *p)
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
149 struct tree_backref {
150 struct extent_backref node;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
167 u64 extent_item_refs;
169 u64 parent_generation;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
191 unsigned int ref_type;
198 struct root_item_record {
199 struct list_head list;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
251 if (hole1->start < hole2->start)
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
260 /* Hole 2 will be merge center */
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
297 /* iterate merge with next holes */
299 if (!rb_next(&hole->node))
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
307 rb_erase(&next->node, holes);
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
321 hole = (struct file_extent_hole *)data;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
327 if (start >= hole->start && start < hole->start + hole->len)
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
346 struct rb_node *node;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
374 rb_erase(node, holes);
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
382 ret = add_file_extent_hole(holes, next_start, next_len);
389 static int copy_file_extent_holes(struct rb_root *dst,
392 struct file_extent_hole *hole;
393 struct rb_node *node;
396 node = rb_first(src);
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
402 node = rb_next(node);
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
443 struct rb_root holes;
444 struct list_head orphan_extents;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
489 struct cache_extent cache;
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
514 struct btrfs_key key;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
558 else if (rec1->devid < rec2->devid)
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct orphan_data_extent *src_orphan;
570 struct orphan_data_extent *dst_orphan;
574 rec = malloc(sizeof(*rec));
575 memcpy(rec, orig_rec, sizeof(*rec));
577 INIT_LIST_HEAD(&rec->backrefs);
578 INIT_LIST_HEAD(&rec->orphan_extents);
579 rec->holes = RB_ROOT;
581 list_for_each_entry(orig, &orig_rec->backrefs, list) {
582 size = sizeof(*orig) + orig->namelen + 1;
583 backref = malloc(size);
584 memcpy(backref, orig, size);
585 list_add_tail(&backref->list, &rec->backrefs);
587 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588 dst_orphan = malloc(sizeof(*dst_orphan));
589 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
591 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
594 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
603 struct orphan_data_extent *orphan;
605 if (list_empty(orphan_extents))
607 printf("The following data extent is lost in tree %llu:\n",
609 list_for_each_entry(orphan, orphan_extents, list) {
610 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611 orphan->objectid, orphan->offset, orphan->disk_bytenr,
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
618 u64 root_objectid = root->root_key.objectid;
619 int errors = rec->errors;
623 /* reloc root errors, we print its corresponding fs root objectid*/
624 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625 root_objectid = root->root_key.offset;
626 fprintf(stderr, "reloc");
628 fprintf(stderr, "root %llu inode %llu errors %x",
629 (unsigned long long) root_objectid,
630 (unsigned long long) rec->ino, rec->errors);
632 if (errors & I_ERR_NO_INODE_ITEM)
633 fprintf(stderr, ", no inode item");
634 if (errors & I_ERR_NO_ORPHAN_ITEM)
635 fprintf(stderr, ", no orphan item");
636 if (errors & I_ERR_DUP_INODE_ITEM)
637 fprintf(stderr, ", dup inode item");
638 if (errors & I_ERR_DUP_DIR_INDEX)
639 fprintf(stderr, ", dup dir index");
640 if (errors & I_ERR_ODD_DIR_ITEM)
641 fprintf(stderr, ", odd dir item");
642 if (errors & I_ERR_ODD_FILE_EXTENT)
643 fprintf(stderr, ", odd file extent");
644 if (errors & I_ERR_BAD_FILE_EXTENT)
645 fprintf(stderr, ", bad file extent");
646 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647 fprintf(stderr, ", file extent overlap");
648 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649 fprintf(stderr, ", file extent discount");
650 if (errors & I_ERR_DIR_ISIZE_WRONG)
651 fprintf(stderr, ", dir isize wrong");
652 if (errors & I_ERR_FILE_NBYTES_WRONG)
653 fprintf(stderr, ", nbytes wrong");
654 if (errors & I_ERR_ODD_CSUM_ITEM)
655 fprintf(stderr, ", odd csum item");
656 if (errors & I_ERR_SOME_CSUM_MISSING)
657 fprintf(stderr, ", some csum missing");
658 if (errors & I_ERR_LINK_COUNT_WRONG)
659 fprintf(stderr, ", link count wrong");
660 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661 fprintf(stderr, ", orphan file extent");
662 fprintf(stderr, "\n");
663 /* Print the orphan extents if needed */
664 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
667 /* Print the holes if needed */
668 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669 struct file_extent_hole *hole;
670 struct rb_node *node;
673 node = rb_first(&rec->holes);
674 fprintf(stderr, "Found file extent holes:\n");
677 hole = rb_entry(node, struct file_extent_hole, node);
678 fprintf(stderr, "\tstart: %llu, len: %llu\n",
679 hole->start, hole->len);
680 node = rb_next(node);
683 fprintf(stderr, "\tstart: 0, len: %llu\n",
684 round_up(rec->isize, root->sectorsize));
688 static void print_ref_error(int errors)
690 if (errors & REF_ERR_NO_DIR_ITEM)
691 fprintf(stderr, ", no dir item");
692 if (errors & REF_ERR_NO_DIR_INDEX)
693 fprintf(stderr, ", no dir index");
694 if (errors & REF_ERR_NO_INODE_REF)
695 fprintf(stderr, ", no inode ref");
696 if (errors & REF_ERR_DUP_DIR_ITEM)
697 fprintf(stderr, ", dup dir item");
698 if (errors & REF_ERR_DUP_DIR_INDEX)
699 fprintf(stderr, ", dup dir index");
700 if (errors & REF_ERR_DUP_INODE_REF)
701 fprintf(stderr, ", dup inode ref");
702 if (errors & REF_ERR_INDEX_UNMATCH)
703 fprintf(stderr, ", index unmatch");
704 if (errors & REF_ERR_FILETYPE_UNMATCH)
705 fprintf(stderr, ", filetype unmatch");
706 if (errors & REF_ERR_NAME_TOO_LONG)
707 fprintf(stderr, ", name too long");
708 if (errors & REF_ERR_NO_ROOT_REF)
709 fprintf(stderr, ", no root ref");
710 if (errors & REF_ERR_NO_ROOT_BACKREF)
711 fprintf(stderr, ", no root backref");
712 if (errors & REF_ERR_DUP_ROOT_REF)
713 fprintf(stderr, ", dup root ref");
714 if (errors & REF_ERR_DUP_ROOT_BACKREF)
715 fprintf(stderr, ", dup root backref");
716 fprintf(stderr, "\n");
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
722 struct ptr_node *node;
723 struct cache_extent *cache;
724 struct inode_record *rec = NULL;
727 cache = lookup_cache_extent(inode_cache, ino, 1);
729 node = container_of(cache, struct ptr_node, cache);
731 if (mod && rec->refs > 1) {
732 node->data = clone_inode_rec(rec);
737 rec = calloc(1, sizeof(*rec));
739 return ERR_PTR(-ENOMEM);
741 rec->extent_start = (u64)-1;
743 INIT_LIST_HEAD(&rec->backrefs);
744 INIT_LIST_HEAD(&rec->orphan_extents);
745 rec->holes = RB_ROOT;
747 node = malloc(sizeof(*node));
750 return ERR_PTR(-ENOMEM);
752 node->cache.start = ino;
753 node->cache.size = 1;
756 if (ino == BTRFS_FREE_INO_OBJECTID)
759 ret = insert_cache_extent(inode_cache, &node->cache);
761 return ERR_PTR(-EEXIST);
766 static void free_orphan_data_extents(struct list_head *orphan_extents)
768 struct orphan_data_extent *orphan;
770 while (!list_empty(orphan_extents)) {
771 orphan = list_entry(orphan_extents->next,
772 struct orphan_data_extent, list);
773 list_del(&orphan->list);
778 static void free_inode_rec(struct inode_record *rec)
780 struct inode_backref *backref;
785 while (!list_empty(&rec->backrefs)) {
786 backref = list_entry(rec->backrefs.next,
787 struct inode_backref, list);
788 list_del(&backref->list);
791 free_orphan_data_extents(&rec->orphan_extents);
792 free_file_extent_holes(&rec->holes);
796 static int can_free_inode_rec(struct inode_record *rec)
798 if (!rec->errors && rec->checked && rec->found_inode_item &&
799 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
804 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
805 struct inode_record *rec)
807 struct cache_extent *cache;
808 struct inode_backref *tmp, *backref;
809 struct ptr_node *node;
810 unsigned char filetype;
812 if (!rec->found_inode_item)
815 filetype = imode_to_type(rec->imode);
816 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
817 if (backref->found_dir_item && backref->found_dir_index) {
818 if (backref->filetype != filetype)
819 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
820 if (!backref->errors && backref->found_inode_ref &&
821 rec->nlink == rec->found_link) {
822 list_del(&backref->list);
828 if (!rec->checked || rec->merging)
831 if (S_ISDIR(rec->imode)) {
832 if (rec->found_size != rec->isize)
833 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
834 if (rec->found_file_extent)
835 rec->errors |= I_ERR_ODD_FILE_EXTENT;
836 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
837 if (rec->found_dir_item)
838 rec->errors |= I_ERR_ODD_DIR_ITEM;
839 if (rec->found_size != rec->nbytes)
840 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
841 if (rec->nlink > 0 && !no_holes &&
842 (rec->extent_end < rec->isize ||
843 first_extent_gap(&rec->holes) < rec->isize))
844 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
847 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
848 if (rec->found_csum_item && rec->nodatasum)
849 rec->errors |= I_ERR_ODD_CSUM_ITEM;
850 if (rec->some_csum_missing && !rec->nodatasum)
851 rec->errors |= I_ERR_SOME_CSUM_MISSING;
854 BUG_ON(rec->refs != 1);
855 if (can_free_inode_rec(rec)) {
856 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
857 node = container_of(cache, struct ptr_node, cache);
858 BUG_ON(node->data != rec);
859 remove_cache_extent(inode_cache, &node->cache);
865 static int check_orphan_item(struct btrfs_root *root, u64 ino)
867 struct btrfs_path path;
868 struct btrfs_key key;
871 key.objectid = BTRFS_ORPHAN_OBJECTID;
872 key.type = BTRFS_ORPHAN_ITEM_KEY;
875 btrfs_init_path(&path);
876 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
877 btrfs_release_path(&path);
883 static int process_inode_item(struct extent_buffer *eb,
884 int slot, struct btrfs_key *key,
885 struct shared_node *active_node)
887 struct inode_record *rec;
888 struct btrfs_inode_item *item;
890 rec = active_node->current;
891 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
892 if (rec->found_inode_item) {
893 rec->errors |= I_ERR_DUP_INODE_ITEM;
896 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
897 rec->nlink = btrfs_inode_nlink(eb, item);
898 rec->isize = btrfs_inode_size(eb, item);
899 rec->nbytes = btrfs_inode_nbytes(eb, item);
900 rec->imode = btrfs_inode_mode(eb, item);
901 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
903 rec->found_inode_item = 1;
905 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
906 maybe_free_inode_rec(&active_node->inode_cache, rec);
910 static struct inode_backref *get_inode_backref(struct inode_record *rec,
912 int namelen, u64 dir)
914 struct inode_backref *backref;
916 list_for_each_entry(backref, &rec->backrefs, list) {
917 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
919 if (backref->dir != dir || backref->namelen != namelen)
921 if (memcmp(name, backref->name, namelen))
926 backref = malloc(sizeof(*backref) + namelen + 1);
927 memset(backref, 0, sizeof(*backref));
929 backref->namelen = namelen;
930 memcpy(backref->name, name, namelen);
931 backref->name[namelen] = '\0';
932 list_add_tail(&backref->list, &rec->backrefs);
936 static int add_inode_backref(struct cache_tree *inode_cache,
937 u64 ino, u64 dir, u64 index,
938 const char *name, int namelen,
939 int filetype, int itemtype, int errors)
941 struct inode_record *rec;
942 struct inode_backref *backref;
944 rec = get_inode_rec(inode_cache, ino, 1);
946 backref = get_inode_backref(rec, name, namelen, dir);
948 backref->errors |= errors;
949 if (itemtype == BTRFS_DIR_INDEX_KEY) {
950 if (backref->found_dir_index)
951 backref->errors |= REF_ERR_DUP_DIR_INDEX;
952 if (backref->found_inode_ref && backref->index != index)
953 backref->errors |= REF_ERR_INDEX_UNMATCH;
954 if (backref->found_dir_item && backref->filetype != filetype)
955 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
957 backref->index = index;
958 backref->filetype = filetype;
959 backref->found_dir_index = 1;
960 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
962 if (backref->found_dir_item)
963 backref->errors |= REF_ERR_DUP_DIR_ITEM;
964 if (backref->found_dir_index && backref->filetype != filetype)
965 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
967 backref->filetype = filetype;
968 backref->found_dir_item = 1;
969 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
970 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
971 if (backref->found_inode_ref)
972 backref->errors |= REF_ERR_DUP_INODE_REF;
973 if (backref->found_dir_index && backref->index != index)
974 backref->errors |= REF_ERR_INDEX_UNMATCH;
976 backref->index = index;
978 backref->ref_type = itemtype;
979 backref->found_inode_ref = 1;
984 maybe_free_inode_rec(inode_cache, rec);
988 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
989 struct cache_tree *dst_cache)
991 struct inode_backref *backref;
996 list_for_each_entry(backref, &src->backrefs, list) {
997 if (backref->found_dir_index) {
998 add_inode_backref(dst_cache, dst->ino, backref->dir,
999 backref->index, backref->name,
1000 backref->namelen, backref->filetype,
1001 BTRFS_DIR_INDEX_KEY, backref->errors);
1003 if (backref->found_dir_item) {
1005 add_inode_backref(dst_cache, dst->ino,
1006 backref->dir, 0, backref->name,
1007 backref->namelen, backref->filetype,
1008 BTRFS_DIR_ITEM_KEY, backref->errors);
1010 if (backref->found_inode_ref) {
1011 add_inode_backref(dst_cache, dst->ino,
1012 backref->dir, backref->index,
1013 backref->name, backref->namelen, 0,
1014 backref->ref_type, backref->errors);
1018 if (src->found_dir_item)
1019 dst->found_dir_item = 1;
1020 if (src->found_file_extent)
1021 dst->found_file_extent = 1;
1022 if (src->found_csum_item)
1023 dst->found_csum_item = 1;
1024 if (src->some_csum_missing)
1025 dst->some_csum_missing = 1;
1026 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1027 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1032 BUG_ON(src->found_link < dir_count);
1033 dst->found_link += src->found_link - dir_count;
1034 dst->found_size += src->found_size;
1035 if (src->extent_start != (u64)-1) {
1036 if (dst->extent_start == (u64)-1) {
1037 dst->extent_start = src->extent_start;
1038 dst->extent_end = src->extent_end;
1040 if (dst->extent_end > src->extent_start)
1041 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1042 else if (dst->extent_end < src->extent_start) {
1043 ret = add_file_extent_hole(&dst->holes,
1045 src->extent_start - dst->extent_end);
1047 if (dst->extent_end < src->extent_end)
1048 dst->extent_end = src->extent_end;
1052 dst->errors |= src->errors;
1053 if (src->found_inode_item) {
1054 if (!dst->found_inode_item) {
1055 dst->nlink = src->nlink;
1056 dst->isize = src->isize;
1057 dst->nbytes = src->nbytes;
1058 dst->imode = src->imode;
1059 dst->nodatasum = src->nodatasum;
1060 dst->found_inode_item = 1;
1062 dst->errors |= I_ERR_DUP_INODE_ITEM;
1070 static int splice_shared_node(struct shared_node *src_node,
1071 struct shared_node *dst_node)
1073 struct cache_extent *cache;
1074 struct ptr_node *node, *ins;
1075 struct cache_tree *src, *dst;
1076 struct inode_record *rec, *conflict;
1077 u64 current_ino = 0;
1081 if (--src_node->refs == 0)
1083 if (src_node->current)
1084 current_ino = src_node->current->ino;
1086 src = &src_node->root_cache;
1087 dst = &dst_node->root_cache;
1089 cache = search_cache_extent(src, 0);
1091 node = container_of(cache, struct ptr_node, cache);
1093 cache = next_cache_extent(cache);
1096 remove_cache_extent(src, &node->cache);
1099 ins = malloc(sizeof(*ins));
1100 ins->cache.start = node->cache.start;
1101 ins->cache.size = node->cache.size;
1105 ret = insert_cache_extent(dst, &ins->cache);
1106 if (ret == -EEXIST) {
1107 conflict = get_inode_rec(dst, rec->ino, 1);
1108 BUG_ON(IS_ERR(conflict));
1109 merge_inode_recs(rec, conflict, dst);
1111 conflict->checked = 1;
1112 if (dst_node->current == conflict)
1113 dst_node->current = NULL;
1115 maybe_free_inode_rec(dst, conflict);
1116 free_inode_rec(rec);
1123 if (src == &src_node->root_cache) {
1124 src = &src_node->inode_cache;
1125 dst = &dst_node->inode_cache;
1129 if (current_ino > 0 && (!dst_node->current ||
1130 current_ino > dst_node->current->ino)) {
1131 if (dst_node->current) {
1132 dst_node->current->checked = 1;
1133 maybe_free_inode_rec(dst, dst_node->current);
1135 dst_node->current = get_inode_rec(dst, current_ino, 1);
1136 BUG_ON(IS_ERR(dst_node->current));
1141 static void free_inode_ptr(struct cache_extent *cache)
1143 struct ptr_node *node;
1144 struct inode_record *rec;
1146 node = container_of(cache, struct ptr_node, cache);
1148 free_inode_rec(rec);
1152 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1154 static struct shared_node *find_shared_node(struct cache_tree *shared,
1157 struct cache_extent *cache;
1158 struct shared_node *node;
1160 cache = lookup_cache_extent(shared, bytenr, 1);
1162 node = container_of(cache, struct shared_node, cache);
1168 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1171 struct shared_node *node;
1173 node = calloc(1, sizeof(*node));
1174 node->cache.start = bytenr;
1175 node->cache.size = 1;
1176 cache_tree_init(&node->root_cache);
1177 cache_tree_init(&node->inode_cache);
1180 ret = insert_cache_extent(shared, &node->cache);
1185 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1186 struct walk_control *wc, int level)
1188 struct shared_node *node;
1189 struct shared_node *dest;
1191 if (level == wc->active_node)
1194 BUG_ON(wc->active_node <= level);
1195 node = find_shared_node(&wc->shared, bytenr);
1197 add_shared_node(&wc->shared, bytenr, refs);
1198 node = find_shared_node(&wc->shared, bytenr);
1199 wc->nodes[level] = node;
1200 wc->active_node = level;
1204 if (wc->root_level == wc->active_node &&
1205 btrfs_root_refs(&root->root_item) == 0) {
1206 if (--node->refs == 0) {
1207 free_inode_recs_tree(&node->root_cache);
1208 free_inode_recs_tree(&node->inode_cache);
1209 remove_cache_extent(&wc->shared, &node->cache);
1215 dest = wc->nodes[wc->active_node];
1216 splice_shared_node(node, dest);
1217 if (node->refs == 0) {
1218 remove_cache_extent(&wc->shared, &node->cache);
1224 static int leave_shared_node(struct btrfs_root *root,
1225 struct walk_control *wc, int level)
1227 struct shared_node *node;
1228 struct shared_node *dest;
1231 if (level == wc->root_level)
1234 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1238 BUG_ON(i >= BTRFS_MAX_LEVEL);
1240 node = wc->nodes[wc->active_node];
1241 wc->nodes[wc->active_node] = NULL;
1242 wc->active_node = i;
1244 dest = wc->nodes[wc->active_node];
1245 if (wc->active_node < wc->root_level ||
1246 btrfs_root_refs(&root->root_item) > 0) {
1247 BUG_ON(node->refs <= 1);
1248 splice_shared_node(node, dest);
1250 BUG_ON(node->refs < 2);
1259 * 1 - if the root with id child_root_id is a child of root parent_root_id
1260 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1261 * has other root(s) as parent(s)
1262 * 2 - if the root child_root_id doesn't have any parent roots
1264 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1267 struct btrfs_path path;
1268 struct btrfs_key key;
1269 struct extent_buffer *leaf;
1273 btrfs_init_path(&path);
1275 key.objectid = parent_root_id;
1276 key.type = BTRFS_ROOT_REF_KEY;
1277 key.offset = child_root_id;
1278 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1282 btrfs_release_path(&path);
1286 key.objectid = child_root_id;
1287 key.type = BTRFS_ROOT_BACKREF_KEY;
1289 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1295 leaf = path.nodes[0];
1296 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1297 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1300 leaf = path.nodes[0];
1303 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1304 if (key.objectid != child_root_id ||
1305 key.type != BTRFS_ROOT_BACKREF_KEY)
1310 if (key.offset == parent_root_id) {
1311 btrfs_release_path(&path);
1318 btrfs_release_path(&path);
1321 return has_parent ? 0 : 2;
1324 static int process_dir_item(struct btrfs_root *root,
1325 struct extent_buffer *eb,
1326 int slot, struct btrfs_key *key,
1327 struct shared_node *active_node)
1337 struct btrfs_dir_item *di;
1338 struct inode_record *rec;
1339 struct cache_tree *root_cache;
1340 struct cache_tree *inode_cache;
1341 struct btrfs_key location;
1342 char namebuf[BTRFS_NAME_LEN];
1344 root_cache = &active_node->root_cache;
1345 inode_cache = &active_node->inode_cache;
1346 rec = active_node->current;
1347 rec->found_dir_item = 1;
1349 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1350 total = btrfs_item_size_nr(eb, slot);
1351 while (cur < total) {
1353 btrfs_dir_item_key_to_cpu(eb, di, &location);
1354 name_len = btrfs_dir_name_len(eb, di);
1355 data_len = btrfs_dir_data_len(eb, di);
1356 filetype = btrfs_dir_type(eb, di);
1358 rec->found_size += name_len;
1359 if (name_len <= BTRFS_NAME_LEN) {
1363 len = BTRFS_NAME_LEN;
1364 error = REF_ERR_NAME_TOO_LONG;
1366 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1368 if (location.type == BTRFS_INODE_ITEM_KEY) {
1369 add_inode_backref(inode_cache, location.objectid,
1370 key->objectid, key->offset, namebuf,
1371 len, filetype, key->type, error);
1372 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1373 add_inode_backref(root_cache, location.objectid,
1374 key->objectid, key->offset,
1375 namebuf, len, filetype,
1378 fprintf(stderr, "invalid location in dir item %u\n",
1380 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1381 key->objectid, key->offset, namebuf,
1382 len, filetype, key->type, error);
1385 len = sizeof(*di) + name_len + data_len;
1386 di = (struct btrfs_dir_item *)((char *)di + len);
1389 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1390 rec->errors |= I_ERR_DUP_DIR_INDEX;
1395 static int process_inode_ref(struct extent_buffer *eb,
1396 int slot, struct btrfs_key *key,
1397 struct shared_node *active_node)
1405 struct cache_tree *inode_cache;
1406 struct btrfs_inode_ref *ref;
1407 char namebuf[BTRFS_NAME_LEN];
1409 inode_cache = &active_node->inode_cache;
1411 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1412 total = btrfs_item_size_nr(eb, slot);
1413 while (cur < total) {
1414 name_len = btrfs_inode_ref_name_len(eb, ref);
1415 index = btrfs_inode_ref_index(eb, ref);
1416 if (name_len <= BTRFS_NAME_LEN) {
1420 len = BTRFS_NAME_LEN;
1421 error = REF_ERR_NAME_TOO_LONG;
1423 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1424 add_inode_backref(inode_cache, key->objectid, key->offset,
1425 index, namebuf, len, 0, key->type, error);
1427 len = sizeof(*ref) + name_len;
1428 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1434 static int process_inode_extref(struct extent_buffer *eb,
1435 int slot, struct btrfs_key *key,
1436 struct shared_node *active_node)
1445 struct cache_tree *inode_cache;
1446 struct btrfs_inode_extref *extref;
1447 char namebuf[BTRFS_NAME_LEN];
1449 inode_cache = &active_node->inode_cache;
1451 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1452 total = btrfs_item_size_nr(eb, slot);
1453 while (cur < total) {
1454 name_len = btrfs_inode_extref_name_len(eb, extref);
1455 index = btrfs_inode_extref_index(eb, extref);
1456 parent = btrfs_inode_extref_parent(eb, extref);
1457 if (name_len <= BTRFS_NAME_LEN) {
1461 len = BTRFS_NAME_LEN;
1462 error = REF_ERR_NAME_TOO_LONG;
1464 read_extent_buffer(eb, namebuf,
1465 (unsigned long)(extref + 1), len);
1466 add_inode_backref(inode_cache, key->objectid, parent,
1467 index, namebuf, len, 0, key->type, error);
1469 len = sizeof(*extref) + name_len;
1470 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1477 static int count_csum_range(struct btrfs_root *root, u64 start,
1478 u64 len, u64 *found)
1480 struct btrfs_key key;
1481 struct btrfs_path path;
1482 struct extent_buffer *leaf;
1487 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1489 btrfs_init_path(&path);
1491 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1493 key.type = BTRFS_EXTENT_CSUM_KEY;
1495 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1499 if (ret > 0 && path.slots[0] > 0) {
1500 leaf = path.nodes[0];
1501 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1502 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1503 key.type == BTRFS_EXTENT_CSUM_KEY)
1508 leaf = path.nodes[0];
1509 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1510 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1515 leaf = path.nodes[0];
1518 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1519 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1520 key.type != BTRFS_EXTENT_CSUM_KEY)
1523 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1524 if (key.offset >= start + len)
1527 if (key.offset > start)
1530 size = btrfs_item_size_nr(leaf, path.slots[0]);
1531 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1532 if (csum_end > start) {
1533 size = min(csum_end - start, len);
1542 btrfs_release_path(&path);
1548 static int process_file_extent(struct btrfs_root *root,
1549 struct extent_buffer *eb,
1550 int slot, struct btrfs_key *key,
1551 struct shared_node *active_node)
1553 struct inode_record *rec;
1554 struct btrfs_file_extent_item *fi;
1556 u64 disk_bytenr = 0;
1557 u64 extent_offset = 0;
1558 u64 mask = root->sectorsize - 1;
1562 rec = active_node->current;
1563 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1564 rec->found_file_extent = 1;
1566 if (rec->extent_start == (u64)-1) {
1567 rec->extent_start = key->offset;
1568 rec->extent_end = key->offset;
1571 if (rec->extent_end > key->offset)
1572 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1573 else if (rec->extent_end < key->offset) {
1574 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1575 key->offset - rec->extent_end);
1580 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1581 extent_type = btrfs_file_extent_type(eb, fi);
1583 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1584 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1586 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1587 rec->found_size += num_bytes;
1588 num_bytes = (num_bytes + mask) & ~mask;
1589 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1590 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1591 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1592 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1593 extent_offset = btrfs_file_extent_offset(eb, fi);
1594 if (num_bytes == 0 || (num_bytes & mask))
1595 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1596 if (num_bytes + extent_offset >
1597 btrfs_file_extent_ram_bytes(eb, fi))
1598 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1599 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1600 (btrfs_file_extent_compression(eb, fi) ||
1601 btrfs_file_extent_encryption(eb, fi) ||
1602 btrfs_file_extent_other_encoding(eb, fi)))
1603 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1604 if (disk_bytenr > 0)
1605 rec->found_size += num_bytes;
1607 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1609 rec->extent_end = key->offset + num_bytes;
1612 * The data reloc tree will copy full extents into its inode and then
1613 * copy the corresponding csums. Because the extent it copied could be
1614 * a preallocated extent that hasn't been written to yet there may be no
1615 * csums to copy, ergo we won't have csums for our file extent. This is
1616 * ok so just don't bother checking csums if the inode belongs to the
1619 if (disk_bytenr > 0 &&
1620 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1622 if (btrfs_file_extent_compression(eb, fi))
1623 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1625 disk_bytenr += extent_offset;
1627 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1630 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1632 rec->found_csum_item = 1;
1633 if (found < num_bytes)
1634 rec->some_csum_missing = 1;
1635 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1637 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1643 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1644 struct walk_control *wc)
1646 struct btrfs_key key;
1650 struct cache_tree *inode_cache;
1651 struct shared_node *active_node;
1653 if (wc->root_level == wc->active_node &&
1654 btrfs_root_refs(&root->root_item) == 0)
1657 active_node = wc->nodes[wc->active_node];
1658 inode_cache = &active_node->inode_cache;
1659 nritems = btrfs_header_nritems(eb);
1660 for (i = 0; i < nritems; i++) {
1661 btrfs_item_key_to_cpu(eb, &key, i);
1663 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1665 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1668 if (active_node->current == NULL ||
1669 active_node->current->ino < key.objectid) {
1670 if (active_node->current) {
1671 active_node->current->checked = 1;
1672 maybe_free_inode_rec(inode_cache,
1673 active_node->current);
1675 active_node->current = get_inode_rec(inode_cache,
1677 BUG_ON(IS_ERR(active_node->current));
1680 case BTRFS_DIR_ITEM_KEY:
1681 case BTRFS_DIR_INDEX_KEY:
1682 ret = process_dir_item(root, eb, i, &key, active_node);
1684 case BTRFS_INODE_REF_KEY:
1685 ret = process_inode_ref(eb, i, &key, active_node);
1687 case BTRFS_INODE_EXTREF_KEY:
1688 ret = process_inode_extref(eb, i, &key, active_node);
1690 case BTRFS_INODE_ITEM_KEY:
1691 ret = process_inode_item(eb, i, &key, active_node);
1693 case BTRFS_EXTENT_DATA_KEY:
1694 ret = process_file_extent(root, eb, i, &key,
1704 static void reada_walk_down(struct btrfs_root *root,
1705 struct extent_buffer *node, int slot)
1714 level = btrfs_header_level(node);
1718 nritems = btrfs_header_nritems(node);
1719 blocksize = btrfs_level_size(root, level - 1);
1720 for (i = slot; i < nritems; i++) {
1721 bytenr = btrfs_node_blockptr(node, i);
1722 ptr_gen = btrfs_node_ptr_generation(node, i);
1723 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1728 * Check the child node/leaf by the following condition:
1729 * 1. the first item key of the node/leaf should be the same with the one
1731 * 2. block in parent node should match the child node/leaf.
1732 * 3. generation of parent node and child's header should be consistent.
1734 * Or the child node/leaf pointed by the key in parent is not valid.
1736 * We hope to check leaf owner too, but since subvol may share leaves,
1737 * which makes leaf owner check not so strong, key check should be
1738 * sufficient enough for that case.
1740 static int check_child_node(struct btrfs_root *root,
1741 struct extent_buffer *parent, int slot,
1742 struct extent_buffer *child)
1744 struct btrfs_key parent_key;
1745 struct btrfs_key child_key;
1748 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1749 if (btrfs_header_level(child) == 0)
1750 btrfs_item_key_to_cpu(child, &child_key, 0);
1752 btrfs_node_key_to_cpu(child, &child_key, 0);
1754 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1757 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1758 parent_key.objectid, parent_key.type, parent_key.offset,
1759 child_key.objectid, child_key.type, child_key.offset);
1761 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1763 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1764 btrfs_node_blockptr(parent, slot),
1765 btrfs_header_bytenr(child));
1767 if (btrfs_node_ptr_generation(parent, slot) !=
1768 btrfs_header_generation(child)) {
1770 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1771 btrfs_header_generation(child),
1772 btrfs_node_ptr_generation(parent, slot));
1777 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1778 struct walk_control *wc, int *level)
1780 enum btrfs_tree_block_status status;
1783 struct extent_buffer *next;
1784 struct extent_buffer *cur;
1789 WARN_ON(*level < 0);
1790 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1791 ret = btrfs_lookup_extent_info(NULL, root,
1792 path->nodes[*level]->start,
1793 *level, 1, &refs, NULL);
1800 ret = enter_shared_node(root, path->nodes[*level]->start,
1808 while (*level >= 0) {
1809 WARN_ON(*level < 0);
1810 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1811 cur = path->nodes[*level];
1813 if (btrfs_header_level(cur) != *level)
1816 if (path->slots[*level] >= btrfs_header_nritems(cur))
1819 ret = process_one_leaf(root, cur, wc);
1824 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1825 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1826 blocksize = btrfs_level_size(root, *level - 1);
1827 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1833 ret = enter_shared_node(root, bytenr, refs,
1836 path->slots[*level]++;
1841 next = btrfs_find_tree_block(root, bytenr, blocksize);
1842 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1843 free_extent_buffer(next);
1844 reada_walk_down(root, cur, path->slots[*level]);
1845 next = read_tree_block(root, bytenr, blocksize,
1847 if (!extent_buffer_uptodate(next)) {
1848 struct btrfs_key node_key;
1850 btrfs_node_key_to_cpu(path->nodes[*level],
1852 path->slots[*level]);
1853 btrfs_add_corrupt_extent_record(root->fs_info,
1855 path->nodes[*level]->start,
1856 root->leafsize, *level);
1862 ret = check_child_node(root, cur, path->slots[*level], next);
1868 if (btrfs_is_leaf(next))
1869 status = btrfs_check_leaf(root, NULL, next);
1871 status = btrfs_check_node(root, NULL, next);
1872 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1873 free_extent_buffer(next);
1878 *level = *level - 1;
1879 free_extent_buffer(path->nodes[*level]);
1880 path->nodes[*level] = next;
1881 path->slots[*level] = 0;
1884 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1888 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1889 struct walk_control *wc, int *level)
1892 struct extent_buffer *leaf;
1894 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1895 leaf = path->nodes[i];
1896 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1901 free_extent_buffer(path->nodes[*level]);
1902 path->nodes[*level] = NULL;
1903 BUG_ON(*level > wc->active_node);
1904 if (*level == wc->active_node)
1905 leave_shared_node(root, wc, *level);
1912 static int check_root_dir(struct inode_record *rec)
1914 struct inode_backref *backref;
1917 if (!rec->found_inode_item || rec->errors)
1919 if (rec->nlink != 1 || rec->found_link != 0)
1921 if (list_empty(&rec->backrefs))
1923 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1924 if (!backref->found_inode_ref)
1926 if (backref->index != 0 || backref->namelen != 2 ||
1927 memcmp(backref->name, "..", 2))
1929 if (backref->found_dir_index || backref->found_dir_item)
1936 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1937 struct btrfs_root *root, struct btrfs_path *path,
1938 struct inode_record *rec)
1940 struct btrfs_inode_item *ei;
1941 struct btrfs_key key;
1944 key.objectid = rec->ino;
1945 key.type = BTRFS_INODE_ITEM_KEY;
1946 key.offset = (u64)-1;
1948 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1952 if (!path->slots[0]) {
1959 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1960 if (key.objectid != rec->ino) {
1965 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1966 struct btrfs_inode_item);
1967 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1968 btrfs_mark_buffer_dirty(path->nodes[0]);
1969 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1970 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1971 root->root_key.objectid);
1973 btrfs_release_path(path);
1977 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1978 struct btrfs_root *root,
1979 struct btrfs_path *path,
1980 struct inode_record *rec)
1984 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1985 btrfs_release_path(path);
1987 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1991 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1992 struct btrfs_root *root,
1993 struct btrfs_path *path,
1994 struct inode_record *rec)
1996 struct btrfs_inode_item *ei;
1997 struct btrfs_key key;
2000 key.objectid = rec->ino;
2001 key.type = BTRFS_INODE_ITEM_KEY;
2004 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2011 /* Since ret == 0, no need to check anything */
2012 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2013 struct btrfs_inode_item);
2014 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2015 btrfs_mark_buffer_dirty(path->nodes[0]);
2016 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2017 printf("reset nbytes for ino %llu root %llu\n",
2018 rec->ino, root->root_key.objectid);
2020 btrfs_release_path(path);
2024 static int add_missing_dir_index(struct btrfs_root *root,
2025 struct cache_tree *inode_cache,
2026 struct inode_record *rec,
2027 struct inode_backref *backref)
2029 struct btrfs_path *path;
2030 struct btrfs_trans_handle *trans;
2031 struct btrfs_dir_item *dir_item;
2032 struct extent_buffer *leaf;
2033 struct btrfs_key key;
2034 struct btrfs_disk_key disk_key;
2035 struct inode_record *dir_rec;
2036 unsigned long name_ptr;
2037 u32 data_size = sizeof(*dir_item) + backref->namelen;
2040 path = btrfs_alloc_path();
2044 trans = btrfs_start_transaction(root, 1);
2045 if (IS_ERR(trans)) {
2046 btrfs_free_path(path);
2047 return PTR_ERR(trans);
2050 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2051 (unsigned long long)rec->ino);
2052 key.objectid = backref->dir;
2053 key.type = BTRFS_DIR_INDEX_KEY;
2054 key.offset = backref->index;
2056 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2059 leaf = path->nodes[0];
2060 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2062 disk_key.objectid = cpu_to_le64(rec->ino);
2063 disk_key.type = BTRFS_INODE_ITEM_KEY;
2064 disk_key.offset = 0;
2066 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2067 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2068 btrfs_set_dir_data_len(leaf, dir_item, 0);
2069 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2070 name_ptr = (unsigned long)(dir_item + 1);
2071 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2072 btrfs_mark_buffer_dirty(leaf);
2073 btrfs_free_path(path);
2074 btrfs_commit_transaction(trans, root);
2076 backref->found_dir_index = 1;
2077 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2078 BUG_ON(IS_ERR(dir_rec));
2081 dir_rec->found_size += backref->namelen;
2082 if (dir_rec->found_size == dir_rec->isize &&
2083 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2084 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2085 if (dir_rec->found_size != dir_rec->isize)
2086 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2091 static int delete_dir_index(struct btrfs_root *root,
2092 struct cache_tree *inode_cache,
2093 struct inode_record *rec,
2094 struct inode_backref *backref)
2096 struct btrfs_trans_handle *trans;
2097 struct btrfs_dir_item *di;
2098 struct btrfs_path *path;
2101 path = btrfs_alloc_path();
2105 trans = btrfs_start_transaction(root, 1);
2106 if (IS_ERR(trans)) {
2107 btrfs_free_path(path);
2108 return PTR_ERR(trans);
2112 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2113 (unsigned long long)backref->dir,
2114 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2115 (unsigned long long)root->objectid);
2117 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2118 backref->name, backref->namelen,
2119 backref->index, -1);
2122 btrfs_free_path(path);
2123 btrfs_commit_transaction(trans, root);
2130 ret = btrfs_del_item(trans, root, path);
2132 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2134 btrfs_free_path(path);
2135 btrfs_commit_transaction(trans, root);
2139 static int create_inode_item(struct btrfs_root *root,
2140 struct inode_record *rec,
2141 struct inode_backref *backref, int root_dir)
2143 struct btrfs_trans_handle *trans;
2144 struct btrfs_inode_item inode_item;
2145 time_t now = time(NULL);
2148 trans = btrfs_start_transaction(root, 1);
2149 if (IS_ERR(trans)) {
2150 ret = PTR_ERR(trans);
2154 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2155 "be incomplete, please check permissions and content after "
2156 "the fsck completes.\n", (unsigned long long)root->objectid,
2157 (unsigned long long)rec->ino);
2159 memset(&inode_item, 0, sizeof(inode_item));
2160 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2162 btrfs_set_stack_inode_nlink(&inode_item, 1);
2164 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2165 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2166 if (rec->found_dir_item) {
2167 if (rec->found_file_extent)
2168 fprintf(stderr, "root %llu inode %llu has both a dir "
2169 "item and extents, unsure if it is a dir or a "
2170 "regular file so setting it as a directory\n",
2171 (unsigned long long)root->objectid,
2172 (unsigned long long)rec->ino);
2173 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2174 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2175 } else if (!rec->found_dir_item) {
2176 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2177 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2179 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2180 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2181 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2182 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2183 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2184 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2185 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2186 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2188 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2190 btrfs_commit_transaction(trans, root);
2194 static int repair_inode_backrefs(struct btrfs_root *root,
2195 struct inode_record *rec,
2196 struct cache_tree *inode_cache,
2199 struct inode_backref *tmp, *backref;
2200 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2204 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2205 if (!delete && rec->ino == root_dirid) {
2206 if (!rec->found_inode_item) {
2207 ret = create_inode_item(root, rec, backref, 1);
2214 /* Index 0 for root dir's are special, don't mess with it */
2215 if (rec->ino == root_dirid && backref->index == 0)
2219 ((backref->found_dir_index && !backref->found_inode_ref) ||
2220 (backref->found_dir_index && backref->found_inode_ref &&
2221 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2222 ret = delete_dir_index(root, inode_cache, rec, backref);
2226 list_del(&backref->list);
2230 if (!delete && !backref->found_dir_index &&
2231 backref->found_dir_item && backref->found_inode_ref) {
2232 ret = add_missing_dir_index(root, inode_cache, rec,
2237 if (backref->found_dir_item &&
2238 backref->found_dir_index &&
2239 backref->found_dir_index) {
2240 if (!backref->errors &&
2241 backref->found_inode_ref) {
2242 list_del(&backref->list);
2248 if (!delete && (!backref->found_dir_index &&
2249 !backref->found_dir_item &&
2250 backref->found_inode_ref)) {
2251 struct btrfs_trans_handle *trans;
2252 struct btrfs_key location;
2254 ret = check_dir_conflict(root, backref->name,
2260 * let nlink fixing routine to handle it,
2261 * which can do it better.
2266 location.objectid = rec->ino;
2267 location.type = BTRFS_INODE_ITEM_KEY;
2268 location.offset = 0;
2270 trans = btrfs_start_transaction(root, 1);
2271 if (IS_ERR(trans)) {
2272 ret = PTR_ERR(trans);
2275 fprintf(stderr, "adding missing dir index/item pair "
2277 (unsigned long long)rec->ino);
2278 ret = btrfs_insert_dir_item(trans, root, backref->name,
2280 backref->dir, &location,
2281 imode_to_type(rec->imode),
2284 btrfs_commit_transaction(trans, root);
2288 if (!delete && (backref->found_inode_ref &&
2289 backref->found_dir_index &&
2290 backref->found_dir_item &&
2291 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2292 !rec->found_inode_item)) {
2293 ret = create_inode_item(root, rec, backref, 0);
2300 return ret ? ret : repaired;
2304 * To determine the file type for nlink/inode_item repair
2306 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2307 * Return -ENOENT if file type is not found.
2309 static int find_file_type(struct inode_record *rec, u8 *type)
2311 struct inode_backref *backref;
2313 /* For inode item recovered case */
2314 if (rec->found_inode_item) {
2315 *type = imode_to_type(rec->imode);
2319 list_for_each_entry(backref, &rec->backrefs, list) {
2320 if (backref->found_dir_index || backref->found_dir_item) {
2321 *type = backref->filetype;
2329 * To determine the file name for nlink repair
2331 * Return 0 if file name is found, set name and namelen.
2332 * Return -ENOENT if file name is not found.
2334 static int find_file_name(struct inode_record *rec,
2335 char *name, int *namelen)
2337 struct inode_backref *backref;
2339 list_for_each_entry(backref, &rec->backrefs, list) {
2340 if (backref->found_dir_index || backref->found_dir_item ||
2341 backref->found_inode_ref) {
2342 memcpy(name, backref->name, backref->namelen);
2343 *namelen = backref->namelen;
2350 /* Reset the nlink of the inode to the correct one */
2351 static int reset_nlink(struct btrfs_trans_handle *trans,
2352 struct btrfs_root *root,
2353 struct btrfs_path *path,
2354 struct inode_record *rec)
2356 struct inode_backref *backref;
2357 struct inode_backref *tmp;
2358 struct btrfs_key key;
2359 struct btrfs_inode_item *inode_item;
2362 /* We don't believe this either, reset it and iterate backref */
2363 rec->found_link = 0;
2365 /* Remove all backref including the valid ones */
2366 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2367 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2368 backref->index, backref->name,
2369 backref->namelen, 0);
2373 /* remove invalid backref, so it won't be added back */
2374 if (!(backref->found_dir_index &&
2375 backref->found_dir_item &&
2376 backref->found_inode_ref)) {
2377 list_del(&backref->list);
2384 /* Set nlink to 0 */
2385 key.objectid = rec->ino;
2386 key.type = BTRFS_INODE_ITEM_KEY;
2388 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2395 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2396 struct btrfs_inode_item);
2397 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2398 btrfs_mark_buffer_dirty(path->nodes[0]);
2399 btrfs_release_path(path);
2402 * Add back valid inode_ref/dir_item/dir_index,
2403 * add_link() will handle the nlink inc, so new nlink must be correct
2405 list_for_each_entry(backref, &rec->backrefs, list) {
2406 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2407 backref->name, backref->namelen,
2408 backref->filetype, &backref->index, 1);
2413 btrfs_release_path(path);
2417 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2418 struct btrfs_root *root,
2419 struct btrfs_path *path,
2420 struct inode_record *rec)
2422 char *dir_name = "lost+found";
2423 char namebuf[BTRFS_NAME_LEN] = {0};
2428 int name_recovered = 0;
2429 int type_recovered = 0;
2433 * Get file name and type first before these invalid inode ref
2434 * are deleted by remove_all_invalid_backref()
2436 name_recovered = !find_file_name(rec, namebuf, &namelen);
2437 type_recovered = !find_file_type(rec, &type);
2439 if (!name_recovered) {
2440 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2441 rec->ino, rec->ino);
2442 namelen = count_digits(rec->ino);
2443 sprintf(namebuf, "%llu", rec->ino);
2446 if (!type_recovered) {
2447 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2449 type = BTRFS_FT_REG_FILE;
2453 ret = reset_nlink(trans, root, path, rec);
2456 "Failed to reset nlink for inode %llu: %s\n",
2457 rec->ino, strerror(-ret));
2461 if (rec->found_link == 0) {
2462 lost_found_ino = root->highest_inode;
2463 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2468 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2469 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2472 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2473 dir_name, strerror(-ret));
2476 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2477 namebuf, namelen, type, NULL, 1);
2479 * Add ".INO" suffix several times to handle case where
2480 * "FILENAME.INO" is already taken by another file.
2482 while (ret == -EEXIST) {
2484 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2486 if (namelen + count_digits(rec->ino) + 1 >
2491 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2493 namelen += count_digits(rec->ino) + 1;
2494 ret = btrfs_add_link(trans, root, rec->ino,
2495 lost_found_ino, namebuf,
2496 namelen, type, NULL, 1);
2500 "Failed to link the inode %llu to %s dir: %s\n",
2501 rec->ino, dir_name, strerror(-ret));
2505 * Just increase the found_link, don't actually add the
2506 * backref. This will make things easier and this inode
2507 * record will be freed after the repair is done.
2508 * So fsck will not report problem about this inode.
2511 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2512 namelen, namebuf, dir_name);
2514 printf("Fixed the nlink of inode %llu\n", rec->ino);
2517 * Clear the flag anyway, or we will loop forever for the same inode
2518 * as it will not be removed from the bad inode list and the dead loop
2521 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2522 btrfs_release_path(path);
2527 * Check if there is any normal(reg or prealloc) file extent for given
2529 * This is used to determine the file type when neither its dir_index/item or
2530 * inode_item exists.
2532 * This will *NOT* report error, if any error happens, just consider it does
2533 * not have any normal file extent.
2535 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2537 struct btrfs_path *path;
2538 struct btrfs_key key;
2539 struct btrfs_key found_key;
2540 struct btrfs_file_extent_item *fi;
2544 path = btrfs_alloc_path();
2548 key.type = BTRFS_EXTENT_DATA_KEY;
2551 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2556 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2557 ret = btrfs_next_leaf(root, path);
2564 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2566 if (found_key.objectid != ino ||
2567 found_key.type != BTRFS_EXTENT_DATA_KEY)
2569 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2570 struct btrfs_file_extent_item);
2571 type = btrfs_file_extent_type(path->nodes[0], fi);
2572 if (type != BTRFS_FILE_EXTENT_INLINE) {
2578 btrfs_free_path(path);
2582 static u32 btrfs_type_to_imode(u8 type)
2584 static u32 imode_by_btrfs_type[] = {
2585 [BTRFS_FT_REG_FILE] = S_IFREG,
2586 [BTRFS_FT_DIR] = S_IFDIR,
2587 [BTRFS_FT_CHRDEV] = S_IFCHR,
2588 [BTRFS_FT_BLKDEV] = S_IFBLK,
2589 [BTRFS_FT_FIFO] = S_IFIFO,
2590 [BTRFS_FT_SOCK] = S_IFSOCK,
2591 [BTRFS_FT_SYMLINK] = S_IFLNK,
2594 return imode_by_btrfs_type[(type)];
2597 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2598 struct btrfs_root *root,
2599 struct btrfs_path *path,
2600 struct inode_record *rec)
2604 int type_recovered = 0;
2607 printf("Trying to rebuild inode:%llu\n", rec->ino);
2609 type_recovered = !find_file_type(rec, &filetype);
2612 * Try to determine inode type if type not found.
2614 * For found regular file extent, it must be FILE.
2615 * For found dir_item/index, it must be DIR.
2617 * For undetermined one, use FILE as fallback.
2620 * 1. If found backref(inode_index/item is already handled) to it,
2622 * Need new inode-inode ref structure to allow search for that.
2624 if (!type_recovered) {
2625 if (rec->found_file_extent &&
2626 find_normal_file_extent(root, rec->ino)) {
2628 filetype = BTRFS_FT_REG_FILE;
2629 } else if (rec->found_dir_item) {
2631 filetype = BTRFS_FT_DIR;
2632 } else if (!list_empty(&rec->orphan_extents)) {
2634 filetype = BTRFS_FT_REG_FILE;
2636 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2639 filetype = BTRFS_FT_REG_FILE;
2643 ret = btrfs_new_inode(trans, root, rec->ino,
2644 mode | btrfs_type_to_imode(filetype));
2649 * Here inode rebuild is done, we only rebuild the inode item,
2650 * don't repair the nlink(like move to lost+found).
2651 * That is the job of nlink repair.
2653 * We just fill the record and return
2655 rec->found_dir_item = 1;
2656 rec->imode = mode | btrfs_type_to_imode(filetype);
2658 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2659 /* Ensure the inode_nlinks repair function will be called */
2660 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2665 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2666 struct btrfs_root *root,
2667 struct btrfs_path *path,
2668 struct inode_record *rec)
2670 struct orphan_data_extent *orphan;
2671 struct orphan_data_extent *tmp;
2674 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2676 * Check for conflicting file extents
2678 * Here we don't know whether the extents is compressed or not,
2679 * so we can only assume it not compressed nor data offset,
2680 * and use its disk_len as extent length.
2682 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2683 orphan->offset, orphan->disk_len, 0);
2684 btrfs_release_path(path);
2689 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2690 orphan->disk_bytenr, orphan->disk_len);
2691 ret = btrfs_free_extent(trans,
2692 root->fs_info->extent_root,
2693 orphan->disk_bytenr, orphan->disk_len,
2694 0, root->objectid, orphan->objectid,
2699 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2700 orphan->offset, orphan->disk_bytenr,
2701 orphan->disk_len, orphan->disk_len);
2705 /* Update file size info */
2706 rec->found_size += orphan->disk_len;
2707 if (rec->found_size == rec->nbytes)
2708 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2710 /* Update the file extent hole info too */
2711 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2715 if (RB_EMPTY_ROOT(&rec->holes))
2716 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2718 list_del(&orphan->list);
2721 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2726 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2727 struct btrfs_root *root,
2728 struct btrfs_path *path,
2729 struct inode_record *rec)
2731 struct rb_node *node;
2732 struct file_extent_hole *hole;
2736 node = rb_first(&rec->holes);
2740 hole = rb_entry(node, struct file_extent_hole, node);
2741 ret = btrfs_punch_hole(trans, root, rec->ino,
2742 hole->start, hole->len);
2745 ret = del_file_extent_hole(&rec->holes, hole->start,
2749 if (RB_EMPTY_ROOT(&rec->holes))
2750 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2751 node = rb_first(&rec->holes);
2753 /* special case for a file losing all its file extent */
2755 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2756 round_up(rec->isize, root->sectorsize));
2760 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2761 rec->ino, root->objectid);
2766 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2768 struct btrfs_trans_handle *trans;
2769 struct btrfs_path *path;
2772 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2773 I_ERR_NO_ORPHAN_ITEM |
2774 I_ERR_LINK_COUNT_WRONG |
2775 I_ERR_NO_INODE_ITEM |
2776 I_ERR_FILE_EXTENT_ORPHAN |
2777 I_ERR_FILE_EXTENT_DISCOUNT|
2778 I_ERR_FILE_NBYTES_WRONG)))
2781 path = btrfs_alloc_path();
2786 * For nlink repair, it may create a dir and add link, so
2787 * 2 for parent(256)'s dir_index and dir_item
2788 * 2 for lost+found dir's inode_item and inode_ref
2789 * 1 for the new inode_ref of the file
2790 * 2 for lost+found dir's dir_index and dir_item for the file
2792 trans = btrfs_start_transaction(root, 7);
2793 if (IS_ERR(trans)) {
2794 btrfs_free_path(path);
2795 return PTR_ERR(trans);
2798 if (rec->errors & I_ERR_NO_INODE_ITEM)
2799 ret = repair_inode_no_item(trans, root, path, rec);
2800 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2801 ret = repair_inode_orphan_extent(trans, root, path, rec);
2802 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2803 ret = repair_inode_discount_extent(trans, root, path, rec);
2804 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2805 ret = repair_inode_isize(trans, root, path, rec);
2806 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2807 ret = repair_inode_orphan_item(trans, root, path, rec);
2808 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2809 ret = repair_inode_nlinks(trans, root, path, rec);
2810 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2811 ret = repair_inode_nbytes(trans, root, path, rec);
2812 btrfs_commit_transaction(trans, root);
2813 btrfs_free_path(path);
2817 static int check_inode_recs(struct btrfs_root *root,
2818 struct cache_tree *inode_cache)
2820 struct cache_extent *cache;
2821 struct ptr_node *node;
2822 struct inode_record *rec;
2823 struct inode_backref *backref;
2828 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2830 if (btrfs_root_refs(&root->root_item) == 0) {
2831 if (!cache_tree_empty(inode_cache))
2832 fprintf(stderr, "warning line %d\n", __LINE__);
2837 * We need to record the highest inode number for later 'lost+found'
2839 * We must select a ino not used/refered by any existing inode, or
2840 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2841 * this may cause 'lost+found' dir has wrong nlinks.
2843 cache = last_cache_extent(inode_cache);
2845 node = container_of(cache, struct ptr_node, cache);
2847 if (rec->ino > root->highest_inode)
2848 root->highest_inode = rec->ino;
2852 * We need to repair backrefs first because we could change some of the
2853 * errors in the inode recs.
2855 * We also need to go through and delete invalid backrefs first and then
2856 * add the correct ones second. We do this because we may get EEXIST
2857 * when adding back the correct index because we hadn't yet deleted the
2860 * For example, if we were missing a dir index then the directories
2861 * isize would be wrong, so if we fixed the isize to what we thought it
2862 * would be and then fixed the backref we'd still have a invalid fs, so
2863 * we need to add back the dir index and then check to see if the isize
2868 if (stage == 3 && !err)
2871 cache = search_cache_extent(inode_cache, 0);
2872 while (repair && cache) {
2873 node = container_of(cache, struct ptr_node, cache);
2875 cache = next_cache_extent(cache);
2877 /* Need to free everything up and rescan */
2879 remove_cache_extent(inode_cache, &node->cache);
2881 free_inode_rec(rec);
2885 if (list_empty(&rec->backrefs))
2888 ret = repair_inode_backrefs(root, rec, inode_cache,
2902 rec = get_inode_rec(inode_cache, root_dirid, 0);
2903 BUG_ON(IS_ERR(rec));
2905 ret = check_root_dir(rec);
2907 fprintf(stderr, "root %llu root dir %llu error\n",
2908 (unsigned long long)root->root_key.objectid,
2909 (unsigned long long)root_dirid);
2910 print_inode_error(root, rec);
2915 struct btrfs_trans_handle *trans;
2917 trans = btrfs_start_transaction(root, 1);
2918 if (IS_ERR(trans)) {
2919 err = PTR_ERR(trans);
2924 "root %llu missing its root dir, recreating\n",
2925 (unsigned long long)root->objectid);
2927 ret = btrfs_make_root_dir(trans, root, root_dirid);
2930 btrfs_commit_transaction(trans, root);
2934 fprintf(stderr, "root %llu root dir %llu not found\n",
2935 (unsigned long long)root->root_key.objectid,
2936 (unsigned long long)root_dirid);
2940 cache = search_cache_extent(inode_cache, 0);
2943 node = container_of(cache, struct ptr_node, cache);
2945 remove_cache_extent(inode_cache, &node->cache);
2947 if (rec->ino == root_dirid ||
2948 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2949 free_inode_rec(rec);
2953 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2954 ret = check_orphan_item(root, rec->ino);
2956 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2957 if (can_free_inode_rec(rec)) {
2958 free_inode_rec(rec);
2963 if (!rec->found_inode_item)
2964 rec->errors |= I_ERR_NO_INODE_ITEM;
2965 if (rec->found_link != rec->nlink)
2966 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2968 ret = try_repair_inode(root, rec);
2969 if (ret == 0 && can_free_inode_rec(rec)) {
2970 free_inode_rec(rec);
2976 if (!(repair && ret == 0))
2978 print_inode_error(root, rec);
2979 list_for_each_entry(backref, &rec->backrefs, list) {
2980 if (!backref->found_dir_item)
2981 backref->errors |= REF_ERR_NO_DIR_ITEM;
2982 if (!backref->found_dir_index)
2983 backref->errors |= REF_ERR_NO_DIR_INDEX;
2984 if (!backref->found_inode_ref)
2985 backref->errors |= REF_ERR_NO_INODE_REF;
2986 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2987 " namelen %u name %s filetype %d errors %x",
2988 (unsigned long long)backref->dir,
2989 (unsigned long long)backref->index,
2990 backref->namelen, backref->name,
2991 backref->filetype, backref->errors);
2992 print_ref_error(backref->errors);
2994 free_inode_rec(rec);
2996 return (error > 0) ? -1 : 0;
2999 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3002 struct cache_extent *cache;
3003 struct root_record *rec = NULL;
3006 cache = lookup_cache_extent(root_cache, objectid, 1);
3008 rec = container_of(cache, struct root_record, cache);
3010 rec = calloc(1, sizeof(*rec));
3011 rec->objectid = objectid;
3012 INIT_LIST_HEAD(&rec->backrefs);
3013 rec->cache.start = objectid;
3014 rec->cache.size = 1;
3016 ret = insert_cache_extent(root_cache, &rec->cache);
3022 static struct root_backref *get_root_backref(struct root_record *rec,
3023 u64 ref_root, u64 dir, u64 index,
3024 const char *name, int namelen)
3026 struct root_backref *backref;
3028 list_for_each_entry(backref, &rec->backrefs, list) {
3029 if (backref->ref_root != ref_root || backref->dir != dir ||
3030 backref->namelen != namelen)
3032 if (memcmp(name, backref->name, namelen))
3037 backref = calloc(1, sizeof(*backref) + namelen + 1);
3038 backref->ref_root = ref_root;
3040 backref->index = index;
3041 backref->namelen = namelen;
3042 memcpy(backref->name, name, namelen);
3043 backref->name[namelen] = '\0';
3044 list_add_tail(&backref->list, &rec->backrefs);
3048 static void free_root_record(struct cache_extent *cache)
3050 struct root_record *rec;
3051 struct root_backref *backref;
3053 rec = container_of(cache, struct root_record, cache);
3054 while (!list_empty(&rec->backrefs)) {
3055 backref = list_entry(rec->backrefs.next,
3056 struct root_backref, list);
3057 list_del(&backref->list);
3064 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3066 static int add_root_backref(struct cache_tree *root_cache,
3067 u64 root_id, u64 ref_root, u64 dir, u64 index,
3068 const char *name, int namelen,
3069 int item_type, int errors)
3071 struct root_record *rec;
3072 struct root_backref *backref;
3074 rec = get_root_rec(root_cache, root_id);
3075 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3077 backref->errors |= errors;
3079 if (item_type != BTRFS_DIR_ITEM_KEY) {
3080 if (backref->found_dir_index || backref->found_back_ref ||
3081 backref->found_forward_ref) {
3082 if (backref->index != index)
3083 backref->errors |= REF_ERR_INDEX_UNMATCH;
3085 backref->index = index;
3089 if (item_type == BTRFS_DIR_ITEM_KEY) {
3090 if (backref->found_forward_ref)
3092 backref->found_dir_item = 1;
3093 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3094 backref->found_dir_index = 1;
3095 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3096 if (backref->found_forward_ref)
3097 backref->errors |= REF_ERR_DUP_ROOT_REF;
3098 else if (backref->found_dir_item)
3100 backref->found_forward_ref = 1;
3101 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3102 if (backref->found_back_ref)
3103 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3104 backref->found_back_ref = 1;
3109 if (backref->found_forward_ref && backref->found_dir_item)
3110 backref->reachable = 1;
3114 static int merge_root_recs(struct btrfs_root *root,
3115 struct cache_tree *src_cache,
3116 struct cache_tree *dst_cache)
3118 struct cache_extent *cache;
3119 struct ptr_node *node;
3120 struct inode_record *rec;
3121 struct inode_backref *backref;
3124 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3125 free_inode_recs_tree(src_cache);
3130 cache = search_cache_extent(src_cache, 0);
3133 node = container_of(cache, struct ptr_node, cache);
3135 remove_cache_extent(src_cache, &node->cache);
3138 ret = is_child_root(root, root->objectid, rec->ino);
3144 list_for_each_entry(backref, &rec->backrefs, list) {
3145 BUG_ON(backref->found_inode_ref);
3146 if (backref->found_dir_item)
3147 add_root_backref(dst_cache, rec->ino,
3148 root->root_key.objectid, backref->dir,
3149 backref->index, backref->name,
3150 backref->namelen, BTRFS_DIR_ITEM_KEY,
3152 if (backref->found_dir_index)
3153 add_root_backref(dst_cache, rec->ino,
3154 root->root_key.objectid, backref->dir,
3155 backref->index, backref->name,
3156 backref->namelen, BTRFS_DIR_INDEX_KEY,
3160 free_inode_rec(rec);
3167 static int check_root_refs(struct btrfs_root *root,
3168 struct cache_tree *root_cache)
3170 struct root_record *rec;
3171 struct root_record *ref_root;
3172 struct root_backref *backref;
3173 struct cache_extent *cache;
3179 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3182 /* fixme: this can not detect circular references */
3185 cache = search_cache_extent(root_cache, 0);
3189 rec = container_of(cache, struct root_record, cache);
3190 cache = next_cache_extent(cache);
3192 if (rec->found_ref == 0)
3195 list_for_each_entry(backref, &rec->backrefs, list) {
3196 if (!backref->reachable)
3199 ref_root = get_root_rec(root_cache,
3201 if (ref_root->found_ref > 0)
3204 backref->reachable = 0;
3206 if (rec->found_ref == 0)
3212 cache = search_cache_extent(root_cache, 0);
3216 rec = container_of(cache, struct root_record, cache);
3217 cache = next_cache_extent(cache);
3219 if (rec->found_ref == 0 &&
3220 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3221 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3222 ret = check_orphan_item(root->fs_info->tree_root,
3228 * If we don't have a root item then we likely just have
3229 * a dir item in a snapshot for this root but no actual
3230 * ref key or anything so it's meaningless.
3232 if (!rec->found_root_item)
3235 fprintf(stderr, "fs tree %llu not referenced\n",
3236 (unsigned long long)rec->objectid);
3240 if (rec->found_ref > 0 && !rec->found_root_item)
3242 list_for_each_entry(backref, &rec->backrefs, list) {
3243 if (!backref->found_dir_item)
3244 backref->errors |= REF_ERR_NO_DIR_ITEM;
3245 if (!backref->found_dir_index)
3246 backref->errors |= REF_ERR_NO_DIR_INDEX;
3247 if (!backref->found_back_ref)
3248 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3249 if (!backref->found_forward_ref)
3250 backref->errors |= REF_ERR_NO_ROOT_REF;
3251 if (backref->reachable && backref->errors)
3258 fprintf(stderr, "fs tree %llu refs %u %s\n",
3259 (unsigned long long)rec->objectid, rec->found_ref,
3260 rec->found_root_item ? "" : "not found");
3262 list_for_each_entry(backref, &rec->backrefs, list) {
3263 if (!backref->reachable)
3265 if (!backref->errors && rec->found_root_item)
3267 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3268 " index %llu namelen %u name %s errors %x\n",
3269 (unsigned long long)backref->ref_root,
3270 (unsigned long long)backref->dir,
3271 (unsigned long long)backref->index,
3272 backref->namelen, backref->name,
3274 print_ref_error(backref->errors);
3277 return errors > 0 ? 1 : 0;
3280 static int process_root_ref(struct extent_buffer *eb, int slot,
3281 struct btrfs_key *key,
3282 struct cache_tree *root_cache)
3288 struct btrfs_root_ref *ref;
3289 char namebuf[BTRFS_NAME_LEN];
3292 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3294 dirid = btrfs_root_ref_dirid(eb, ref);
3295 index = btrfs_root_ref_sequence(eb, ref);
3296 name_len = btrfs_root_ref_name_len(eb, ref);
3298 if (name_len <= BTRFS_NAME_LEN) {
3302 len = BTRFS_NAME_LEN;
3303 error = REF_ERR_NAME_TOO_LONG;
3305 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3307 if (key->type == BTRFS_ROOT_REF_KEY) {
3308 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3309 index, namebuf, len, key->type, error);
3311 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3312 index, namebuf, len, key->type, error);
3317 static void free_corrupt_block(struct cache_extent *cache)
3319 struct btrfs_corrupt_block *corrupt;
3321 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3325 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3328 * Repair the btree of the given root.
3330 * The fix is to remove the node key in corrupt_blocks cache_tree.
3331 * and rebalance the tree.
3332 * After the fix, the btree should be writeable.
3334 static int repair_btree(struct btrfs_root *root,
3335 struct cache_tree *corrupt_blocks)
3337 struct btrfs_trans_handle *trans;
3338 struct btrfs_path *path;
3339 struct btrfs_corrupt_block *corrupt;
3340 struct cache_extent *cache;
3341 struct btrfs_key key;
3346 if (cache_tree_empty(corrupt_blocks))
3349 path = btrfs_alloc_path();
3353 trans = btrfs_start_transaction(root, 1);
3354 if (IS_ERR(trans)) {
3355 ret = PTR_ERR(trans);
3356 fprintf(stderr, "Error starting transaction: %s\n",
3360 cache = first_cache_extent(corrupt_blocks);
3362 corrupt = container_of(cache, struct btrfs_corrupt_block,
3364 level = corrupt->level;
3365 path->lowest_level = level;
3366 key.objectid = corrupt->key.objectid;
3367 key.type = corrupt->key.type;
3368 key.offset = corrupt->key.offset;
3371 * Here we don't want to do any tree balance, since it may
3372 * cause a balance with corrupted brother leaf/node,
3373 * so ins_len set to 0 here.
3374 * Balance will be done after all corrupt node/leaf is deleted.
3376 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3379 offset = btrfs_node_blockptr(path->nodes[level],
3380 path->slots[level]);
3382 /* Remove the ptr */
3383 ret = btrfs_del_ptr(trans, root, path, level,
3384 path->slots[level]);
3388 * Remove the corresponding extent
3389 * return value is not concerned.
3391 btrfs_release_path(path);
3392 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3393 0, root->root_key.objectid,
3395 cache = next_cache_extent(cache);
3398 /* Balance the btree using btrfs_search_slot() */
3399 cache = first_cache_extent(corrupt_blocks);
3401 corrupt = container_of(cache, struct btrfs_corrupt_block,
3403 memcpy(&key, &corrupt->key, sizeof(key));
3404 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3407 /* return will always >0 since it won't find the item */
3409 btrfs_release_path(path);
3410 cache = next_cache_extent(cache);
3413 btrfs_commit_transaction(trans, root);
3415 btrfs_free_path(path);
3419 static int check_fs_root(struct btrfs_root *root,
3420 struct cache_tree *root_cache,
3421 struct walk_control *wc)
3427 struct btrfs_path path;
3428 struct shared_node root_node;
3429 struct root_record *rec;
3430 struct btrfs_root_item *root_item = &root->root_item;
3431 struct cache_tree corrupt_blocks;
3432 struct orphan_data_extent *orphan;
3433 struct orphan_data_extent *tmp;
3434 enum btrfs_tree_block_status status;
3437 * Reuse the corrupt_block cache tree to record corrupted tree block
3439 * Unlike the usage in extent tree check, here we do it in a per
3440 * fs/subvol tree base.
3442 cache_tree_init(&corrupt_blocks);
3443 root->fs_info->corrupt_blocks = &corrupt_blocks;
3445 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3446 rec = get_root_rec(root_cache, root->root_key.objectid);
3447 if (btrfs_root_refs(root_item) > 0)
3448 rec->found_root_item = 1;
3451 btrfs_init_path(&path);
3452 memset(&root_node, 0, sizeof(root_node));
3453 cache_tree_init(&root_node.root_cache);
3454 cache_tree_init(&root_node.inode_cache);
3456 /* Move the orphan extent record to corresponding inode_record */
3457 list_for_each_entry_safe(orphan, tmp,
3458 &root->orphan_data_extents, list) {
3459 struct inode_record *inode;
3461 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3463 BUG_ON(IS_ERR(inode));
3464 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3465 list_move(&orphan->list, &inode->orphan_extents);
3468 level = btrfs_header_level(root->node);
3469 memset(wc->nodes, 0, sizeof(wc->nodes));
3470 wc->nodes[level] = &root_node;
3471 wc->active_node = level;
3472 wc->root_level = level;
3474 /* We may not have checked the root block, lets do that now */
3475 if (btrfs_is_leaf(root->node))
3476 status = btrfs_check_leaf(root, NULL, root->node);
3478 status = btrfs_check_node(root, NULL, root->node);
3479 if (status != BTRFS_TREE_BLOCK_CLEAN)
3482 if (btrfs_root_refs(root_item) > 0 ||
3483 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3484 path.nodes[level] = root->node;
3485 extent_buffer_get(root->node);
3486 path.slots[level] = 0;
3488 struct btrfs_key key;
3489 struct btrfs_disk_key found_key;
3491 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3492 level = root_item->drop_level;
3493 path.lowest_level = level;
3494 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3497 btrfs_node_key(path.nodes[level], &found_key,
3499 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3500 sizeof(found_key)));
3504 wret = walk_down_tree(root, &path, wc, &level);
3510 wret = walk_up_tree(root, &path, wc, &level);
3517 btrfs_release_path(&path);
3519 if (!cache_tree_empty(&corrupt_blocks)) {
3520 struct cache_extent *cache;
3521 struct btrfs_corrupt_block *corrupt;
3523 printf("The following tree block(s) is corrupted in tree %llu:\n",
3524 root->root_key.objectid);
3525 cache = first_cache_extent(&corrupt_blocks);
3527 corrupt = container_of(cache,
3528 struct btrfs_corrupt_block,
3530 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3531 cache->start, corrupt->level,
3532 corrupt->key.objectid, corrupt->key.type,
3533 corrupt->key.offset);
3534 cache = next_cache_extent(cache);
3537 printf("Try to repair the btree for root %llu\n",
3538 root->root_key.objectid);
3539 ret = repair_btree(root, &corrupt_blocks);
3541 fprintf(stderr, "Failed to repair btree: %s\n",
3544 printf("Btree for root %llu is fixed\n",
3545 root->root_key.objectid);
3549 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3553 if (root_node.current) {
3554 root_node.current->checked = 1;
3555 maybe_free_inode_rec(&root_node.inode_cache,
3559 err = check_inode_recs(root, &root_node.inode_cache);
3563 free_corrupt_blocks_tree(&corrupt_blocks);
3564 root->fs_info->corrupt_blocks = NULL;
3565 free_orphan_data_extents(&root->orphan_data_extents);
3569 static int fs_root_objectid(u64 objectid)
3571 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3572 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3574 return is_fstree(objectid);
3577 static int check_fs_roots(struct btrfs_root *root,
3578 struct cache_tree *root_cache)
3580 struct btrfs_path path;
3581 struct btrfs_key key;
3582 struct walk_control wc;
3583 struct extent_buffer *leaf, *tree_node;
3584 struct btrfs_root *tmp_root;
3585 struct btrfs_root *tree_root = root->fs_info->tree_root;
3589 if (ctx.progress_enabled) {
3590 ctx.tp = TASK_FS_ROOTS;
3591 task_start(ctx.info);
3595 * Just in case we made any changes to the extent tree that weren't
3596 * reflected into the free space cache yet.
3599 reset_cached_block_groups(root->fs_info);
3600 memset(&wc, 0, sizeof(wc));
3601 cache_tree_init(&wc.shared);
3602 btrfs_init_path(&path);
3607 key.type = BTRFS_ROOT_ITEM_KEY;
3608 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3613 tree_node = tree_root->node;
3615 if (tree_node != tree_root->node) {
3616 free_root_recs_tree(root_cache);
3617 btrfs_release_path(&path);
3620 leaf = path.nodes[0];
3621 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3622 ret = btrfs_next_leaf(tree_root, &path);
3628 leaf = path.nodes[0];
3630 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3631 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3632 fs_root_objectid(key.objectid)) {
3633 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3634 tmp_root = btrfs_read_fs_root_no_cache(
3635 root->fs_info, &key);
3637 key.offset = (u64)-1;
3638 tmp_root = btrfs_read_fs_root(
3639 root->fs_info, &key);
3641 if (IS_ERR(tmp_root)) {
3645 ret = check_fs_root(tmp_root, root_cache, &wc);
3646 if (ret == -EAGAIN) {
3647 free_root_recs_tree(root_cache);
3648 btrfs_release_path(&path);
3653 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3654 btrfs_free_fs_root(tmp_root);
3655 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3656 key.type == BTRFS_ROOT_BACKREF_KEY) {
3657 process_root_ref(leaf, path.slots[0], &key,
3664 btrfs_release_path(&path);
3666 free_extent_cache_tree(&wc.shared);
3667 if (!cache_tree_empty(&wc.shared))
3668 fprintf(stderr, "warning line %d\n", __LINE__);
3670 task_stop(ctx.info);
3675 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3677 struct list_head *cur = rec->backrefs.next;
3678 struct extent_backref *back;
3679 struct tree_backref *tback;
3680 struct data_backref *dback;
3684 while(cur != &rec->backrefs) {
3685 back = list_entry(cur, struct extent_backref, list);
3687 if (!back->found_extent_tree) {
3691 if (back->is_data) {
3692 dback = (struct data_backref *)back;
3693 fprintf(stderr, "Backref %llu %s %llu"
3694 " owner %llu offset %llu num_refs %lu"
3695 " not found in extent tree\n",
3696 (unsigned long long)rec->start,
3697 back->full_backref ?
3699 back->full_backref ?
3700 (unsigned long long)dback->parent:
3701 (unsigned long long)dback->root,
3702 (unsigned long long)dback->owner,
3703 (unsigned long long)dback->offset,
3704 (unsigned long)dback->num_refs);
3706 tback = (struct tree_backref *)back;
3707 fprintf(stderr, "Backref %llu parent %llu"
3708 " root %llu not found in extent tree\n",
3709 (unsigned long long)rec->start,
3710 (unsigned long long)tback->parent,
3711 (unsigned long long)tback->root);
3714 if (!back->is_data && !back->found_ref) {
3718 tback = (struct tree_backref *)back;
3719 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3720 (unsigned long long)rec->start,
3721 back->full_backref ? "parent" : "root",
3722 back->full_backref ?
3723 (unsigned long long)tback->parent :
3724 (unsigned long long)tback->root, back);
3726 if (back->is_data) {
3727 dback = (struct data_backref *)back;
3728 if (dback->found_ref != dback->num_refs) {
3732 fprintf(stderr, "Incorrect local backref count"
3733 " on %llu %s %llu owner %llu"
3734 " offset %llu found %u wanted %u back %p\n",
3735 (unsigned long long)rec->start,
3736 back->full_backref ?
3738 back->full_backref ?
3739 (unsigned long long)dback->parent:
3740 (unsigned long long)dback->root,
3741 (unsigned long long)dback->owner,
3742 (unsigned long long)dback->offset,
3743 dback->found_ref, dback->num_refs, back);
3745 if (dback->disk_bytenr != rec->start) {
3749 fprintf(stderr, "Backref disk bytenr does not"
3750 " match extent record, bytenr=%llu, "
3751 "ref bytenr=%llu\n",
3752 (unsigned long long)rec->start,
3753 (unsigned long long)dback->disk_bytenr);
3756 if (dback->bytes != rec->nr) {
3760 fprintf(stderr, "Backref bytes do not match "
3761 "extent backref, bytenr=%llu, ref "
3762 "bytes=%llu, backref bytes=%llu\n",
3763 (unsigned long long)rec->start,
3764 (unsigned long long)rec->nr,
3765 (unsigned long long)dback->bytes);
3768 if (!back->is_data) {
3771 dback = (struct data_backref *)back;
3772 found += dback->found_ref;
3775 if (found != rec->refs) {
3779 fprintf(stderr, "Incorrect global backref count "
3780 "on %llu found %llu wanted %llu\n",
3781 (unsigned long long)rec->start,
3782 (unsigned long long)found,
3783 (unsigned long long)rec->refs);
3789 static int free_all_extent_backrefs(struct extent_record *rec)
3791 struct extent_backref *back;
3792 struct list_head *cur;
3793 while (!list_empty(&rec->backrefs)) {
3794 cur = rec->backrefs.next;
3795 back = list_entry(cur, struct extent_backref, list);
3802 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3803 struct cache_tree *extent_cache)
3805 struct cache_extent *cache;
3806 struct extent_record *rec;
3809 cache = first_cache_extent(extent_cache);
3812 rec = container_of(cache, struct extent_record, cache);
3813 remove_cache_extent(extent_cache, cache);
3814 free_all_extent_backrefs(rec);
3819 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3820 struct extent_record *rec)
3822 if (rec->content_checked && rec->owner_ref_checked &&
3823 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3824 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3825 !rec->bad_full_backref && !rec->crossing_stripes &&
3826 !rec->wrong_chunk_type) {
3827 remove_cache_extent(extent_cache, &rec->cache);
3828 free_all_extent_backrefs(rec);
3829 list_del_init(&rec->list);
3835 static int check_owner_ref(struct btrfs_root *root,
3836 struct extent_record *rec,
3837 struct extent_buffer *buf)
3839 struct extent_backref *node;
3840 struct tree_backref *back;
3841 struct btrfs_root *ref_root;
3842 struct btrfs_key key;
3843 struct btrfs_path path;
3844 struct extent_buffer *parent;
3849 list_for_each_entry(node, &rec->backrefs, list) {
3852 if (!node->found_ref)
3854 if (node->full_backref)
3856 back = (struct tree_backref *)node;
3857 if (btrfs_header_owner(buf) == back->root)
3860 BUG_ON(rec->is_root);
3862 /* try to find the block by search corresponding fs tree */
3863 key.objectid = btrfs_header_owner(buf);
3864 key.type = BTRFS_ROOT_ITEM_KEY;
3865 key.offset = (u64)-1;
3867 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3868 if (IS_ERR(ref_root))
3871 level = btrfs_header_level(buf);
3873 btrfs_item_key_to_cpu(buf, &key, 0);
3875 btrfs_node_key_to_cpu(buf, &key, 0);
3877 btrfs_init_path(&path);
3878 path.lowest_level = level + 1;
3879 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3883 parent = path.nodes[level + 1];
3884 if (parent && buf->start == btrfs_node_blockptr(parent,
3885 path.slots[level + 1]))
3888 btrfs_release_path(&path);
3889 return found ? 0 : 1;
3892 static int is_extent_tree_record(struct extent_record *rec)
3894 struct list_head *cur = rec->backrefs.next;
3895 struct extent_backref *node;
3896 struct tree_backref *back;
3899 while(cur != &rec->backrefs) {
3900 node = list_entry(cur, struct extent_backref, list);
3904 back = (struct tree_backref *)node;
3905 if (node->full_backref)
3907 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3914 static int record_bad_block_io(struct btrfs_fs_info *info,
3915 struct cache_tree *extent_cache,
3918 struct extent_record *rec;
3919 struct cache_extent *cache;
3920 struct btrfs_key key;
3922 cache = lookup_cache_extent(extent_cache, start, len);
3926 rec = container_of(cache, struct extent_record, cache);
3927 if (!is_extent_tree_record(rec))
3930 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3931 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3934 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3935 struct extent_buffer *buf, int slot)
3937 if (btrfs_header_level(buf)) {
3938 struct btrfs_key_ptr ptr1, ptr2;
3940 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3941 sizeof(struct btrfs_key_ptr));
3942 read_extent_buffer(buf, &ptr2,
3943 btrfs_node_key_ptr_offset(slot + 1),
3944 sizeof(struct btrfs_key_ptr));
3945 write_extent_buffer(buf, &ptr1,
3946 btrfs_node_key_ptr_offset(slot + 1),
3947 sizeof(struct btrfs_key_ptr));
3948 write_extent_buffer(buf, &ptr2,
3949 btrfs_node_key_ptr_offset(slot),
3950 sizeof(struct btrfs_key_ptr));
3952 struct btrfs_disk_key key;
3953 btrfs_node_key(buf, &key, 0);
3954 btrfs_fixup_low_keys(root, path, &key,
3955 btrfs_header_level(buf) + 1);
3958 struct btrfs_item *item1, *item2;
3959 struct btrfs_key k1, k2;
3960 char *item1_data, *item2_data;
3961 u32 item1_offset, item2_offset, item1_size, item2_size;
3963 item1 = btrfs_item_nr(slot);
3964 item2 = btrfs_item_nr(slot + 1);
3965 btrfs_item_key_to_cpu(buf, &k1, slot);
3966 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3967 item1_offset = btrfs_item_offset(buf, item1);
3968 item2_offset = btrfs_item_offset(buf, item2);
3969 item1_size = btrfs_item_size(buf, item1);
3970 item2_size = btrfs_item_size(buf, item2);
3972 item1_data = malloc(item1_size);
3975 item2_data = malloc(item2_size);
3981 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3982 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3984 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3985 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3989 btrfs_set_item_offset(buf, item1, item2_offset);
3990 btrfs_set_item_offset(buf, item2, item1_offset);
3991 btrfs_set_item_size(buf, item1, item2_size);
3992 btrfs_set_item_size(buf, item2, item1_size);
3994 path->slots[0] = slot;
3995 btrfs_set_item_key_unsafe(root, path, &k2);
3996 path->slots[0] = slot + 1;
3997 btrfs_set_item_key_unsafe(root, path, &k1);
4002 static int fix_key_order(struct btrfs_trans_handle *trans,
4003 struct btrfs_root *root,
4004 struct btrfs_path *path)
4006 struct extent_buffer *buf;
4007 struct btrfs_key k1, k2;
4009 int level = path->lowest_level;
4012 buf = path->nodes[level];
4013 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4015 btrfs_node_key_to_cpu(buf, &k1, i);
4016 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4018 btrfs_item_key_to_cpu(buf, &k1, i);
4019 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4021 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4023 ret = swap_values(root, path, buf, i);
4026 btrfs_mark_buffer_dirty(buf);
4032 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4033 struct btrfs_root *root,
4034 struct btrfs_path *path,
4035 struct extent_buffer *buf, int slot)
4037 struct btrfs_key key;
4038 int nritems = btrfs_header_nritems(buf);
4040 btrfs_item_key_to_cpu(buf, &key, slot);
4042 /* These are all the keys we can deal with missing. */
4043 if (key.type != BTRFS_DIR_INDEX_KEY &&
4044 key.type != BTRFS_EXTENT_ITEM_KEY &&
4045 key.type != BTRFS_METADATA_ITEM_KEY &&
4046 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4047 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4050 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4051 (unsigned long long)key.objectid, key.type,
4052 (unsigned long long)key.offset, slot, buf->start);
4053 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4054 btrfs_item_nr_offset(slot + 1),
4055 sizeof(struct btrfs_item) *
4056 (nritems - slot - 1));
4057 btrfs_set_header_nritems(buf, nritems - 1);
4059 struct btrfs_disk_key disk_key;
4061 btrfs_item_key(buf, &disk_key, 0);
4062 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4064 btrfs_mark_buffer_dirty(buf);
4068 static int fix_item_offset(struct btrfs_trans_handle *trans,
4069 struct btrfs_root *root,
4070 struct btrfs_path *path)
4072 struct extent_buffer *buf;
4076 /* We should only get this for leaves */
4077 BUG_ON(path->lowest_level);
4078 buf = path->nodes[0];
4080 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4081 unsigned int shift = 0, offset;
4083 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4084 BTRFS_LEAF_DATA_SIZE(root)) {
4085 if (btrfs_item_end_nr(buf, i) >
4086 BTRFS_LEAF_DATA_SIZE(root)) {
4087 ret = delete_bogus_item(trans, root, path,
4091 fprintf(stderr, "item is off the end of the "
4092 "leaf, can't fix\n");
4096 shift = BTRFS_LEAF_DATA_SIZE(root) -
4097 btrfs_item_end_nr(buf, i);
4098 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4099 btrfs_item_offset_nr(buf, i - 1)) {
4100 if (btrfs_item_end_nr(buf, i) >
4101 btrfs_item_offset_nr(buf, i - 1)) {
4102 ret = delete_bogus_item(trans, root, path,
4106 fprintf(stderr, "items overlap, can't fix\n");
4110 shift = btrfs_item_offset_nr(buf, i - 1) -
4111 btrfs_item_end_nr(buf, i);
4116 printf("Shifting item nr %d by %u bytes in block %llu\n",
4117 i, shift, (unsigned long long)buf->start);
4118 offset = btrfs_item_offset_nr(buf, i);
4119 memmove_extent_buffer(buf,
4120 btrfs_leaf_data(buf) + offset + shift,
4121 btrfs_leaf_data(buf) + offset,
4122 btrfs_item_size_nr(buf, i));
4123 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4125 btrfs_mark_buffer_dirty(buf);
4129 * We may have moved things, in which case we want to exit so we don't
4130 * write those changes out. Once we have proper abort functionality in
4131 * progs this can be changed to something nicer.
4138 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4139 * then just return -EIO.
4141 static int try_to_fix_bad_block(struct btrfs_root *root,
4142 struct extent_buffer *buf,
4143 enum btrfs_tree_block_status status)
4145 struct btrfs_trans_handle *trans;
4146 struct ulist *roots;
4147 struct ulist_node *node;
4148 struct btrfs_root *search_root;
4149 struct btrfs_path *path;
4150 struct ulist_iterator iter;
4151 struct btrfs_key root_key, key;
4154 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4155 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4158 path = btrfs_alloc_path();
4162 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4165 btrfs_free_path(path);
4169 ULIST_ITER_INIT(&iter);
4170 while ((node = ulist_next(roots, &iter))) {
4171 root_key.objectid = node->val;
4172 root_key.type = BTRFS_ROOT_ITEM_KEY;
4173 root_key.offset = (u64)-1;
4175 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4182 trans = btrfs_start_transaction(search_root, 0);
4183 if (IS_ERR(trans)) {
4184 ret = PTR_ERR(trans);
4188 path->lowest_level = btrfs_header_level(buf);
4189 path->skip_check_block = 1;
4190 if (path->lowest_level)
4191 btrfs_node_key_to_cpu(buf, &key, 0);
4193 btrfs_item_key_to_cpu(buf, &key, 0);
4194 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4197 btrfs_commit_transaction(trans, search_root);
4200 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4201 ret = fix_key_order(trans, search_root, path);
4202 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4203 ret = fix_item_offset(trans, search_root, path);
4205 btrfs_commit_transaction(trans, search_root);
4208 btrfs_release_path(path);
4209 btrfs_commit_transaction(trans, search_root);
4212 btrfs_free_path(path);
4216 static int check_block(struct btrfs_root *root,
4217 struct cache_tree *extent_cache,
4218 struct extent_buffer *buf, u64 flags)
4220 struct extent_record *rec;
4221 struct cache_extent *cache;
4222 struct btrfs_key key;
4223 enum btrfs_tree_block_status status;
4227 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4230 rec = container_of(cache, struct extent_record, cache);
4231 rec->generation = btrfs_header_generation(buf);
4233 level = btrfs_header_level(buf);
4234 if (btrfs_header_nritems(buf) > 0) {
4237 btrfs_item_key_to_cpu(buf, &key, 0);
4239 btrfs_node_key_to_cpu(buf, &key, 0);
4241 rec->info_objectid = key.objectid;
4243 rec->info_level = level;
4245 if (btrfs_is_leaf(buf))
4246 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4248 status = btrfs_check_node(root, &rec->parent_key, buf);
4250 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4252 status = try_to_fix_bad_block(root, buf, status);
4253 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4255 fprintf(stderr, "bad block %llu\n",
4256 (unsigned long long)buf->start);
4259 * Signal to callers we need to start the scan over
4260 * again since we'll have cow'ed blocks.
4265 rec->content_checked = 1;
4266 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4267 rec->owner_ref_checked = 1;
4269 ret = check_owner_ref(root, rec, buf);
4271 rec->owner_ref_checked = 1;
4275 maybe_free_extent_rec(extent_cache, rec);
4279 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4280 u64 parent, u64 root)
4282 struct list_head *cur = rec->backrefs.next;
4283 struct extent_backref *node;
4284 struct tree_backref *back;
4286 while(cur != &rec->backrefs) {
4287 node = list_entry(cur, struct extent_backref, list);
4291 back = (struct tree_backref *)node;
4293 if (!node->full_backref)
4295 if (parent == back->parent)
4298 if (node->full_backref)
4300 if (back->root == root)
4307 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4308 u64 parent, u64 root)
4310 struct tree_backref *ref = malloc(sizeof(*ref));
4311 memset(&ref->node, 0, sizeof(ref->node));
4313 ref->parent = parent;
4314 ref->node.full_backref = 1;
4317 ref->node.full_backref = 0;
4319 list_add_tail(&ref->node.list, &rec->backrefs);
4324 static struct data_backref *find_data_backref(struct extent_record *rec,
4325 u64 parent, u64 root,
4326 u64 owner, u64 offset,
4328 u64 disk_bytenr, u64 bytes)
4330 struct list_head *cur = rec->backrefs.next;
4331 struct extent_backref *node;
4332 struct data_backref *back;
4334 while(cur != &rec->backrefs) {
4335 node = list_entry(cur, struct extent_backref, list);
4339 back = (struct data_backref *)node;
4341 if (!node->full_backref)
4343 if (parent == back->parent)
4346 if (node->full_backref)
4348 if (back->root == root && back->owner == owner &&
4349 back->offset == offset) {
4350 if (found_ref && node->found_ref &&
4351 (back->bytes != bytes ||
4352 back->disk_bytenr != disk_bytenr))
4361 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4362 u64 parent, u64 root,
4363 u64 owner, u64 offset,
4366 struct data_backref *ref = malloc(sizeof(*ref));
4367 memset(&ref->node, 0, sizeof(ref->node));
4368 ref->node.is_data = 1;
4371 ref->parent = parent;
4374 ref->node.full_backref = 1;
4378 ref->offset = offset;
4379 ref->node.full_backref = 0;
4381 ref->bytes = max_size;
4384 list_add_tail(&ref->node.list, &rec->backrefs);
4385 if (max_size > rec->max_size)
4386 rec->max_size = max_size;
4390 /* Check if the type of extent matches with its chunk */
4391 static void check_extent_type(struct extent_record *rec)
4393 struct btrfs_block_group_cache *bg_cache;
4395 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4399 /* data extent, check chunk directly*/
4400 if (!rec->metadata) {
4401 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4402 rec->wrong_chunk_type = 1;
4406 /* metadata extent, check the obvious case first */
4407 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4408 BTRFS_BLOCK_GROUP_METADATA))) {
4409 rec->wrong_chunk_type = 1;
4414 * Check SYSTEM extent, as it's also marked as metadata, we can only
4415 * make sure it's a SYSTEM extent by its backref
4417 if (!list_empty(&rec->backrefs)) {
4418 struct extent_backref *node;
4419 struct tree_backref *tback;
4422 node = list_entry(rec->backrefs.next, struct extent_backref,
4424 if (node->is_data) {
4425 /* tree block shouldn't have data backref */
4426 rec->wrong_chunk_type = 1;
4429 tback = container_of(node, struct tree_backref, node);
4431 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4432 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4434 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4435 if (!(bg_cache->flags & bg_type))
4436 rec->wrong_chunk_type = 1;
4440 static int add_extent_rec(struct cache_tree *extent_cache,
4441 struct btrfs_key *parent_key, u64 parent_gen,
4442 u64 start, u64 nr, u64 extent_item_refs,
4443 int is_root, int inc_ref, int set_checked,
4444 int metadata, int extent_rec, u64 max_size)
4446 struct extent_record *rec;
4447 struct cache_extent *cache;
4451 cache = lookup_cache_extent(extent_cache, start, nr);
4453 rec = container_of(cache, struct extent_record, cache);
4457 rec->nr = max(nr, max_size);
4460 * We need to make sure to reset nr to whatever the extent
4461 * record says was the real size, this way we can compare it to
4465 if (start != rec->start || rec->found_rec) {
4466 struct extent_record *tmp;
4469 if (list_empty(&rec->list))
4470 list_add_tail(&rec->list,
4471 &duplicate_extents);
4474 * We have to do this song and dance in case we
4475 * find an extent record that falls inside of
4476 * our current extent record but does not have
4477 * the same objectid.
4479 tmp = malloc(sizeof(*tmp));
4483 tmp->max_size = max_size;
4486 tmp->metadata = metadata;
4487 tmp->extent_item_refs = extent_item_refs;
4488 INIT_LIST_HEAD(&tmp->list);
4489 list_add_tail(&tmp->list, &rec->dups);
4490 rec->num_duplicates++;
4497 if (extent_item_refs && !dup) {
4498 if (rec->extent_item_refs) {
4499 fprintf(stderr, "block %llu rec "
4500 "extent_item_refs %llu, passed %llu\n",
4501 (unsigned long long)start,
4502 (unsigned long long)
4503 rec->extent_item_refs,
4504 (unsigned long long)extent_item_refs);
4506 rec->extent_item_refs = extent_item_refs;
4511 rec->content_checked = 1;
4512 rec->owner_ref_checked = 1;
4516 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4518 rec->parent_generation = parent_gen;
4520 if (rec->max_size < max_size)
4521 rec->max_size = max_size;
4524 * A metadata extent can't cross stripe_len boundary, otherwise
4525 * kernel scrub won't be able to handle it.
4526 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4529 if (metadata && check_crossing_stripes(rec->start,
4531 rec->crossing_stripes = 1;
4532 check_extent_type(rec);
4533 maybe_free_extent_rec(extent_cache, rec);
4536 rec = malloc(sizeof(*rec));
4538 rec->max_size = max_size;
4539 rec->nr = max(nr, max_size);
4540 rec->found_rec = !!extent_rec;
4541 rec->content_checked = 0;
4542 rec->owner_ref_checked = 0;
4543 rec->num_duplicates = 0;
4544 rec->metadata = metadata;
4545 rec->flag_block_full_backref = -1;
4546 rec->bad_full_backref = 0;
4547 rec->crossing_stripes = 0;
4548 rec->wrong_chunk_type = 0;
4549 INIT_LIST_HEAD(&rec->backrefs);
4550 INIT_LIST_HEAD(&rec->dups);
4551 INIT_LIST_HEAD(&rec->list);
4563 if (extent_item_refs)
4564 rec->extent_item_refs = extent_item_refs;
4566 rec->extent_item_refs = 0;
4569 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4571 memset(&rec->parent_key, 0, sizeof(*parent_key));
4574 rec->parent_generation = parent_gen;
4576 rec->parent_generation = 0;
4578 rec->cache.start = start;
4579 rec->cache.size = nr;
4580 ret = insert_cache_extent(extent_cache, &rec->cache);
4584 rec->content_checked = 1;
4585 rec->owner_ref_checked = 1;
4589 if (check_crossing_stripes(rec->start, rec->max_size))
4590 rec->crossing_stripes = 1;
4591 check_extent_type(rec);
4595 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4596 u64 parent, u64 root, int found_ref)
4598 struct extent_record *rec;
4599 struct tree_backref *back;
4600 struct cache_extent *cache;
4602 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4604 add_extent_rec(extent_cache, NULL, 0, bytenr,
4605 1, 0, 0, 0, 0, 1, 0, 0);
4606 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4611 rec = container_of(cache, struct extent_record, cache);
4612 if (rec->start != bytenr) {
4616 back = find_tree_backref(rec, parent, root);
4618 back = alloc_tree_backref(rec, parent, root);
4621 if (back->node.found_ref) {
4622 fprintf(stderr, "Extent back ref already exists "
4623 "for %llu parent %llu root %llu \n",
4624 (unsigned long long)bytenr,
4625 (unsigned long long)parent,
4626 (unsigned long long)root);
4628 back->node.found_ref = 1;
4630 if (back->node.found_extent_tree) {
4631 fprintf(stderr, "Extent back ref already exists "
4632 "for %llu parent %llu root %llu \n",
4633 (unsigned long long)bytenr,
4634 (unsigned long long)parent,
4635 (unsigned long long)root);
4637 back->node.found_extent_tree = 1;
4639 check_extent_type(rec);
4640 maybe_free_extent_rec(extent_cache, rec);
4644 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4645 u64 parent, u64 root, u64 owner, u64 offset,
4646 u32 num_refs, int found_ref, u64 max_size)
4648 struct extent_record *rec;
4649 struct data_backref *back;
4650 struct cache_extent *cache;
4652 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4654 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4656 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4661 rec = container_of(cache, struct extent_record, cache);
4662 if (rec->max_size < max_size)
4663 rec->max_size = max_size;
4666 * If found_ref is set then max_size is the real size and must match the
4667 * existing refs. So if we have already found a ref then we need to
4668 * make sure that this ref matches the existing one, otherwise we need
4669 * to add a new backref so we can notice that the backrefs don't match
4670 * and we need to figure out who is telling the truth. This is to
4671 * account for that awful fsync bug I introduced where we'd end up with
4672 * a btrfs_file_extent_item that would have its length include multiple
4673 * prealloc extents or point inside of a prealloc extent.
4675 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4678 back = alloc_data_backref(rec, parent, root, owner, offset,
4682 BUG_ON(num_refs != 1);
4683 if (back->node.found_ref)
4684 BUG_ON(back->bytes != max_size);
4685 back->node.found_ref = 1;
4686 back->found_ref += 1;
4687 back->bytes = max_size;
4688 back->disk_bytenr = bytenr;
4690 rec->content_checked = 1;
4691 rec->owner_ref_checked = 1;
4693 if (back->node.found_extent_tree) {
4694 fprintf(stderr, "Extent back ref already exists "
4695 "for %llu parent %llu root %llu "
4696 "owner %llu offset %llu num_refs %lu\n",
4697 (unsigned long long)bytenr,
4698 (unsigned long long)parent,
4699 (unsigned long long)root,
4700 (unsigned long long)owner,
4701 (unsigned long long)offset,
4702 (unsigned long)num_refs);
4704 back->num_refs = num_refs;
4705 back->node.found_extent_tree = 1;
4707 maybe_free_extent_rec(extent_cache, rec);
4711 static int add_pending(struct cache_tree *pending,
4712 struct cache_tree *seen, u64 bytenr, u32 size)
4715 ret = add_cache_extent(seen, bytenr, size);
4718 add_cache_extent(pending, bytenr, size);
4722 static int pick_next_pending(struct cache_tree *pending,
4723 struct cache_tree *reada,
4724 struct cache_tree *nodes,
4725 u64 last, struct block_info *bits, int bits_nr,
4728 unsigned long node_start = last;
4729 struct cache_extent *cache;
4732 cache = search_cache_extent(reada, 0);
4734 bits[0].start = cache->start;
4735 bits[0].size = cache->size;
4740 if (node_start > 32768)
4741 node_start -= 32768;
4743 cache = search_cache_extent(nodes, node_start);
4745 cache = search_cache_extent(nodes, 0);
4748 cache = search_cache_extent(pending, 0);
4753 bits[ret].start = cache->start;
4754 bits[ret].size = cache->size;
4755 cache = next_cache_extent(cache);
4757 } while (cache && ret < bits_nr);
4763 bits[ret].start = cache->start;
4764 bits[ret].size = cache->size;
4765 cache = next_cache_extent(cache);
4767 } while (cache && ret < bits_nr);
4769 if (bits_nr - ret > 8) {
4770 u64 lookup = bits[0].start + bits[0].size;
4771 struct cache_extent *next;
4772 next = search_cache_extent(pending, lookup);
4774 if (next->start - lookup > 32768)
4776 bits[ret].start = next->start;
4777 bits[ret].size = next->size;
4778 lookup = next->start + next->size;
4782 next = next_cache_extent(next);
4790 static void free_chunk_record(struct cache_extent *cache)
4792 struct chunk_record *rec;
4794 rec = container_of(cache, struct chunk_record, cache);
4795 list_del_init(&rec->list);
4796 list_del_init(&rec->dextents);
4800 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4802 cache_tree_free_extents(chunk_cache, free_chunk_record);
4805 static void free_device_record(struct rb_node *node)
4807 struct device_record *rec;
4809 rec = container_of(node, struct device_record, node);
4813 FREE_RB_BASED_TREE(device_cache, free_device_record);
4815 int insert_block_group_record(struct block_group_tree *tree,
4816 struct block_group_record *bg_rec)
4820 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4824 list_add_tail(&bg_rec->list, &tree->block_groups);
4828 static void free_block_group_record(struct cache_extent *cache)
4830 struct block_group_record *rec;
4832 rec = container_of(cache, struct block_group_record, cache);
4833 list_del_init(&rec->list);
4837 void free_block_group_tree(struct block_group_tree *tree)
4839 cache_tree_free_extents(&tree->tree, free_block_group_record);
4842 int insert_device_extent_record(struct device_extent_tree *tree,
4843 struct device_extent_record *de_rec)
4848 * Device extent is a bit different from the other extents, because
4849 * the extents which belong to the different devices may have the
4850 * same start and size, so we need use the special extent cache
4851 * search/insert functions.
4853 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4857 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4858 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4862 static void free_device_extent_record(struct cache_extent *cache)
4864 struct device_extent_record *rec;
4866 rec = container_of(cache, struct device_extent_record, cache);
4867 if (!list_empty(&rec->chunk_list))
4868 list_del_init(&rec->chunk_list);
4869 if (!list_empty(&rec->device_list))
4870 list_del_init(&rec->device_list);
4874 void free_device_extent_tree(struct device_extent_tree *tree)
4876 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4879 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4880 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4881 struct extent_buffer *leaf, int slot)
4883 struct btrfs_extent_ref_v0 *ref0;
4884 struct btrfs_key key;
4886 btrfs_item_key_to_cpu(leaf, &key, slot);
4887 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4888 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4889 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4891 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4892 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4898 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4899 struct btrfs_key *key,
4902 struct btrfs_chunk *ptr;
4903 struct chunk_record *rec;
4906 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4907 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4909 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4911 fprintf(stderr, "memory allocation failed\n");
4915 INIT_LIST_HEAD(&rec->list);
4916 INIT_LIST_HEAD(&rec->dextents);
4919 rec->cache.start = key->offset;
4920 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4922 rec->generation = btrfs_header_generation(leaf);
4924 rec->objectid = key->objectid;
4925 rec->type = key->type;
4926 rec->offset = key->offset;
4928 rec->length = rec->cache.size;
4929 rec->owner = btrfs_chunk_owner(leaf, ptr);
4930 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4931 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4932 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4933 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4934 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4935 rec->num_stripes = num_stripes;
4936 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4938 for (i = 0; i < rec->num_stripes; ++i) {
4939 rec->stripes[i].devid =
4940 btrfs_stripe_devid_nr(leaf, ptr, i);
4941 rec->stripes[i].offset =
4942 btrfs_stripe_offset_nr(leaf, ptr, i);
4943 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4944 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4951 static int process_chunk_item(struct cache_tree *chunk_cache,
4952 struct btrfs_key *key, struct extent_buffer *eb,
4955 struct chunk_record *rec;
4958 rec = btrfs_new_chunk_record(eb, key, slot);
4959 ret = insert_cache_extent(chunk_cache, &rec->cache);
4961 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4962 rec->offset, rec->length);
4969 static int process_device_item(struct rb_root *dev_cache,
4970 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4972 struct btrfs_dev_item *ptr;
4973 struct device_record *rec;
4976 ptr = btrfs_item_ptr(eb,
4977 slot, struct btrfs_dev_item);
4979 rec = malloc(sizeof(*rec));
4981 fprintf(stderr, "memory allocation failed\n");
4985 rec->devid = key->offset;
4986 rec->generation = btrfs_header_generation(eb);
4988 rec->objectid = key->objectid;
4989 rec->type = key->type;
4990 rec->offset = key->offset;
4992 rec->devid = btrfs_device_id(eb, ptr);
4993 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4994 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4996 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4998 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5005 struct block_group_record *
5006 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5009 struct btrfs_block_group_item *ptr;
5010 struct block_group_record *rec;
5012 rec = calloc(1, sizeof(*rec));
5014 fprintf(stderr, "memory allocation failed\n");
5018 rec->cache.start = key->objectid;
5019 rec->cache.size = key->offset;
5021 rec->generation = btrfs_header_generation(leaf);
5023 rec->objectid = key->objectid;
5024 rec->type = key->type;
5025 rec->offset = key->offset;
5027 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5028 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5030 INIT_LIST_HEAD(&rec->list);
5035 static int process_block_group_item(struct block_group_tree *block_group_cache,
5036 struct btrfs_key *key,
5037 struct extent_buffer *eb, int slot)
5039 struct block_group_record *rec;
5042 rec = btrfs_new_block_group_record(eb, key, slot);
5043 ret = insert_block_group_record(block_group_cache, rec);
5045 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5046 rec->objectid, rec->offset);
5053 struct device_extent_record *
5054 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5055 struct btrfs_key *key, int slot)
5057 struct device_extent_record *rec;
5058 struct btrfs_dev_extent *ptr;
5060 rec = calloc(1, sizeof(*rec));
5062 fprintf(stderr, "memory allocation failed\n");
5066 rec->cache.objectid = key->objectid;
5067 rec->cache.start = key->offset;
5069 rec->generation = btrfs_header_generation(leaf);
5071 rec->objectid = key->objectid;
5072 rec->type = key->type;
5073 rec->offset = key->offset;
5075 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5076 rec->chunk_objecteid =
5077 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5079 btrfs_dev_extent_chunk_offset(leaf, ptr);
5080 rec->length = btrfs_dev_extent_length(leaf, ptr);
5081 rec->cache.size = rec->length;
5083 INIT_LIST_HEAD(&rec->chunk_list);
5084 INIT_LIST_HEAD(&rec->device_list);
5090 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5091 struct btrfs_key *key, struct extent_buffer *eb,
5094 struct device_extent_record *rec;
5097 rec = btrfs_new_device_extent_record(eb, key, slot);
5098 ret = insert_device_extent_record(dev_extent_cache, rec);
5101 "Device extent[%llu, %llu, %llu] existed.\n",
5102 rec->objectid, rec->offset, rec->length);
5109 static int process_extent_item(struct btrfs_root *root,
5110 struct cache_tree *extent_cache,
5111 struct extent_buffer *eb, int slot)
5113 struct btrfs_extent_item *ei;
5114 struct btrfs_extent_inline_ref *iref;
5115 struct btrfs_extent_data_ref *dref;
5116 struct btrfs_shared_data_ref *sref;
5117 struct btrfs_key key;
5121 u32 item_size = btrfs_item_size_nr(eb, slot);
5127 btrfs_item_key_to_cpu(eb, &key, slot);
5129 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5131 num_bytes = root->leafsize;
5133 num_bytes = key.offset;
5136 if (item_size < sizeof(*ei)) {
5137 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5138 struct btrfs_extent_item_v0 *ei0;
5139 BUG_ON(item_size != sizeof(*ei0));
5140 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5141 refs = btrfs_extent_refs_v0(eb, ei0);
5145 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5146 num_bytes, refs, 0, 0, 0, metadata, 1,
5150 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5151 refs = btrfs_extent_refs(eb, ei);
5152 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5157 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5158 refs, 0, 0, 0, metadata, 1, num_bytes);
5160 ptr = (unsigned long)(ei + 1);
5161 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5162 key.type == BTRFS_EXTENT_ITEM_KEY)
5163 ptr += sizeof(struct btrfs_tree_block_info);
5165 end = (unsigned long)ei + item_size;
5167 iref = (struct btrfs_extent_inline_ref *)ptr;
5168 type = btrfs_extent_inline_ref_type(eb, iref);
5169 offset = btrfs_extent_inline_ref_offset(eb, iref);
5171 case BTRFS_TREE_BLOCK_REF_KEY:
5172 add_tree_backref(extent_cache, key.objectid,
5175 case BTRFS_SHARED_BLOCK_REF_KEY:
5176 add_tree_backref(extent_cache, key.objectid,
5179 case BTRFS_EXTENT_DATA_REF_KEY:
5180 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5181 add_data_backref(extent_cache, key.objectid, 0,
5182 btrfs_extent_data_ref_root(eb, dref),
5183 btrfs_extent_data_ref_objectid(eb,
5185 btrfs_extent_data_ref_offset(eb, dref),
5186 btrfs_extent_data_ref_count(eb, dref),
5189 case BTRFS_SHARED_DATA_REF_KEY:
5190 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5191 add_data_backref(extent_cache, key.objectid, offset,
5193 btrfs_shared_data_ref_count(eb, sref),
5197 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5198 key.objectid, key.type, num_bytes);
5201 ptr += btrfs_extent_inline_ref_size(type);
5208 static int check_cache_range(struct btrfs_root *root,
5209 struct btrfs_block_group_cache *cache,
5210 u64 offset, u64 bytes)
5212 struct btrfs_free_space *entry;
5218 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5219 bytenr = btrfs_sb_offset(i);
5220 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5221 cache->key.objectid, bytenr, 0,
5222 &logical, &nr, &stripe_len);
5227 if (logical[nr] + stripe_len <= offset)
5229 if (offset + bytes <= logical[nr])
5231 if (logical[nr] == offset) {
5232 if (stripe_len >= bytes) {
5236 bytes -= stripe_len;
5237 offset += stripe_len;
5238 } else if (logical[nr] < offset) {
5239 if (logical[nr] + stripe_len >=
5244 bytes = (offset + bytes) -
5245 (logical[nr] + stripe_len);
5246 offset = logical[nr] + stripe_len;
5249 * Could be tricky, the super may land in the
5250 * middle of the area we're checking. First
5251 * check the easiest case, it's at the end.
5253 if (logical[nr] + stripe_len >=
5255 bytes = logical[nr] - offset;
5259 /* Check the left side */
5260 ret = check_cache_range(root, cache,
5262 logical[nr] - offset);
5268 /* Now we continue with the right side */
5269 bytes = (offset + bytes) -
5270 (logical[nr] + stripe_len);
5271 offset = logical[nr] + stripe_len;
5278 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5280 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5281 offset, offset+bytes);
5285 if (entry->offset != offset) {
5286 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5291 if (entry->bytes != bytes) {
5292 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5293 bytes, entry->bytes, offset);
5297 unlink_free_space(cache->free_space_ctl, entry);
5302 static int verify_space_cache(struct btrfs_root *root,
5303 struct btrfs_block_group_cache *cache)
5305 struct btrfs_path *path;
5306 struct extent_buffer *leaf;
5307 struct btrfs_key key;
5311 path = btrfs_alloc_path();
5315 root = root->fs_info->extent_root;
5317 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5319 key.objectid = last;
5321 key.type = BTRFS_EXTENT_ITEM_KEY;
5323 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5328 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5329 ret = btrfs_next_leaf(root, path);
5337 leaf = path->nodes[0];
5338 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5339 if (key.objectid >= cache->key.offset + cache->key.objectid)
5341 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5342 key.type != BTRFS_METADATA_ITEM_KEY) {
5347 if (last == key.objectid) {
5348 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5349 last = key.objectid + key.offset;
5351 last = key.objectid + root->leafsize;
5356 ret = check_cache_range(root, cache, last,
5357 key.objectid - last);
5360 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5361 last = key.objectid + key.offset;
5363 last = key.objectid + root->leafsize;
5367 if (last < cache->key.objectid + cache->key.offset)
5368 ret = check_cache_range(root, cache, last,
5369 cache->key.objectid +
5370 cache->key.offset - last);
5373 btrfs_free_path(path);
5376 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5377 fprintf(stderr, "There are still entries left in the space "
5385 static int check_space_cache(struct btrfs_root *root)
5387 struct btrfs_block_group_cache *cache;
5388 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5392 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5393 btrfs_super_generation(root->fs_info->super_copy) !=
5394 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5395 printf("cache and super generation don't match, space cache "
5396 "will be invalidated\n");
5400 if (ctx.progress_enabled) {
5401 ctx.tp = TASK_FREE_SPACE;
5402 task_start(ctx.info);
5406 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5410 start = cache->key.objectid + cache->key.offset;
5411 if (!cache->free_space_ctl) {
5412 if (btrfs_init_free_space_ctl(cache,
5413 root->sectorsize)) {
5418 btrfs_remove_free_space_cache(cache);
5421 ret = load_free_space_cache(root->fs_info, cache);
5425 ret = verify_space_cache(root, cache);
5427 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5428 cache->key.objectid);
5433 task_stop(ctx.info);
5435 return error ? -EINVAL : 0;
5438 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5439 u64 num_bytes, unsigned long leaf_offset,
5440 struct extent_buffer *eb) {
5443 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5445 unsigned long csum_offset;
5449 u64 data_checked = 0;
5455 if (num_bytes % root->sectorsize)
5458 data = malloc(num_bytes);
5462 while (offset < num_bytes) {
5465 read_len = num_bytes - offset;
5466 /* read as much space once a time */
5467 ret = read_extent_data(root, data + offset,
5468 bytenr + offset, &read_len, mirror);
5472 /* verify every 4k data's checksum */
5473 while (data_checked < read_len) {
5475 tmp = offset + data_checked;
5477 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5478 csum, root->sectorsize);
5479 btrfs_csum_final(csum, (char *)&csum);
5481 csum_offset = leaf_offset +
5482 tmp / root->sectorsize * csum_size;
5483 read_extent_buffer(eb, (char *)&csum_expected,
5484 csum_offset, csum_size);
5485 /* try another mirror */
5486 if (csum != csum_expected) {
5487 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5488 mirror, bytenr + tmp,
5489 csum, csum_expected);
5490 num_copies = btrfs_num_copies(
5491 &root->fs_info->mapping_tree,
5493 if (mirror < num_copies - 1) {
5498 data_checked += root->sectorsize;
5507 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5510 struct btrfs_path *path;
5511 struct extent_buffer *leaf;
5512 struct btrfs_key key;
5515 path = btrfs_alloc_path();
5517 fprintf(stderr, "Error allocing path\n");
5521 key.objectid = bytenr;
5522 key.type = BTRFS_EXTENT_ITEM_KEY;
5523 key.offset = (u64)-1;
5526 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5529 fprintf(stderr, "Error looking up extent record %d\n", ret);
5530 btrfs_free_path(path);
5533 if (path->slots[0] > 0) {
5536 ret = btrfs_prev_leaf(root, path);
5539 } else if (ret > 0) {
5546 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5549 * Block group items come before extent items if they have the same
5550 * bytenr, so walk back one more just in case. Dear future traveler,
5551 * first congrats on mastering time travel. Now if it's not too much
5552 * trouble could you go back to 2006 and tell Chris to make the
5553 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5554 * EXTENT_ITEM_KEY please?
5556 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5557 if (path->slots[0] > 0) {
5560 ret = btrfs_prev_leaf(root, path);
5563 } else if (ret > 0) {
5568 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5572 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5573 ret = btrfs_next_leaf(root, path);
5575 fprintf(stderr, "Error going to next leaf "
5577 btrfs_free_path(path);
5583 leaf = path->nodes[0];
5584 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5585 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5589 if (key.objectid + key.offset < bytenr) {
5593 if (key.objectid > bytenr + num_bytes)
5596 if (key.objectid == bytenr) {
5597 if (key.offset >= num_bytes) {
5601 num_bytes -= key.offset;
5602 bytenr += key.offset;
5603 } else if (key.objectid < bytenr) {
5604 if (key.objectid + key.offset >= bytenr + num_bytes) {
5608 num_bytes = (bytenr + num_bytes) -
5609 (key.objectid + key.offset);
5610 bytenr = key.objectid + key.offset;
5612 if (key.objectid + key.offset < bytenr + num_bytes) {
5613 u64 new_start = key.objectid + key.offset;
5614 u64 new_bytes = bytenr + num_bytes - new_start;
5617 * Weird case, the extent is in the middle of
5618 * our range, we'll have to search one side
5619 * and then the other. Not sure if this happens
5620 * in real life, but no harm in coding it up
5621 * anyway just in case.
5623 btrfs_release_path(path);
5624 ret = check_extent_exists(root, new_start,
5627 fprintf(stderr, "Right section didn't "
5631 num_bytes = key.objectid - bytenr;
5634 num_bytes = key.objectid - bytenr;
5641 if (num_bytes && !ret) {
5642 fprintf(stderr, "There are no extents for csum range "
5643 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5647 btrfs_free_path(path);
5651 static int check_csums(struct btrfs_root *root)
5653 struct btrfs_path *path;
5654 struct extent_buffer *leaf;
5655 struct btrfs_key key;
5656 u64 offset = 0, num_bytes = 0;
5657 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5661 unsigned long leaf_offset;
5663 root = root->fs_info->csum_root;
5664 if (!extent_buffer_uptodate(root->node)) {
5665 fprintf(stderr, "No valid csum tree found\n");
5669 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5670 key.type = BTRFS_EXTENT_CSUM_KEY;
5673 path = btrfs_alloc_path();
5677 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5679 fprintf(stderr, "Error searching csum tree %d\n", ret);
5680 btrfs_free_path(path);
5684 if (ret > 0 && path->slots[0])
5689 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5690 ret = btrfs_next_leaf(root, path);
5692 fprintf(stderr, "Error going to next leaf "
5699 leaf = path->nodes[0];
5701 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5702 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5707 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5708 csum_size) * root->sectorsize;
5709 if (!check_data_csum)
5710 goto skip_csum_check;
5711 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5712 ret = check_extent_csums(root, key.offset, data_len,
5718 offset = key.offset;
5719 } else if (key.offset != offset + num_bytes) {
5720 ret = check_extent_exists(root, offset, num_bytes);
5722 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5723 "there is no extent record\n",
5724 offset, offset+num_bytes);
5727 offset = key.offset;
5730 num_bytes += data_len;
5734 btrfs_free_path(path);
5738 static int is_dropped_key(struct btrfs_key *key,
5739 struct btrfs_key *drop_key) {
5740 if (key->objectid < drop_key->objectid)
5742 else if (key->objectid == drop_key->objectid) {
5743 if (key->type < drop_key->type)
5745 else if (key->type == drop_key->type) {
5746 if (key->offset < drop_key->offset)
5754 * Here are the rules for FULL_BACKREF.
5756 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5757 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5759 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5760 * if it happened after the relocation occurred since we'll have dropped the
5761 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5762 * have no real way to know for sure.
5764 * We process the blocks one root at a time, and we start from the lowest root
5765 * objectid and go to the highest. So we can just lookup the owner backref for
5766 * the record and if we don't find it then we know it doesn't exist and we have
5769 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5770 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5771 * be set or not and then we can check later once we've gathered all the refs.
5773 static int calc_extent_flag(struct btrfs_root *root,
5774 struct cache_tree *extent_cache,
5775 struct extent_buffer *buf,
5776 struct root_item_record *ri,
5779 struct extent_record *rec;
5780 struct cache_extent *cache;
5781 struct tree_backref *tback;
5784 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5785 /* we have added this extent before */
5787 rec = container_of(cache, struct extent_record, cache);
5790 * Except file/reloc tree, we can not have
5793 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5798 if (buf->start == ri->bytenr)
5801 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5804 owner = btrfs_header_owner(buf);
5805 if (owner == ri->objectid)
5808 tback = find_tree_backref(rec, 0, owner);
5813 if (rec->flag_block_full_backref != -1 &&
5814 rec->flag_block_full_backref != 0)
5815 rec->bad_full_backref = 1;
5818 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5819 if (rec->flag_block_full_backref != -1 &&
5820 rec->flag_block_full_backref != 1)
5821 rec->bad_full_backref = 1;
5825 static int run_next_block(struct btrfs_root *root,
5826 struct block_info *bits,
5829 struct cache_tree *pending,
5830 struct cache_tree *seen,
5831 struct cache_tree *reada,
5832 struct cache_tree *nodes,
5833 struct cache_tree *extent_cache,
5834 struct cache_tree *chunk_cache,
5835 struct rb_root *dev_cache,
5836 struct block_group_tree *block_group_cache,
5837 struct device_extent_tree *dev_extent_cache,
5838 struct root_item_record *ri)
5840 struct extent_buffer *buf;
5841 struct extent_record *rec = NULL;
5852 struct btrfs_key key;
5853 struct cache_extent *cache;
5856 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5857 bits_nr, &reada_bits);
5862 for(i = 0; i < nritems; i++) {
5863 ret = add_cache_extent(reada, bits[i].start,
5868 /* fixme, get the parent transid */
5869 readahead_tree_block(root, bits[i].start,
5873 *last = bits[0].start;
5874 bytenr = bits[0].start;
5875 size = bits[0].size;
5877 cache = lookup_cache_extent(pending, bytenr, size);
5879 remove_cache_extent(pending, cache);
5882 cache = lookup_cache_extent(reada, bytenr, size);
5884 remove_cache_extent(reada, cache);
5887 cache = lookup_cache_extent(nodes, bytenr, size);
5889 remove_cache_extent(nodes, cache);
5892 cache = lookup_cache_extent(extent_cache, bytenr, size);
5894 rec = container_of(cache, struct extent_record, cache);
5895 gen = rec->parent_generation;
5898 /* fixme, get the real parent transid */
5899 buf = read_tree_block(root, bytenr, size, gen);
5900 if (!extent_buffer_uptodate(buf)) {
5901 record_bad_block_io(root->fs_info,
5902 extent_cache, bytenr, size);
5906 nritems = btrfs_header_nritems(buf);
5909 if (!init_extent_tree) {
5910 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5911 btrfs_header_level(buf), 1, NULL,
5914 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5916 fprintf(stderr, "Couldn't calc extent flags\n");
5917 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5922 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5924 fprintf(stderr, "Couldn't calc extent flags\n");
5925 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5929 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5931 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5932 ri->objectid == btrfs_header_owner(buf)) {
5934 * Ok we got to this block from it's original owner and
5935 * we have FULL_BACKREF set. Relocation can leave
5936 * converted blocks over so this is altogether possible,
5937 * however it's not possible if the generation > the
5938 * last snapshot, so check for this case.
5940 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5941 btrfs_header_generation(buf) > ri->last_snapshot) {
5942 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5943 rec->bad_full_backref = 1;
5948 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5949 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5950 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5951 rec->bad_full_backref = 1;
5955 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5956 rec->flag_block_full_backref = 1;
5960 rec->flag_block_full_backref = 0;
5962 owner = btrfs_header_owner(buf);
5965 ret = check_block(root, extent_cache, buf, flags);
5969 if (btrfs_is_leaf(buf)) {
5970 btree_space_waste += btrfs_leaf_free_space(root, buf);
5971 for (i = 0; i < nritems; i++) {
5972 struct btrfs_file_extent_item *fi;
5973 btrfs_item_key_to_cpu(buf, &key, i);
5974 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5975 process_extent_item(root, extent_cache, buf,
5979 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5980 process_extent_item(root, extent_cache, buf,
5984 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5986 btrfs_item_size_nr(buf, i);
5989 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5990 process_chunk_item(chunk_cache, &key, buf, i);
5993 if (key.type == BTRFS_DEV_ITEM_KEY) {
5994 process_device_item(dev_cache, &key, buf, i);
5997 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5998 process_block_group_item(block_group_cache,
6002 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6003 process_device_extent_item(dev_extent_cache,
6008 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6009 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6010 process_extent_ref_v0(extent_cache, buf, i);
6017 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6018 add_tree_backref(extent_cache, key.objectid, 0,
6022 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6023 add_tree_backref(extent_cache, key.objectid,
6027 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6028 struct btrfs_extent_data_ref *ref;
6029 ref = btrfs_item_ptr(buf, i,
6030 struct btrfs_extent_data_ref);
6031 add_data_backref(extent_cache,
6033 btrfs_extent_data_ref_root(buf, ref),
6034 btrfs_extent_data_ref_objectid(buf,
6036 btrfs_extent_data_ref_offset(buf, ref),
6037 btrfs_extent_data_ref_count(buf, ref),
6038 0, root->sectorsize);
6041 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6042 struct btrfs_shared_data_ref *ref;
6043 ref = btrfs_item_ptr(buf, i,
6044 struct btrfs_shared_data_ref);
6045 add_data_backref(extent_cache,
6046 key.objectid, key.offset, 0, 0, 0,
6047 btrfs_shared_data_ref_count(buf, ref),
6048 0, root->sectorsize);
6051 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6052 struct bad_item *bad;
6054 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6058 bad = malloc(sizeof(struct bad_item));
6061 INIT_LIST_HEAD(&bad->list);
6062 memcpy(&bad->key, &key,
6063 sizeof(struct btrfs_key));
6064 bad->root_id = owner;
6065 list_add_tail(&bad->list, &delete_items);
6068 if (key.type != BTRFS_EXTENT_DATA_KEY)
6070 fi = btrfs_item_ptr(buf, i,
6071 struct btrfs_file_extent_item);
6072 if (btrfs_file_extent_type(buf, fi) ==
6073 BTRFS_FILE_EXTENT_INLINE)
6075 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6078 data_bytes_allocated +=
6079 btrfs_file_extent_disk_num_bytes(buf, fi);
6080 if (data_bytes_allocated < root->sectorsize) {
6083 data_bytes_referenced +=
6084 btrfs_file_extent_num_bytes(buf, fi);
6085 add_data_backref(extent_cache,
6086 btrfs_file_extent_disk_bytenr(buf, fi),
6087 parent, owner, key.objectid, key.offset -
6088 btrfs_file_extent_offset(buf, fi), 1, 1,
6089 btrfs_file_extent_disk_num_bytes(buf, fi));
6093 struct btrfs_key first_key;
6095 first_key.objectid = 0;
6098 btrfs_item_key_to_cpu(buf, &first_key, 0);
6099 level = btrfs_header_level(buf);
6100 for (i = 0; i < nritems; i++) {
6101 ptr = btrfs_node_blockptr(buf, i);
6102 size = btrfs_level_size(root, level - 1);
6103 btrfs_node_key_to_cpu(buf, &key, i);
6105 if ((level == ri->drop_level)
6106 && is_dropped_key(&key, &ri->drop_key)) {
6110 ret = add_extent_rec(extent_cache, &key,
6111 btrfs_node_ptr_generation(buf, i),
6112 ptr, size, 0, 0, 1, 0, 1, 0,
6116 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6119 add_pending(nodes, seen, ptr, size);
6121 add_pending(pending, seen, ptr, size);
6124 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6125 nritems) * sizeof(struct btrfs_key_ptr);
6127 total_btree_bytes += buf->len;
6128 if (fs_root_objectid(btrfs_header_owner(buf)))
6129 total_fs_tree_bytes += buf->len;
6130 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6131 total_extent_tree_bytes += buf->len;
6132 if (!found_old_backref &&
6133 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6134 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6135 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6136 found_old_backref = 1;
6138 free_extent_buffer(buf);
6142 static int add_root_to_pending(struct extent_buffer *buf,
6143 struct cache_tree *extent_cache,
6144 struct cache_tree *pending,
6145 struct cache_tree *seen,
6146 struct cache_tree *nodes,
6149 if (btrfs_header_level(buf) > 0)
6150 add_pending(nodes, seen, buf->start, buf->len);
6152 add_pending(pending, seen, buf->start, buf->len);
6153 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6154 0, 1, 1, 0, 1, 0, buf->len);
6156 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6157 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6158 add_tree_backref(extent_cache, buf->start, buf->start,
6161 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6165 /* as we fix the tree, we might be deleting blocks that
6166 * we're tracking for repair. This hook makes sure we
6167 * remove any backrefs for blocks as we are fixing them.
6169 static int free_extent_hook(struct btrfs_trans_handle *trans,
6170 struct btrfs_root *root,
6171 u64 bytenr, u64 num_bytes, u64 parent,
6172 u64 root_objectid, u64 owner, u64 offset,
6175 struct extent_record *rec;
6176 struct cache_extent *cache;
6178 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6180 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6181 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6185 rec = container_of(cache, struct extent_record, cache);
6187 struct data_backref *back;
6188 back = find_data_backref(rec, parent, root_objectid, owner,
6189 offset, 1, bytenr, num_bytes);
6192 if (back->node.found_ref) {
6193 back->found_ref -= refs_to_drop;
6195 rec->refs -= refs_to_drop;
6197 if (back->node.found_extent_tree) {
6198 back->num_refs -= refs_to_drop;
6199 if (rec->extent_item_refs)
6200 rec->extent_item_refs -= refs_to_drop;
6202 if (back->found_ref == 0)
6203 back->node.found_ref = 0;
6204 if (back->num_refs == 0)
6205 back->node.found_extent_tree = 0;
6207 if (!back->node.found_extent_tree && back->node.found_ref) {
6208 list_del(&back->node.list);
6212 struct tree_backref *back;
6213 back = find_tree_backref(rec, parent, root_objectid);
6216 if (back->node.found_ref) {
6219 back->node.found_ref = 0;
6221 if (back->node.found_extent_tree) {
6222 if (rec->extent_item_refs)
6223 rec->extent_item_refs--;
6224 back->node.found_extent_tree = 0;
6226 if (!back->node.found_extent_tree && back->node.found_ref) {
6227 list_del(&back->node.list);
6231 maybe_free_extent_rec(extent_cache, rec);
6236 static int delete_extent_records(struct btrfs_trans_handle *trans,
6237 struct btrfs_root *root,
6238 struct btrfs_path *path,
6239 u64 bytenr, u64 new_len)
6241 struct btrfs_key key;
6242 struct btrfs_key found_key;
6243 struct extent_buffer *leaf;
6248 key.objectid = bytenr;
6250 key.offset = (u64)-1;
6253 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6260 if (path->slots[0] == 0)
6266 leaf = path->nodes[0];
6267 slot = path->slots[0];
6269 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6270 if (found_key.objectid != bytenr)
6273 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6274 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6275 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6276 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6277 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6278 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6279 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6280 btrfs_release_path(path);
6281 if (found_key.type == 0) {
6282 if (found_key.offset == 0)
6284 key.offset = found_key.offset - 1;
6285 key.type = found_key.type;
6287 key.type = found_key.type - 1;
6288 key.offset = (u64)-1;
6292 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6293 found_key.objectid, found_key.type, found_key.offset);
6295 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6298 btrfs_release_path(path);
6300 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6301 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6302 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6303 found_key.offset : root->leafsize;
6305 ret = btrfs_update_block_group(trans, root, bytenr,
6312 btrfs_release_path(path);
6317 * for a single backref, this will allocate a new extent
6318 * and add the backref to it.
6320 static int record_extent(struct btrfs_trans_handle *trans,
6321 struct btrfs_fs_info *info,
6322 struct btrfs_path *path,
6323 struct extent_record *rec,
6324 struct extent_backref *back,
6325 int allocated, u64 flags)
6328 struct btrfs_root *extent_root = info->extent_root;
6329 struct extent_buffer *leaf;
6330 struct btrfs_key ins_key;
6331 struct btrfs_extent_item *ei;
6332 struct tree_backref *tback;
6333 struct data_backref *dback;
6334 struct btrfs_tree_block_info *bi;
6337 rec->max_size = max_t(u64, rec->max_size,
6338 info->extent_root->leafsize);
6341 u32 item_size = sizeof(*ei);
6344 item_size += sizeof(*bi);
6346 ins_key.objectid = rec->start;
6347 ins_key.offset = rec->max_size;
6348 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6350 ret = btrfs_insert_empty_item(trans, extent_root, path,
6351 &ins_key, item_size);
6355 leaf = path->nodes[0];
6356 ei = btrfs_item_ptr(leaf, path->slots[0],
6357 struct btrfs_extent_item);
6359 btrfs_set_extent_refs(leaf, ei, 0);
6360 btrfs_set_extent_generation(leaf, ei, rec->generation);
6362 if (back->is_data) {
6363 btrfs_set_extent_flags(leaf, ei,
6364 BTRFS_EXTENT_FLAG_DATA);
6366 struct btrfs_disk_key copy_key;;
6368 tback = (struct tree_backref *)back;
6369 bi = (struct btrfs_tree_block_info *)(ei + 1);
6370 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6373 btrfs_set_disk_key_objectid(©_key,
6374 rec->info_objectid);
6375 btrfs_set_disk_key_type(©_key, 0);
6376 btrfs_set_disk_key_offset(©_key, 0);
6378 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6379 btrfs_set_tree_block_key(leaf, bi, ©_key);
6381 btrfs_set_extent_flags(leaf, ei,
6382 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6385 btrfs_mark_buffer_dirty(leaf);
6386 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6387 rec->max_size, 1, 0);
6390 btrfs_release_path(path);
6393 if (back->is_data) {
6397 dback = (struct data_backref *)back;
6398 if (back->full_backref)
6399 parent = dback->parent;
6403 for (i = 0; i < dback->found_ref; i++) {
6404 /* if parent != 0, we're doing a full backref
6405 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6406 * just makes the backref allocator create a data
6409 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6410 rec->start, rec->max_size,
6414 BTRFS_FIRST_FREE_OBJECTID :
6420 fprintf(stderr, "adding new data backref"
6421 " on %llu %s %llu owner %llu"
6422 " offset %llu found %d\n",
6423 (unsigned long long)rec->start,
6424 back->full_backref ?
6426 back->full_backref ?
6427 (unsigned long long)parent :
6428 (unsigned long long)dback->root,
6429 (unsigned long long)dback->owner,
6430 (unsigned long long)dback->offset,
6435 tback = (struct tree_backref *)back;
6436 if (back->full_backref)
6437 parent = tback->parent;
6441 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6442 rec->start, rec->max_size,
6443 parent, tback->root, 0, 0);
6444 fprintf(stderr, "adding new tree backref on "
6445 "start %llu len %llu parent %llu root %llu\n",
6446 rec->start, rec->max_size, parent, tback->root);
6449 btrfs_release_path(path);
6453 struct extent_entry {
6458 struct list_head list;
6461 static struct extent_entry *find_entry(struct list_head *entries,
6462 u64 bytenr, u64 bytes)
6464 struct extent_entry *entry = NULL;
6466 list_for_each_entry(entry, entries, list) {
6467 if (entry->bytenr == bytenr && entry->bytes == bytes)
6474 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6476 struct extent_entry *entry, *best = NULL, *prev = NULL;
6478 list_for_each_entry(entry, entries, list) {
6485 * If there are as many broken entries as entries then we know
6486 * not to trust this particular entry.
6488 if (entry->broken == entry->count)
6492 * If our current entry == best then we can't be sure our best
6493 * is really the best, so we need to keep searching.
6495 if (best && best->count == entry->count) {
6501 /* Prev == entry, not good enough, have to keep searching */
6502 if (!prev->broken && prev->count == entry->count)
6506 best = (prev->count > entry->count) ? prev : entry;
6507 else if (best->count < entry->count)
6515 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6516 struct data_backref *dback, struct extent_entry *entry)
6518 struct btrfs_trans_handle *trans;
6519 struct btrfs_root *root;
6520 struct btrfs_file_extent_item *fi;
6521 struct extent_buffer *leaf;
6522 struct btrfs_key key;
6526 key.objectid = dback->root;
6527 key.type = BTRFS_ROOT_ITEM_KEY;
6528 key.offset = (u64)-1;
6529 root = btrfs_read_fs_root(info, &key);
6531 fprintf(stderr, "Couldn't find root for our ref\n");
6536 * The backref points to the original offset of the extent if it was
6537 * split, so we need to search down to the offset we have and then walk
6538 * forward until we find the backref we're looking for.
6540 key.objectid = dback->owner;
6541 key.type = BTRFS_EXTENT_DATA_KEY;
6542 key.offset = dback->offset;
6543 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6545 fprintf(stderr, "Error looking up ref %d\n", ret);
6550 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6551 ret = btrfs_next_leaf(root, path);
6553 fprintf(stderr, "Couldn't find our ref, next\n");
6557 leaf = path->nodes[0];
6558 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6559 if (key.objectid != dback->owner ||
6560 key.type != BTRFS_EXTENT_DATA_KEY) {
6561 fprintf(stderr, "Couldn't find our ref, search\n");
6564 fi = btrfs_item_ptr(leaf, path->slots[0],
6565 struct btrfs_file_extent_item);
6566 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6567 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6569 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6574 btrfs_release_path(path);
6576 trans = btrfs_start_transaction(root, 1);
6578 return PTR_ERR(trans);
6581 * Ok we have the key of the file extent we want to fix, now we can cow
6582 * down to the thing and fix it.
6584 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6586 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6587 key.objectid, key.type, key.offset, ret);
6591 fprintf(stderr, "Well that's odd, we just found this key "
6592 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6597 leaf = path->nodes[0];
6598 fi = btrfs_item_ptr(leaf, path->slots[0],
6599 struct btrfs_file_extent_item);
6601 if (btrfs_file_extent_compression(leaf, fi) &&
6602 dback->disk_bytenr != entry->bytenr) {
6603 fprintf(stderr, "Ref doesn't match the record start and is "
6604 "compressed, please take a btrfs-image of this file "
6605 "system and send it to a btrfs developer so they can "
6606 "complete this functionality for bytenr %Lu\n",
6607 dback->disk_bytenr);
6612 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6613 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6614 } else if (dback->disk_bytenr > entry->bytenr) {
6615 u64 off_diff, offset;
6617 off_diff = dback->disk_bytenr - entry->bytenr;
6618 offset = btrfs_file_extent_offset(leaf, fi);
6619 if (dback->disk_bytenr + offset +
6620 btrfs_file_extent_num_bytes(leaf, fi) >
6621 entry->bytenr + entry->bytes) {
6622 fprintf(stderr, "Ref is past the entry end, please "
6623 "take a btrfs-image of this file system and "
6624 "send it to a btrfs developer, ref %Lu\n",
6625 dback->disk_bytenr);
6630 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6631 btrfs_set_file_extent_offset(leaf, fi, offset);
6632 } else if (dback->disk_bytenr < entry->bytenr) {
6635 offset = btrfs_file_extent_offset(leaf, fi);
6636 if (dback->disk_bytenr + offset < entry->bytenr) {
6637 fprintf(stderr, "Ref is before the entry start, please"
6638 " take a btrfs-image of this file system and "
6639 "send it to a btrfs developer, ref %Lu\n",
6640 dback->disk_bytenr);
6645 offset += dback->disk_bytenr;
6646 offset -= entry->bytenr;
6647 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6648 btrfs_set_file_extent_offset(leaf, fi, offset);
6651 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6654 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6655 * only do this if we aren't using compression, otherwise it's a
6658 if (!btrfs_file_extent_compression(leaf, fi))
6659 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6661 printf("ram bytes may be wrong?\n");
6662 btrfs_mark_buffer_dirty(leaf);
6664 err = btrfs_commit_transaction(trans, root);
6665 btrfs_release_path(path);
6666 return ret ? ret : err;
6669 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6670 struct extent_record *rec)
6672 struct extent_backref *back;
6673 struct data_backref *dback;
6674 struct extent_entry *entry, *best = NULL;
6677 int broken_entries = 0;
6682 * Metadata is easy and the backrefs should always agree on bytenr and
6683 * size, if not we've got bigger issues.
6688 list_for_each_entry(back, &rec->backrefs, list) {
6689 if (back->full_backref || !back->is_data)
6692 dback = (struct data_backref *)back;
6695 * We only pay attention to backrefs that we found a real
6698 if (dback->found_ref == 0)
6702 * For now we only catch when the bytes don't match, not the
6703 * bytenr. We can easily do this at the same time, but I want
6704 * to have a fs image to test on before we just add repair
6705 * functionality willy-nilly so we know we won't screw up the
6709 entry = find_entry(&entries, dback->disk_bytenr,
6712 entry = malloc(sizeof(struct extent_entry));
6717 memset(entry, 0, sizeof(*entry));
6718 entry->bytenr = dback->disk_bytenr;
6719 entry->bytes = dback->bytes;
6720 list_add_tail(&entry->list, &entries);
6725 * If we only have on entry we may think the entries agree when
6726 * in reality they don't so we have to do some extra checking.
6728 if (dback->disk_bytenr != rec->start ||
6729 dback->bytes != rec->nr || back->broken)
6740 /* Yay all the backrefs agree, carry on good sir */
6741 if (nr_entries <= 1 && !mismatch)
6744 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6745 "%Lu\n", rec->start);
6748 * First we want to see if the backrefs can agree amongst themselves who
6749 * is right, so figure out which one of the entries has the highest
6752 best = find_most_right_entry(&entries);
6755 * Ok so we may have an even split between what the backrefs think, so
6756 * this is where we use the extent ref to see what it thinks.
6759 entry = find_entry(&entries, rec->start, rec->nr);
6760 if (!entry && (!broken_entries || !rec->found_rec)) {
6761 fprintf(stderr, "Backrefs don't agree with each other "
6762 "and extent record doesn't agree with anybody,"
6763 " so we can't fix bytenr %Lu bytes %Lu\n",
6764 rec->start, rec->nr);
6767 } else if (!entry) {
6769 * Ok our backrefs were broken, we'll assume this is the
6770 * correct value and add an entry for this range.
6772 entry = malloc(sizeof(struct extent_entry));
6777 memset(entry, 0, sizeof(*entry));
6778 entry->bytenr = rec->start;
6779 entry->bytes = rec->nr;
6780 list_add_tail(&entry->list, &entries);
6784 best = find_most_right_entry(&entries);
6786 fprintf(stderr, "Backrefs and extent record evenly "
6787 "split on who is right, this is going to "
6788 "require user input to fix bytenr %Lu bytes "
6789 "%Lu\n", rec->start, rec->nr);
6796 * I don't think this can happen currently as we'll abort() if we catch
6797 * this case higher up, but in case somebody removes that we still can't
6798 * deal with it properly here yet, so just bail out of that's the case.
6800 if (best->bytenr != rec->start) {
6801 fprintf(stderr, "Extent start and backref starts don't match, "
6802 "please use btrfs-image on this file system and send "
6803 "it to a btrfs developer so they can make fsck fix "
6804 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6805 rec->start, rec->nr);
6811 * Ok great we all agreed on an extent record, let's go find the real
6812 * references and fix up the ones that don't match.
6814 list_for_each_entry(back, &rec->backrefs, list) {
6815 if (back->full_backref || !back->is_data)
6818 dback = (struct data_backref *)back;
6821 * Still ignoring backrefs that don't have a real ref attached
6824 if (dback->found_ref == 0)
6827 if (dback->bytes == best->bytes &&
6828 dback->disk_bytenr == best->bytenr)
6831 ret = repair_ref(info, path, dback, best);
6837 * Ok we messed with the actual refs, which means we need to drop our
6838 * entire cache and go back and rescan. I know this is a huge pain and
6839 * adds a lot of extra work, but it's the only way to be safe. Once all
6840 * the backrefs agree we may not need to do anything to the extent
6845 while (!list_empty(&entries)) {
6846 entry = list_entry(entries.next, struct extent_entry, list);
6847 list_del_init(&entry->list);
6853 static int process_duplicates(struct btrfs_root *root,
6854 struct cache_tree *extent_cache,
6855 struct extent_record *rec)
6857 struct extent_record *good, *tmp;
6858 struct cache_extent *cache;
6862 * If we found a extent record for this extent then return, or if we
6863 * have more than one duplicate we are likely going to need to delete
6866 if (rec->found_rec || rec->num_duplicates > 1)
6869 /* Shouldn't happen but just in case */
6870 BUG_ON(!rec->num_duplicates);
6873 * So this happens if we end up with a backref that doesn't match the
6874 * actual extent entry. So either the backref is bad or the extent
6875 * entry is bad. Either way we want to have the extent_record actually
6876 * reflect what we found in the extent_tree, so we need to take the
6877 * duplicate out and use that as the extent_record since the only way we
6878 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6880 remove_cache_extent(extent_cache, &rec->cache);
6882 good = list_entry(rec->dups.next, struct extent_record, list);
6883 list_del_init(&good->list);
6884 INIT_LIST_HEAD(&good->backrefs);
6885 INIT_LIST_HEAD(&good->dups);
6886 good->cache.start = good->start;
6887 good->cache.size = good->nr;
6888 good->content_checked = 0;
6889 good->owner_ref_checked = 0;
6890 good->num_duplicates = 0;
6891 good->refs = rec->refs;
6892 list_splice_init(&rec->backrefs, &good->backrefs);
6894 cache = lookup_cache_extent(extent_cache, good->start,
6898 tmp = container_of(cache, struct extent_record, cache);
6901 * If we find another overlapping extent and it's found_rec is
6902 * set then it's a duplicate and we need to try and delete
6905 if (tmp->found_rec || tmp->num_duplicates > 0) {
6906 if (list_empty(&good->list))
6907 list_add_tail(&good->list,
6908 &duplicate_extents);
6909 good->num_duplicates += tmp->num_duplicates + 1;
6910 list_splice_init(&tmp->dups, &good->dups);
6911 list_del_init(&tmp->list);
6912 list_add_tail(&tmp->list, &good->dups);
6913 remove_cache_extent(extent_cache, &tmp->cache);
6918 * Ok we have another non extent item backed extent rec, so lets
6919 * just add it to this extent and carry on like we did above.
6921 good->refs += tmp->refs;
6922 list_splice_init(&tmp->backrefs, &good->backrefs);
6923 remove_cache_extent(extent_cache, &tmp->cache);
6926 ret = insert_cache_extent(extent_cache, &good->cache);
6929 return good->num_duplicates ? 0 : 1;
6932 static int delete_duplicate_records(struct btrfs_root *root,
6933 struct extent_record *rec)
6935 struct btrfs_trans_handle *trans;
6936 LIST_HEAD(delete_list);
6937 struct btrfs_path *path;
6938 struct extent_record *tmp, *good, *n;
6941 struct btrfs_key key;
6943 path = btrfs_alloc_path();
6950 /* Find the record that covers all of the duplicates. */
6951 list_for_each_entry(tmp, &rec->dups, list) {
6952 if (good->start < tmp->start)
6954 if (good->nr > tmp->nr)
6957 if (tmp->start + tmp->nr < good->start + good->nr) {
6958 fprintf(stderr, "Ok we have overlapping extents that "
6959 "aren't completely covered by eachother, this "
6960 "is going to require more careful thought. "
6961 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6962 tmp->start, tmp->nr, good->start, good->nr);
6969 list_add_tail(&rec->list, &delete_list);
6971 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6974 list_move_tail(&tmp->list, &delete_list);
6977 root = root->fs_info->extent_root;
6978 trans = btrfs_start_transaction(root, 1);
6979 if (IS_ERR(trans)) {
6980 ret = PTR_ERR(trans);
6984 list_for_each_entry(tmp, &delete_list, list) {
6985 if (tmp->found_rec == 0)
6987 key.objectid = tmp->start;
6988 key.type = BTRFS_EXTENT_ITEM_KEY;
6989 key.offset = tmp->nr;
6991 /* Shouldn't happen but just in case */
6992 if (tmp->metadata) {
6993 fprintf(stderr, "Well this shouldn't happen, extent "
6994 "record overlaps but is metadata? "
6995 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6999 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7005 ret = btrfs_del_item(trans, root, path);
7008 btrfs_release_path(path);
7011 err = btrfs_commit_transaction(trans, root);
7015 while (!list_empty(&delete_list)) {
7016 tmp = list_entry(delete_list.next, struct extent_record, list);
7017 list_del_init(&tmp->list);
7023 while (!list_empty(&rec->dups)) {
7024 tmp = list_entry(rec->dups.next, struct extent_record, list);
7025 list_del_init(&tmp->list);
7029 btrfs_free_path(path);
7031 if (!ret && !nr_del)
7032 rec->num_duplicates = 0;
7034 return ret ? ret : nr_del;
7037 static int find_possible_backrefs(struct btrfs_fs_info *info,
7038 struct btrfs_path *path,
7039 struct cache_tree *extent_cache,
7040 struct extent_record *rec)
7042 struct btrfs_root *root;
7043 struct extent_backref *back;
7044 struct data_backref *dback;
7045 struct cache_extent *cache;
7046 struct btrfs_file_extent_item *fi;
7047 struct btrfs_key key;
7051 list_for_each_entry(back, &rec->backrefs, list) {
7052 /* Don't care about full backrefs (poor unloved backrefs) */
7053 if (back->full_backref || !back->is_data)
7056 dback = (struct data_backref *)back;
7058 /* We found this one, we don't need to do a lookup */
7059 if (dback->found_ref)
7062 key.objectid = dback->root;
7063 key.type = BTRFS_ROOT_ITEM_KEY;
7064 key.offset = (u64)-1;
7066 root = btrfs_read_fs_root(info, &key);
7068 /* No root, definitely a bad ref, skip */
7069 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7071 /* Other err, exit */
7073 return PTR_ERR(root);
7075 key.objectid = dback->owner;
7076 key.type = BTRFS_EXTENT_DATA_KEY;
7077 key.offset = dback->offset;
7078 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7080 btrfs_release_path(path);
7083 /* Didn't find it, we can carry on */
7088 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7089 struct btrfs_file_extent_item);
7090 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7091 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7092 btrfs_release_path(path);
7093 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7095 struct extent_record *tmp;
7096 tmp = container_of(cache, struct extent_record, cache);
7099 * If we found an extent record for the bytenr for this
7100 * particular backref then we can't add it to our
7101 * current extent record. We only want to add backrefs
7102 * that don't have a corresponding extent item in the
7103 * extent tree since they likely belong to this record
7104 * and we need to fix it if it doesn't match bytenrs.
7110 dback->found_ref += 1;
7111 dback->disk_bytenr = bytenr;
7112 dback->bytes = bytes;
7115 * Set this so the verify backref code knows not to trust the
7116 * values in this backref.
7125 * Record orphan data ref into corresponding root.
7127 * Return 0 if the extent item contains data ref and recorded.
7128 * Return 1 if the extent item contains no useful data ref
7129 * On that case, it may contains only shared_dataref or metadata backref
7130 * or the file extent exists(this should be handled by the extent bytenr
7132 * Return <0 if something goes wrong.
7134 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7135 struct extent_record *rec)
7137 struct btrfs_key key;
7138 struct btrfs_root *dest_root;
7139 struct extent_backref *back;
7140 struct data_backref *dback;
7141 struct orphan_data_extent *orphan;
7142 struct btrfs_path *path;
7143 int recorded_data_ref = 0;
7148 path = btrfs_alloc_path();
7151 list_for_each_entry(back, &rec->backrefs, list) {
7152 if (back->full_backref || !back->is_data ||
7153 !back->found_extent_tree)
7155 dback = (struct data_backref *)back;
7156 if (dback->found_ref)
7158 key.objectid = dback->root;
7159 key.type = BTRFS_ROOT_ITEM_KEY;
7160 key.offset = (u64)-1;
7162 dest_root = btrfs_read_fs_root(fs_info, &key);
7164 /* For non-exist root we just skip it */
7165 if (IS_ERR(dest_root) || !dest_root)
7168 key.objectid = dback->owner;
7169 key.type = BTRFS_EXTENT_DATA_KEY;
7170 key.offset = dback->offset;
7172 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7174 * For ret < 0, it's OK since the fs-tree may be corrupted,
7175 * we need to record it for inode/file extent rebuild.
7176 * For ret > 0, we record it only for file extent rebuild.
7177 * For ret == 0, the file extent exists but only bytenr
7178 * mismatch, let the original bytenr fix routine to handle,
7184 orphan = malloc(sizeof(*orphan));
7189 INIT_LIST_HEAD(&orphan->list);
7190 orphan->root = dback->root;
7191 orphan->objectid = dback->owner;
7192 orphan->offset = dback->offset;
7193 orphan->disk_bytenr = rec->cache.start;
7194 orphan->disk_len = rec->cache.size;
7195 list_add(&dest_root->orphan_data_extents, &orphan->list);
7196 recorded_data_ref = 1;
7199 btrfs_free_path(path);
7201 return !recorded_data_ref;
7207 * when an incorrect extent item is found, this will delete
7208 * all of the existing entries for it and recreate them
7209 * based on what the tree scan found.
7211 static int fixup_extent_refs(struct btrfs_fs_info *info,
7212 struct cache_tree *extent_cache,
7213 struct extent_record *rec)
7215 struct btrfs_trans_handle *trans = NULL;
7217 struct btrfs_path *path;
7218 struct list_head *cur = rec->backrefs.next;
7219 struct cache_extent *cache;
7220 struct extent_backref *back;
7224 if (rec->flag_block_full_backref)
7225 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7227 path = btrfs_alloc_path();
7231 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7233 * Sometimes the backrefs themselves are so broken they don't
7234 * get attached to any meaningful rec, so first go back and
7235 * check any of our backrefs that we couldn't find and throw
7236 * them into the list if we find the backref so that
7237 * verify_backrefs can figure out what to do.
7239 ret = find_possible_backrefs(info, path, extent_cache, rec);
7244 /* step one, make sure all of the backrefs agree */
7245 ret = verify_backrefs(info, path, rec);
7249 trans = btrfs_start_transaction(info->extent_root, 1);
7250 if (IS_ERR(trans)) {
7251 ret = PTR_ERR(trans);
7255 /* step two, delete all the existing records */
7256 ret = delete_extent_records(trans, info->extent_root, path,
7257 rec->start, rec->max_size);
7262 /* was this block corrupt? If so, don't add references to it */
7263 cache = lookup_cache_extent(info->corrupt_blocks,
7264 rec->start, rec->max_size);
7270 /* step three, recreate all the refs we did find */
7271 while(cur != &rec->backrefs) {
7272 back = list_entry(cur, struct extent_backref, list);
7276 * if we didn't find any references, don't create a
7279 if (!back->found_ref)
7282 rec->bad_full_backref = 0;
7283 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7291 int err = btrfs_commit_transaction(trans, info->extent_root);
7296 btrfs_free_path(path);
7300 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7301 struct extent_record *rec)
7303 struct btrfs_trans_handle *trans;
7304 struct btrfs_root *root = fs_info->extent_root;
7305 struct btrfs_path *path;
7306 struct btrfs_extent_item *ei;
7307 struct btrfs_key key;
7311 key.objectid = rec->start;
7312 if (rec->metadata) {
7313 key.type = BTRFS_METADATA_ITEM_KEY;
7314 key.offset = rec->info_level;
7316 key.type = BTRFS_EXTENT_ITEM_KEY;
7317 key.offset = rec->max_size;
7320 path = btrfs_alloc_path();
7324 trans = btrfs_start_transaction(root, 0);
7325 if (IS_ERR(trans)) {
7326 btrfs_free_path(path);
7327 return PTR_ERR(trans);
7330 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7332 btrfs_free_path(path);
7333 btrfs_commit_transaction(trans, root);
7336 fprintf(stderr, "Didn't find extent for %llu\n",
7337 (unsigned long long)rec->start);
7338 btrfs_free_path(path);
7339 btrfs_commit_transaction(trans, root);
7343 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7344 struct btrfs_extent_item);
7345 flags = btrfs_extent_flags(path->nodes[0], ei);
7346 if (rec->flag_block_full_backref) {
7347 fprintf(stderr, "setting full backref on %llu\n",
7348 (unsigned long long)key.objectid);
7349 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7351 fprintf(stderr, "clearing full backref on %llu\n",
7352 (unsigned long long)key.objectid);
7353 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7355 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7356 btrfs_mark_buffer_dirty(path->nodes[0]);
7357 btrfs_free_path(path);
7358 return btrfs_commit_transaction(trans, root);
7361 /* right now we only prune from the extent allocation tree */
7362 static int prune_one_block(struct btrfs_trans_handle *trans,
7363 struct btrfs_fs_info *info,
7364 struct btrfs_corrupt_block *corrupt)
7367 struct btrfs_path path;
7368 struct extent_buffer *eb;
7372 int level = corrupt->level + 1;
7374 btrfs_init_path(&path);
7376 /* we want to stop at the parent to our busted block */
7377 path.lowest_level = level;
7379 ret = btrfs_search_slot(trans, info->extent_root,
7380 &corrupt->key, &path, -1, 1);
7385 eb = path.nodes[level];
7392 * hopefully the search gave us the block we want to prune,
7393 * lets try that first
7395 slot = path.slots[level];
7396 found = btrfs_node_blockptr(eb, slot);
7397 if (found == corrupt->cache.start)
7400 nritems = btrfs_header_nritems(eb);
7402 /* the search failed, lets scan this node and hope we find it */
7403 for (slot = 0; slot < nritems; slot++) {
7404 found = btrfs_node_blockptr(eb, slot);
7405 if (found == corrupt->cache.start)
7409 * we couldn't find the bad block. TODO, search all the nodes for pointers
7412 if (eb == info->extent_root->node) {
7417 btrfs_release_path(&path);
7422 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7423 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7426 btrfs_release_path(&path);
7430 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7432 struct btrfs_trans_handle *trans = NULL;
7433 struct cache_extent *cache;
7434 struct btrfs_corrupt_block *corrupt;
7437 cache = search_cache_extent(info->corrupt_blocks, 0);
7441 trans = btrfs_start_transaction(info->extent_root, 1);
7443 return PTR_ERR(trans);
7445 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7446 prune_one_block(trans, info, corrupt);
7447 remove_cache_extent(info->corrupt_blocks, cache);
7450 return btrfs_commit_transaction(trans, info->extent_root);
7454 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7456 struct btrfs_block_group_cache *cache;
7461 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7462 &start, &end, EXTENT_DIRTY);
7465 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7471 cache = btrfs_lookup_first_block_group(fs_info, start);
7476 start = cache->key.objectid + cache->key.offset;
7480 static int check_extent_refs(struct btrfs_root *root,
7481 struct cache_tree *extent_cache)
7483 struct extent_record *rec;
7484 struct cache_extent *cache;
7493 * if we're doing a repair, we have to make sure
7494 * we don't allocate from the problem extents.
7495 * In the worst case, this will be all the
7498 cache = search_cache_extent(extent_cache, 0);
7500 rec = container_of(cache, struct extent_record, cache);
7501 set_extent_dirty(root->fs_info->excluded_extents,
7503 rec->start + rec->max_size - 1,
7505 cache = next_cache_extent(cache);
7508 /* pin down all the corrupted blocks too */
7509 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7511 set_extent_dirty(root->fs_info->excluded_extents,
7513 cache->start + cache->size - 1,
7515 cache = next_cache_extent(cache);
7517 prune_corrupt_blocks(root->fs_info);
7518 reset_cached_block_groups(root->fs_info);
7521 reset_cached_block_groups(root->fs_info);
7524 * We need to delete any duplicate entries we find first otherwise we
7525 * could mess up the extent tree when we have backrefs that actually
7526 * belong to a different extent item and not the weird duplicate one.
7528 while (repair && !list_empty(&duplicate_extents)) {
7529 rec = list_entry(duplicate_extents.next, struct extent_record,
7531 list_del_init(&rec->list);
7533 /* Sometimes we can find a backref before we find an actual
7534 * extent, so we need to process it a little bit to see if there
7535 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7536 * if this is a backref screwup. If we need to delete stuff
7537 * process_duplicates() will return 0, otherwise it will return
7540 if (process_duplicates(root, extent_cache, rec))
7542 ret = delete_duplicate_records(root, rec);
7546 * delete_duplicate_records will return the number of entries
7547 * deleted, so if it's greater than 0 then we know we actually
7548 * did something and we need to remove.
7562 cache = search_cache_extent(extent_cache, 0);
7565 rec = container_of(cache, struct extent_record, cache);
7566 if (rec->num_duplicates) {
7567 fprintf(stderr, "extent item %llu has multiple extent "
7568 "items\n", (unsigned long long)rec->start);
7573 if (rec->refs != rec->extent_item_refs) {
7574 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7575 (unsigned long long)rec->start,
7576 (unsigned long long)rec->nr);
7577 fprintf(stderr, "extent item %llu, found %llu\n",
7578 (unsigned long long)rec->extent_item_refs,
7579 (unsigned long long)rec->refs);
7580 ret = record_orphan_data_extents(root->fs_info, rec);
7587 * we can't use the extent to repair file
7588 * extent, let the fallback method handle it.
7590 if (!fixed && repair) {
7591 ret = fixup_extent_refs(
7602 if (all_backpointers_checked(rec, 1)) {
7603 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7604 (unsigned long long)rec->start,
7605 (unsigned long long)rec->nr);
7607 if (!fixed && !recorded && repair) {
7608 ret = fixup_extent_refs(root->fs_info,
7617 if (!rec->owner_ref_checked) {
7618 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7619 (unsigned long long)rec->start,
7620 (unsigned long long)rec->nr);
7621 if (!fixed && !recorded && repair) {
7622 ret = fixup_extent_refs(root->fs_info,
7631 if (rec->bad_full_backref) {
7632 fprintf(stderr, "bad full backref, on [%llu]\n",
7633 (unsigned long long)rec->start);
7635 ret = fixup_extent_flags(root->fs_info, rec);
7644 * Although it's not a extent ref's problem, we reuse this
7645 * routine for error reporting.
7646 * No repair function yet.
7648 if (rec->crossing_stripes) {
7650 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7651 rec->start, rec->start + rec->max_size);
7656 if (rec->wrong_chunk_type) {
7658 "bad extent [%llu, %llu), type mismatch with chunk\n",
7659 rec->start, rec->start + rec->max_size);
7664 remove_cache_extent(extent_cache, cache);
7665 free_all_extent_backrefs(rec);
7666 if (!init_extent_tree && repair && (!cur_err || fixed))
7667 clear_extent_dirty(root->fs_info->excluded_extents,
7669 rec->start + rec->max_size - 1,
7675 if (ret && ret != -EAGAIN) {
7676 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7679 struct btrfs_trans_handle *trans;
7681 root = root->fs_info->extent_root;
7682 trans = btrfs_start_transaction(root, 1);
7683 if (IS_ERR(trans)) {
7684 ret = PTR_ERR(trans);
7688 btrfs_fix_block_accounting(trans, root);
7689 ret = btrfs_commit_transaction(trans, root);
7694 fprintf(stderr, "repaired damaged extent references\n");
7700 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7704 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7705 stripe_size = length;
7706 stripe_size /= num_stripes;
7707 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7708 stripe_size = length * 2;
7709 stripe_size /= num_stripes;
7710 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7711 stripe_size = length;
7712 stripe_size /= (num_stripes - 1);
7713 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7714 stripe_size = length;
7715 stripe_size /= (num_stripes - 2);
7717 stripe_size = length;
7723 * Check the chunk with its block group/dev list ref:
7724 * Return 0 if all refs seems valid.
7725 * Return 1 if part of refs seems valid, need later check for rebuild ref
7726 * like missing block group and needs to search extent tree to rebuild them.
7727 * Return -1 if essential refs are missing and unable to rebuild.
7729 static int check_chunk_refs(struct chunk_record *chunk_rec,
7730 struct block_group_tree *block_group_cache,
7731 struct device_extent_tree *dev_extent_cache,
7734 struct cache_extent *block_group_item;
7735 struct block_group_record *block_group_rec;
7736 struct cache_extent *dev_extent_item;
7737 struct device_extent_record *dev_extent_rec;
7741 int metadump_v2 = 0;
7745 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7748 if (block_group_item) {
7749 block_group_rec = container_of(block_group_item,
7750 struct block_group_record,
7752 if (chunk_rec->length != block_group_rec->offset ||
7753 chunk_rec->offset != block_group_rec->objectid ||
7755 chunk_rec->type_flags != block_group_rec->flags)) {
7758 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7759 chunk_rec->objectid,
7764 chunk_rec->type_flags,
7765 block_group_rec->objectid,
7766 block_group_rec->type,
7767 block_group_rec->offset,
7768 block_group_rec->offset,
7769 block_group_rec->objectid,
7770 block_group_rec->flags);
7773 list_del_init(&block_group_rec->list);
7774 chunk_rec->bg_rec = block_group_rec;
7779 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7780 chunk_rec->objectid,
7785 chunk_rec->type_flags);
7792 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7793 chunk_rec->num_stripes);
7794 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7795 devid = chunk_rec->stripes[i].devid;
7796 offset = chunk_rec->stripes[i].offset;
7797 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7798 devid, offset, length);
7799 if (dev_extent_item) {
7800 dev_extent_rec = container_of(dev_extent_item,
7801 struct device_extent_record,
7803 if (dev_extent_rec->objectid != devid ||
7804 dev_extent_rec->offset != offset ||
7805 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7806 dev_extent_rec->length != length) {
7809 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7810 chunk_rec->objectid,
7813 chunk_rec->stripes[i].devid,
7814 chunk_rec->stripes[i].offset,
7815 dev_extent_rec->objectid,
7816 dev_extent_rec->offset,
7817 dev_extent_rec->length);
7820 list_move(&dev_extent_rec->chunk_list,
7821 &chunk_rec->dextents);
7826 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7827 chunk_rec->objectid,
7830 chunk_rec->stripes[i].devid,
7831 chunk_rec->stripes[i].offset);
7838 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7839 int check_chunks(struct cache_tree *chunk_cache,
7840 struct block_group_tree *block_group_cache,
7841 struct device_extent_tree *dev_extent_cache,
7842 struct list_head *good, struct list_head *bad,
7843 struct list_head *rebuild, int silent)
7845 struct cache_extent *chunk_item;
7846 struct chunk_record *chunk_rec;
7847 struct block_group_record *bg_rec;
7848 struct device_extent_record *dext_rec;
7852 chunk_item = first_cache_extent(chunk_cache);
7853 while (chunk_item) {
7854 chunk_rec = container_of(chunk_item, struct chunk_record,
7856 err = check_chunk_refs(chunk_rec, block_group_cache,
7857 dev_extent_cache, silent);
7860 if (err == 0 && good)
7861 list_add_tail(&chunk_rec->list, good);
7862 if (err > 0 && rebuild)
7863 list_add_tail(&chunk_rec->list, rebuild);
7865 list_add_tail(&chunk_rec->list, bad);
7866 chunk_item = next_cache_extent(chunk_item);
7869 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7872 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7880 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7884 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7895 static int check_device_used(struct device_record *dev_rec,
7896 struct device_extent_tree *dext_cache)
7898 struct cache_extent *cache;
7899 struct device_extent_record *dev_extent_rec;
7902 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7904 dev_extent_rec = container_of(cache,
7905 struct device_extent_record,
7907 if (dev_extent_rec->objectid != dev_rec->devid)
7910 list_del_init(&dev_extent_rec->device_list);
7911 total_byte += dev_extent_rec->length;
7912 cache = next_cache_extent(cache);
7915 if (total_byte != dev_rec->byte_used) {
7917 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7918 total_byte, dev_rec->byte_used, dev_rec->objectid,
7919 dev_rec->type, dev_rec->offset);
7926 /* check btrfs_dev_item -> btrfs_dev_extent */
7927 static int check_devices(struct rb_root *dev_cache,
7928 struct device_extent_tree *dev_extent_cache)
7930 struct rb_node *dev_node;
7931 struct device_record *dev_rec;
7932 struct device_extent_record *dext_rec;
7936 dev_node = rb_first(dev_cache);
7938 dev_rec = container_of(dev_node, struct device_record, node);
7939 err = check_device_used(dev_rec, dev_extent_cache);
7943 dev_node = rb_next(dev_node);
7945 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7948 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7949 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7956 static int add_root_item_to_list(struct list_head *head,
7957 u64 objectid, u64 bytenr, u64 last_snapshot,
7958 u8 level, u8 drop_level,
7959 int level_size, struct btrfs_key *drop_key)
7962 struct root_item_record *ri_rec;
7963 ri_rec = malloc(sizeof(*ri_rec));
7966 ri_rec->bytenr = bytenr;
7967 ri_rec->objectid = objectid;
7968 ri_rec->level = level;
7969 ri_rec->level_size = level_size;
7970 ri_rec->drop_level = drop_level;
7971 ri_rec->last_snapshot = last_snapshot;
7973 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7974 list_add_tail(&ri_rec->list, head);
7979 static void free_root_item_list(struct list_head *list)
7981 struct root_item_record *ri_rec;
7983 while (!list_empty(list)) {
7984 ri_rec = list_first_entry(list, struct root_item_record,
7986 list_del_init(&ri_rec->list);
7991 static int deal_root_from_list(struct list_head *list,
7992 struct btrfs_root *root,
7993 struct block_info *bits,
7995 struct cache_tree *pending,
7996 struct cache_tree *seen,
7997 struct cache_tree *reada,
7998 struct cache_tree *nodes,
7999 struct cache_tree *extent_cache,
8000 struct cache_tree *chunk_cache,
8001 struct rb_root *dev_cache,
8002 struct block_group_tree *block_group_cache,
8003 struct device_extent_tree *dev_extent_cache)
8008 while (!list_empty(list)) {
8009 struct root_item_record *rec;
8010 struct extent_buffer *buf;
8011 rec = list_entry(list->next,
8012 struct root_item_record, list);
8014 buf = read_tree_block(root->fs_info->tree_root,
8015 rec->bytenr, rec->level_size, 0);
8016 if (!extent_buffer_uptodate(buf)) {
8017 free_extent_buffer(buf);
8021 add_root_to_pending(buf, extent_cache, pending,
8022 seen, nodes, rec->objectid);
8024 * To rebuild extent tree, we need deal with snapshot
8025 * one by one, otherwise we deal with node firstly which
8026 * can maximize readahead.
8029 ret = run_next_block(root, bits, bits_nr, &last,
8030 pending, seen, reada, nodes,
8031 extent_cache, chunk_cache,
8032 dev_cache, block_group_cache,
8033 dev_extent_cache, rec);
8037 free_extent_buffer(buf);
8038 list_del(&rec->list);
8044 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8045 reada, nodes, extent_cache, chunk_cache,
8046 dev_cache, block_group_cache,
8047 dev_extent_cache, NULL);
8057 static int check_chunks_and_extents(struct btrfs_root *root)
8059 struct rb_root dev_cache;
8060 struct cache_tree chunk_cache;
8061 struct block_group_tree block_group_cache;
8062 struct device_extent_tree dev_extent_cache;
8063 struct cache_tree extent_cache;
8064 struct cache_tree seen;
8065 struct cache_tree pending;
8066 struct cache_tree reada;
8067 struct cache_tree nodes;
8068 struct extent_io_tree excluded_extents;
8069 struct cache_tree corrupt_blocks;
8070 struct btrfs_path path;
8071 struct btrfs_key key;
8072 struct btrfs_key found_key;
8074 struct block_info *bits;
8076 struct extent_buffer *leaf;
8078 struct btrfs_root_item ri;
8079 struct list_head dropping_trees;
8080 struct list_head normal_trees;
8081 struct btrfs_root *root1;
8086 dev_cache = RB_ROOT;
8087 cache_tree_init(&chunk_cache);
8088 block_group_tree_init(&block_group_cache);
8089 device_extent_tree_init(&dev_extent_cache);
8091 cache_tree_init(&extent_cache);
8092 cache_tree_init(&seen);
8093 cache_tree_init(&pending);
8094 cache_tree_init(&nodes);
8095 cache_tree_init(&reada);
8096 cache_tree_init(&corrupt_blocks);
8097 extent_io_tree_init(&excluded_extents);
8098 INIT_LIST_HEAD(&dropping_trees);
8099 INIT_LIST_HEAD(&normal_trees);
8102 root->fs_info->excluded_extents = &excluded_extents;
8103 root->fs_info->fsck_extent_cache = &extent_cache;
8104 root->fs_info->free_extent_hook = free_extent_hook;
8105 root->fs_info->corrupt_blocks = &corrupt_blocks;
8109 bits = malloc(bits_nr * sizeof(struct block_info));
8115 if (ctx.progress_enabled) {
8116 ctx.tp = TASK_EXTENTS;
8117 task_start(ctx.info);
8121 root1 = root->fs_info->tree_root;
8122 level = btrfs_header_level(root1->node);
8123 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8124 root1->node->start, 0, level, 0,
8125 btrfs_level_size(root1, level), NULL);
8128 root1 = root->fs_info->chunk_root;
8129 level = btrfs_header_level(root1->node);
8130 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8131 root1->node->start, 0, level, 0,
8132 btrfs_level_size(root1, level), NULL);
8135 btrfs_init_path(&path);
8138 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8139 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8144 leaf = path.nodes[0];
8145 slot = path.slots[0];
8146 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8147 ret = btrfs_next_leaf(root, &path);
8150 leaf = path.nodes[0];
8151 slot = path.slots[0];
8153 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8154 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8155 unsigned long offset;
8158 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8159 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8160 last_snapshot = btrfs_root_last_snapshot(&ri);
8161 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8162 level = btrfs_root_level(&ri);
8163 level_size = btrfs_level_size(root, level);
8164 ret = add_root_item_to_list(&normal_trees,
8166 btrfs_root_bytenr(&ri),
8167 last_snapshot, level,
8168 0, level_size, NULL);
8172 level = btrfs_root_level(&ri);
8173 level_size = btrfs_level_size(root, level);
8174 objectid = found_key.objectid;
8175 btrfs_disk_key_to_cpu(&found_key,
8177 ret = add_root_item_to_list(&dropping_trees,
8179 btrfs_root_bytenr(&ri),
8180 last_snapshot, level,
8182 level_size, &found_key);
8189 btrfs_release_path(&path);
8192 * check_block can return -EAGAIN if it fixes something, please keep
8193 * this in mind when dealing with return values from these functions, if
8194 * we get -EAGAIN we want to fall through and restart the loop.
8196 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8197 &seen, &reada, &nodes, &extent_cache,
8198 &chunk_cache, &dev_cache, &block_group_cache,
8205 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8206 &pending, &seen, &reada, &nodes,
8207 &extent_cache, &chunk_cache, &dev_cache,
8208 &block_group_cache, &dev_extent_cache);
8215 ret = check_chunks(&chunk_cache, &block_group_cache,
8216 &dev_extent_cache, NULL, NULL, NULL, 0);
8223 ret = check_extent_refs(root, &extent_cache);
8230 ret = check_devices(&dev_cache, &dev_extent_cache);
8235 task_stop(ctx.info);
8237 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8238 extent_io_tree_cleanup(&excluded_extents);
8239 root->fs_info->fsck_extent_cache = NULL;
8240 root->fs_info->free_extent_hook = NULL;
8241 root->fs_info->corrupt_blocks = NULL;
8242 root->fs_info->excluded_extents = NULL;
8245 free_chunk_cache_tree(&chunk_cache);
8246 free_device_cache_tree(&dev_cache);
8247 free_block_group_tree(&block_group_cache);
8248 free_device_extent_tree(&dev_extent_cache);
8249 free_extent_cache_tree(&seen);
8250 free_extent_cache_tree(&pending);
8251 free_extent_cache_tree(&reada);
8252 free_extent_cache_tree(&nodes);
8255 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8256 free_extent_cache_tree(&seen);
8257 free_extent_cache_tree(&pending);
8258 free_extent_cache_tree(&reada);
8259 free_extent_cache_tree(&nodes);
8260 free_chunk_cache_tree(&chunk_cache);
8261 free_block_group_tree(&block_group_cache);
8262 free_device_cache_tree(&dev_cache);
8263 free_device_extent_tree(&dev_extent_cache);
8264 free_extent_record_cache(root->fs_info, &extent_cache);
8265 free_root_item_list(&normal_trees);
8266 free_root_item_list(&dropping_trees);
8267 extent_io_tree_cleanup(&excluded_extents);
8271 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8272 struct btrfs_root *root, int overwrite)
8274 struct extent_buffer *c;
8275 struct extent_buffer *old = root->node;
8278 struct btrfs_disk_key disk_key = {0,0,0};
8284 extent_buffer_get(c);
8287 c = btrfs_alloc_free_block(trans, root,
8288 btrfs_level_size(root, 0),
8289 root->root_key.objectid,
8290 &disk_key, level, 0, 0);
8293 extent_buffer_get(c);
8297 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8298 btrfs_set_header_level(c, level);
8299 btrfs_set_header_bytenr(c, c->start);
8300 btrfs_set_header_generation(c, trans->transid);
8301 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8302 btrfs_set_header_owner(c, root->root_key.objectid);
8304 write_extent_buffer(c, root->fs_info->fsid,
8305 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8307 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8308 btrfs_header_chunk_tree_uuid(c),
8311 btrfs_mark_buffer_dirty(c);
8313 * this case can happen in the following case:
8315 * 1.overwrite previous root.
8317 * 2.reinit reloc data root, this is because we skip pin
8318 * down reloc data tree before which means we can allocate
8319 * same block bytenr here.
8321 if (old->start == c->start) {
8322 btrfs_set_root_generation(&root->root_item,
8324 root->root_item.level = btrfs_header_level(root->node);
8325 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8326 &root->root_key, &root->root_item);
8328 free_extent_buffer(c);
8332 free_extent_buffer(old);
8334 add_root_to_dirty_list(root);
8338 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8339 struct extent_buffer *eb, int tree_root)
8341 struct extent_buffer *tmp;
8342 struct btrfs_root_item *ri;
8343 struct btrfs_key key;
8346 int level = btrfs_header_level(eb);
8352 * If we have pinned this block before, don't pin it again.
8353 * This can not only avoid forever loop with broken filesystem
8354 * but also give us some speedups.
8356 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8357 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8360 btrfs_pin_extent(fs_info, eb->start, eb->len);
8362 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8363 nritems = btrfs_header_nritems(eb);
8364 for (i = 0; i < nritems; i++) {
8366 btrfs_item_key_to_cpu(eb, &key, i);
8367 if (key.type != BTRFS_ROOT_ITEM_KEY)
8369 /* Skip the extent root and reloc roots */
8370 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8371 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8372 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8374 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8375 bytenr = btrfs_disk_root_bytenr(eb, ri);
8378 * If at any point we start needing the real root we
8379 * will have to build a stump root for the root we are
8380 * in, but for now this doesn't actually use the root so
8381 * just pass in extent_root.
8383 tmp = read_tree_block(fs_info->extent_root, bytenr,
8385 if (!extent_buffer_uptodate(tmp)) {
8386 fprintf(stderr, "Error reading root block\n");
8389 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8390 free_extent_buffer(tmp);
8394 bytenr = btrfs_node_blockptr(eb, i);
8396 /* If we aren't the tree root don't read the block */
8397 if (level == 1 && !tree_root) {
8398 btrfs_pin_extent(fs_info, bytenr, leafsize);
8402 tmp = read_tree_block(fs_info->extent_root, bytenr,
8404 if (!extent_buffer_uptodate(tmp)) {
8405 fprintf(stderr, "Error reading tree block\n");
8408 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8409 free_extent_buffer(tmp);
8418 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8422 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8426 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8429 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8431 struct btrfs_block_group_cache *cache;
8432 struct btrfs_path *path;
8433 struct extent_buffer *leaf;
8434 struct btrfs_chunk *chunk;
8435 struct btrfs_key key;
8439 path = btrfs_alloc_path();
8444 key.type = BTRFS_CHUNK_ITEM_KEY;
8447 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8449 btrfs_free_path(path);
8454 * We do this in case the block groups were screwed up and had alloc
8455 * bits that aren't actually set on the chunks. This happens with
8456 * restored images every time and could happen in real life I guess.
8458 fs_info->avail_data_alloc_bits = 0;
8459 fs_info->avail_metadata_alloc_bits = 0;
8460 fs_info->avail_system_alloc_bits = 0;
8462 /* First we need to create the in-memory block groups */
8464 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8465 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8467 btrfs_free_path(path);
8475 leaf = path->nodes[0];
8476 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8477 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8482 chunk = btrfs_item_ptr(leaf, path->slots[0],
8483 struct btrfs_chunk);
8484 btrfs_add_block_group(fs_info, 0,
8485 btrfs_chunk_type(leaf, chunk),
8486 key.objectid, key.offset,
8487 btrfs_chunk_length(leaf, chunk));
8488 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8489 key.offset + btrfs_chunk_length(leaf, chunk),
8495 cache = btrfs_lookup_first_block_group(fs_info, start);
8499 start = cache->key.objectid + cache->key.offset;
8502 btrfs_free_path(path);
8506 static int reset_balance(struct btrfs_trans_handle *trans,
8507 struct btrfs_fs_info *fs_info)
8509 struct btrfs_root *root = fs_info->tree_root;
8510 struct btrfs_path *path;
8511 struct extent_buffer *leaf;
8512 struct btrfs_key key;
8513 int del_slot, del_nr = 0;
8517 path = btrfs_alloc_path();
8521 key.objectid = BTRFS_BALANCE_OBJECTID;
8522 key.type = BTRFS_BALANCE_ITEM_KEY;
8525 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8530 goto reinit_data_reloc;
8535 ret = btrfs_del_item(trans, root, path);
8538 btrfs_release_path(path);
8540 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8541 key.type = BTRFS_ROOT_ITEM_KEY;
8544 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8548 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8553 ret = btrfs_del_items(trans, root, path,
8560 btrfs_release_path(path);
8563 ret = btrfs_search_slot(trans, root, &key, path,
8570 leaf = path->nodes[0];
8571 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8572 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8574 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8579 del_slot = path->slots[0];
8588 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8592 btrfs_release_path(path);
8595 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8596 key.type = BTRFS_ROOT_ITEM_KEY;
8597 key.offset = (u64)-1;
8598 root = btrfs_read_fs_root(fs_info, &key);
8600 fprintf(stderr, "Error reading data reloc tree\n");
8601 ret = PTR_ERR(root);
8604 record_root_in_trans(trans, root);
8605 ret = btrfs_fsck_reinit_root(trans, root, 0);
8608 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8610 btrfs_free_path(path);
8614 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8615 struct btrfs_fs_info *fs_info)
8621 * The only reason we don't do this is because right now we're just
8622 * walking the trees we find and pinning down their bytes, we don't look
8623 * at any of the leaves. In order to do mixed groups we'd have to check
8624 * the leaves of any fs roots and pin down the bytes for any file
8625 * extents we find. Not hard but why do it if we don't have to?
8627 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8628 fprintf(stderr, "We don't support re-initing the extent tree "
8629 "for mixed block groups yet, please notify a btrfs "
8630 "developer you want to do this so they can add this "
8631 "functionality.\n");
8636 * first we need to walk all of the trees except the extent tree and pin
8637 * down the bytes that are in use so we don't overwrite any existing
8640 ret = pin_metadata_blocks(fs_info);
8642 fprintf(stderr, "error pinning down used bytes\n");
8647 * Need to drop all the block groups since we're going to recreate all
8650 btrfs_free_block_groups(fs_info);
8651 ret = reset_block_groups(fs_info);
8653 fprintf(stderr, "error resetting the block groups\n");
8657 /* Ok we can allocate now, reinit the extent root */
8658 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8660 fprintf(stderr, "extent root initialization failed\n");
8662 * When the transaction code is updated we should end the
8663 * transaction, but for now progs only knows about commit so
8664 * just return an error.
8670 * Now we have all the in-memory block groups setup so we can make
8671 * allocations properly, and the metadata we care about is safe since we
8672 * pinned all of it above.
8675 struct btrfs_block_group_cache *cache;
8677 cache = btrfs_lookup_first_block_group(fs_info, start);
8680 start = cache->key.objectid + cache->key.offset;
8681 ret = btrfs_insert_item(trans, fs_info->extent_root,
8682 &cache->key, &cache->item,
8683 sizeof(cache->item));
8685 fprintf(stderr, "Error adding block group\n");
8688 btrfs_extent_post_op(trans, fs_info->extent_root);
8691 ret = reset_balance(trans, fs_info);
8693 fprintf(stderr, "error reseting the pending balance\n");
8698 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8700 struct btrfs_path *path;
8701 struct btrfs_trans_handle *trans;
8702 struct btrfs_key key;
8705 printf("Recowing metadata block %llu\n", eb->start);
8706 key.objectid = btrfs_header_owner(eb);
8707 key.type = BTRFS_ROOT_ITEM_KEY;
8708 key.offset = (u64)-1;
8710 root = btrfs_read_fs_root(root->fs_info, &key);
8712 fprintf(stderr, "Couldn't find owner root %llu\n",
8714 return PTR_ERR(root);
8717 path = btrfs_alloc_path();
8721 trans = btrfs_start_transaction(root, 1);
8722 if (IS_ERR(trans)) {
8723 btrfs_free_path(path);
8724 return PTR_ERR(trans);
8727 path->lowest_level = btrfs_header_level(eb);
8728 if (path->lowest_level)
8729 btrfs_node_key_to_cpu(eb, &key, 0);
8731 btrfs_item_key_to_cpu(eb, &key, 0);
8733 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8734 btrfs_commit_transaction(trans, root);
8735 btrfs_free_path(path);
8739 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8741 struct btrfs_path *path;
8742 struct btrfs_trans_handle *trans;
8743 struct btrfs_key key;
8746 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8747 bad->key.type, bad->key.offset);
8748 key.objectid = bad->root_id;
8749 key.type = BTRFS_ROOT_ITEM_KEY;
8750 key.offset = (u64)-1;
8752 root = btrfs_read_fs_root(root->fs_info, &key);
8754 fprintf(stderr, "Couldn't find owner root %llu\n",
8756 return PTR_ERR(root);
8759 path = btrfs_alloc_path();
8763 trans = btrfs_start_transaction(root, 1);
8764 if (IS_ERR(trans)) {
8765 btrfs_free_path(path);
8766 return PTR_ERR(trans);
8769 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8775 ret = btrfs_del_item(trans, root, path);
8777 btrfs_commit_transaction(trans, root);
8778 btrfs_free_path(path);
8782 static int zero_log_tree(struct btrfs_root *root)
8784 struct btrfs_trans_handle *trans;
8787 trans = btrfs_start_transaction(root, 1);
8788 if (IS_ERR(trans)) {
8789 ret = PTR_ERR(trans);
8792 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8793 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8794 ret = btrfs_commit_transaction(trans, root);
8798 static int populate_csum(struct btrfs_trans_handle *trans,
8799 struct btrfs_root *csum_root, char *buf, u64 start,
8806 while (offset < len) {
8807 sectorsize = csum_root->sectorsize;
8808 ret = read_extent_data(csum_root, buf, start + offset,
8812 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8813 start + offset, buf, sectorsize);
8816 offset += sectorsize;
8821 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8822 struct btrfs_root *csum_root,
8823 struct btrfs_root *cur_root)
8825 struct btrfs_path *path;
8826 struct btrfs_key key;
8827 struct extent_buffer *node;
8828 struct btrfs_file_extent_item *fi;
8835 path = btrfs_alloc_path();
8838 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8848 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8851 /* Iterate all regular file extents and fill its csum */
8853 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8855 if (key.type != BTRFS_EXTENT_DATA_KEY)
8857 node = path->nodes[0];
8858 slot = path->slots[0];
8859 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8860 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8862 start = btrfs_file_extent_disk_bytenr(node, fi);
8863 len = btrfs_file_extent_disk_num_bytes(node, fi);
8865 ret = populate_csum(trans, csum_root, buf, start, len);
8872 * TODO: if next leaf is corrupted, jump to nearest next valid
8875 ret = btrfs_next_item(cur_root, path);
8885 btrfs_free_path(path);
8890 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8891 struct btrfs_root *csum_root)
8893 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8894 struct btrfs_path *path;
8895 struct btrfs_root *tree_root = fs_info->tree_root;
8896 struct btrfs_root *cur_root;
8897 struct extent_buffer *node;
8898 struct btrfs_key key;
8902 path = btrfs_alloc_path();
8906 key.objectid = BTRFS_FS_TREE_OBJECTID;
8908 key.type = BTRFS_ROOT_ITEM_KEY;
8910 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8919 node = path->nodes[0];
8920 slot = path->slots[0];
8921 btrfs_item_key_to_cpu(node, &key, slot);
8922 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8924 if (key.type != BTRFS_ROOT_ITEM_KEY)
8926 if (!is_fstree(key.objectid))
8928 key.offset = (u64)-1;
8930 cur_root = btrfs_read_fs_root(fs_info, &key);
8931 if (IS_ERR(cur_root) || !cur_root) {
8932 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8936 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8941 ret = btrfs_next_item(tree_root, path);
8951 btrfs_free_path(path);
8955 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8956 struct btrfs_root *csum_root)
8958 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8959 struct btrfs_path *path;
8960 struct btrfs_extent_item *ei;
8961 struct extent_buffer *leaf;
8963 struct btrfs_key key;
8966 path = btrfs_alloc_path();
8971 key.type = BTRFS_EXTENT_ITEM_KEY;
8974 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8976 btrfs_free_path(path);
8980 buf = malloc(csum_root->sectorsize);
8982 btrfs_free_path(path);
8987 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8988 ret = btrfs_next_leaf(extent_root, path);
8996 leaf = path->nodes[0];
8998 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8999 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9004 ei = btrfs_item_ptr(leaf, path->slots[0],
9005 struct btrfs_extent_item);
9006 if (!(btrfs_extent_flags(leaf, ei) &
9007 BTRFS_EXTENT_FLAG_DATA)) {
9012 ret = populate_csum(trans, csum_root, buf, key.objectid,
9019 btrfs_free_path(path);
9025 * Recalculate the csum and put it into the csum tree.
9027 * Extent tree init will wipe out all the extent info, so in that case, we
9028 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9029 * will use fs/subvol trees to init the csum tree.
9031 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9032 struct btrfs_root *csum_root,
9036 return fill_csum_tree_from_fs(trans, csum_root);
9038 return fill_csum_tree_from_extent(trans, csum_root);
9041 struct root_item_info {
9042 /* level of the root */
9044 /* number of nodes at this level, must be 1 for a root */
9048 struct cache_extent cache_extent;
9051 static struct cache_tree *roots_info_cache = NULL;
9053 static void free_roots_info_cache(void)
9055 if (!roots_info_cache)
9058 while (!cache_tree_empty(roots_info_cache)) {
9059 struct cache_extent *entry;
9060 struct root_item_info *rii;
9062 entry = first_cache_extent(roots_info_cache);
9065 remove_cache_extent(roots_info_cache, entry);
9066 rii = container_of(entry, struct root_item_info, cache_extent);
9070 free(roots_info_cache);
9071 roots_info_cache = NULL;
9074 static int build_roots_info_cache(struct btrfs_fs_info *info)
9077 struct btrfs_key key;
9078 struct extent_buffer *leaf;
9079 struct btrfs_path *path;
9081 if (!roots_info_cache) {
9082 roots_info_cache = malloc(sizeof(*roots_info_cache));
9083 if (!roots_info_cache)
9085 cache_tree_init(roots_info_cache);
9088 path = btrfs_alloc_path();
9093 key.type = BTRFS_EXTENT_ITEM_KEY;
9096 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9099 leaf = path->nodes[0];
9102 struct btrfs_key found_key;
9103 struct btrfs_extent_item *ei;
9104 struct btrfs_extent_inline_ref *iref;
9105 int slot = path->slots[0];
9110 struct cache_extent *entry;
9111 struct root_item_info *rii;
9113 if (slot >= btrfs_header_nritems(leaf)) {
9114 ret = btrfs_next_leaf(info->extent_root, path);
9121 leaf = path->nodes[0];
9122 slot = path->slots[0];
9125 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9127 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9128 found_key.type != BTRFS_METADATA_ITEM_KEY)
9131 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9132 flags = btrfs_extent_flags(leaf, ei);
9134 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9135 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9138 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9139 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9140 level = found_key.offset;
9142 struct btrfs_tree_block_info *info;
9144 info = (struct btrfs_tree_block_info *)(ei + 1);
9145 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9146 level = btrfs_tree_block_level(leaf, info);
9150 * For a root extent, it must be of the following type and the
9151 * first (and only one) iref in the item.
9153 type = btrfs_extent_inline_ref_type(leaf, iref);
9154 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9157 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9158 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9160 rii = malloc(sizeof(struct root_item_info));
9165 rii->cache_extent.start = root_id;
9166 rii->cache_extent.size = 1;
9167 rii->level = (u8)-1;
9168 entry = &rii->cache_extent;
9169 ret = insert_cache_extent(roots_info_cache, entry);
9172 rii = container_of(entry, struct root_item_info,
9176 ASSERT(rii->cache_extent.start == root_id);
9177 ASSERT(rii->cache_extent.size == 1);
9179 if (level > rii->level || rii->level == (u8)-1) {
9181 rii->bytenr = found_key.objectid;
9182 rii->gen = btrfs_extent_generation(leaf, ei);
9183 rii->node_count = 1;
9184 } else if (level == rii->level) {
9192 btrfs_free_path(path);
9197 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9198 struct btrfs_path *path,
9199 const struct btrfs_key *root_key,
9200 const int read_only_mode)
9202 const u64 root_id = root_key->objectid;
9203 struct cache_extent *entry;
9204 struct root_item_info *rii;
9205 struct btrfs_root_item ri;
9206 unsigned long offset;
9208 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9211 "Error: could not find extent items for root %llu\n",
9212 root_key->objectid);
9216 rii = container_of(entry, struct root_item_info, cache_extent);
9217 ASSERT(rii->cache_extent.start == root_id);
9218 ASSERT(rii->cache_extent.size == 1);
9220 if (rii->node_count != 1) {
9222 "Error: could not find btree root extent for root %llu\n",
9227 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9228 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9230 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9231 btrfs_root_level(&ri) != rii->level ||
9232 btrfs_root_generation(&ri) != rii->gen) {
9235 * If we're in repair mode but our caller told us to not update
9236 * the root item, i.e. just check if it needs to be updated, don't
9237 * print this message, since the caller will call us again shortly
9238 * for the same root item without read only mode (the caller will
9239 * open a transaction first).
9241 if (!(read_only_mode && repair))
9243 "%sroot item for root %llu,"
9244 " current bytenr %llu, current gen %llu, current level %u,"
9245 " new bytenr %llu, new gen %llu, new level %u\n",
9246 (read_only_mode ? "" : "fixing "),
9248 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9249 btrfs_root_level(&ri),
9250 rii->bytenr, rii->gen, rii->level);
9252 if (btrfs_root_generation(&ri) > rii->gen) {
9254 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9255 root_id, btrfs_root_generation(&ri), rii->gen);
9259 if (!read_only_mode) {
9260 btrfs_set_root_bytenr(&ri, rii->bytenr);
9261 btrfs_set_root_level(&ri, rii->level);
9262 btrfs_set_root_generation(&ri, rii->gen);
9263 write_extent_buffer(path->nodes[0], &ri,
9264 offset, sizeof(ri));
9274 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9275 * caused read-only snapshots to be corrupted if they were created at a moment
9276 * when the source subvolume/snapshot had orphan items. The issue was that the
9277 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9278 * node instead of the post orphan cleanup root node.
9279 * So this function, and its callees, just detects and fixes those cases. Even
9280 * though the regression was for read-only snapshots, this function applies to
9281 * any snapshot/subvolume root.
9282 * This must be run before any other repair code - not doing it so, makes other
9283 * repair code delete or modify backrefs in the extent tree for example, which
9284 * will result in an inconsistent fs after repairing the root items.
9286 static int repair_root_items(struct btrfs_fs_info *info)
9288 struct btrfs_path *path = NULL;
9289 struct btrfs_key key;
9290 struct extent_buffer *leaf;
9291 struct btrfs_trans_handle *trans = NULL;
9296 ret = build_roots_info_cache(info);
9300 path = btrfs_alloc_path();
9306 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9307 key.type = BTRFS_ROOT_ITEM_KEY;
9312 * Avoid opening and committing transactions if a leaf doesn't have
9313 * any root items that need to be fixed, so that we avoid rotating
9314 * backup roots unnecessarily.
9317 trans = btrfs_start_transaction(info->tree_root, 1);
9318 if (IS_ERR(trans)) {
9319 ret = PTR_ERR(trans);
9324 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9328 leaf = path->nodes[0];
9331 struct btrfs_key found_key;
9333 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9334 int no_more_keys = find_next_key(path, &key);
9336 btrfs_release_path(path);
9338 ret = btrfs_commit_transaction(trans,
9350 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9352 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9354 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9357 ret = maybe_repair_root_item(info, path, &found_key,
9362 if (!trans && repair) {
9365 btrfs_release_path(path);
9375 free_roots_info_cache();
9376 btrfs_free_path(path);
9378 btrfs_commit_transaction(trans, info->tree_root);
9385 const char * const cmd_check_usage[] = {
9386 "btrfs check [options] <device>",
9387 "Check structural inegrity of a filesystem (unmounted).",
9388 "Check structural inegrity of an unmounted filesystem. Verify internal",
9389 "trees' consistency and item connectivity. In the repair mode try to",
9390 "fix the problems found.",
9391 "WARNING: the repair mode is considered dangerous",
9393 "-s|--super <superblock> use this superblock copy",
9394 "-b|--backup use the backup root copy",
9395 "--repair try to repair the filesystem",
9396 "--readonly run in read-only mode (default)",
9397 "--init-csum-tree create a new CRC tree",
9398 "--init-extent-tree create a new extent tree",
9399 "--check-data-csum verify checkums of data blocks",
9400 "-Q|--qgroup-report print a report on qgroup consistency",
9401 "-E|--subvol-extents <subvolid>",
9402 " print subvolume extents and sharing state",
9403 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9404 "-p|--progress indicate progress",
9408 int cmd_check(int argc, char **argv)
9410 struct cache_tree root_cache;
9411 struct btrfs_root *root;
9412 struct btrfs_fs_info *info;
9415 u64 tree_root_bytenr = 0;
9416 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9419 int init_csum_tree = 0;
9421 int qgroup_report = 0;
9422 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9426 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9427 OPT_CHECK_CSUM, OPT_READONLY };
9428 static const struct option long_options[] = {
9429 { "super", required_argument, NULL, 's' },
9430 { "repair", no_argument, NULL, OPT_REPAIR },
9431 { "readonly", no_argument, NULL, OPT_READONLY },
9432 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9433 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9434 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9435 { "backup", no_argument, NULL, 'b' },
9436 { "subvol-extents", required_argument, NULL, 'E' },
9437 { "qgroup-report", no_argument, NULL, 'Q' },
9438 { "tree-root", required_argument, NULL, 'r' },
9439 { "progress", no_argument, NULL, 'p' },
9443 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9447 case 'a': /* ignored */ break;
9449 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9452 num = arg_strtou64(optarg);
9453 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9455 "ERROR: super mirror should be less than: %d\n",
9456 BTRFS_SUPER_MIRROR_MAX);
9459 bytenr = btrfs_sb_offset(((int)num));
9460 printf("using SB copy %llu, bytenr %llu\n", num,
9461 (unsigned long long)bytenr);
9467 subvolid = arg_strtou64(optarg);
9470 tree_root_bytenr = arg_strtou64(optarg);
9473 ctx.progress_enabled = true;
9477 usage(cmd_check_usage);
9479 printf("enabling repair mode\n");
9481 ctree_flags |= OPEN_CTREE_WRITES;
9487 printf("Creating a new CRC tree\n");
9490 ctree_flags |= OPEN_CTREE_WRITES;
9492 case OPT_INIT_EXTENT:
9493 init_extent_tree = 1;
9494 ctree_flags |= (OPEN_CTREE_WRITES |
9495 OPEN_CTREE_NO_BLOCK_GROUPS);
9498 case OPT_CHECK_CSUM:
9499 check_data_csum = 1;
9503 argc = argc - optind;
9505 if (check_argc_exact(argc, 1))
9506 usage(cmd_check_usage);
9508 if (ctx.progress_enabled) {
9509 ctx.tp = TASK_NOTHING;
9510 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9513 /* This check is the only reason for --readonly to exist */
9514 if (readonly && repair) {
9515 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9520 cache_tree_init(&root_cache);
9522 if((ret = check_mounted(argv[optind])) < 0) {
9523 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9526 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9531 /* only allow partial opening under repair mode */
9533 ctree_flags |= OPEN_CTREE_PARTIAL;
9535 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9538 fprintf(stderr, "Couldn't open file system\n");
9544 root = info->fs_root;
9547 * repair mode will force us to commit transaction which
9548 * will make us fail to load log tree when mounting.
9550 if (repair && btrfs_super_log_root(info->super_copy)) {
9551 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9556 ret = zero_log_tree(root);
9558 fprintf(stderr, "fail to zero log tree\n");
9563 uuid_unparse(info->super_copy->fsid, uuidbuf);
9564 if (qgroup_report) {
9565 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9567 ret = qgroup_verify_all(info);
9569 print_qgroup_report(1);
9573 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9574 subvolid, argv[optind], uuidbuf);
9575 ret = print_extent_state(info, subvolid);
9578 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9580 if (!extent_buffer_uptodate(info->tree_root->node) ||
9581 !extent_buffer_uptodate(info->dev_root->node) ||
9582 !extent_buffer_uptodate(info->chunk_root->node)) {
9583 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9588 if (init_extent_tree || init_csum_tree) {
9589 struct btrfs_trans_handle *trans;
9591 trans = btrfs_start_transaction(info->extent_root, 0);
9592 if (IS_ERR(trans)) {
9593 fprintf(stderr, "Error starting transaction\n");
9594 ret = PTR_ERR(trans);
9598 if (init_extent_tree) {
9599 printf("Creating a new extent tree\n");
9600 ret = reinit_extent_tree(trans, info);
9605 if (init_csum_tree) {
9606 fprintf(stderr, "Reinit crc root\n");
9607 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9609 fprintf(stderr, "crc root initialization failed\n");
9614 ret = fill_csum_tree(trans, info->csum_root,
9617 fprintf(stderr, "crc refilling failed\n");
9622 * Ok now we commit and run the normal fsck, which will add
9623 * extent entries for all of the items it finds.
9625 ret = btrfs_commit_transaction(trans, info->extent_root);
9629 if (!extent_buffer_uptodate(info->extent_root->node)) {
9630 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9634 if (!extent_buffer_uptodate(info->csum_root->node)) {
9635 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9640 if (!ctx.progress_enabled)
9641 fprintf(stderr, "checking extents\n");
9642 ret = check_chunks_and_extents(root);
9644 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9646 ret = repair_root_items(info);
9650 fprintf(stderr, "Fixed %d roots.\n", ret);
9652 } else if (ret > 0) {
9654 "Found %d roots with an outdated root item.\n",
9657 "Please run a filesystem check with the option --repair to fix them.\n");
9662 if (!ctx.progress_enabled)
9663 fprintf(stderr, "checking free space cache\n");
9664 ret = check_space_cache(root);
9669 * We used to have to have these hole extents in between our real
9670 * extents so if we don't have this flag set we need to make sure there
9671 * are no gaps in the file extents for inodes, otherwise we can just
9672 * ignore it when this happens.
9674 no_holes = btrfs_fs_incompat(root->fs_info,
9675 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9676 if (!ctx.progress_enabled)
9677 fprintf(stderr, "checking fs roots\n");
9678 ret = check_fs_roots(root, &root_cache);
9682 fprintf(stderr, "checking csums\n");
9683 ret = check_csums(root);
9687 fprintf(stderr, "checking root refs\n");
9688 ret = check_root_refs(root, &root_cache);
9692 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9693 struct extent_buffer *eb;
9695 eb = list_first_entry(&root->fs_info->recow_ebs,
9696 struct extent_buffer, recow);
9697 list_del_init(&eb->recow);
9698 ret = recow_extent_buffer(root, eb);
9703 while (!list_empty(&delete_items)) {
9704 struct bad_item *bad;
9706 bad = list_first_entry(&delete_items, struct bad_item, list);
9707 list_del_init(&bad->list);
9709 ret = delete_bad_item(root, bad);
9713 if (info->quota_enabled) {
9715 fprintf(stderr, "checking quota groups\n");
9716 err = qgroup_verify_all(info);
9721 if (!list_empty(&root->fs_info->recow_ebs)) {
9722 fprintf(stderr, "Transid errors in file system\n");
9726 print_qgroup_report(0);
9727 if (found_old_backref) { /*
9728 * there was a disk format change when mixed
9729 * backref was in testing tree. The old format
9730 * existed about one week.
9732 printf("\n * Found old mixed backref format. "
9733 "The old format is not supported! *"
9734 "\n * Please mount the FS in readonly mode, "
9735 "backup data and re-format the FS. *\n\n");
9738 printf("found %llu bytes used err is %d\n",
9739 (unsigned long long)bytes_used, ret);
9740 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9741 printf("total tree bytes: %llu\n",
9742 (unsigned long long)total_btree_bytes);
9743 printf("total fs tree bytes: %llu\n",
9744 (unsigned long long)total_fs_tree_bytes);
9745 printf("total extent tree bytes: %llu\n",
9746 (unsigned long long)total_extent_tree_bytes);
9747 printf("btree space waste bytes: %llu\n",
9748 (unsigned long long)btree_space_waste);
9749 printf("file data blocks allocated: %llu\n referenced %llu\n",
9750 (unsigned long long)data_bytes_allocated,
9751 (unsigned long long)data_bytes_referenced);
9753 free_root_recs_tree(&root_cache);
9757 if (ctx.progress_enabled)
9758 task_deinit(ctx.info);