2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
54 TASK_NOTHING, /* have to be the last element */
59 enum task_position tp;
61 struct task_info *info;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
103 if (back1->parent < back2->parent)
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
110 if (back1->owner > back2->owner)
112 if (back1->owner < back2->owner)
115 if (back1->offset > back2->offset)
117 if (back1->offset < back2->offset)
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
123 if (back1->disk_bytenr < back2->disk_bytenr)
126 if (back1->bytes > back2->bytes)
128 if (back1->bytes < back2->bytes)
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
148 if (back1->parent < back2->parent)
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
162 if (ext1->is_data < ext2->is_data)
165 if (ext1->full_backref > ext2->full_backref)
167 if (ext1->full_backref < ext2->full_backref)
171 return compare_data_backref(node1, node2);
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string[] = {
184 "checking free space cache",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
198 task_period_wait(priv->info);
203 static int print_status_return(void *p)
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
245 if (hole1->start < hole2->start)
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole->node))
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
301 rb_erase(&next->node, holes);
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
315 hole = (struct file_extent_hole *)data;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
321 if (start >= hole->start && start < hole->start + hole->len)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
340 struct rb_node *node;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
368 rb_erase(node, holes);
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
376 ret = add_file_extent_hole(holes, next_start, next_len);
383 static int copy_file_extent_holes(struct rb_root *dst,
386 struct file_extent_hole *hole;
387 struct rb_node *node;
390 node = rb_first(src);
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
396 node = rb_next(node);
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
411 node = rb_first(holes);
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416 struct btrfs_root *root)
418 if (root->last_trans != trans->transid) {
419 root->track_dirty = 1;
420 root->last_trans = trans->transid;
421 root->commit_root = root->node;
422 extent_buffer_get(root->node);
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
428 struct device_record *rec1;
429 struct device_record *rec2;
431 rec1 = rb_entry(node1, struct device_record, node);
432 rec2 = rb_entry(node2, struct device_record, node);
433 if (rec1->devid > rec2->devid)
435 else if (rec1->devid < rec2->devid)
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
443 struct inode_record *rec;
444 struct inode_backref *backref;
445 struct inode_backref *orig;
446 struct inode_backref *tmp;
447 struct orphan_data_extent *src_orphan;
448 struct orphan_data_extent *dst_orphan;
453 rec = malloc(sizeof(*rec));
455 return ERR_PTR(-ENOMEM);
456 memcpy(rec, orig_rec, sizeof(*rec));
458 INIT_LIST_HEAD(&rec->backrefs);
459 INIT_LIST_HEAD(&rec->orphan_extents);
460 rec->holes = RB_ROOT;
462 list_for_each_entry(orig, &orig_rec->backrefs, list) {
463 size = sizeof(*orig) + orig->namelen + 1;
464 backref = malloc(size);
469 memcpy(backref, orig, size);
470 list_add_tail(&backref->list, &rec->backrefs);
472 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473 dst_orphan = malloc(sizeof(*dst_orphan));
478 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
481 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
488 rb = rb_first(&rec->holes);
490 struct file_extent_hole *hole;
492 hole = rb_entry(rb, struct file_extent_hole, node);
498 if (!list_empty(&rec->backrefs))
499 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500 list_del(&orig->list);
504 if (!list_empty(&rec->orphan_extents))
505 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506 list_del(&orig->list);
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
518 struct orphan_data_extent *orphan;
520 if (list_empty(orphan_extents))
522 printf("The following data extent is lost in tree %llu:\n",
524 list_for_each_entry(orphan, orphan_extents, list) {
525 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526 orphan->objectid, orphan->offset, orphan->disk_bytenr,
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
533 u64 root_objectid = root->root_key.objectid;
534 int errors = rec->errors;
538 /* reloc root errors, we print its corresponding fs root objectid*/
539 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540 root_objectid = root->root_key.offset;
541 fprintf(stderr, "reloc");
543 fprintf(stderr, "root %llu inode %llu errors %x",
544 (unsigned long long) root_objectid,
545 (unsigned long long) rec->ino, rec->errors);
547 if (errors & I_ERR_NO_INODE_ITEM)
548 fprintf(stderr, ", no inode item");
549 if (errors & I_ERR_NO_ORPHAN_ITEM)
550 fprintf(stderr, ", no orphan item");
551 if (errors & I_ERR_DUP_INODE_ITEM)
552 fprintf(stderr, ", dup inode item");
553 if (errors & I_ERR_DUP_DIR_INDEX)
554 fprintf(stderr, ", dup dir index");
555 if (errors & I_ERR_ODD_DIR_ITEM)
556 fprintf(stderr, ", odd dir item");
557 if (errors & I_ERR_ODD_FILE_EXTENT)
558 fprintf(stderr, ", odd file extent");
559 if (errors & I_ERR_BAD_FILE_EXTENT)
560 fprintf(stderr, ", bad file extent");
561 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562 fprintf(stderr, ", file extent overlap");
563 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
564 fprintf(stderr, ", file extent discount");
565 if (errors & I_ERR_DIR_ISIZE_WRONG)
566 fprintf(stderr, ", dir isize wrong");
567 if (errors & I_ERR_FILE_NBYTES_WRONG)
568 fprintf(stderr, ", nbytes wrong");
569 if (errors & I_ERR_ODD_CSUM_ITEM)
570 fprintf(stderr, ", odd csum item");
571 if (errors & I_ERR_SOME_CSUM_MISSING)
572 fprintf(stderr, ", some csum missing");
573 if (errors & I_ERR_LINK_COUNT_WRONG)
574 fprintf(stderr, ", link count wrong");
575 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
576 fprintf(stderr, ", orphan file extent");
577 fprintf(stderr, "\n");
578 /* Print the orphan extents if needed */
579 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
580 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
582 /* Print the holes if needed */
583 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
584 struct file_extent_hole *hole;
585 struct rb_node *node;
588 node = rb_first(&rec->holes);
589 fprintf(stderr, "Found file extent holes:\n");
592 hole = rb_entry(node, struct file_extent_hole, node);
593 fprintf(stderr, "\tstart: %llu, len: %llu\n",
594 hole->start, hole->len);
595 node = rb_next(node);
598 fprintf(stderr, "\tstart: 0, len: %llu\n",
600 root->fs_info->sectorsize));
604 static void print_ref_error(int errors)
606 if (errors & REF_ERR_NO_DIR_ITEM)
607 fprintf(stderr, ", no dir item");
608 if (errors & REF_ERR_NO_DIR_INDEX)
609 fprintf(stderr, ", no dir index");
610 if (errors & REF_ERR_NO_INODE_REF)
611 fprintf(stderr, ", no inode ref");
612 if (errors & REF_ERR_DUP_DIR_ITEM)
613 fprintf(stderr, ", dup dir item");
614 if (errors & REF_ERR_DUP_DIR_INDEX)
615 fprintf(stderr, ", dup dir index");
616 if (errors & REF_ERR_DUP_INODE_REF)
617 fprintf(stderr, ", dup inode ref");
618 if (errors & REF_ERR_INDEX_UNMATCH)
619 fprintf(stderr, ", index mismatch");
620 if (errors & REF_ERR_FILETYPE_UNMATCH)
621 fprintf(stderr, ", filetype mismatch");
622 if (errors & REF_ERR_NAME_TOO_LONG)
623 fprintf(stderr, ", name too long");
624 if (errors & REF_ERR_NO_ROOT_REF)
625 fprintf(stderr, ", no root ref");
626 if (errors & REF_ERR_NO_ROOT_BACKREF)
627 fprintf(stderr, ", no root backref");
628 if (errors & REF_ERR_DUP_ROOT_REF)
629 fprintf(stderr, ", dup root ref");
630 if (errors & REF_ERR_DUP_ROOT_BACKREF)
631 fprintf(stderr, ", dup root backref");
632 fprintf(stderr, "\n");
635 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638 struct ptr_node *node;
639 struct cache_extent *cache;
640 struct inode_record *rec = NULL;
643 cache = lookup_cache_extent(inode_cache, ino, 1);
645 node = container_of(cache, struct ptr_node, cache);
647 if (mod && rec->refs > 1) {
648 node->data = clone_inode_rec(rec);
649 if (IS_ERR(node->data))
655 rec = calloc(1, sizeof(*rec));
657 return ERR_PTR(-ENOMEM);
659 rec->extent_start = (u64)-1;
661 INIT_LIST_HEAD(&rec->backrefs);
662 INIT_LIST_HEAD(&rec->orphan_extents);
663 rec->holes = RB_ROOT;
665 node = malloc(sizeof(*node));
668 return ERR_PTR(-ENOMEM);
670 node->cache.start = ino;
671 node->cache.size = 1;
674 if (ino == BTRFS_FREE_INO_OBJECTID)
677 ret = insert_cache_extent(inode_cache, &node->cache);
679 return ERR_PTR(-EEXIST);
684 static void free_orphan_data_extents(struct list_head *orphan_extents)
686 struct orphan_data_extent *orphan;
688 while (!list_empty(orphan_extents)) {
689 orphan = list_entry(orphan_extents->next,
690 struct orphan_data_extent, list);
691 list_del(&orphan->list);
696 static void free_inode_rec(struct inode_record *rec)
698 struct inode_backref *backref;
703 while (!list_empty(&rec->backrefs)) {
704 backref = to_inode_backref(rec->backrefs.next);
705 list_del(&backref->list);
708 free_orphan_data_extents(&rec->orphan_extents);
709 free_file_extent_holes(&rec->holes);
713 static int can_free_inode_rec(struct inode_record *rec)
715 if (!rec->errors && rec->checked && rec->found_inode_item &&
716 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
721 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
722 struct inode_record *rec)
724 struct cache_extent *cache;
725 struct inode_backref *tmp, *backref;
726 struct ptr_node *node;
729 if (!rec->found_inode_item)
732 filetype = imode_to_type(rec->imode);
733 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
734 if (backref->found_dir_item && backref->found_dir_index) {
735 if (backref->filetype != filetype)
736 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
737 if (!backref->errors && backref->found_inode_ref &&
738 rec->nlink == rec->found_link) {
739 list_del(&backref->list);
745 if (!rec->checked || rec->merging)
748 if (S_ISDIR(rec->imode)) {
749 if (rec->found_size != rec->isize)
750 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
751 if (rec->found_file_extent)
752 rec->errors |= I_ERR_ODD_FILE_EXTENT;
753 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
754 if (rec->found_dir_item)
755 rec->errors |= I_ERR_ODD_DIR_ITEM;
756 if (rec->found_size != rec->nbytes)
757 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
758 if (rec->nlink > 0 && !no_holes &&
759 (rec->extent_end < rec->isize ||
760 first_extent_gap(&rec->holes) < rec->isize))
761 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
765 if (rec->found_csum_item && rec->nodatasum)
766 rec->errors |= I_ERR_ODD_CSUM_ITEM;
767 if (rec->some_csum_missing && !rec->nodatasum)
768 rec->errors |= I_ERR_SOME_CSUM_MISSING;
771 BUG_ON(rec->refs != 1);
772 if (can_free_inode_rec(rec)) {
773 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
774 node = container_of(cache, struct ptr_node, cache);
775 BUG_ON(node->data != rec);
776 remove_cache_extent(inode_cache, &node->cache);
782 static int check_orphan_item(struct btrfs_root *root, u64 ino)
784 struct btrfs_path path;
785 struct btrfs_key key;
788 key.objectid = BTRFS_ORPHAN_OBJECTID;
789 key.type = BTRFS_ORPHAN_ITEM_KEY;
792 btrfs_init_path(&path);
793 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
794 btrfs_release_path(&path);
800 static int process_inode_item(struct extent_buffer *eb,
801 int slot, struct btrfs_key *key,
802 struct shared_node *active_node)
804 struct inode_record *rec;
805 struct btrfs_inode_item *item;
807 rec = active_node->current;
808 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
809 if (rec->found_inode_item) {
810 rec->errors |= I_ERR_DUP_INODE_ITEM;
813 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
814 rec->nlink = btrfs_inode_nlink(eb, item);
815 rec->isize = btrfs_inode_size(eb, item);
816 rec->nbytes = btrfs_inode_nbytes(eb, item);
817 rec->imode = btrfs_inode_mode(eb, item);
818 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
820 rec->found_inode_item = 1;
822 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
823 maybe_free_inode_rec(&active_node->inode_cache, rec);
827 static struct inode_backref *get_inode_backref(struct inode_record *rec,
829 int namelen, u64 dir)
831 struct inode_backref *backref;
833 list_for_each_entry(backref, &rec->backrefs, list) {
834 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
836 if (backref->dir != dir || backref->namelen != namelen)
838 if (memcmp(name, backref->name, namelen))
843 backref = malloc(sizeof(*backref) + namelen + 1);
846 memset(backref, 0, sizeof(*backref));
848 backref->namelen = namelen;
849 memcpy(backref->name, name, namelen);
850 backref->name[namelen] = '\0';
851 list_add_tail(&backref->list, &rec->backrefs);
855 static int add_inode_backref(struct cache_tree *inode_cache,
856 u64 ino, u64 dir, u64 index,
857 const char *name, int namelen,
858 u8 filetype, u8 itemtype, int errors)
860 struct inode_record *rec;
861 struct inode_backref *backref;
863 rec = get_inode_rec(inode_cache, ino, 1);
865 backref = get_inode_backref(rec, name, namelen, dir);
868 backref->errors |= errors;
869 if (itemtype == BTRFS_DIR_INDEX_KEY) {
870 if (backref->found_dir_index)
871 backref->errors |= REF_ERR_DUP_DIR_INDEX;
872 if (backref->found_inode_ref && backref->index != index)
873 backref->errors |= REF_ERR_INDEX_UNMATCH;
874 if (backref->found_dir_item && backref->filetype != filetype)
875 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
877 backref->index = index;
878 backref->filetype = filetype;
879 backref->found_dir_index = 1;
880 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
882 if (backref->found_dir_item)
883 backref->errors |= REF_ERR_DUP_DIR_ITEM;
884 if (backref->found_dir_index && backref->filetype != filetype)
885 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
887 backref->filetype = filetype;
888 backref->found_dir_item = 1;
889 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
890 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
891 if (backref->found_inode_ref)
892 backref->errors |= REF_ERR_DUP_INODE_REF;
893 if (backref->found_dir_index && backref->index != index)
894 backref->errors |= REF_ERR_INDEX_UNMATCH;
896 backref->index = index;
898 backref->ref_type = itemtype;
899 backref->found_inode_ref = 1;
904 maybe_free_inode_rec(inode_cache, rec);
908 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
909 struct cache_tree *dst_cache)
911 struct inode_backref *backref;
916 list_for_each_entry(backref, &src->backrefs, list) {
917 if (backref->found_dir_index) {
918 add_inode_backref(dst_cache, dst->ino, backref->dir,
919 backref->index, backref->name,
920 backref->namelen, backref->filetype,
921 BTRFS_DIR_INDEX_KEY, backref->errors);
923 if (backref->found_dir_item) {
925 add_inode_backref(dst_cache, dst->ino,
926 backref->dir, 0, backref->name,
927 backref->namelen, backref->filetype,
928 BTRFS_DIR_ITEM_KEY, backref->errors);
930 if (backref->found_inode_ref) {
931 add_inode_backref(dst_cache, dst->ino,
932 backref->dir, backref->index,
933 backref->name, backref->namelen, 0,
934 backref->ref_type, backref->errors);
938 if (src->found_dir_item)
939 dst->found_dir_item = 1;
940 if (src->found_file_extent)
941 dst->found_file_extent = 1;
942 if (src->found_csum_item)
943 dst->found_csum_item = 1;
944 if (src->some_csum_missing)
945 dst->some_csum_missing = 1;
946 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
947 ret = copy_file_extent_holes(&dst->holes, &src->holes);
952 BUG_ON(src->found_link < dir_count);
953 dst->found_link += src->found_link - dir_count;
954 dst->found_size += src->found_size;
955 if (src->extent_start != (u64)-1) {
956 if (dst->extent_start == (u64)-1) {
957 dst->extent_start = src->extent_start;
958 dst->extent_end = src->extent_end;
960 if (dst->extent_end > src->extent_start)
961 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
962 else if (dst->extent_end < src->extent_start) {
963 ret = add_file_extent_hole(&dst->holes,
965 src->extent_start - dst->extent_end);
967 if (dst->extent_end < src->extent_end)
968 dst->extent_end = src->extent_end;
972 dst->errors |= src->errors;
973 if (src->found_inode_item) {
974 if (!dst->found_inode_item) {
975 dst->nlink = src->nlink;
976 dst->isize = src->isize;
977 dst->nbytes = src->nbytes;
978 dst->imode = src->imode;
979 dst->nodatasum = src->nodatasum;
980 dst->found_inode_item = 1;
982 dst->errors |= I_ERR_DUP_INODE_ITEM;
990 static int splice_shared_node(struct shared_node *src_node,
991 struct shared_node *dst_node)
993 struct cache_extent *cache;
994 struct ptr_node *node, *ins;
995 struct cache_tree *src, *dst;
996 struct inode_record *rec, *conflict;
1001 if (--src_node->refs == 0)
1003 if (src_node->current)
1004 current_ino = src_node->current->ino;
1006 src = &src_node->root_cache;
1007 dst = &dst_node->root_cache;
1009 cache = search_cache_extent(src, 0);
1011 node = container_of(cache, struct ptr_node, cache);
1013 cache = next_cache_extent(cache);
1016 remove_cache_extent(src, &node->cache);
1019 ins = malloc(sizeof(*ins));
1021 ins->cache.start = node->cache.start;
1022 ins->cache.size = node->cache.size;
1026 ret = insert_cache_extent(dst, &ins->cache);
1027 if (ret == -EEXIST) {
1028 conflict = get_inode_rec(dst, rec->ino, 1);
1029 BUG_ON(IS_ERR(conflict));
1030 merge_inode_recs(rec, conflict, dst);
1032 conflict->checked = 1;
1033 if (dst_node->current == conflict)
1034 dst_node->current = NULL;
1036 maybe_free_inode_rec(dst, conflict);
1037 free_inode_rec(rec);
1044 if (src == &src_node->root_cache) {
1045 src = &src_node->inode_cache;
1046 dst = &dst_node->inode_cache;
1050 if (current_ino > 0 && (!dst_node->current ||
1051 current_ino > dst_node->current->ino)) {
1052 if (dst_node->current) {
1053 dst_node->current->checked = 1;
1054 maybe_free_inode_rec(dst, dst_node->current);
1056 dst_node->current = get_inode_rec(dst, current_ino, 1);
1057 BUG_ON(IS_ERR(dst_node->current));
1062 static void free_inode_ptr(struct cache_extent *cache)
1064 struct ptr_node *node;
1065 struct inode_record *rec;
1067 node = container_of(cache, struct ptr_node, cache);
1069 free_inode_rec(rec);
1073 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1075 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078 struct cache_extent *cache;
1079 struct shared_node *node;
1081 cache = lookup_cache_extent(shared, bytenr, 1);
1083 node = container_of(cache, struct shared_node, cache);
1089 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 struct shared_node *node;
1094 node = calloc(1, sizeof(*node));
1097 node->cache.start = bytenr;
1098 node->cache.size = 1;
1099 cache_tree_init(&node->root_cache);
1100 cache_tree_init(&node->inode_cache);
1103 ret = insert_cache_extent(shared, &node->cache);
1108 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1109 struct walk_control *wc, int level)
1111 struct shared_node *node;
1112 struct shared_node *dest;
1115 if (level == wc->active_node)
1118 BUG_ON(wc->active_node <= level);
1119 node = find_shared_node(&wc->shared, bytenr);
1121 ret = add_shared_node(&wc->shared, bytenr, refs);
1123 node = find_shared_node(&wc->shared, bytenr);
1124 wc->nodes[level] = node;
1125 wc->active_node = level;
1129 if (wc->root_level == wc->active_node &&
1130 btrfs_root_refs(&root->root_item) == 0) {
1131 if (--node->refs == 0) {
1132 free_inode_recs_tree(&node->root_cache);
1133 free_inode_recs_tree(&node->inode_cache);
1134 remove_cache_extent(&wc->shared, &node->cache);
1140 dest = wc->nodes[wc->active_node];
1141 splice_shared_node(node, dest);
1142 if (node->refs == 0) {
1143 remove_cache_extent(&wc->shared, &node->cache);
1149 static int leave_shared_node(struct btrfs_root *root,
1150 struct walk_control *wc, int level)
1152 struct shared_node *node;
1153 struct shared_node *dest;
1156 if (level == wc->root_level)
1159 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1163 BUG_ON(i >= BTRFS_MAX_LEVEL);
1165 node = wc->nodes[wc->active_node];
1166 wc->nodes[wc->active_node] = NULL;
1167 wc->active_node = i;
1169 dest = wc->nodes[wc->active_node];
1170 if (wc->active_node < wc->root_level ||
1171 btrfs_root_refs(&root->root_item) > 0) {
1172 BUG_ON(node->refs <= 1);
1173 splice_shared_node(node, dest);
1175 BUG_ON(node->refs < 2);
1184 * 1 - if the root with id child_root_id is a child of root parent_root_id
1185 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1186 * has other root(s) as parent(s)
1187 * 2 - if the root child_root_id doesn't have any parent roots
1189 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192 struct btrfs_path path;
1193 struct btrfs_key key;
1194 struct extent_buffer *leaf;
1198 btrfs_init_path(&path);
1200 key.objectid = parent_root_id;
1201 key.type = BTRFS_ROOT_REF_KEY;
1202 key.offset = child_root_id;
1203 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1207 btrfs_release_path(&path);
1211 key.objectid = child_root_id;
1212 key.type = BTRFS_ROOT_BACKREF_KEY;
1214 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1220 leaf = path.nodes[0];
1221 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1222 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225 leaf = path.nodes[0];
1228 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1229 if (key.objectid != child_root_id ||
1230 key.type != BTRFS_ROOT_BACKREF_KEY)
1235 if (key.offset == parent_root_id) {
1236 btrfs_release_path(&path);
1243 btrfs_release_path(&path);
1246 return has_parent ? 0 : 2;
1249 static int process_dir_item(struct extent_buffer *eb,
1250 int slot, struct btrfs_key *key,
1251 struct shared_node *active_node)
1261 struct btrfs_dir_item *di;
1262 struct inode_record *rec;
1263 struct cache_tree *root_cache;
1264 struct cache_tree *inode_cache;
1265 struct btrfs_key location;
1266 char namebuf[BTRFS_NAME_LEN];
1268 root_cache = &active_node->root_cache;
1269 inode_cache = &active_node->inode_cache;
1270 rec = active_node->current;
1271 rec->found_dir_item = 1;
1273 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1274 total = btrfs_item_size_nr(eb, slot);
1275 while (cur < total) {
1277 btrfs_dir_item_key_to_cpu(eb, di, &location);
1278 name_len = btrfs_dir_name_len(eb, di);
1279 data_len = btrfs_dir_data_len(eb, di);
1280 filetype = btrfs_dir_type(eb, di);
1282 rec->found_size += name_len;
1283 if (cur + sizeof(*di) + name_len > total ||
1284 name_len > BTRFS_NAME_LEN) {
1285 error = REF_ERR_NAME_TOO_LONG;
1287 if (cur + sizeof(*di) > total)
1289 len = min_t(u32, total - cur - sizeof(*di),
1296 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1298 if (key->type == BTRFS_DIR_ITEM_KEY &&
1299 key->offset != btrfs_name_hash(namebuf, len)) {
1300 rec->errors |= I_ERR_ODD_DIR_ITEM;
1301 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1302 key->objectid, key->offset, namebuf, len, filetype,
1303 key->offset, btrfs_name_hash(namebuf, len));
1306 if (location.type == BTRFS_INODE_ITEM_KEY) {
1307 add_inode_backref(inode_cache, location.objectid,
1308 key->objectid, key->offset, namebuf,
1309 len, filetype, key->type, error);
1310 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1311 add_inode_backref(root_cache, location.objectid,
1312 key->objectid, key->offset,
1313 namebuf, len, filetype,
1317 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1318 location.type, key->objectid, key->offset);
1319 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1320 key->objectid, key->offset, namebuf,
1321 len, filetype, key->type, error);
1324 len = sizeof(*di) + name_len + data_len;
1325 di = (struct btrfs_dir_item *)((char *)di + len);
1328 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1329 rec->errors |= I_ERR_DUP_DIR_INDEX;
1334 static int process_inode_ref(struct extent_buffer *eb,
1335 int slot, struct btrfs_key *key,
1336 struct shared_node *active_node)
1344 struct cache_tree *inode_cache;
1345 struct btrfs_inode_ref *ref;
1346 char namebuf[BTRFS_NAME_LEN];
1348 inode_cache = &active_node->inode_cache;
1350 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1351 total = btrfs_item_size_nr(eb, slot);
1352 while (cur < total) {
1353 name_len = btrfs_inode_ref_name_len(eb, ref);
1354 index = btrfs_inode_ref_index(eb, ref);
1356 /* inode_ref + namelen should not cross item boundary */
1357 if (cur + sizeof(*ref) + name_len > total ||
1358 name_len > BTRFS_NAME_LEN) {
1359 if (total < cur + sizeof(*ref))
1362 /* Still try to read out the remaining part */
1363 len = min_t(u32, total - cur - sizeof(*ref),
1365 error = REF_ERR_NAME_TOO_LONG;
1371 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1372 add_inode_backref(inode_cache, key->objectid, key->offset,
1373 index, namebuf, len, 0, key->type, error);
1375 len = sizeof(*ref) + name_len;
1376 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1382 static int process_inode_extref(struct extent_buffer *eb,
1383 int slot, struct btrfs_key *key,
1384 struct shared_node *active_node)
1393 struct cache_tree *inode_cache;
1394 struct btrfs_inode_extref *extref;
1395 char namebuf[BTRFS_NAME_LEN];
1397 inode_cache = &active_node->inode_cache;
1399 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1400 total = btrfs_item_size_nr(eb, slot);
1401 while (cur < total) {
1402 name_len = btrfs_inode_extref_name_len(eb, extref);
1403 index = btrfs_inode_extref_index(eb, extref);
1404 parent = btrfs_inode_extref_parent(eb, extref);
1405 if (name_len <= BTRFS_NAME_LEN) {
1409 len = BTRFS_NAME_LEN;
1410 error = REF_ERR_NAME_TOO_LONG;
1412 read_extent_buffer(eb, namebuf,
1413 (unsigned long)(extref + 1), len);
1414 add_inode_backref(inode_cache, key->objectid, parent,
1415 index, namebuf, len, 0, key->type, error);
1417 len = sizeof(*extref) + name_len;
1418 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1425 static int process_file_extent(struct btrfs_root *root,
1426 struct extent_buffer *eb,
1427 int slot, struct btrfs_key *key,
1428 struct shared_node *active_node)
1430 struct inode_record *rec;
1431 struct btrfs_file_extent_item *fi;
1433 u64 disk_bytenr = 0;
1434 u64 extent_offset = 0;
1435 u64 mask = root->fs_info->sectorsize - 1;
1439 rec = active_node->current;
1440 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1441 rec->found_file_extent = 1;
1443 if (rec->extent_start == (u64)-1) {
1444 rec->extent_start = key->offset;
1445 rec->extent_end = key->offset;
1448 if (rec->extent_end > key->offset)
1449 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1450 else if (rec->extent_end < key->offset) {
1451 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1452 key->offset - rec->extent_end);
1457 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1458 extent_type = btrfs_file_extent_type(eb, fi);
1460 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1461 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1463 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1464 rec->found_size += num_bytes;
1465 num_bytes = (num_bytes + mask) & ~mask;
1466 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1467 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1468 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1469 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1470 extent_offset = btrfs_file_extent_offset(eb, fi);
1471 if (num_bytes == 0 || (num_bytes & mask))
1472 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1473 if (num_bytes + extent_offset >
1474 btrfs_file_extent_ram_bytes(eb, fi))
1475 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1476 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1477 (btrfs_file_extent_compression(eb, fi) ||
1478 btrfs_file_extent_encryption(eb, fi) ||
1479 btrfs_file_extent_other_encoding(eb, fi)))
1480 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1481 if (disk_bytenr > 0)
1482 rec->found_size += num_bytes;
1484 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1486 rec->extent_end = key->offset + num_bytes;
1489 * The data reloc tree will copy full extents into its inode and then
1490 * copy the corresponding csums. Because the extent it copied could be
1491 * a preallocated extent that hasn't been written to yet there may be no
1492 * csums to copy, ergo we won't have csums for our file extent. This is
1493 * ok so just don't bother checking csums if the inode belongs to the
1496 if (disk_bytenr > 0 &&
1497 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1499 if (btrfs_file_extent_compression(eb, fi))
1500 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1502 disk_bytenr += extent_offset;
1504 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1508 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1510 rec->found_csum_item = 1;
1511 if (found < num_bytes)
1512 rec->some_csum_missing = 1;
1513 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1515 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1521 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1522 struct walk_control *wc)
1524 struct btrfs_key key;
1528 struct cache_tree *inode_cache;
1529 struct shared_node *active_node;
1531 if (wc->root_level == wc->active_node &&
1532 btrfs_root_refs(&root->root_item) == 0)
1535 active_node = wc->nodes[wc->active_node];
1536 inode_cache = &active_node->inode_cache;
1537 nritems = btrfs_header_nritems(eb);
1538 for (i = 0; i < nritems; i++) {
1539 btrfs_item_key_to_cpu(eb, &key, i);
1541 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1543 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1546 if (active_node->current == NULL ||
1547 active_node->current->ino < key.objectid) {
1548 if (active_node->current) {
1549 active_node->current->checked = 1;
1550 maybe_free_inode_rec(inode_cache,
1551 active_node->current);
1553 active_node->current = get_inode_rec(inode_cache,
1555 BUG_ON(IS_ERR(active_node->current));
1558 case BTRFS_DIR_ITEM_KEY:
1559 case BTRFS_DIR_INDEX_KEY:
1560 ret = process_dir_item(eb, i, &key, active_node);
1562 case BTRFS_INODE_REF_KEY:
1563 ret = process_inode_ref(eb, i, &key, active_node);
1565 case BTRFS_INODE_EXTREF_KEY:
1566 ret = process_inode_extref(eb, i, &key, active_node);
1568 case BTRFS_INODE_ITEM_KEY:
1569 ret = process_inode_item(eb, i, &key, active_node);
1571 case BTRFS_EXTENT_DATA_KEY:
1572 ret = process_file_extent(root, eb, i, &key,
1582 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1583 struct walk_control *wc, int *level,
1584 struct node_refs *nrefs)
1586 enum btrfs_tree_block_status status;
1589 struct btrfs_fs_info *fs_info = root->fs_info;
1590 struct extent_buffer *next;
1591 struct extent_buffer *cur;
1595 WARN_ON(*level < 0);
1596 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1598 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1599 refs = nrefs->refs[*level];
1602 ret = btrfs_lookup_extent_info(NULL, root,
1603 path->nodes[*level]->start,
1604 *level, 1, &refs, NULL);
1609 nrefs->bytenr[*level] = path->nodes[*level]->start;
1610 nrefs->refs[*level] = refs;
1614 ret = enter_shared_node(root, path->nodes[*level]->start,
1622 while (*level >= 0) {
1623 WARN_ON(*level < 0);
1624 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1625 cur = path->nodes[*level];
1627 if (btrfs_header_level(cur) != *level)
1630 if (path->slots[*level] >= btrfs_header_nritems(cur))
1633 ret = process_one_leaf(root, cur, wc);
1638 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1639 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1641 if (bytenr == nrefs->bytenr[*level - 1]) {
1642 refs = nrefs->refs[*level - 1];
1644 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1645 *level - 1, 1, &refs, NULL);
1649 nrefs->bytenr[*level - 1] = bytenr;
1650 nrefs->refs[*level - 1] = refs;
1655 ret = enter_shared_node(root, bytenr, refs,
1658 path->slots[*level]++;
1663 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1664 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1665 free_extent_buffer(next);
1666 reada_walk_down(root, cur, path->slots[*level]);
1667 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1668 if (!extent_buffer_uptodate(next)) {
1669 struct btrfs_key node_key;
1671 btrfs_node_key_to_cpu(path->nodes[*level],
1673 path->slots[*level]);
1674 btrfs_add_corrupt_extent_record(root->fs_info,
1676 path->nodes[*level]->start,
1677 root->fs_info->nodesize,
1684 ret = check_child_node(cur, path->slots[*level], next);
1686 free_extent_buffer(next);
1691 if (btrfs_is_leaf(next))
1692 status = btrfs_check_leaf(root, NULL, next);
1694 status = btrfs_check_node(root, NULL, next);
1695 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1696 free_extent_buffer(next);
1701 *level = *level - 1;
1702 free_extent_buffer(path->nodes[*level]);
1703 path->nodes[*level] = next;
1704 path->slots[*level] = 0;
1707 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1711 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1712 struct walk_control *wc, int *level)
1715 struct extent_buffer *leaf;
1717 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1718 leaf = path->nodes[i];
1719 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1724 free_extent_buffer(path->nodes[*level]);
1725 path->nodes[*level] = NULL;
1726 BUG_ON(*level > wc->active_node);
1727 if (*level == wc->active_node)
1728 leave_shared_node(root, wc, *level);
1735 static int check_root_dir(struct inode_record *rec)
1737 struct inode_backref *backref;
1740 if (!rec->found_inode_item || rec->errors)
1742 if (rec->nlink != 1 || rec->found_link != 0)
1744 if (list_empty(&rec->backrefs))
1746 backref = to_inode_backref(rec->backrefs.next);
1747 if (!backref->found_inode_ref)
1749 if (backref->index != 0 || backref->namelen != 2 ||
1750 memcmp(backref->name, "..", 2))
1752 if (backref->found_dir_index || backref->found_dir_item)
1759 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1760 struct btrfs_root *root, struct btrfs_path *path,
1761 struct inode_record *rec)
1763 struct btrfs_inode_item *ei;
1764 struct btrfs_key key;
1767 key.objectid = rec->ino;
1768 key.type = BTRFS_INODE_ITEM_KEY;
1769 key.offset = (u64)-1;
1771 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1775 if (!path->slots[0]) {
1782 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1783 if (key.objectid != rec->ino) {
1788 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1789 struct btrfs_inode_item);
1790 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1791 btrfs_mark_buffer_dirty(path->nodes[0]);
1792 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1793 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1794 root->root_key.objectid);
1796 btrfs_release_path(path);
1800 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1801 struct btrfs_root *root,
1802 struct btrfs_path *path,
1803 struct inode_record *rec)
1807 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1808 btrfs_release_path(path);
1810 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1814 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1815 struct btrfs_root *root,
1816 struct btrfs_path *path,
1817 struct inode_record *rec)
1819 struct btrfs_inode_item *ei;
1820 struct btrfs_key key;
1823 key.objectid = rec->ino;
1824 key.type = BTRFS_INODE_ITEM_KEY;
1827 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1834 /* Since ret == 0, no need to check anything */
1835 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1836 struct btrfs_inode_item);
1837 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1838 btrfs_mark_buffer_dirty(path->nodes[0]);
1839 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1840 printf("reset nbytes for ino %llu root %llu\n",
1841 rec->ino, root->root_key.objectid);
1843 btrfs_release_path(path);
1847 static int add_missing_dir_index(struct btrfs_root *root,
1848 struct cache_tree *inode_cache,
1849 struct inode_record *rec,
1850 struct inode_backref *backref)
1852 struct btrfs_path path;
1853 struct btrfs_trans_handle *trans;
1854 struct btrfs_dir_item *dir_item;
1855 struct extent_buffer *leaf;
1856 struct btrfs_key key;
1857 struct btrfs_disk_key disk_key;
1858 struct inode_record *dir_rec;
1859 unsigned long name_ptr;
1860 u32 data_size = sizeof(*dir_item) + backref->namelen;
1863 trans = btrfs_start_transaction(root, 1);
1865 return PTR_ERR(trans);
1867 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1868 (unsigned long long)rec->ino);
1870 btrfs_init_path(&path);
1871 key.objectid = backref->dir;
1872 key.type = BTRFS_DIR_INDEX_KEY;
1873 key.offset = backref->index;
1874 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1877 leaf = path.nodes[0];
1878 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1880 disk_key.objectid = cpu_to_le64(rec->ino);
1881 disk_key.type = BTRFS_INODE_ITEM_KEY;
1882 disk_key.offset = 0;
1884 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1885 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1886 btrfs_set_dir_data_len(leaf, dir_item, 0);
1887 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1888 name_ptr = (unsigned long)(dir_item + 1);
1889 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1890 btrfs_mark_buffer_dirty(leaf);
1891 btrfs_release_path(&path);
1892 btrfs_commit_transaction(trans, root);
1894 backref->found_dir_index = 1;
1895 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1896 BUG_ON(IS_ERR(dir_rec));
1899 dir_rec->found_size += backref->namelen;
1900 if (dir_rec->found_size == dir_rec->isize &&
1901 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1902 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1903 if (dir_rec->found_size != dir_rec->isize)
1904 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1909 static int delete_dir_index(struct btrfs_root *root,
1910 struct inode_backref *backref)
1912 struct btrfs_trans_handle *trans;
1913 struct btrfs_dir_item *di;
1914 struct btrfs_path path;
1917 trans = btrfs_start_transaction(root, 1);
1919 return PTR_ERR(trans);
1921 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1922 (unsigned long long)backref->dir,
1923 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1924 (unsigned long long)root->objectid);
1926 btrfs_init_path(&path);
1927 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1928 backref->name, backref->namelen,
1929 backref->index, -1);
1932 btrfs_release_path(&path);
1933 btrfs_commit_transaction(trans, root);
1940 ret = btrfs_del_item(trans, root, &path);
1942 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1944 btrfs_release_path(&path);
1945 btrfs_commit_transaction(trans, root);
1949 static int create_inode_item(struct btrfs_root *root,
1950 struct inode_record *rec, int root_dir)
1952 struct btrfs_trans_handle *trans;
1958 trans = btrfs_start_transaction(root, 1);
1959 if (IS_ERR(trans)) {
1960 ret = PTR_ERR(trans);
1964 nlink = root_dir ? 1 : rec->found_link;
1965 if (rec->found_dir_item) {
1966 if (rec->found_file_extent)
1967 fprintf(stderr, "root %llu inode %llu has both a dir "
1968 "item and extents, unsure if it is a dir or a "
1969 "regular file so setting it as a directory\n",
1970 (unsigned long long)root->objectid,
1971 (unsigned long long)rec->ino);
1972 mode = S_IFDIR | 0755;
1973 size = rec->found_size;
1974 } else if (!rec->found_dir_item) {
1975 size = rec->extent_end;
1976 mode = S_IFREG | 0755;
1979 ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
1981 btrfs_commit_transaction(trans, root);
1985 static int repair_inode_backrefs(struct btrfs_root *root,
1986 struct inode_record *rec,
1987 struct cache_tree *inode_cache,
1990 struct inode_backref *tmp, *backref;
1991 u64 root_dirid = btrfs_root_dirid(&root->root_item);
1995 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1996 if (!delete && rec->ino == root_dirid) {
1997 if (!rec->found_inode_item) {
1998 ret = create_inode_item(root, rec, 1);
2005 /* Index 0 for root dir's are special, don't mess with it */
2006 if (rec->ino == root_dirid && backref->index == 0)
2010 ((backref->found_dir_index && !backref->found_inode_ref) ||
2011 (backref->found_dir_index && backref->found_inode_ref &&
2012 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2013 ret = delete_dir_index(root, backref);
2017 list_del(&backref->list);
2022 if (!delete && !backref->found_dir_index &&
2023 backref->found_dir_item && backref->found_inode_ref) {
2024 ret = add_missing_dir_index(root, inode_cache, rec,
2029 if (backref->found_dir_item &&
2030 backref->found_dir_index) {
2031 if (!backref->errors &&
2032 backref->found_inode_ref) {
2033 list_del(&backref->list);
2040 if (!delete && (!backref->found_dir_index &&
2041 !backref->found_dir_item &&
2042 backref->found_inode_ref)) {
2043 struct btrfs_trans_handle *trans;
2044 struct btrfs_key location;
2046 ret = check_dir_conflict(root, backref->name,
2052 * let nlink fixing routine to handle it,
2053 * which can do it better.
2058 location.objectid = rec->ino;
2059 location.type = BTRFS_INODE_ITEM_KEY;
2060 location.offset = 0;
2062 trans = btrfs_start_transaction(root, 1);
2063 if (IS_ERR(trans)) {
2064 ret = PTR_ERR(trans);
2067 fprintf(stderr, "adding missing dir index/item pair "
2069 (unsigned long long)rec->ino);
2070 ret = btrfs_insert_dir_item(trans, root, backref->name,
2072 backref->dir, &location,
2073 imode_to_type(rec->imode),
2076 btrfs_commit_transaction(trans, root);
2080 if (!delete && (backref->found_inode_ref &&
2081 backref->found_dir_index &&
2082 backref->found_dir_item &&
2083 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2084 !rec->found_inode_item)) {
2085 ret = create_inode_item(root, rec, 0);
2092 return ret ? ret : repaired;
2096 * To determine the file type for nlink/inode_item repair
2098 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2099 * Return -ENOENT if file type is not found.
2101 static int find_file_type(struct inode_record *rec, u8 *type)
2103 struct inode_backref *backref;
2105 /* For inode item recovered case */
2106 if (rec->found_inode_item) {
2107 *type = imode_to_type(rec->imode);
2111 list_for_each_entry(backref, &rec->backrefs, list) {
2112 if (backref->found_dir_index || backref->found_dir_item) {
2113 *type = backref->filetype;
2121 * To determine the file name for nlink repair
2123 * Return 0 if file name is found, set name and namelen.
2124 * Return -ENOENT if file name is not found.
2126 static int find_file_name(struct inode_record *rec,
2127 char *name, int *namelen)
2129 struct inode_backref *backref;
2131 list_for_each_entry(backref, &rec->backrefs, list) {
2132 if (backref->found_dir_index || backref->found_dir_item ||
2133 backref->found_inode_ref) {
2134 memcpy(name, backref->name, backref->namelen);
2135 *namelen = backref->namelen;
2142 /* Reset the nlink of the inode to the correct one */
2143 static int reset_nlink(struct btrfs_trans_handle *trans,
2144 struct btrfs_root *root,
2145 struct btrfs_path *path,
2146 struct inode_record *rec)
2148 struct inode_backref *backref;
2149 struct inode_backref *tmp;
2150 struct btrfs_key key;
2151 struct btrfs_inode_item *inode_item;
2154 /* We don't believe this either, reset it and iterate backref */
2155 rec->found_link = 0;
2157 /* Remove all backref including the valid ones */
2158 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2159 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2160 backref->index, backref->name,
2161 backref->namelen, 0);
2165 /* remove invalid backref, so it won't be added back */
2166 if (!(backref->found_dir_index &&
2167 backref->found_dir_item &&
2168 backref->found_inode_ref)) {
2169 list_del(&backref->list);
2176 /* Set nlink to 0 */
2177 key.objectid = rec->ino;
2178 key.type = BTRFS_INODE_ITEM_KEY;
2180 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2187 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2188 struct btrfs_inode_item);
2189 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2190 btrfs_mark_buffer_dirty(path->nodes[0]);
2191 btrfs_release_path(path);
2194 * Add back valid inode_ref/dir_item/dir_index,
2195 * add_link() will handle the nlink inc, so new nlink must be correct
2197 list_for_each_entry(backref, &rec->backrefs, list) {
2198 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2199 backref->name, backref->namelen,
2200 backref->filetype, &backref->index, 1, 0);
2205 btrfs_release_path(path);
2209 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2210 struct btrfs_root *root,
2211 struct btrfs_path *path,
2212 struct inode_record *rec)
2214 char namebuf[BTRFS_NAME_LEN] = {0};
2217 int name_recovered = 0;
2218 int type_recovered = 0;
2222 * Get file name and type first before these invalid inode ref
2223 * are deleted by remove_all_invalid_backref()
2225 name_recovered = !find_file_name(rec, namebuf, &namelen);
2226 type_recovered = !find_file_type(rec, &type);
2228 if (!name_recovered) {
2229 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2230 rec->ino, rec->ino);
2231 namelen = count_digits(rec->ino);
2232 sprintf(namebuf, "%llu", rec->ino);
2235 if (!type_recovered) {
2236 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2238 type = BTRFS_FT_REG_FILE;
2242 ret = reset_nlink(trans, root, path, rec);
2245 "Failed to reset nlink for inode %llu: %s\n",
2246 rec->ino, strerror(-ret));
2250 if (rec->found_link == 0) {
2251 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2252 namebuf, namelen, type,
2253 (u64 *)&rec->found_link);
2257 printf("Fixed the nlink of inode %llu\n", rec->ino);
2260 * Clear the flag anyway, or we will loop forever for the same inode
2261 * as it will not be removed from the bad inode list and the dead loop
2264 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2265 btrfs_release_path(path);
2270 * Check if there is any normal(reg or prealloc) file extent for given
2272 * This is used to determine the file type when neither its dir_index/item or
2273 * inode_item exists.
2275 * This will *NOT* report error, if any error happens, just consider it does
2276 * not have any normal file extent.
2278 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2280 struct btrfs_path path;
2281 struct btrfs_key key;
2282 struct btrfs_key found_key;
2283 struct btrfs_file_extent_item *fi;
2287 btrfs_init_path(&path);
2289 key.type = BTRFS_EXTENT_DATA_KEY;
2292 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2297 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2298 ret = btrfs_next_leaf(root, &path);
2305 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2307 if (found_key.objectid != ino ||
2308 found_key.type != BTRFS_EXTENT_DATA_KEY)
2310 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2311 struct btrfs_file_extent_item);
2312 type = btrfs_file_extent_type(path.nodes[0], fi);
2313 if (type != BTRFS_FILE_EXTENT_INLINE) {
2319 btrfs_release_path(&path);
2323 static u32 btrfs_type_to_imode(u8 type)
2325 static u32 imode_by_btrfs_type[] = {
2326 [BTRFS_FT_REG_FILE] = S_IFREG,
2327 [BTRFS_FT_DIR] = S_IFDIR,
2328 [BTRFS_FT_CHRDEV] = S_IFCHR,
2329 [BTRFS_FT_BLKDEV] = S_IFBLK,
2330 [BTRFS_FT_FIFO] = S_IFIFO,
2331 [BTRFS_FT_SOCK] = S_IFSOCK,
2332 [BTRFS_FT_SYMLINK] = S_IFLNK,
2335 return imode_by_btrfs_type[(type)];
2338 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2339 struct btrfs_root *root,
2340 struct btrfs_path *path,
2341 struct inode_record *rec)
2345 int type_recovered = 0;
2348 printf("Trying to rebuild inode:%llu\n", rec->ino);
2350 type_recovered = !find_file_type(rec, &filetype);
2353 * Try to determine inode type if type not found.
2355 * For found regular file extent, it must be FILE.
2356 * For found dir_item/index, it must be DIR.
2358 * For undetermined one, use FILE as fallback.
2361 * 1. If found backref(inode_index/item is already handled) to it,
2363 * Need new inode-inode ref structure to allow search for that.
2365 if (!type_recovered) {
2366 if (rec->found_file_extent &&
2367 find_normal_file_extent(root, rec->ino)) {
2369 filetype = BTRFS_FT_REG_FILE;
2370 } else if (rec->found_dir_item) {
2372 filetype = BTRFS_FT_DIR;
2373 } else if (!list_empty(&rec->orphan_extents)) {
2375 filetype = BTRFS_FT_REG_FILE;
2377 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2380 filetype = BTRFS_FT_REG_FILE;
2384 ret = btrfs_new_inode(trans, root, rec->ino,
2385 mode | btrfs_type_to_imode(filetype));
2390 * Here inode rebuild is done, we only rebuild the inode item,
2391 * don't repair the nlink(like move to lost+found).
2392 * That is the job of nlink repair.
2394 * We just fill the record and return
2396 rec->found_dir_item = 1;
2397 rec->imode = mode | btrfs_type_to_imode(filetype);
2399 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2400 /* Ensure the inode_nlinks repair function will be called */
2401 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2406 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2407 struct btrfs_root *root,
2408 struct btrfs_path *path,
2409 struct inode_record *rec)
2411 struct orphan_data_extent *orphan;
2412 struct orphan_data_extent *tmp;
2415 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2417 * Check for conflicting file extents
2419 * Here we don't know whether the extents is compressed or not,
2420 * so we can only assume it not compressed nor data offset,
2421 * and use its disk_len as extent length.
2423 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2424 orphan->offset, orphan->disk_len, 0);
2425 btrfs_release_path(path);
2430 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2431 orphan->disk_bytenr, orphan->disk_len);
2432 ret = btrfs_free_extent(trans,
2433 root->fs_info->extent_root,
2434 orphan->disk_bytenr, orphan->disk_len,
2435 0, root->objectid, orphan->objectid,
2440 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2441 orphan->offset, orphan->disk_bytenr,
2442 orphan->disk_len, orphan->disk_len);
2446 /* Update file size info */
2447 rec->found_size += orphan->disk_len;
2448 if (rec->found_size == rec->nbytes)
2449 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2451 /* Update the file extent hole info too */
2452 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2456 if (RB_EMPTY_ROOT(&rec->holes))
2457 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2459 list_del(&orphan->list);
2462 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2467 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2468 struct btrfs_root *root,
2469 struct btrfs_path *path,
2470 struct inode_record *rec)
2472 struct rb_node *node;
2473 struct file_extent_hole *hole;
2477 node = rb_first(&rec->holes);
2481 hole = rb_entry(node, struct file_extent_hole, node);
2482 ret = btrfs_punch_hole(trans, root, rec->ino,
2483 hole->start, hole->len);
2486 ret = del_file_extent_hole(&rec->holes, hole->start,
2490 if (RB_EMPTY_ROOT(&rec->holes))
2491 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2492 node = rb_first(&rec->holes);
2494 /* special case for a file losing all its file extent */
2496 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2497 round_up(rec->isize,
2498 root->fs_info->sectorsize));
2502 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2503 rec->ino, root->objectid);
2508 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2510 struct btrfs_trans_handle *trans;
2511 struct btrfs_path path;
2514 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2515 I_ERR_NO_ORPHAN_ITEM |
2516 I_ERR_LINK_COUNT_WRONG |
2517 I_ERR_NO_INODE_ITEM |
2518 I_ERR_FILE_EXTENT_ORPHAN |
2519 I_ERR_FILE_EXTENT_DISCOUNT|
2520 I_ERR_FILE_NBYTES_WRONG)))
2524 * For nlink repair, it may create a dir and add link, so
2525 * 2 for parent(256)'s dir_index and dir_item
2526 * 2 for lost+found dir's inode_item and inode_ref
2527 * 1 for the new inode_ref of the file
2528 * 2 for lost+found dir's dir_index and dir_item for the file
2530 trans = btrfs_start_transaction(root, 7);
2532 return PTR_ERR(trans);
2534 btrfs_init_path(&path);
2535 if (rec->errors & I_ERR_NO_INODE_ITEM)
2536 ret = repair_inode_no_item(trans, root, &path, rec);
2537 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2538 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2539 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2540 ret = repair_inode_discount_extent(trans, root, &path, rec);
2541 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2542 ret = repair_inode_isize(trans, root, &path, rec);
2543 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2544 ret = repair_inode_orphan_item(trans, root, &path, rec);
2545 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2546 ret = repair_inode_nlinks(trans, root, &path, rec);
2547 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2548 ret = repair_inode_nbytes(trans, root, &path, rec);
2549 btrfs_commit_transaction(trans, root);
2550 btrfs_release_path(&path);
2554 static int check_inode_recs(struct btrfs_root *root,
2555 struct cache_tree *inode_cache)
2557 struct cache_extent *cache;
2558 struct ptr_node *node;
2559 struct inode_record *rec;
2560 struct inode_backref *backref;
2565 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2567 if (btrfs_root_refs(&root->root_item) == 0) {
2568 if (!cache_tree_empty(inode_cache))
2569 fprintf(stderr, "warning line %d\n", __LINE__);
2574 * We need to repair backrefs first because we could change some of the
2575 * errors in the inode recs.
2577 * We also need to go through and delete invalid backrefs first and then
2578 * add the correct ones second. We do this because we may get EEXIST
2579 * when adding back the correct index because we hadn't yet deleted the
2582 * For example, if we were missing a dir index then the directories
2583 * isize would be wrong, so if we fixed the isize to what we thought it
2584 * would be and then fixed the backref we'd still have a invalid fs, so
2585 * we need to add back the dir index and then check to see if the isize
2590 if (stage == 3 && !err)
2593 cache = search_cache_extent(inode_cache, 0);
2594 while (repair && cache) {
2595 node = container_of(cache, struct ptr_node, cache);
2597 cache = next_cache_extent(cache);
2599 /* Need to free everything up and rescan */
2601 remove_cache_extent(inode_cache, &node->cache);
2603 free_inode_rec(rec);
2607 if (list_empty(&rec->backrefs))
2610 ret = repair_inode_backrefs(root, rec, inode_cache,
2624 rec = get_inode_rec(inode_cache, root_dirid, 0);
2625 BUG_ON(IS_ERR(rec));
2627 ret = check_root_dir(rec);
2629 fprintf(stderr, "root %llu root dir %llu error\n",
2630 (unsigned long long)root->root_key.objectid,
2631 (unsigned long long)root_dirid);
2632 print_inode_error(root, rec);
2637 struct btrfs_trans_handle *trans;
2639 trans = btrfs_start_transaction(root, 1);
2640 if (IS_ERR(trans)) {
2641 err = PTR_ERR(trans);
2646 "root %llu missing its root dir, recreating\n",
2647 (unsigned long long)root->objectid);
2649 ret = btrfs_make_root_dir(trans, root, root_dirid);
2652 btrfs_commit_transaction(trans, root);
2656 fprintf(stderr, "root %llu root dir %llu not found\n",
2657 (unsigned long long)root->root_key.objectid,
2658 (unsigned long long)root_dirid);
2662 cache = search_cache_extent(inode_cache, 0);
2665 node = container_of(cache, struct ptr_node, cache);
2667 remove_cache_extent(inode_cache, &node->cache);
2669 if (rec->ino == root_dirid ||
2670 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2671 free_inode_rec(rec);
2675 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2676 ret = check_orphan_item(root, rec->ino);
2678 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2679 if (can_free_inode_rec(rec)) {
2680 free_inode_rec(rec);
2685 if (!rec->found_inode_item)
2686 rec->errors |= I_ERR_NO_INODE_ITEM;
2687 if (rec->found_link != rec->nlink)
2688 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2690 ret = try_repair_inode(root, rec);
2691 if (ret == 0 && can_free_inode_rec(rec)) {
2692 free_inode_rec(rec);
2698 if (!(repair && ret == 0))
2700 print_inode_error(root, rec);
2701 list_for_each_entry(backref, &rec->backrefs, list) {
2702 if (!backref->found_dir_item)
2703 backref->errors |= REF_ERR_NO_DIR_ITEM;
2704 if (!backref->found_dir_index)
2705 backref->errors |= REF_ERR_NO_DIR_INDEX;
2706 if (!backref->found_inode_ref)
2707 backref->errors |= REF_ERR_NO_INODE_REF;
2708 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2709 " namelen %u name %s filetype %d errors %x",
2710 (unsigned long long)backref->dir,
2711 (unsigned long long)backref->index,
2712 backref->namelen, backref->name,
2713 backref->filetype, backref->errors);
2714 print_ref_error(backref->errors);
2716 free_inode_rec(rec);
2718 return (error > 0) ? -1 : 0;
2721 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2724 struct cache_extent *cache;
2725 struct root_record *rec = NULL;
2728 cache = lookup_cache_extent(root_cache, objectid, 1);
2730 rec = container_of(cache, struct root_record, cache);
2732 rec = calloc(1, sizeof(*rec));
2734 return ERR_PTR(-ENOMEM);
2735 rec->objectid = objectid;
2736 INIT_LIST_HEAD(&rec->backrefs);
2737 rec->cache.start = objectid;
2738 rec->cache.size = 1;
2740 ret = insert_cache_extent(root_cache, &rec->cache);
2742 return ERR_PTR(-EEXIST);
2747 static struct root_backref *get_root_backref(struct root_record *rec,
2748 u64 ref_root, u64 dir, u64 index,
2749 const char *name, int namelen)
2751 struct root_backref *backref;
2753 list_for_each_entry(backref, &rec->backrefs, list) {
2754 if (backref->ref_root != ref_root || backref->dir != dir ||
2755 backref->namelen != namelen)
2757 if (memcmp(name, backref->name, namelen))
2762 backref = calloc(1, sizeof(*backref) + namelen + 1);
2765 backref->ref_root = ref_root;
2767 backref->index = index;
2768 backref->namelen = namelen;
2769 memcpy(backref->name, name, namelen);
2770 backref->name[namelen] = '\0';
2771 list_add_tail(&backref->list, &rec->backrefs);
2775 static void free_root_record(struct cache_extent *cache)
2777 struct root_record *rec;
2778 struct root_backref *backref;
2780 rec = container_of(cache, struct root_record, cache);
2781 while (!list_empty(&rec->backrefs)) {
2782 backref = to_root_backref(rec->backrefs.next);
2783 list_del(&backref->list);
2790 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2792 static int add_root_backref(struct cache_tree *root_cache,
2793 u64 root_id, u64 ref_root, u64 dir, u64 index,
2794 const char *name, int namelen,
2795 int item_type, int errors)
2797 struct root_record *rec;
2798 struct root_backref *backref;
2800 rec = get_root_rec(root_cache, root_id);
2801 BUG_ON(IS_ERR(rec));
2802 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2805 backref->errors |= errors;
2807 if (item_type != BTRFS_DIR_ITEM_KEY) {
2808 if (backref->found_dir_index || backref->found_back_ref ||
2809 backref->found_forward_ref) {
2810 if (backref->index != index)
2811 backref->errors |= REF_ERR_INDEX_UNMATCH;
2813 backref->index = index;
2817 if (item_type == BTRFS_DIR_ITEM_KEY) {
2818 if (backref->found_forward_ref)
2820 backref->found_dir_item = 1;
2821 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2822 backref->found_dir_index = 1;
2823 } else if (item_type == BTRFS_ROOT_REF_KEY) {
2824 if (backref->found_forward_ref)
2825 backref->errors |= REF_ERR_DUP_ROOT_REF;
2826 else if (backref->found_dir_item)
2828 backref->found_forward_ref = 1;
2829 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2830 if (backref->found_back_ref)
2831 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2832 backref->found_back_ref = 1;
2837 if (backref->found_forward_ref && backref->found_dir_item)
2838 backref->reachable = 1;
2842 static int merge_root_recs(struct btrfs_root *root,
2843 struct cache_tree *src_cache,
2844 struct cache_tree *dst_cache)
2846 struct cache_extent *cache;
2847 struct ptr_node *node;
2848 struct inode_record *rec;
2849 struct inode_backref *backref;
2852 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2853 free_inode_recs_tree(src_cache);
2858 cache = search_cache_extent(src_cache, 0);
2861 node = container_of(cache, struct ptr_node, cache);
2863 remove_cache_extent(src_cache, &node->cache);
2866 ret = is_child_root(root, root->objectid, rec->ino);
2872 list_for_each_entry(backref, &rec->backrefs, list) {
2873 BUG_ON(backref->found_inode_ref);
2874 if (backref->found_dir_item)
2875 add_root_backref(dst_cache, rec->ino,
2876 root->root_key.objectid, backref->dir,
2877 backref->index, backref->name,
2878 backref->namelen, BTRFS_DIR_ITEM_KEY,
2880 if (backref->found_dir_index)
2881 add_root_backref(dst_cache, rec->ino,
2882 root->root_key.objectid, backref->dir,
2883 backref->index, backref->name,
2884 backref->namelen, BTRFS_DIR_INDEX_KEY,
2888 free_inode_rec(rec);
2895 static int check_root_refs(struct btrfs_root *root,
2896 struct cache_tree *root_cache)
2898 struct root_record *rec;
2899 struct root_record *ref_root;
2900 struct root_backref *backref;
2901 struct cache_extent *cache;
2907 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2908 BUG_ON(IS_ERR(rec));
2911 /* fixme: this can not detect circular references */
2914 cache = search_cache_extent(root_cache, 0);
2918 rec = container_of(cache, struct root_record, cache);
2919 cache = next_cache_extent(cache);
2921 if (rec->found_ref == 0)
2924 list_for_each_entry(backref, &rec->backrefs, list) {
2925 if (!backref->reachable)
2928 ref_root = get_root_rec(root_cache,
2930 BUG_ON(IS_ERR(ref_root));
2931 if (ref_root->found_ref > 0)
2934 backref->reachable = 0;
2936 if (rec->found_ref == 0)
2942 cache = search_cache_extent(root_cache, 0);
2946 rec = container_of(cache, struct root_record, cache);
2947 cache = next_cache_extent(cache);
2949 if (rec->found_ref == 0 &&
2950 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2951 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2952 ret = check_orphan_item(root->fs_info->tree_root,
2958 * If we don't have a root item then we likely just have
2959 * a dir item in a snapshot for this root but no actual
2960 * ref key or anything so it's meaningless.
2962 if (!rec->found_root_item)
2965 fprintf(stderr, "fs tree %llu not referenced\n",
2966 (unsigned long long)rec->objectid);
2970 if (rec->found_ref > 0 && !rec->found_root_item)
2972 list_for_each_entry(backref, &rec->backrefs, list) {
2973 if (!backref->found_dir_item)
2974 backref->errors |= REF_ERR_NO_DIR_ITEM;
2975 if (!backref->found_dir_index)
2976 backref->errors |= REF_ERR_NO_DIR_INDEX;
2977 if (!backref->found_back_ref)
2978 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
2979 if (!backref->found_forward_ref)
2980 backref->errors |= REF_ERR_NO_ROOT_REF;
2981 if (backref->reachable && backref->errors)
2988 fprintf(stderr, "fs tree %llu refs %u %s\n",
2989 (unsigned long long)rec->objectid, rec->found_ref,
2990 rec->found_root_item ? "" : "not found");
2992 list_for_each_entry(backref, &rec->backrefs, list) {
2993 if (!backref->reachable)
2995 if (!backref->errors && rec->found_root_item)
2997 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
2998 " index %llu namelen %u name %s errors %x\n",
2999 (unsigned long long)backref->ref_root,
3000 (unsigned long long)backref->dir,
3001 (unsigned long long)backref->index,
3002 backref->namelen, backref->name,
3004 print_ref_error(backref->errors);
3007 return errors > 0 ? 1 : 0;
3010 static int process_root_ref(struct extent_buffer *eb, int slot,
3011 struct btrfs_key *key,
3012 struct cache_tree *root_cache)
3018 struct btrfs_root_ref *ref;
3019 char namebuf[BTRFS_NAME_LEN];
3022 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3024 dirid = btrfs_root_ref_dirid(eb, ref);
3025 index = btrfs_root_ref_sequence(eb, ref);
3026 name_len = btrfs_root_ref_name_len(eb, ref);
3028 if (name_len <= BTRFS_NAME_LEN) {
3032 len = BTRFS_NAME_LEN;
3033 error = REF_ERR_NAME_TOO_LONG;
3035 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3037 if (key->type == BTRFS_ROOT_REF_KEY) {
3038 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3039 index, namebuf, len, key->type, error);
3041 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3042 index, namebuf, len, key->type, error);
3047 static void free_corrupt_block(struct cache_extent *cache)
3049 struct btrfs_corrupt_block *corrupt;
3051 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3055 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3058 * Repair the btree of the given root.
3060 * The fix is to remove the node key in corrupt_blocks cache_tree.
3061 * and rebalance the tree.
3062 * After the fix, the btree should be writeable.
3064 static int repair_btree(struct btrfs_root *root,
3065 struct cache_tree *corrupt_blocks)
3067 struct btrfs_trans_handle *trans;
3068 struct btrfs_path path;
3069 struct btrfs_corrupt_block *corrupt;
3070 struct cache_extent *cache;
3071 struct btrfs_key key;
3076 if (cache_tree_empty(corrupt_blocks))
3079 trans = btrfs_start_transaction(root, 1);
3080 if (IS_ERR(trans)) {
3081 ret = PTR_ERR(trans);
3082 fprintf(stderr, "Error starting transaction: %s\n",
3086 btrfs_init_path(&path);
3087 cache = first_cache_extent(corrupt_blocks);
3089 corrupt = container_of(cache, struct btrfs_corrupt_block,
3091 level = corrupt->level;
3092 path.lowest_level = level;
3093 key.objectid = corrupt->key.objectid;
3094 key.type = corrupt->key.type;
3095 key.offset = corrupt->key.offset;
3098 * Here we don't want to do any tree balance, since it may
3099 * cause a balance with corrupted brother leaf/node,
3100 * so ins_len set to 0 here.
3101 * Balance will be done after all corrupt node/leaf is deleted.
3103 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3106 offset = btrfs_node_blockptr(path.nodes[level],
3109 /* Remove the ptr */
3110 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3114 * Remove the corresponding extent
3115 * return value is not concerned.
3117 btrfs_release_path(&path);
3118 ret = btrfs_free_extent(trans, root, offset,
3119 root->fs_info->nodesize, 0,
3120 root->root_key.objectid, level - 1, 0);
3121 cache = next_cache_extent(cache);
3124 /* Balance the btree using btrfs_search_slot() */
3125 cache = first_cache_extent(corrupt_blocks);
3127 corrupt = container_of(cache, struct btrfs_corrupt_block,
3129 memcpy(&key, &corrupt->key, sizeof(key));
3130 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3133 /* return will always >0 since it won't find the item */
3135 btrfs_release_path(&path);
3136 cache = next_cache_extent(cache);
3139 btrfs_commit_transaction(trans, root);
3140 btrfs_release_path(&path);
3144 static int check_fs_root(struct btrfs_root *root,
3145 struct cache_tree *root_cache,
3146 struct walk_control *wc)
3152 struct btrfs_path path;
3153 struct shared_node root_node;
3154 struct root_record *rec;
3155 struct btrfs_root_item *root_item = &root->root_item;
3156 struct cache_tree corrupt_blocks;
3157 struct orphan_data_extent *orphan;
3158 struct orphan_data_extent *tmp;
3159 enum btrfs_tree_block_status status;
3160 struct node_refs nrefs;
3163 * Reuse the corrupt_block cache tree to record corrupted tree block
3165 * Unlike the usage in extent tree check, here we do it in a per
3166 * fs/subvol tree base.
3168 cache_tree_init(&corrupt_blocks);
3169 root->fs_info->corrupt_blocks = &corrupt_blocks;
3171 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3172 rec = get_root_rec(root_cache, root->root_key.objectid);
3173 BUG_ON(IS_ERR(rec));
3174 if (btrfs_root_refs(root_item) > 0)
3175 rec->found_root_item = 1;
3178 btrfs_init_path(&path);
3179 memset(&root_node, 0, sizeof(root_node));
3180 cache_tree_init(&root_node.root_cache);
3181 cache_tree_init(&root_node.inode_cache);
3182 memset(&nrefs, 0, sizeof(nrefs));
3184 /* Move the orphan extent record to corresponding inode_record */
3185 list_for_each_entry_safe(orphan, tmp,
3186 &root->orphan_data_extents, list) {
3187 struct inode_record *inode;
3189 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3191 BUG_ON(IS_ERR(inode));
3192 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3193 list_move(&orphan->list, &inode->orphan_extents);
3196 level = btrfs_header_level(root->node);
3197 memset(wc->nodes, 0, sizeof(wc->nodes));
3198 wc->nodes[level] = &root_node;
3199 wc->active_node = level;
3200 wc->root_level = level;
3202 /* We may not have checked the root block, lets do that now */
3203 if (btrfs_is_leaf(root->node))
3204 status = btrfs_check_leaf(root, NULL, root->node);
3206 status = btrfs_check_node(root, NULL, root->node);
3207 if (status != BTRFS_TREE_BLOCK_CLEAN)
3210 if (btrfs_root_refs(root_item) > 0 ||
3211 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3212 path.nodes[level] = root->node;
3213 extent_buffer_get(root->node);
3214 path.slots[level] = 0;
3216 struct btrfs_key key;
3217 struct btrfs_disk_key found_key;
3219 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3220 level = root_item->drop_level;
3221 path.lowest_level = level;
3222 if (level > btrfs_header_level(root->node) ||
3223 level >= BTRFS_MAX_LEVEL) {
3224 error("ignoring invalid drop level: %u", level);
3227 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3230 btrfs_node_key(path.nodes[level], &found_key,
3232 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3233 sizeof(found_key)));
3237 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3243 wret = walk_up_tree(root, &path, wc, &level);
3250 btrfs_release_path(&path);
3252 if (!cache_tree_empty(&corrupt_blocks)) {
3253 struct cache_extent *cache;
3254 struct btrfs_corrupt_block *corrupt;
3256 printf("The following tree block(s) is corrupted in tree %llu:\n",
3257 root->root_key.objectid);
3258 cache = first_cache_extent(&corrupt_blocks);
3260 corrupt = container_of(cache,
3261 struct btrfs_corrupt_block,
3263 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3264 cache->start, corrupt->level,
3265 corrupt->key.objectid, corrupt->key.type,
3266 corrupt->key.offset);
3267 cache = next_cache_extent(cache);
3270 printf("Try to repair the btree for root %llu\n",
3271 root->root_key.objectid);
3272 ret = repair_btree(root, &corrupt_blocks);
3274 fprintf(stderr, "Failed to repair btree: %s\n",
3277 printf("Btree for root %llu is fixed\n",
3278 root->root_key.objectid);
3282 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3286 if (root_node.current) {
3287 root_node.current->checked = 1;
3288 maybe_free_inode_rec(&root_node.inode_cache,
3292 err = check_inode_recs(root, &root_node.inode_cache);
3296 free_corrupt_blocks_tree(&corrupt_blocks);
3297 root->fs_info->corrupt_blocks = NULL;
3298 free_orphan_data_extents(&root->orphan_data_extents);
3302 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3303 struct cache_tree *root_cache)
3305 struct btrfs_path path;
3306 struct btrfs_key key;
3307 struct walk_control wc;
3308 struct extent_buffer *leaf, *tree_node;
3309 struct btrfs_root *tmp_root;
3310 struct btrfs_root *tree_root = fs_info->tree_root;
3314 if (ctx.progress_enabled) {
3315 ctx.tp = TASK_FS_ROOTS;
3316 task_start(ctx.info);
3320 * Just in case we made any changes to the extent tree that weren't
3321 * reflected into the free space cache yet.
3324 reset_cached_block_groups(fs_info);
3325 memset(&wc, 0, sizeof(wc));
3326 cache_tree_init(&wc.shared);
3327 btrfs_init_path(&path);
3332 key.type = BTRFS_ROOT_ITEM_KEY;
3333 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3338 tree_node = tree_root->node;
3340 if (tree_node != tree_root->node) {
3341 free_root_recs_tree(root_cache);
3342 btrfs_release_path(&path);
3345 leaf = path.nodes[0];
3346 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3347 ret = btrfs_next_leaf(tree_root, &path);
3353 leaf = path.nodes[0];
3355 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3356 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3357 fs_root_objectid(key.objectid)) {
3358 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3359 tmp_root = btrfs_read_fs_root_no_cache(
3362 key.offset = (u64)-1;
3363 tmp_root = btrfs_read_fs_root(
3366 if (IS_ERR(tmp_root)) {
3370 ret = check_fs_root(tmp_root, root_cache, &wc);
3371 if (ret == -EAGAIN) {
3372 free_root_recs_tree(root_cache);
3373 btrfs_release_path(&path);
3378 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3379 btrfs_free_fs_root(tmp_root);
3380 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3381 key.type == BTRFS_ROOT_BACKREF_KEY) {
3382 process_root_ref(leaf, path.slots[0], &key,
3389 btrfs_release_path(&path);
3391 free_extent_cache_tree(&wc.shared);
3392 if (!cache_tree_empty(&wc.shared))
3393 fprintf(stderr, "warning line %d\n", __LINE__);
3395 task_stop(ctx.info);
3400 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3401 u64 parent, u64 root)
3403 struct rb_node *node;
3404 struct tree_backref *back = NULL;
3405 struct tree_backref match = {
3412 match.parent = parent;
3413 match.node.full_backref = 1;
3418 node = rb_search(&rec->backref_tree, &match.node.node,
3419 (rb_compare_keys)compare_extent_backref, NULL);
3421 back = to_tree_backref(rb_node_to_extent_backref(node));
3426 static struct data_backref *find_data_backref(struct extent_record *rec,
3427 u64 parent, u64 root,
3428 u64 owner, u64 offset,
3430 u64 disk_bytenr, u64 bytes)
3432 struct rb_node *node;
3433 struct data_backref *back = NULL;
3434 struct data_backref match = {
3441 .found_ref = found_ref,
3442 .disk_bytenr = disk_bytenr,
3446 match.parent = parent;
3447 match.node.full_backref = 1;
3452 node = rb_search(&rec->backref_tree, &match.node.node,
3453 (rb_compare_keys)compare_extent_backref, NULL);
3455 back = to_data_backref(rb_node_to_extent_backref(node));
3460 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3461 struct cache_tree *root_cache)
3465 if (!ctx.progress_enabled)
3466 fprintf(stderr, "checking fs roots\n");
3467 if (check_mode == CHECK_MODE_LOWMEM)
3468 ret = check_fs_roots_v2(fs_info);
3470 ret = check_fs_roots(fs_info, root_cache);
3475 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3477 struct extent_backref *back, *tmp;
3478 struct tree_backref *tback;
3479 struct data_backref *dback;
3483 rbtree_postorder_for_each_entry_safe(back, tmp,
3484 &rec->backref_tree, node) {
3485 if (!back->found_extent_tree) {
3489 if (back->is_data) {
3490 dback = to_data_backref(back);
3491 fprintf(stderr, "Data backref %llu %s %llu"
3492 " owner %llu offset %llu num_refs %lu"
3493 " not found in extent tree\n",
3494 (unsigned long long)rec->start,
3495 back->full_backref ?
3497 back->full_backref ?
3498 (unsigned long long)dback->parent:
3499 (unsigned long long)dback->root,
3500 (unsigned long long)dback->owner,
3501 (unsigned long long)dback->offset,
3502 (unsigned long)dback->num_refs);
3504 tback = to_tree_backref(back);
3505 fprintf(stderr, "Tree backref %llu parent %llu"
3506 " root %llu not found in extent tree\n",
3507 (unsigned long long)rec->start,
3508 (unsigned long long)tback->parent,
3509 (unsigned long long)tback->root);
3512 if (!back->is_data && !back->found_ref) {
3516 tback = to_tree_backref(back);
3517 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3518 (unsigned long long)rec->start,
3519 back->full_backref ? "parent" : "root",
3520 back->full_backref ?
3521 (unsigned long long)tback->parent :
3522 (unsigned long long)tback->root, back);
3524 if (back->is_data) {
3525 dback = to_data_backref(back);
3526 if (dback->found_ref != dback->num_refs) {
3530 fprintf(stderr, "Incorrect local backref count"
3531 " on %llu %s %llu owner %llu"
3532 " offset %llu found %u wanted %u back %p\n",
3533 (unsigned long long)rec->start,
3534 back->full_backref ?
3536 back->full_backref ?
3537 (unsigned long long)dback->parent:
3538 (unsigned long long)dback->root,
3539 (unsigned long long)dback->owner,
3540 (unsigned long long)dback->offset,
3541 dback->found_ref, dback->num_refs, back);
3543 if (dback->disk_bytenr != rec->start) {
3547 fprintf(stderr, "Backref disk bytenr does not"
3548 " match extent record, bytenr=%llu, "
3549 "ref bytenr=%llu\n",
3550 (unsigned long long)rec->start,
3551 (unsigned long long)dback->disk_bytenr);
3554 if (dback->bytes != rec->nr) {
3558 fprintf(stderr, "Backref bytes do not match "
3559 "extent backref, bytenr=%llu, ref "
3560 "bytes=%llu, backref bytes=%llu\n",
3561 (unsigned long long)rec->start,
3562 (unsigned long long)rec->nr,
3563 (unsigned long long)dback->bytes);
3566 if (!back->is_data) {
3569 dback = to_data_backref(back);
3570 found += dback->found_ref;
3573 if (found != rec->refs) {
3577 fprintf(stderr, "Incorrect global backref count "
3578 "on %llu found %llu wanted %llu\n",
3579 (unsigned long long)rec->start,
3580 (unsigned long long)found,
3581 (unsigned long long)rec->refs);
3587 static void __free_one_backref(struct rb_node *node)
3589 struct extent_backref *back = rb_node_to_extent_backref(node);
3594 static void free_all_extent_backrefs(struct extent_record *rec)
3596 rb_free_nodes(&rec->backref_tree, __free_one_backref);
3599 static void free_extent_record_cache(struct cache_tree *extent_cache)
3601 struct cache_extent *cache;
3602 struct extent_record *rec;
3605 cache = first_cache_extent(extent_cache);
3608 rec = container_of(cache, struct extent_record, cache);
3609 remove_cache_extent(extent_cache, cache);
3610 free_all_extent_backrefs(rec);
3615 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3616 struct extent_record *rec)
3618 if (rec->content_checked && rec->owner_ref_checked &&
3619 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3620 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3621 !rec->bad_full_backref && !rec->crossing_stripes &&
3622 !rec->wrong_chunk_type) {
3623 remove_cache_extent(extent_cache, &rec->cache);
3624 free_all_extent_backrefs(rec);
3625 list_del_init(&rec->list);
3631 static int check_owner_ref(struct btrfs_root *root,
3632 struct extent_record *rec,
3633 struct extent_buffer *buf)
3635 struct extent_backref *node, *tmp;
3636 struct tree_backref *back;
3637 struct btrfs_root *ref_root;
3638 struct btrfs_key key;
3639 struct btrfs_path path;
3640 struct extent_buffer *parent;
3645 rbtree_postorder_for_each_entry_safe(node, tmp,
3646 &rec->backref_tree, node) {
3649 if (!node->found_ref)
3651 if (node->full_backref)
3653 back = to_tree_backref(node);
3654 if (btrfs_header_owner(buf) == back->root)
3657 BUG_ON(rec->is_root);
3659 /* try to find the block by search corresponding fs tree */
3660 key.objectid = btrfs_header_owner(buf);
3661 key.type = BTRFS_ROOT_ITEM_KEY;
3662 key.offset = (u64)-1;
3664 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3665 if (IS_ERR(ref_root))
3668 level = btrfs_header_level(buf);
3670 btrfs_item_key_to_cpu(buf, &key, 0);
3672 btrfs_node_key_to_cpu(buf, &key, 0);
3674 btrfs_init_path(&path);
3675 path.lowest_level = level + 1;
3676 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3680 parent = path.nodes[level + 1];
3681 if (parent && buf->start == btrfs_node_blockptr(parent,
3682 path.slots[level + 1]))
3685 btrfs_release_path(&path);
3686 return found ? 0 : 1;
3689 static int is_extent_tree_record(struct extent_record *rec)
3691 struct extent_backref *node, *tmp;
3692 struct tree_backref *back;
3695 rbtree_postorder_for_each_entry_safe(node, tmp,
3696 &rec->backref_tree, node) {
3699 back = to_tree_backref(node);
3700 if (node->full_backref)
3702 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3709 static int record_bad_block_io(struct btrfs_fs_info *info,
3710 struct cache_tree *extent_cache,
3713 struct extent_record *rec;
3714 struct cache_extent *cache;
3715 struct btrfs_key key;
3717 cache = lookup_cache_extent(extent_cache, start, len);
3721 rec = container_of(cache, struct extent_record, cache);
3722 if (!is_extent_tree_record(rec))
3725 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3726 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3729 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3730 struct extent_buffer *buf, int slot)
3732 if (btrfs_header_level(buf)) {
3733 struct btrfs_key_ptr ptr1, ptr2;
3735 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3736 sizeof(struct btrfs_key_ptr));
3737 read_extent_buffer(buf, &ptr2,
3738 btrfs_node_key_ptr_offset(slot + 1),
3739 sizeof(struct btrfs_key_ptr));
3740 write_extent_buffer(buf, &ptr1,
3741 btrfs_node_key_ptr_offset(slot + 1),
3742 sizeof(struct btrfs_key_ptr));
3743 write_extent_buffer(buf, &ptr2,
3744 btrfs_node_key_ptr_offset(slot),
3745 sizeof(struct btrfs_key_ptr));
3747 struct btrfs_disk_key key;
3748 btrfs_node_key(buf, &key, 0);
3749 btrfs_fixup_low_keys(root, path, &key,
3750 btrfs_header_level(buf) + 1);
3753 struct btrfs_item *item1, *item2;
3754 struct btrfs_key k1, k2;
3755 char *item1_data, *item2_data;
3756 u32 item1_offset, item2_offset, item1_size, item2_size;
3758 item1 = btrfs_item_nr(slot);
3759 item2 = btrfs_item_nr(slot + 1);
3760 btrfs_item_key_to_cpu(buf, &k1, slot);
3761 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3762 item1_offset = btrfs_item_offset(buf, item1);
3763 item2_offset = btrfs_item_offset(buf, item2);
3764 item1_size = btrfs_item_size(buf, item1);
3765 item2_size = btrfs_item_size(buf, item2);
3767 item1_data = malloc(item1_size);
3770 item2_data = malloc(item2_size);
3776 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3777 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3779 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3780 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3784 btrfs_set_item_offset(buf, item1, item2_offset);
3785 btrfs_set_item_offset(buf, item2, item1_offset);
3786 btrfs_set_item_size(buf, item1, item2_size);
3787 btrfs_set_item_size(buf, item2, item1_size);
3789 path->slots[0] = slot;
3790 btrfs_set_item_key_unsafe(root, path, &k2);
3791 path->slots[0] = slot + 1;
3792 btrfs_set_item_key_unsafe(root, path, &k1);
3797 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3799 struct extent_buffer *buf;
3800 struct btrfs_key k1, k2;
3802 int level = path->lowest_level;
3805 buf = path->nodes[level];
3806 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3808 btrfs_node_key_to_cpu(buf, &k1, i);
3809 btrfs_node_key_to_cpu(buf, &k2, i + 1);
3811 btrfs_item_key_to_cpu(buf, &k1, i);
3812 btrfs_item_key_to_cpu(buf, &k2, i + 1);
3814 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3816 ret = swap_values(root, path, buf, i);
3819 btrfs_mark_buffer_dirty(buf);
3825 static int delete_bogus_item(struct btrfs_root *root,
3826 struct btrfs_path *path,
3827 struct extent_buffer *buf, int slot)
3829 struct btrfs_key key;
3830 int nritems = btrfs_header_nritems(buf);
3832 btrfs_item_key_to_cpu(buf, &key, slot);
3834 /* These are all the keys we can deal with missing. */
3835 if (key.type != BTRFS_DIR_INDEX_KEY &&
3836 key.type != BTRFS_EXTENT_ITEM_KEY &&
3837 key.type != BTRFS_METADATA_ITEM_KEY &&
3838 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3839 key.type != BTRFS_EXTENT_DATA_REF_KEY)
3842 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3843 (unsigned long long)key.objectid, key.type,
3844 (unsigned long long)key.offset, slot, buf->start);
3845 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3846 btrfs_item_nr_offset(slot + 1),
3847 sizeof(struct btrfs_item) *
3848 (nritems - slot - 1));
3849 btrfs_set_header_nritems(buf, nritems - 1);
3851 struct btrfs_disk_key disk_key;
3853 btrfs_item_key(buf, &disk_key, 0);
3854 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3856 btrfs_mark_buffer_dirty(buf);
3860 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3862 struct extent_buffer *buf;
3866 /* We should only get this for leaves */
3867 BUG_ON(path->lowest_level);
3868 buf = path->nodes[0];
3870 for (i = 0; i < btrfs_header_nritems(buf); i++) {
3871 unsigned int shift = 0, offset;
3873 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3874 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3875 if (btrfs_item_end_nr(buf, i) >
3876 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3877 ret = delete_bogus_item(root, path, buf, i);
3880 fprintf(stderr, "item is off the end of the "
3881 "leaf, can't fix\n");
3885 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3886 btrfs_item_end_nr(buf, i);
3887 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3888 btrfs_item_offset_nr(buf, i - 1)) {
3889 if (btrfs_item_end_nr(buf, i) >
3890 btrfs_item_offset_nr(buf, i - 1)) {
3891 ret = delete_bogus_item(root, path, buf, i);
3894 fprintf(stderr, "items overlap, can't fix\n");
3898 shift = btrfs_item_offset_nr(buf, i - 1) -
3899 btrfs_item_end_nr(buf, i);
3904 printf("Shifting item nr %d by %u bytes in block %llu\n",
3905 i, shift, (unsigned long long)buf->start);
3906 offset = btrfs_item_offset_nr(buf, i);
3907 memmove_extent_buffer(buf,
3908 btrfs_leaf_data(buf) + offset + shift,
3909 btrfs_leaf_data(buf) + offset,
3910 btrfs_item_size_nr(buf, i));
3911 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3913 btrfs_mark_buffer_dirty(buf);
3917 * We may have moved things, in which case we want to exit so we don't
3918 * write those changes out. Once we have proper abort functionality in
3919 * progs this can be changed to something nicer.
3926 * Attempt to fix basic block failures. If we can't fix it for whatever reason
3927 * then just return -EIO.
3929 static int try_to_fix_bad_block(struct btrfs_root *root,
3930 struct extent_buffer *buf,
3931 enum btrfs_tree_block_status status)
3933 struct btrfs_trans_handle *trans;
3934 struct ulist *roots;
3935 struct ulist_node *node;
3936 struct btrfs_root *search_root;
3937 struct btrfs_path path;
3938 struct ulist_iterator iter;
3939 struct btrfs_key root_key, key;
3942 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
3943 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3946 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
3950 btrfs_init_path(&path);
3951 ULIST_ITER_INIT(&iter);
3952 while ((node = ulist_next(roots, &iter))) {
3953 root_key.objectid = node->val;
3954 root_key.type = BTRFS_ROOT_ITEM_KEY;
3955 root_key.offset = (u64)-1;
3957 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
3964 trans = btrfs_start_transaction(search_root, 0);
3965 if (IS_ERR(trans)) {
3966 ret = PTR_ERR(trans);
3970 path.lowest_level = btrfs_header_level(buf);
3971 path.skip_check_block = 1;
3972 if (path.lowest_level)
3973 btrfs_node_key_to_cpu(buf, &key, 0);
3975 btrfs_item_key_to_cpu(buf, &key, 0);
3976 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
3979 btrfs_commit_transaction(trans, search_root);
3982 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
3983 ret = fix_key_order(search_root, &path);
3984 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3985 ret = fix_item_offset(search_root, &path);
3987 btrfs_commit_transaction(trans, search_root);
3990 btrfs_release_path(&path);
3991 btrfs_commit_transaction(trans, search_root);
3994 btrfs_release_path(&path);
3998 static int check_block(struct btrfs_root *root,
3999 struct cache_tree *extent_cache,
4000 struct extent_buffer *buf, u64 flags)
4002 struct extent_record *rec;
4003 struct cache_extent *cache;
4004 struct btrfs_key key;
4005 enum btrfs_tree_block_status status;
4009 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4012 rec = container_of(cache, struct extent_record, cache);
4013 rec->generation = btrfs_header_generation(buf);
4015 level = btrfs_header_level(buf);
4016 if (btrfs_header_nritems(buf) > 0) {
4019 btrfs_item_key_to_cpu(buf, &key, 0);
4021 btrfs_node_key_to_cpu(buf, &key, 0);
4023 rec->info_objectid = key.objectid;
4025 rec->info_level = level;
4027 if (btrfs_is_leaf(buf))
4028 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4030 status = btrfs_check_node(root, &rec->parent_key, buf);
4032 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4034 status = try_to_fix_bad_block(root, buf, status);
4035 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4037 fprintf(stderr, "bad block %llu\n",
4038 (unsigned long long)buf->start);
4041 * Signal to callers we need to start the scan over
4042 * again since we'll have cowed blocks.
4047 rec->content_checked = 1;
4048 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4049 rec->owner_ref_checked = 1;
4051 ret = check_owner_ref(root, rec, buf);
4053 rec->owner_ref_checked = 1;
4057 maybe_free_extent_rec(extent_cache, rec);
4062 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4063 u64 parent, u64 root)
4065 struct list_head *cur = rec->backrefs.next;
4066 struct extent_backref *node;
4067 struct tree_backref *back;
4069 while(cur != &rec->backrefs) {
4070 node = to_extent_backref(cur);
4074 back = to_tree_backref(node);
4076 if (!node->full_backref)
4078 if (parent == back->parent)
4081 if (node->full_backref)
4083 if (back->root == root)
4091 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4092 u64 parent, u64 root)
4094 struct tree_backref *ref = malloc(sizeof(*ref));
4098 memset(&ref->node, 0, sizeof(ref->node));
4100 ref->parent = parent;
4101 ref->node.full_backref = 1;
4104 ref->node.full_backref = 0;
4111 static struct data_backref *find_data_backref(struct extent_record *rec,
4112 u64 parent, u64 root,
4113 u64 owner, u64 offset,
4115 u64 disk_bytenr, u64 bytes)
4117 struct list_head *cur = rec->backrefs.next;
4118 struct extent_backref *node;
4119 struct data_backref *back;
4121 while(cur != &rec->backrefs) {
4122 node = to_extent_backref(cur);
4126 back = to_data_backref(node);
4128 if (!node->full_backref)
4130 if (parent == back->parent)
4133 if (node->full_backref)
4135 if (back->root == root && back->owner == owner &&
4136 back->offset == offset) {
4137 if (found_ref && node->found_ref &&
4138 (back->bytes != bytes ||
4139 back->disk_bytenr != disk_bytenr))
4149 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4150 u64 parent, u64 root,
4151 u64 owner, u64 offset,
4154 struct data_backref *ref = malloc(sizeof(*ref));
4158 memset(&ref->node, 0, sizeof(ref->node));
4159 ref->node.is_data = 1;
4162 ref->parent = parent;
4165 ref->node.full_backref = 1;
4169 ref->offset = offset;
4170 ref->node.full_backref = 0;
4172 ref->bytes = max_size;
4175 if (max_size > rec->max_size)
4176 rec->max_size = max_size;
4180 /* Check if the type of extent matches with its chunk */
4181 static void check_extent_type(struct extent_record *rec)
4183 struct btrfs_block_group_cache *bg_cache;
4185 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4189 /* data extent, check chunk directly*/
4190 if (!rec->metadata) {
4191 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4192 rec->wrong_chunk_type = 1;
4196 /* metadata extent, check the obvious case first */
4197 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4198 BTRFS_BLOCK_GROUP_METADATA))) {
4199 rec->wrong_chunk_type = 1;
4204 * Check SYSTEM extent, as it's also marked as metadata, we can only
4205 * make sure it's a SYSTEM extent by its backref
4207 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4208 struct extent_backref *node;
4209 struct tree_backref *tback;
4212 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4213 if (node->is_data) {
4214 /* tree block shouldn't have data backref */
4215 rec->wrong_chunk_type = 1;
4218 tback = container_of(node, struct tree_backref, node);
4220 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4221 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4223 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4224 if (!(bg_cache->flags & bg_type))
4225 rec->wrong_chunk_type = 1;
4230 * Allocate a new extent record, fill default values from @tmpl and insert int
4231 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4232 * the cache, otherwise it fails.
4234 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4235 struct extent_record *tmpl)
4237 struct extent_record *rec;
4240 BUG_ON(tmpl->max_size == 0);
4241 rec = malloc(sizeof(*rec));
4244 rec->start = tmpl->start;
4245 rec->max_size = tmpl->max_size;
4246 rec->nr = max(tmpl->nr, tmpl->max_size);
4247 rec->found_rec = tmpl->found_rec;
4248 rec->content_checked = tmpl->content_checked;
4249 rec->owner_ref_checked = tmpl->owner_ref_checked;
4250 rec->num_duplicates = 0;
4251 rec->metadata = tmpl->metadata;
4252 rec->flag_block_full_backref = FLAG_UNSET;
4253 rec->bad_full_backref = 0;
4254 rec->crossing_stripes = 0;
4255 rec->wrong_chunk_type = 0;
4256 rec->is_root = tmpl->is_root;
4257 rec->refs = tmpl->refs;
4258 rec->extent_item_refs = tmpl->extent_item_refs;
4259 rec->parent_generation = tmpl->parent_generation;
4260 INIT_LIST_HEAD(&rec->backrefs);
4261 INIT_LIST_HEAD(&rec->dups);
4262 INIT_LIST_HEAD(&rec->list);
4263 rec->backref_tree = RB_ROOT;
4264 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4265 rec->cache.start = tmpl->start;
4266 rec->cache.size = tmpl->nr;
4267 ret = insert_cache_extent(extent_cache, &rec->cache);
4272 bytes_used += rec->nr;
4275 rec->crossing_stripes = check_crossing_stripes(global_info,
4276 rec->start, global_info->nodesize);
4277 check_extent_type(rec);
4282 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4284 * - refs - if found, increase refs
4285 * - is_root - if found, set
4286 * - content_checked - if found, set
4287 * - owner_ref_checked - if found, set
4289 * If not found, create a new one, initialize and insert.
4291 static int add_extent_rec(struct cache_tree *extent_cache,
4292 struct extent_record *tmpl)
4294 struct extent_record *rec;
4295 struct cache_extent *cache;
4299 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4301 rec = container_of(cache, struct extent_record, cache);
4305 rec->nr = max(tmpl->nr, tmpl->max_size);
4308 * We need to make sure to reset nr to whatever the extent
4309 * record says was the real size, this way we can compare it to
4312 if (tmpl->found_rec) {
4313 if (tmpl->start != rec->start || rec->found_rec) {
4314 struct extent_record *tmp;
4317 if (list_empty(&rec->list))
4318 list_add_tail(&rec->list,
4319 &duplicate_extents);
4322 * We have to do this song and dance in case we
4323 * find an extent record that falls inside of
4324 * our current extent record but does not have
4325 * the same objectid.
4327 tmp = malloc(sizeof(*tmp));
4330 tmp->start = tmpl->start;
4331 tmp->max_size = tmpl->max_size;
4334 tmp->metadata = tmpl->metadata;
4335 tmp->extent_item_refs = tmpl->extent_item_refs;
4336 INIT_LIST_HEAD(&tmp->list);
4337 list_add_tail(&tmp->list, &rec->dups);
4338 rec->num_duplicates++;
4345 if (tmpl->extent_item_refs && !dup) {
4346 if (rec->extent_item_refs) {
4347 fprintf(stderr, "block %llu rec "
4348 "extent_item_refs %llu, passed %llu\n",
4349 (unsigned long long)tmpl->start,
4350 (unsigned long long)
4351 rec->extent_item_refs,
4352 (unsigned long long)tmpl->extent_item_refs);
4354 rec->extent_item_refs = tmpl->extent_item_refs;
4358 if (tmpl->content_checked)
4359 rec->content_checked = 1;
4360 if (tmpl->owner_ref_checked)
4361 rec->owner_ref_checked = 1;
4362 memcpy(&rec->parent_key, &tmpl->parent_key,
4363 sizeof(tmpl->parent_key));
4364 if (tmpl->parent_generation)
4365 rec->parent_generation = tmpl->parent_generation;
4366 if (rec->max_size < tmpl->max_size)
4367 rec->max_size = tmpl->max_size;
4370 * A metadata extent can't cross stripe_len boundary, otherwise
4371 * kernel scrub won't be able to handle it.
4372 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4376 rec->crossing_stripes = check_crossing_stripes(
4377 global_info, rec->start,
4378 global_info->nodesize);
4379 check_extent_type(rec);
4380 maybe_free_extent_rec(extent_cache, rec);
4384 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4389 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4390 u64 parent, u64 root, int found_ref)
4392 struct extent_record *rec;
4393 struct tree_backref *back;
4394 struct cache_extent *cache;
4396 bool insert = false;
4398 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4400 struct extent_record tmpl;
4402 memset(&tmpl, 0, sizeof(tmpl));
4403 tmpl.start = bytenr;
4408 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4412 /* really a bug in cache_extent implement now */
4413 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4418 rec = container_of(cache, struct extent_record, cache);
4419 if (rec->start != bytenr) {
4421 * Several cause, from unaligned bytenr to over lapping extents
4426 back = find_tree_backref(rec, parent, root);
4428 back = alloc_tree_backref(rec, parent, root);
4435 if (back->node.found_ref) {
4436 fprintf(stderr, "Extent back ref already exists "
4437 "for %llu parent %llu root %llu \n",
4438 (unsigned long long)bytenr,
4439 (unsigned long long)parent,
4440 (unsigned long long)root);
4442 back->node.found_ref = 1;
4444 if (back->node.found_extent_tree) {
4445 fprintf(stderr, "Extent back ref already exists "
4446 "for %llu parent %llu root %llu \n",
4447 (unsigned long long)bytenr,
4448 (unsigned long long)parent,
4449 (unsigned long long)root);
4451 back->node.found_extent_tree = 1;
4454 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4455 compare_extent_backref));
4456 check_extent_type(rec);
4457 maybe_free_extent_rec(extent_cache, rec);
4461 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4462 u64 parent, u64 root, u64 owner, u64 offset,
4463 u32 num_refs, int found_ref, u64 max_size)
4465 struct extent_record *rec;
4466 struct data_backref *back;
4467 struct cache_extent *cache;
4469 bool insert = false;
4471 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4473 struct extent_record tmpl;
4475 memset(&tmpl, 0, sizeof(tmpl));
4476 tmpl.start = bytenr;
4478 tmpl.max_size = max_size;
4480 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4484 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4489 rec = container_of(cache, struct extent_record, cache);
4490 if (rec->max_size < max_size)
4491 rec->max_size = max_size;
4494 * If found_ref is set then max_size is the real size and must match the
4495 * existing refs. So if we have already found a ref then we need to
4496 * make sure that this ref matches the existing one, otherwise we need
4497 * to add a new backref so we can notice that the backrefs don't match
4498 * and we need to figure out who is telling the truth. This is to
4499 * account for that awful fsync bug I introduced where we'd end up with
4500 * a btrfs_file_extent_item that would have its length include multiple
4501 * prealloc extents or point inside of a prealloc extent.
4503 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4506 back = alloc_data_backref(rec, parent, root, owner, offset,
4513 BUG_ON(num_refs != 1);
4514 if (back->node.found_ref)
4515 BUG_ON(back->bytes != max_size);
4516 back->node.found_ref = 1;
4517 back->found_ref += 1;
4518 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4519 back->bytes = max_size;
4520 back->disk_bytenr = bytenr;
4522 /* Need to reinsert if not already in the tree */
4524 rb_erase(&back->node.node, &rec->backref_tree);
4529 rec->content_checked = 1;
4530 rec->owner_ref_checked = 1;
4532 if (back->node.found_extent_tree) {
4533 fprintf(stderr, "Extent back ref already exists "
4534 "for %llu parent %llu root %llu "
4535 "owner %llu offset %llu num_refs %lu\n",
4536 (unsigned long long)bytenr,
4537 (unsigned long long)parent,
4538 (unsigned long long)root,
4539 (unsigned long long)owner,
4540 (unsigned long long)offset,
4541 (unsigned long)num_refs);
4543 back->num_refs = num_refs;
4544 back->node.found_extent_tree = 1;
4547 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4548 compare_extent_backref));
4550 maybe_free_extent_rec(extent_cache, rec);
4554 static int add_pending(struct cache_tree *pending,
4555 struct cache_tree *seen, u64 bytenr, u32 size)
4558 ret = add_cache_extent(seen, bytenr, size);
4561 add_cache_extent(pending, bytenr, size);
4565 static int pick_next_pending(struct cache_tree *pending,
4566 struct cache_tree *reada,
4567 struct cache_tree *nodes,
4568 u64 last, struct block_info *bits, int bits_nr,
4571 unsigned long node_start = last;
4572 struct cache_extent *cache;
4575 cache = search_cache_extent(reada, 0);
4577 bits[0].start = cache->start;
4578 bits[0].size = cache->size;
4583 if (node_start > 32768)
4584 node_start -= 32768;
4586 cache = search_cache_extent(nodes, node_start);
4588 cache = search_cache_extent(nodes, 0);
4591 cache = search_cache_extent(pending, 0);
4596 bits[ret].start = cache->start;
4597 bits[ret].size = cache->size;
4598 cache = next_cache_extent(cache);
4600 } while (cache && ret < bits_nr);
4606 bits[ret].start = cache->start;
4607 bits[ret].size = cache->size;
4608 cache = next_cache_extent(cache);
4610 } while (cache && ret < bits_nr);
4612 if (bits_nr - ret > 8) {
4613 u64 lookup = bits[0].start + bits[0].size;
4614 struct cache_extent *next;
4615 next = search_cache_extent(pending, lookup);
4617 if (next->start - lookup > 32768)
4619 bits[ret].start = next->start;
4620 bits[ret].size = next->size;
4621 lookup = next->start + next->size;
4625 next = next_cache_extent(next);
4633 static void free_chunk_record(struct cache_extent *cache)
4635 struct chunk_record *rec;
4637 rec = container_of(cache, struct chunk_record, cache);
4638 list_del_init(&rec->list);
4639 list_del_init(&rec->dextents);
4643 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4645 cache_tree_free_extents(chunk_cache, free_chunk_record);
4648 static void free_device_record(struct rb_node *node)
4650 struct device_record *rec;
4652 rec = container_of(node, struct device_record, node);
4656 FREE_RB_BASED_TREE(device_cache, free_device_record);
4658 int insert_block_group_record(struct block_group_tree *tree,
4659 struct block_group_record *bg_rec)
4663 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4667 list_add_tail(&bg_rec->list, &tree->block_groups);
4671 static void free_block_group_record(struct cache_extent *cache)
4673 struct block_group_record *rec;
4675 rec = container_of(cache, struct block_group_record, cache);
4676 list_del_init(&rec->list);
4680 void free_block_group_tree(struct block_group_tree *tree)
4682 cache_tree_free_extents(&tree->tree, free_block_group_record);
4685 int insert_device_extent_record(struct device_extent_tree *tree,
4686 struct device_extent_record *de_rec)
4691 * Device extent is a bit different from the other extents, because
4692 * the extents which belong to the different devices may have the
4693 * same start and size, so we need use the special extent cache
4694 * search/insert functions.
4696 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4700 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4701 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4705 static void free_device_extent_record(struct cache_extent *cache)
4707 struct device_extent_record *rec;
4709 rec = container_of(cache, struct device_extent_record, cache);
4710 if (!list_empty(&rec->chunk_list))
4711 list_del_init(&rec->chunk_list);
4712 if (!list_empty(&rec->device_list))
4713 list_del_init(&rec->device_list);
4717 void free_device_extent_tree(struct device_extent_tree *tree)
4719 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4722 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4723 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4724 struct extent_buffer *leaf, int slot)
4726 struct btrfs_extent_ref_v0 *ref0;
4727 struct btrfs_key key;
4730 btrfs_item_key_to_cpu(leaf, &key, slot);
4731 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4732 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4733 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4736 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4737 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4743 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4744 struct btrfs_key *key,
4747 struct btrfs_chunk *ptr;
4748 struct chunk_record *rec;
4751 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4752 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4754 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4756 fprintf(stderr, "memory allocation failed\n");
4760 INIT_LIST_HEAD(&rec->list);
4761 INIT_LIST_HEAD(&rec->dextents);
4764 rec->cache.start = key->offset;
4765 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4767 rec->generation = btrfs_header_generation(leaf);
4769 rec->objectid = key->objectid;
4770 rec->type = key->type;
4771 rec->offset = key->offset;
4773 rec->length = rec->cache.size;
4774 rec->owner = btrfs_chunk_owner(leaf, ptr);
4775 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4776 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4777 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4778 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4779 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4780 rec->num_stripes = num_stripes;
4781 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4783 for (i = 0; i < rec->num_stripes; ++i) {
4784 rec->stripes[i].devid =
4785 btrfs_stripe_devid_nr(leaf, ptr, i);
4786 rec->stripes[i].offset =
4787 btrfs_stripe_offset_nr(leaf, ptr, i);
4788 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4789 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4796 static int process_chunk_item(struct cache_tree *chunk_cache,
4797 struct btrfs_key *key, struct extent_buffer *eb,
4800 struct chunk_record *rec;
4801 struct btrfs_chunk *chunk;
4804 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4806 * Do extra check for this chunk item,
4808 * It's still possible one can craft a leaf with CHUNK_ITEM, with
4809 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4810 * and owner<->key_type check.
4812 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4815 error("chunk(%llu, %llu) is not valid, ignore it",
4816 key->offset, btrfs_chunk_length(eb, chunk));
4819 rec = btrfs_new_chunk_record(eb, key, slot);
4820 ret = insert_cache_extent(chunk_cache, &rec->cache);
4822 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4823 rec->offset, rec->length);
4830 static int process_device_item(struct rb_root *dev_cache,
4831 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4833 struct btrfs_dev_item *ptr;
4834 struct device_record *rec;
4837 ptr = btrfs_item_ptr(eb,
4838 slot, struct btrfs_dev_item);
4840 rec = malloc(sizeof(*rec));
4842 fprintf(stderr, "memory allocation failed\n");
4846 rec->devid = key->offset;
4847 rec->generation = btrfs_header_generation(eb);
4849 rec->objectid = key->objectid;
4850 rec->type = key->type;
4851 rec->offset = key->offset;
4853 rec->devid = btrfs_device_id(eb, ptr);
4854 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4855 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4857 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4859 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4866 struct block_group_record *
4867 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4870 struct btrfs_block_group_item *ptr;
4871 struct block_group_record *rec;
4873 rec = calloc(1, sizeof(*rec));
4875 fprintf(stderr, "memory allocation failed\n");
4879 rec->cache.start = key->objectid;
4880 rec->cache.size = key->offset;
4882 rec->generation = btrfs_header_generation(leaf);
4884 rec->objectid = key->objectid;
4885 rec->type = key->type;
4886 rec->offset = key->offset;
4888 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4889 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4891 INIT_LIST_HEAD(&rec->list);
4896 static int process_block_group_item(struct block_group_tree *block_group_cache,
4897 struct btrfs_key *key,
4898 struct extent_buffer *eb, int slot)
4900 struct block_group_record *rec;
4903 rec = btrfs_new_block_group_record(eb, key, slot);
4904 ret = insert_block_group_record(block_group_cache, rec);
4906 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4907 rec->objectid, rec->offset);
4914 struct device_extent_record *
4915 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4916 struct btrfs_key *key, int slot)
4918 struct device_extent_record *rec;
4919 struct btrfs_dev_extent *ptr;
4921 rec = calloc(1, sizeof(*rec));
4923 fprintf(stderr, "memory allocation failed\n");
4927 rec->cache.objectid = key->objectid;
4928 rec->cache.start = key->offset;
4930 rec->generation = btrfs_header_generation(leaf);
4932 rec->objectid = key->objectid;
4933 rec->type = key->type;
4934 rec->offset = key->offset;
4936 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4937 rec->chunk_objecteid =
4938 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4940 btrfs_dev_extent_chunk_offset(leaf, ptr);
4941 rec->length = btrfs_dev_extent_length(leaf, ptr);
4942 rec->cache.size = rec->length;
4944 INIT_LIST_HEAD(&rec->chunk_list);
4945 INIT_LIST_HEAD(&rec->device_list);
4951 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4952 struct btrfs_key *key, struct extent_buffer *eb,
4955 struct device_extent_record *rec;
4958 rec = btrfs_new_device_extent_record(eb, key, slot);
4959 ret = insert_device_extent_record(dev_extent_cache, rec);
4962 "Device extent[%llu, %llu, %llu] existed.\n",
4963 rec->objectid, rec->offset, rec->length);
4970 static int process_extent_item(struct btrfs_root *root,
4971 struct cache_tree *extent_cache,
4972 struct extent_buffer *eb, int slot)
4974 struct btrfs_extent_item *ei;
4975 struct btrfs_extent_inline_ref *iref;
4976 struct btrfs_extent_data_ref *dref;
4977 struct btrfs_shared_data_ref *sref;
4978 struct btrfs_key key;
4979 struct extent_record tmpl;
4984 u32 item_size = btrfs_item_size_nr(eb, slot);
4990 btrfs_item_key_to_cpu(eb, &key, slot);
4992 if (key.type == BTRFS_METADATA_ITEM_KEY) {
4994 num_bytes = root->fs_info->nodesize;
4996 num_bytes = key.offset;
4999 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5000 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5001 key.objectid, root->fs_info->sectorsize);
5004 if (item_size < sizeof(*ei)) {
5005 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5006 struct btrfs_extent_item_v0 *ei0;
5007 if (item_size != sizeof(*ei0)) {
5009 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5010 key.objectid, key.type, key.offset,
5011 btrfs_header_bytenr(eb), slot);
5014 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5015 refs = btrfs_extent_refs_v0(eb, ei0);
5019 memset(&tmpl, 0, sizeof(tmpl));
5020 tmpl.start = key.objectid;
5021 tmpl.nr = num_bytes;
5022 tmpl.extent_item_refs = refs;
5023 tmpl.metadata = metadata;
5025 tmpl.max_size = num_bytes;
5027 return add_extent_rec(extent_cache, &tmpl);
5030 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5031 refs = btrfs_extent_refs(eb, ei);
5032 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5036 if (metadata && num_bytes != root->fs_info->nodesize) {
5037 error("ignore invalid metadata extent, length %llu does not equal to %u",
5038 num_bytes, root->fs_info->nodesize);
5041 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5042 error("ignore invalid data extent, length %llu is not aligned to %u",
5043 num_bytes, root->fs_info->sectorsize);
5047 memset(&tmpl, 0, sizeof(tmpl));
5048 tmpl.start = key.objectid;
5049 tmpl.nr = num_bytes;
5050 tmpl.extent_item_refs = refs;
5051 tmpl.metadata = metadata;
5053 tmpl.max_size = num_bytes;
5054 add_extent_rec(extent_cache, &tmpl);
5056 ptr = (unsigned long)(ei + 1);
5057 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5058 key.type == BTRFS_EXTENT_ITEM_KEY)
5059 ptr += sizeof(struct btrfs_tree_block_info);
5061 end = (unsigned long)ei + item_size;
5063 iref = (struct btrfs_extent_inline_ref *)ptr;
5064 type = btrfs_extent_inline_ref_type(eb, iref);
5065 offset = btrfs_extent_inline_ref_offset(eb, iref);
5067 case BTRFS_TREE_BLOCK_REF_KEY:
5068 ret = add_tree_backref(extent_cache, key.objectid,
5072 "add_tree_backref failed (extent items tree block): %s",
5075 case BTRFS_SHARED_BLOCK_REF_KEY:
5076 ret = add_tree_backref(extent_cache, key.objectid,
5080 "add_tree_backref failed (extent items shared block): %s",
5083 case BTRFS_EXTENT_DATA_REF_KEY:
5084 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5085 add_data_backref(extent_cache, key.objectid, 0,
5086 btrfs_extent_data_ref_root(eb, dref),
5087 btrfs_extent_data_ref_objectid(eb,
5089 btrfs_extent_data_ref_offset(eb, dref),
5090 btrfs_extent_data_ref_count(eb, dref),
5093 case BTRFS_SHARED_DATA_REF_KEY:
5094 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5095 add_data_backref(extent_cache, key.objectid, offset,
5097 btrfs_shared_data_ref_count(eb, sref),
5101 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5102 key.objectid, key.type, num_bytes);
5105 ptr += btrfs_extent_inline_ref_size(type);
5112 static int check_cache_range(struct btrfs_root *root,
5113 struct btrfs_block_group_cache *cache,
5114 u64 offset, u64 bytes)
5116 struct btrfs_free_space *entry;
5122 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5123 bytenr = btrfs_sb_offset(i);
5124 ret = btrfs_rmap_block(root->fs_info,
5125 cache->key.objectid, bytenr, 0,
5126 &logical, &nr, &stripe_len);
5131 if (logical[nr] + stripe_len <= offset)
5133 if (offset + bytes <= logical[nr])
5135 if (logical[nr] == offset) {
5136 if (stripe_len >= bytes) {
5140 bytes -= stripe_len;
5141 offset += stripe_len;
5142 } else if (logical[nr] < offset) {
5143 if (logical[nr] + stripe_len >=
5148 bytes = (offset + bytes) -
5149 (logical[nr] + stripe_len);
5150 offset = logical[nr] + stripe_len;
5153 * Could be tricky, the super may land in the
5154 * middle of the area we're checking. First
5155 * check the easiest case, it's at the end.
5157 if (logical[nr] + stripe_len >=
5159 bytes = logical[nr] - offset;
5163 /* Check the left side */
5164 ret = check_cache_range(root, cache,
5166 logical[nr] - offset);
5172 /* Now we continue with the right side */
5173 bytes = (offset + bytes) -
5174 (logical[nr] + stripe_len);
5175 offset = logical[nr] + stripe_len;
5182 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5184 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5185 offset, offset+bytes);
5189 if (entry->offset != offset) {
5190 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5195 if (entry->bytes != bytes) {
5196 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5197 bytes, entry->bytes, offset);
5201 unlink_free_space(cache->free_space_ctl, entry);
5206 static int verify_space_cache(struct btrfs_root *root,
5207 struct btrfs_block_group_cache *cache)
5209 struct btrfs_path path;
5210 struct extent_buffer *leaf;
5211 struct btrfs_key key;
5215 root = root->fs_info->extent_root;
5217 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5219 btrfs_init_path(&path);
5220 key.objectid = last;
5222 key.type = BTRFS_EXTENT_ITEM_KEY;
5223 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5228 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5229 ret = btrfs_next_leaf(root, &path);
5237 leaf = path.nodes[0];
5238 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5239 if (key.objectid >= cache->key.offset + cache->key.objectid)
5241 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5242 key.type != BTRFS_METADATA_ITEM_KEY) {
5247 if (last == key.objectid) {
5248 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5249 last = key.objectid + key.offset;
5251 last = key.objectid + root->fs_info->nodesize;
5256 ret = check_cache_range(root, cache, last,
5257 key.objectid - last);
5260 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5261 last = key.objectid + key.offset;
5263 last = key.objectid + root->fs_info->nodesize;
5267 if (last < cache->key.objectid + cache->key.offset)
5268 ret = check_cache_range(root, cache, last,
5269 cache->key.objectid +
5270 cache->key.offset - last);
5273 btrfs_release_path(&path);
5276 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5277 fprintf(stderr, "There are still entries left in the space "
5285 static int check_space_cache(struct btrfs_root *root)
5287 struct btrfs_block_group_cache *cache;
5288 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5292 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5293 btrfs_super_generation(root->fs_info->super_copy) !=
5294 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5295 printf("cache and super generation don't match, space cache "
5296 "will be invalidated\n");
5300 if (ctx.progress_enabled) {
5301 ctx.tp = TASK_FREE_SPACE;
5302 task_start(ctx.info);
5306 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5310 start = cache->key.objectid + cache->key.offset;
5311 if (!cache->free_space_ctl) {
5312 if (btrfs_init_free_space_ctl(cache,
5313 root->fs_info->sectorsize)) {
5318 btrfs_remove_free_space_cache(cache);
5321 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5322 ret = exclude_super_stripes(root, cache);
5324 fprintf(stderr, "could not exclude super stripes: %s\n",
5329 ret = load_free_space_tree(root->fs_info, cache);
5330 free_excluded_extents(root, cache);
5332 fprintf(stderr, "could not load free space tree: %s\n",
5339 ret = load_free_space_cache(root->fs_info, cache);
5344 ret = verify_space_cache(root, cache);
5346 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5347 cache->key.objectid);
5352 task_stop(ctx.info);
5354 return error ? -EINVAL : 0;
5357 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5358 u64 num_bytes, unsigned long leaf_offset,
5359 struct extent_buffer *eb) {
5361 struct btrfs_fs_info *fs_info = root->fs_info;
5363 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5365 unsigned long csum_offset;
5369 u64 data_checked = 0;
5375 if (num_bytes % fs_info->sectorsize)
5378 data = malloc(num_bytes);
5382 while (offset < num_bytes) {
5385 read_len = num_bytes - offset;
5386 /* read as much space once a time */
5387 ret = read_extent_data(fs_info, data + offset,
5388 bytenr + offset, &read_len, mirror);
5392 /* verify every 4k data's checksum */
5393 while (data_checked < read_len) {
5395 tmp = offset + data_checked;
5397 csum = btrfs_csum_data((char *)data + tmp,
5398 csum, fs_info->sectorsize);
5399 btrfs_csum_final(csum, (u8 *)&csum);
5401 csum_offset = leaf_offset +
5402 tmp / fs_info->sectorsize * csum_size;
5403 read_extent_buffer(eb, (char *)&csum_expected,
5404 csum_offset, csum_size);
5405 /* try another mirror */
5406 if (csum != csum_expected) {
5407 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5408 mirror, bytenr + tmp,
5409 csum, csum_expected);
5410 num_copies = btrfs_num_copies(root->fs_info,
5412 if (mirror < num_copies - 1) {
5417 data_checked += fs_info->sectorsize;
5426 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5429 struct btrfs_path path;
5430 struct extent_buffer *leaf;
5431 struct btrfs_key key;
5434 btrfs_init_path(&path);
5435 key.objectid = bytenr;
5436 key.type = BTRFS_EXTENT_ITEM_KEY;
5437 key.offset = (u64)-1;
5440 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5443 fprintf(stderr, "Error looking up extent record %d\n", ret);
5444 btrfs_release_path(&path);
5447 if (path.slots[0] > 0) {
5450 ret = btrfs_prev_leaf(root, &path);
5453 } else if (ret > 0) {
5460 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5463 * Block group items come before extent items if they have the same
5464 * bytenr, so walk back one more just in case. Dear future traveller,
5465 * first congrats on mastering time travel. Now if it's not too much
5466 * trouble could you go back to 2006 and tell Chris to make the
5467 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5468 * EXTENT_ITEM_KEY please?
5470 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5471 if (path.slots[0] > 0) {
5474 ret = btrfs_prev_leaf(root, &path);
5477 } else if (ret > 0) {
5482 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5486 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5487 ret = btrfs_next_leaf(root, &path);
5489 fprintf(stderr, "Error going to next leaf "
5491 btrfs_release_path(&path);
5497 leaf = path.nodes[0];
5498 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5499 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5503 if (key.objectid + key.offset < bytenr) {
5507 if (key.objectid > bytenr + num_bytes)
5510 if (key.objectid == bytenr) {
5511 if (key.offset >= num_bytes) {
5515 num_bytes -= key.offset;
5516 bytenr += key.offset;
5517 } else if (key.objectid < bytenr) {
5518 if (key.objectid + key.offset >= bytenr + num_bytes) {
5522 num_bytes = (bytenr + num_bytes) -
5523 (key.objectid + key.offset);
5524 bytenr = key.objectid + key.offset;
5526 if (key.objectid + key.offset < bytenr + num_bytes) {
5527 u64 new_start = key.objectid + key.offset;
5528 u64 new_bytes = bytenr + num_bytes - new_start;
5531 * Weird case, the extent is in the middle of
5532 * our range, we'll have to search one side
5533 * and then the other. Not sure if this happens
5534 * in real life, but no harm in coding it up
5535 * anyway just in case.
5537 btrfs_release_path(&path);
5538 ret = check_extent_exists(root, new_start,
5541 fprintf(stderr, "Right section didn't "
5545 num_bytes = key.objectid - bytenr;
5548 num_bytes = key.objectid - bytenr;
5555 if (num_bytes && !ret) {
5556 fprintf(stderr, "There are no extents for csum range "
5557 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5561 btrfs_release_path(&path);
5565 static int check_csums(struct btrfs_root *root)
5567 struct btrfs_path path;
5568 struct extent_buffer *leaf;
5569 struct btrfs_key key;
5570 u64 offset = 0, num_bytes = 0;
5571 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5575 unsigned long leaf_offset;
5577 root = root->fs_info->csum_root;
5578 if (!extent_buffer_uptodate(root->node)) {
5579 fprintf(stderr, "No valid csum tree found\n");
5583 btrfs_init_path(&path);
5584 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5585 key.type = BTRFS_EXTENT_CSUM_KEY;
5587 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5589 fprintf(stderr, "Error searching csum tree %d\n", ret);
5590 btrfs_release_path(&path);
5594 if (ret > 0 && path.slots[0])
5599 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5600 ret = btrfs_next_leaf(root, &path);
5602 fprintf(stderr, "Error going to next leaf "
5609 leaf = path.nodes[0];
5611 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5612 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5617 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5618 csum_size) * root->fs_info->sectorsize;
5619 if (!check_data_csum)
5620 goto skip_csum_check;
5621 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5622 ret = check_extent_csums(root, key.offset, data_len,
5628 offset = key.offset;
5629 } else if (key.offset != offset + num_bytes) {
5630 ret = check_extent_exists(root, offset, num_bytes);
5632 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5633 "there is no extent record\n",
5634 offset, offset+num_bytes);
5637 offset = key.offset;
5640 num_bytes += data_len;
5644 btrfs_release_path(&path);
5648 static int is_dropped_key(struct btrfs_key *key,
5649 struct btrfs_key *drop_key) {
5650 if (key->objectid < drop_key->objectid)
5652 else if (key->objectid == drop_key->objectid) {
5653 if (key->type < drop_key->type)
5655 else if (key->type == drop_key->type) {
5656 if (key->offset < drop_key->offset)
5664 * Here are the rules for FULL_BACKREF.
5666 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5667 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5669 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
5670 * if it happened after the relocation occurred since we'll have dropped the
5671 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5672 * have no real way to know for sure.
5674 * We process the blocks one root at a time, and we start from the lowest root
5675 * objectid and go to the highest. So we can just lookup the owner backref for
5676 * the record and if we don't find it then we know it doesn't exist and we have
5679 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5680 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5681 * be set or not and then we can check later once we've gathered all the refs.
5683 static int calc_extent_flag(struct cache_tree *extent_cache,
5684 struct extent_buffer *buf,
5685 struct root_item_record *ri,
5688 struct extent_record *rec;
5689 struct cache_extent *cache;
5690 struct tree_backref *tback;
5693 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5694 /* we have added this extent before */
5698 rec = container_of(cache, struct extent_record, cache);
5701 * Except file/reloc tree, we can not have
5704 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5709 if (buf->start == ri->bytenr)
5712 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5715 owner = btrfs_header_owner(buf);
5716 if (owner == ri->objectid)
5719 tback = find_tree_backref(rec, 0, owner);
5724 if (rec->flag_block_full_backref != FLAG_UNSET &&
5725 rec->flag_block_full_backref != 0)
5726 rec->bad_full_backref = 1;
5729 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5730 if (rec->flag_block_full_backref != FLAG_UNSET &&
5731 rec->flag_block_full_backref != 1)
5732 rec->bad_full_backref = 1;
5736 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5738 fprintf(stderr, "Invalid key type(");
5739 print_key_type(stderr, 0, key_type);
5740 fprintf(stderr, ") found in root(");
5741 print_objectid(stderr, rootid, 0);
5742 fprintf(stderr, ")\n");
5746 * Check if the key is valid with its extent buffer.
5748 * This is a early check in case invalid key exists in a extent buffer
5749 * This is not comprehensive yet, but should prevent wrong key/item passed
5752 static int check_type_with_root(u64 rootid, u8 key_type)
5755 /* Only valid in chunk tree */
5756 case BTRFS_DEV_ITEM_KEY:
5757 case BTRFS_CHUNK_ITEM_KEY:
5758 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5761 /* valid in csum and log tree */
5762 case BTRFS_CSUM_TREE_OBJECTID:
5763 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5767 case BTRFS_EXTENT_ITEM_KEY:
5768 case BTRFS_METADATA_ITEM_KEY:
5769 case BTRFS_BLOCK_GROUP_ITEM_KEY:
5770 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5773 case BTRFS_ROOT_ITEM_KEY:
5774 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5777 case BTRFS_DEV_EXTENT_KEY:
5778 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5784 report_mismatch_key_root(key_type, rootid);
5788 static int run_next_block(struct btrfs_root *root,
5789 struct block_info *bits,
5792 struct cache_tree *pending,
5793 struct cache_tree *seen,
5794 struct cache_tree *reada,
5795 struct cache_tree *nodes,
5796 struct cache_tree *extent_cache,
5797 struct cache_tree *chunk_cache,
5798 struct rb_root *dev_cache,
5799 struct block_group_tree *block_group_cache,
5800 struct device_extent_tree *dev_extent_cache,
5801 struct root_item_record *ri)
5803 struct btrfs_fs_info *fs_info = root->fs_info;
5804 struct extent_buffer *buf;
5805 struct extent_record *rec = NULL;
5816 struct btrfs_key key;
5817 struct cache_extent *cache;
5820 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5821 bits_nr, &reada_bits);
5826 for(i = 0; i < nritems; i++) {
5827 ret = add_cache_extent(reada, bits[i].start,
5832 /* fixme, get the parent transid */
5833 readahead_tree_block(fs_info, bits[i].start, 0);
5836 *last = bits[0].start;
5837 bytenr = bits[0].start;
5838 size = bits[0].size;
5840 cache = lookup_cache_extent(pending, bytenr, size);
5842 remove_cache_extent(pending, cache);
5845 cache = lookup_cache_extent(reada, bytenr, size);
5847 remove_cache_extent(reada, cache);
5850 cache = lookup_cache_extent(nodes, bytenr, size);
5852 remove_cache_extent(nodes, cache);
5855 cache = lookup_cache_extent(extent_cache, bytenr, size);
5857 rec = container_of(cache, struct extent_record, cache);
5858 gen = rec->parent_generation;
5861 /* fixme, get the real parent transid */
5862 buf = read_tree_block(root->fs_info, bytenr, gen);
5863 if (!extent_buffer_uptodate(buf)) {
5864 record_bad_block_io(root->fs_info,
5865 extent_cache, bytenr, size);
5869 nritems = btrfs_header_nritems(buf);
5872 if (!init_extent_tree) {
5873 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5874 btrfs_header_level(buf), 1, NULL,
5877 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5879 fprintf(stderr, "Couldn't calc extent flags\n");
5880 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5885 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5887 fprintf(stderr, "Couldn't calc extent flags\n");
5888 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5892 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5894 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5895 ri->objectid == btrfs_header_owner(buf)) {
5897 * Ok we got to this block from it's original owner and
5898 * we have FULL_BACKREF set. Relocation can leave
5899 * converted blocks over so this is altogether possible,
5900 * however it's not possible if the generation > the
5901 * last snapshot, so check for this case.
5903 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5904 btrfs_header_generation(buf) > ri->last_snapshot) {
5905 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5906 rec->bad_full_backref = 1;
5911 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5912 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5913 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5914 rec->bad_full_backref = 1;
5918 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5919 rec->flag_block_full_backref = 1;
5923 rec->flag_block_full_backref = 0;
5925 owner = btrfs_header_owner(buf);
5928 ret = check_block(root, extent_cache, buf, flags);
5932 if (btrfs_is_leaf(buf)) {
5933 btree_space_waste += btrfs_leaf_free_space(root, buf);
5934 for (i = 0; i < nritems; i++) {
5935 struct btrfs_file_extent_item *fi;
5936 btrfs_item_key_to_cpu(buf, &key, i);
5938 * Check key type against the leaf owner.
5939 * Could filter quite a lot of early error if
5942 if (check_type_with_root(btrfs_header_owner(buf),
5944 fprintf(stderr, "ignoring invalid key\n");
5947 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5948 process_extent_item(root, extent_cache, buf,
5952 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5953 process_extent_item(root, extent_cache, buf,
5957 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5959 btrfs_item_size_nr(buf, i);
5962 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5963 process_chunk_item(chunk_cache, &key, buf, i);
5966 if (key.type == BTRFS_DEV_ITEM_KEY) {
5967 process_device_item(dev_cache, &key, buf, i);
5970 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5971 process_block_group_item(block_group_cache,
5975 if (key.type == BTRFS_DEV_EXTENT_KEY) {
5976 process_device_extent_item(dev_extent_cache,
5981 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5982 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5983 process_extent_ref_v0(extent_cache, buf, i);
5990 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5991 ret = add_tree_backref(extent_cache,
5992 key.objectid, 0, key.offset, 0);
5995 "add_tree_backref failed (leaf tree block): %s",
5999 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6000 ret = add_tree_backref(extent_cache,
6001 key.objectid, key.offset, 0, 0);
6004 "add_tree_backref failed (leaf shared block): %s",
6008 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6009 struct btrfs_extent_data_ref *ref;
6010 ref = btrfs_item_ptr(buf, i,
6011 struct btrfs_extent_data_ref);
6012 add_data_backref(extent_cache,
6014 btrfs_extent_data_ref_root(buf, ref),
6015 btrfs_extent_data_ref_objectid(buf,
6017 btrfs_extent_data_ref_offset(buf, ref),
6018 btrfs_extent_data_ref_count(buf, ref),
6019 0, root->fs_info->sectorsize);
6022 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6023 struct btrfs_shared_data_ref *ref;
6024 ref = btrfs_item_ptr(buf, i,
6025 struct btrfs_shared_data_ref);
6026 add_data_backref(extent_cache,
6027 key.objectid, key.offset, 0, 0, 0,
6028 btrfs_shared_data_ref_count(buf, ref),
6029 0, root->fs_info->sectorsize);
6032 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6033 struct bad_item *bad;
6035 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6039 bad = malloc(sizeof(struct bad_item));
6042 INIT_LIST_HEAD(&bad->list);
6043 memcpy(&bad->key, &key,
6044 sizeof(struct btrfs_key));
6045 bad->root_id = owner;
6046 list_add_tail(&bad->list, &delete_items);
6049 if (key.type != BTRFS_EXTENT_DATA_KEY)
6051 fi = btrfs_item_ptr(buf, i,
6052 struct btrfs_file_extent_item);
6053 if (btrfs_file_extent_type(buf, fi) ==
6054 BTRFS_FILE_EXTENT_INLINE)
6056 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6059 data_bytes_allocated +=
6060 btrfs_file_extent_disk_num_bytes(buf, fi);
6061 if (data_bytes_allocated < root->fs_info->sectorsize) {
6064 data_bytes_referenced +=
6065 btrfs_file_extent_num_bytes(buf, fi);
6066 add_data_backref(extent_cache,
6067 btrfs_file_extent_disk_bytenr(buf, fi),
6068 parent, owner, key.objectid, key.offset -
6069 btrfs_file_extent_offset(buf, fi), 1, 1,
6070 btrfs_file_extent_disk_num_bytes(buf, fi));
6074 struct btrfs_key first_key;
6076 first_key.objectid = 0;
6079 btrfs_item_key_to_cpu(buf, &first_key, 0);
6080 level = btrfs_header_level(buf);
6081 for (i = 0; i < nritems; i++) {
6082 struct extent_record tmpl;
6084 ptr = btrfs_node_blockptr(buf, i);
6085 size = root->fs_info->nodesize;
6086 btrfs_node_key_to_cpu(buf, &key, i);
6088 if ((level == ri->drop_level)
6089 && is_dropped_key(&key, &ri->drop_key)) {
6094 memset(&tmpl, 0, sizeof(tmpl));
6095 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6096 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6101 tmpl.max_size = size;
6102 ret = add_extent_rec(extent_cache, &tmpl);
6106 ret = add_tree_backref(extent_cache, ptr, parent,
6110 "add_tree_backref failed (non-leaf block): %s",
6116 add_pending(nodes, seen, ptr, size);
6118 add_pending(pending, seen, ptr, size);
6121 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6122 nritems) * sizeof(struct btrfs_key_ptr);
6124 total_btree_bytes += buf->len;
6125 if (fs_root_objectid(btrfs_header_owner(buf)))
6126 total_fs_tree_bytes += buf->len;
6127 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6128 total_extent_tree_bytes += buf->len;
6130 free_extent_buffer(buf);
6134 static int add_root_to_pending(struct extent_buffer *buf,
6135 struct cache_tree *extent_cache,
6136 struct cache_tree *pending,
6137 struct cache_tree *seen,
6138 struct cache_tree *nodes,
6141 struct extent_record tmpl;
6144 if (btrfs_header_level(buf) > 0)
6145 add_pending(nodes, seen, buf->start, buf->len);
6147 add_pending(pending, seen, buf->start, buf->len);
6149 memset(&tmpl, 0, sizeof(tmpl));
6150 tmpl.start = buf->start;
6155 tmpl.max_size = buf->len;
6156 add_extent_rec(extent_cache, &tmpl);
6158 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6159 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6160 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6163 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6168 /* as we fix the tree, we might be deleting blocks that
6169 * we're tracking for repair. This hook makes sure we
6170 * remove any backrefs for blocks as we are fixing them.
6172 static int free_extent_hook(struct btrfs_trans_handle *trans,
6173 struct btrfs_root *root,
6174 u64 bytenr, u64 num_bytes, u64 parent,
6175 u64 root_objectid, u64 owner, u64 offset,
6178 struct extent_record *rec;
6179 struct cache_extent *cache;
6181 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6183 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6184 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6188 rec = container_of(cache, struct extent_record, cache);
6190 struct data_backref *back;
6191 back = find_data_backref(rec, parent, root_objectid, owner,
6192 offset, 1, bytenr, num_bytes);
6195 if (back->node.found_ref) {
6196 back->found_ref -= refs_to_drop;
6198 rec->refs -= refs_to_drop;
6200 if (back->node.found_extent_tree) {
6201 back->num_refs -= refs_to_drop;
6202 if (rec->extent_item_refs)
6203 rec->extent_item_refs -= refs_to_drop;
6205 if (back->found_ref == 0)
6206 back->node.found_ref = 0;
6207 if (back->num_refs == 0)
6208 back->node.found_extent_tree = 0;
6210 if (!back->node.found_extent_tree && back->node.found_ref) {
6211 rb_erase(&back->node.node, &rec->backref_tree);
6215 struct tree_backref *back;
6216 back = find_tree_backref(rec, parent, root_objectid);
6219 if (back->node.found_ref) {
6222 back->node.found_ref = 0;
6224 if (back->node.found_extent_tree) {
6225 if (rec->extent_item_refs)
6226 rec->extent_item_refs--;
6227 back->node.found_extent_tree = 0;
6229 if (!back->node.found_extent_tree && back->node.found_ref) {
6230 rb_erase(&back->node.node, &rec->backref_tree);
6234 maybe_free_extent_rec(extent_cache, rec);
6239 static int delete_extent_records(struct btrfs_trans_handle *trans,
6240 struct btrfs_root *root,
6241 struct btrfs_path *path,
6244 struct btrfs_key key;
6245 struct btrfs_key found_key;
6246 struct extent_buffer *leaf;
6251 key.objectid = bytenr;
6253 key.offset = (u64)-1;
6256 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6263 if (path->slots[0] == 0)
6269 leaf = path->nodes[0];
6270 slot = path->slots[0];
6272 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6273 if (found_key.objectid != bytenr)
6276 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6277 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6278 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6279 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6280 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6281 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6282 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6283 btrfs_release_path(path);
6284 if (found_key.type == 0) {
6285 if (found_key.offset == 0)
6287 key.offset = found_key.offset - 1;
6288 key.type = found_key.type;
6290 key.type = found_key.type - 1;
6291 key.offset = (u64)-1;
6295 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6296 found_key.objectid, found_key.type, found_key.offset);
6298 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6301 btrfs_release_path(path);
6303 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6304 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6305 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6306 found_key.offset : root->fs_info->nodesize;
6308 ret = btrfs_update_block_group(root, bytenr,
6315 btrfs_release_path(path);
6320 * for a single backref, this will allocate a new extent
6321 * and add the backref to it.
6323 static int record_extent(struct btrfs_trans_handle *trans,
6324 struct btrfs_fs_info *info,
6325 struct btrfs_path *path,
6326 struct extent_record *rec,
6327 struct extent_backref *back,
6328 int allocated, u64 flags)
6331 struct btrfs_root *extent_root = info->extent_root;
6332 struct extent_buffer *leaf;
6333 struct btrfs_key ins_key;
6334 struct btrfs_extent_item *ei;
6335 struct data_backref *dback;
6336 struct btrfs_tree_block_info *bi;
6339 rec->max_size = max_t(u64, rec->max_size,
6343 u32 item_size = sizeof(*ei);
6346 item_size += sizeof(*bi);
6348 ins_key.objectid = rec->start;
6349 ins_key.offset = rec->max_size;
6350 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6352 ret = btrfs_insert_empty_item(trans, extent_root, path,
6353 &ins_key, item_size);
6357 leaf = path->nodes[0];
6358 ei = btrfs_item_ptr(leaf, path->slots[0],
6359 struct btrfs_extent_item);
6361 btrfs_set_extent_refs(leaf, ei, 0);
6362 btrfs_set_extent_generation(leaf, ei, rec->generation);
6364 if (back->is_data) {
6365 btrfs_set_extent_flags(leaf, ei,
6366 BTRFS_EXTENT_FLAG_DATA);
6368 struct btrfs_disk_key copy_key;;
6370 bi = (struct btrfs_tree_block_info *)(ei + 1);
6371 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6374 btrfs_set_disk_key_objectid(©_key,
6375 rec->info_objectid);
6376 btrfs_set_disk_key_type(©_key, 0);
6377 btrfs_set_disk_key_offset(©_key, 0);
6379 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6380 btrfs_set_tree_block_key(leaf, bi, ©_key);
6382 btrfs_set_extent_flags(leaf, ei,
6383 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6386 btrfs_mark_buffer_dirty(leaf);
6387 ret = btrfs_update_block_group(extent_root, rec->start,
6388 rec->max_size, 1, 0);
6391 btrfs_release_path(path);
6394 if (back->is_data) {
6398 dback = to_data_backref(back);
6399 if (back->full_backref)
6400 parent = dback->parent;
6404 for (i = 0; i < dback->found_ref; i++) {
6405 /* if parent != 0, we're doing a full backref
6406 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6407 * just makes the backref allocator create a data
6410 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6411 rec->start, rec->max_size,
6415 BTRFS_FIRST_FREE_OBJECTID :
6421 fprintf(stderr, "adding new data backref"
6422 " on %llu %s %llu owner %llu"
6423 " offset %llu found %d\n",
6424 (unsigned long long)rec->start,
6425 back->full_backref ?
6427 back->full_backref ?
6428 (unsigned long long)parent :
6429 (unsigned long long)dback->root,
6430 (unsigned long long)dback->owner,
6431 (unsigned long long)dback->offset,
6435 struct tree_backref *tback;
6437 tback = to_tree_backref(back);
6438 if (back->full_backref)
6439 parent = tback->parent;
6443 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6444 rec->start, rec->max_size,
6445 parent, tback->root, 0, 0);
6446 fprintf(stderr, "adding new tree backref on "
6447 "start %llu len %llu parent %llu root %llu\n",
6448 rec->start, rec->max_size, parent, tback->root);
6451 btrfs_release_path(path);
6455 static struct extent_entry *find_entry(struct list_head *entries,
6456 u64 bytenr, u64 bytes)
6458 struct extent_entry *entry = NULL;
6460 list_for_each_entry(entry, entries, list) {
6461 if (entry->bytenr == bytenr && entry->bytes == bytes)
6468 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6470 struct extent_entry *entry, *best = NULL, *prev = NULL;
6472 list_for_each_entry(entry, entries, list) {
6474 * If there are as many broken entries as entries then we know
6475 * not to trust this particular entry.
6477 if (entry->broken == entry->count)
6481 * Special case, when there are only two entries and 'best' is
6491 * If our current entry == best then we can't be sure our best
6492 * is really the best, so we need to keep searching.
6494 if (best && best->count == entry->count) {
6500 /* Prev == entry, not good enough, have to keep searching */
6501 if (!prev->broken && prev->count == entry->count)
6505 best = (prev->count > entry->count) ? prev : entry;
6506 else if (best->count < entry->count)
6514 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6515 struct data_backref *dback, struct extent_entry *entry)
6517 struct btrfs_trans_handle *trans;
6518 struct btrfs_root *root;
6519 struct btrfs_file_extent_item *fi;
6520 struct extent_buffer *leaf;
6521 struct btrfs_key key;
6525 key.objectid = dback->root;
6526 key.type = BTRFS_ROOT_ITEM_KEY;
6527 key.offset = (u64)-1;
6528 root = btrfs_read_fs_root(info, &key);
6530 fprintf(stderr, "Couldn't find root for our ref\n");
6535 * The backref points to the original offset of the extent if it was
6536 * split, so we need to search down to the offset we have and then walk
6537 * forward until we find the backref we're looking for.
6539 key.objectid = dback->owner;
6540 key.type = BTRFS_EXTENT_DATA_KEY;
6541 key.offset = dback->offset;
6542 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6544 fprintf(stderr, "Error looking up ref %d\n", ret);
6549 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6550 ret = btrfs_next_leaf(root, path);
6552 fprintf(stderr, "Couldn't find our ref, next\n");
6556 leaf = path->nodes[0];
6557 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6558 if (key.objectid != dback->owner ||
6559 key.type != BTRFS_EXTENT_DATA_KEY) {
6560 fprintf(stderr, "Couldn't find our ref, search\n");
6563 fi = btrfs_item_ptr(leaf, path->slots[0],
6564 struct btrfs_file_extent_item);
6565 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6566 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6568 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6573 btrfs_release_path(path);
6575 trans = btrfs_start_transaction(root, 1);
6577 return PTR_ERR(trans);
6580 * Ok we have the key of the file extent we want to fix, now we can cow
6581 * down to the thing and fix it.
6583 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6585 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6586 key.objectid, key.type, key.offset, ret);
6590 fprintf(stderr, "Well that's odd, we just found this key "
6591 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6596 leaf = path->nodes[0];
6597 fi = btrfs_item_ptr(leaf, path->slots[0],
6598 struct btrfs_file_extent_item);
6600 if (btrfs_file_extent_compression(leaf, fi) &&
6601 dback->disk_bytenr != entry->bytenr) {
6602 fprintf(stderr, "Ref doesn't match the record start and is "
6603 "compressed, please take a btrfs-image of this file "
6604 "system and send it to a btrfs developer so they can "
6605 "complete this functionality for bytenr %Lu\n",
6606 dback->disk_bytenr);
6611 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6612 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6613 } else if (dback->disk_bytenr > entry->bytenr) {
6614 u64 off_diff, offset;
6616 off_diff = dback->disk_bytenr - entry->bytenr;
6617 offset = btrfs_file_extent_offset(leaf, fi);
6618 if (dback->disk_bytenr + offset +
6619 btrfs_file_extent_num_bytes(leaf, fi) >
6620 entry->bytenr + entry->bytes) {
6621 fprintf(stderr, "Ref is past the entry end, please "
6622 "take a btrfs-image of this file system and "
6623 "send it to a btrfs developer, ref %Lu\n",
6624 dback->disk_bytenr);
6629 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6630 btrfs_set_file_extent_offset(leaf, fi, offset);
6631 } else if (dback->disk_bytenr < entry->bytenr) {
6634 offset = btrfs_file_extent_offset(leaf, fi);
6635 if (dback->disk_bytenr + offset < entry->bytenr) {
6636 fprintf(stderr, "Ref is before the entry start, please"
6637 " take a btrfs-image of this file system and "
6638 "send it to a btrfs developer, ref %Lu\n",
6639 dback->disk_bytenr);
6644 offset += dback->disk_bytenr;
6645 offset -= entry->bytenr;
6646 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6647 btrfs_set_file_extent_offset(leaf, fi, offset);
6650 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6653 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6654 * only do this if we aren't using compression, otherwise it's a
6657 if (!btrfs_file_extent_compression(leaf, fi))
6658 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6660 printf("ram bytes may be wrong?\n");
6661 btrfs_mark_buffer_dirty(leaf);
6663 err = btrfs_commit_transaction(trans, root);
6664 btrfs_release_path(path);
6665 return ret ? ret : err;
6668 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6669 struct extent_record *rec)
6671 struct extent_backref *back, *tmp;
6672 struct data_backref *dback;
6673 struct extent_entry *entry, *best = NULL;
6676 int broken_entries = 0;
6681 * Metadata is easy and the backrefs should always agree on bytenr and
6682 * size, if not we've got bigger issues.
6687 rbtree_postorder_for_each_entry_safe(back, tmp,
6688 &rec->backref_tree, node) {
6689 if (back->full_backref || !back->is_data)
6692 dback = to_data_backref(back);
6695 * We only pay attention to backrefs that we found a real
6698 if (dback->found_ref == 0)
6702 * For now we only catch when the bytes don't match, not the
6703 * bytenr. We can easily do this at the same time, but I want
6704 * to have a fs image to test on before we just add repair
6705 * functionality willy-nilly so we know we won't screw up the
6709 entry = find_entry(&entries, dback->disk_bytenr,
6712 entry = malloc(sizeof(struct extent_entry));
6717 memset(entry, 0, sizeof(*entry));
6718 entry->bytenr = dback->disk_bytenr;
6719 entry->bytes = dback->bytes;
6720 list_add_tail(&entry->list, &entries);
6725 * If we only have on entry we may think the entries agree when
6726 * in reality they don't so we have to do some extra checking.
6728 if (dback->disk_bytenr != rec->start ||
6729 dback->bytes != rec->nr || back->broken)
6740 /* Yay all the backrefs agree, carry on good sir */
6741 if (nr_entries <= 1 && !mismatch)
6744 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6745 "%Lu\n", rec->start);
6748 * First we want to see if the backrefs can agree amongst themselves who
6749 * is right, so figure out which one of the entries has the highest
6752 best = find_most_right_entry(&entries);
6755 * Ok so we may have an even split between what the backrefs think, so
6756 * this is where we use the extent ref to see what it thinks.
6759 entry = find_entry(&entries, rec->start, rec->nr);
6760 if (!entry && (!broken_entries || !rec->found_rec)) {
6761 fprintf(stderr, "Backrefs don't agree with each other "
6762 "and extent record doesn't agree with anybody,"
6763 " so we can't fix bytenr %Lu bytes %Lu\n",
6764 rec->start, rec->nr);
6767 } else if (!entry) {
6769 * Ok our backrefs were broken, we'll assume this is the
6770 * correct value and add an entry for this range.
6772 entry = malloc(sizeof(struct extent_entry));
6777 memset(entry, 0, sizeof(*entry));
6778 entry->bytenr = rec->start;
6779 entry->bytes = rec->nr;
6780 list_add_tail(&entry->list, &entries);
6784 best = find_most_right_entry(&entries);
6786 fprintf(stderr, "Backrefs and extent record evenly "
6787 "split on who is right, this is going to "
6788 "require user input to fix bytenr %Lu bytes "
6789 "%Lu\n", rec->start, rec->nr);
6796 * I don't think this can happen currently as we'll abort() if we catch
6797 * this case higher up, but in case somebody removes that we still can't
6798 * deal with it properly here yet, so just bail out of that's the case.
6800 if (best->bytenr != rec->start) {
6801 fprintf(stderr, "Extent start and backref starts don't match, "
6802 "please use btrfs-image on this file system and send "
6803 "it to a btrfs developer so they can make fsck fix "
6804 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6805 rec->start, rec->nr);
6811 * Ok great we all agreed on an extent record, let's go find the real
6812 * references and fix up the ones that don't match.
6814 rbtree_postorder_for_each_entry_safe(back, tmp,
6815 &rec->backref_tree, node) {
6816 if (back->full_backref || !back->is_data)
6819 dback = to_data_backref(back);
6822 * Still ignoring backrefs that don't have a real ref attached
6825 if (dback->found_ref == 0)
6828 if (dback->bytes == best->bytes &&
6829 dback->disk_bytenr == best->bytenr)
6832 ret = repair_ref(info, path, dback, best);
6838 * Ok we messed with the actual refs, which means we need to drop our
6839 * entire cache and go back and rescan. I know this is a huge pain and
6840 * adds a lot of extra work, but it's the only way to be safe. Once all
6841 * the backrefs agree we may not need to do anything to the extent
6846 while (!list_empty(&entries)) {
6847 entry = list_entry(entries.next, struct extent_entry, list);
6848 list_del_init(&entry->list);
6854 static int process_duplicates(struct cache_tree *extent_cache,
6855 struct extent_record *rec)
6857 struct extent_record *good, *tmp;
6858 struct cache_extent *cache;
6862 * If we found a extent record for this extent then return, or if we
6863 * have more than one duplicate we are likely going to need to delete
6866 if (rec->found_rec || rec->num_duplicates > 1)
6869 /* Shouldn't happen but just in case */
6870 BUG_ON(!rec->num_duplicates);
6873 * So this happens if we end up with a backref that doesn't match the
6874 * actual extent entry. So either the backref is bad or the extent
6875 * entry is bad. Either way we want to have the extent_record actually
6876 * reflect what we found in the extent_tree, so we need to take the
6877 * duplicate out and use that as the extent_record since the only way we
6878 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6880 remove_cache_extent(extent_cache, &rec->cache);
6882 good = to_extent_record(rec->dups.next);
6883 list_del_init(&good->list);
6884 INIT_LIST_HEAD(&good->backrefs);
6885 INIT_LIST_HEAD(&good->dups);
6886 good->cache.start = good->start;
6887 good->cache.size = good->nr;
6888 good->content_checked = 0;
6889 good->owner_ref_checked = 0;
6890 good->num_duplicates = 0;
6891 good->refs = rec->refs;
6892 list_splice_init(&rec->backrefs, &good->backrefs);
6894 cache = lookup_cache_extent(extent_cache, good->start,
6898 tmp = container_of(cache, struct extent_record, cache);
6901 * If we find another overlapping extent and it's found_rec is
6902 * set then it's a duplicate and we need to try and delete
6905 if (tmp->found_rec || tmp->num_duplicates > 0) {
6906 if (list_empty(&good->list))
6907 list_add_tail(&good->list,
6908 &duplicate_extents);
6909 good->num_duplicates += tmp->num_duplicates + 1;
6910 list_splice_init(&tmp->dups, &good->dups);
6911 list_del_init(&tmp->list);
6912 list_add_tail(&tmp->list, &good->dups);
6913 remove_cache_extent(extent_cache, &tmp->cache);
6918 * Ok we have another non extent item backed extent rec, so lets
6919 * just add it to this extent and carry on like we did above.
6921 good->refs += tmp->refs;
6922 list_splice_init(&tmp->backrefs, &good->backrefs);
6923 remove_cache_extent(extent_cache, &tmp->cache);
6926 ret = insert_cache_extent(extent_cache, &good->cache);
6929 return good->num_duplicates ? 0 : 1;
6932 static int delete_duplicate_records(struct btrfs_root *root,
6933 struct extent_record *rec)
6935 struct btrfs_trans_handle *trans;
6936 LIST_HEAD(delete_list);
6937 struct btrfs_path path;
6938 struct extent_record *tmp, *good, *n;
6941 struct btrfs_key key;
6943 btrfs_init_path(&path);
6946 /* Find the record that covers all of the duplicates. */
6947 list_for_each_entry(tmp, &rec->dups, list) {
6948 if (good->start < tmp->start)
6950 if (good->nr > tmp->nr)
6953 if (tmp->start + tmp->nr < good->start + good->nr) {
6954 fprintf(stderr, "Ok we have overlapping extents that "
6955 "aren't completely covered by each other, this "
6956 "is going to require more careful thought. "
6957 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6958 tmp->start, tmp->nr, good->start, good->nr);
6965 list_add_tail(&rec->list, &delete_list);
6967 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6970 list_move_tail(&tmp->list, &delete_list);
6973 root = root->fs_info->extent_root;
6974 trans = btrfs_start_transaction(root, 1);
6975 if (IS_ERR(trans)) {
6976 ret = PTR_ERR(trans);
6980 list_for_each_entry(tmp, &delete_list, list) {
6981 if (tmp->found_rec == 0)
6983 key.objectid = tmp->start;
6984 key.type = BTRFS_EXTENT_ITEM_KEY;
6985 key.offset = tmp->nr;
6987 /* Shouldn't happen but just in case */
6988 if (tmp->metadata) {
6989 fprintf(stderr, "Well this shouldn't happen, extent "
6990 "record overlaps but is metadata? "
6991 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6995 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7001 ret = btrfs_del_item(trans, root, &path);
7004 btrfs_release_path(&path);
7007 err = btrfs_commit_transaction(trans, root);
7011 while (!list_empty(&delete_list)) {
7012 tmp = to_extent_record(delete_list.next);
7013 list_del_init(&tmp->list);
7019 while (!list_empty(&rec->dups)) {
7020 tmp = to_extent_record(rec->dups.next);
7021 list_del_init(&tmp->list);
7025 btrfs_release_path(&path);
7027 if (!ret && !nr_del)
7028 rec->num_duplicates = 0;
7030 return ret ? ret : nr_del;
7033 static int find_possible_backrefs(struct btrfs_fs_info *info,
7034 struct btrfs_path *path,
7035 struct cache_tree *extent_cache,
7036 struct extent_record *rec)
7038 struct btrfs_root *root;
7039 struct extent_backref *back, *tmp;
7040 struct data_backref *dback;
7041 struct cache_extent *cache;
7042 struct btrfs_file_extent_item *fi;
7043 struct btrfs_key key;
7047 rbtree_postorder_for_each_entry_safe(back, tmp,
7048 &rec->backref_tree, node) {
7049 /* Don't care about full backrefs (poor unloved backrefs) */
7050 if (back->full_backref || !back->is_data)
7053 dback = to_data_backref(back);
7055 /* We found this one, we don't need to do a lookup */
7056 if (dback->found_ref)
7059 key.objectid = dback->root;
7060 key.type = BTRFS_ROOT_ITEM_KEY;
7061 key.offset = (u64)-1;
7063 root = btrfs_read_fs_root(info, &key);
7065 /* No root, definitely a bad ref, skip */
7066 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7068 /* Other err, exit */
7070 return PTR_ERR(root);
7072 key.objectid = dback->owner;
7073 key.type = BTRFS_EXTENT_DATA_KEY;
7074 key.offset = dback->offset;
7075 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7077 btrfs_release_path(path);
7080 /* Didn't find it, we can carry on */
7085 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7086 struct btrfs_file_extent_item);
7087 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7088 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7089 btrfs_release_path(path);
7090 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7092 struct extent_record *tmp;
7093 tmp = container_of(cache, struct extent_record, cache);
7096 * If we found an extent record for the bytenr for this
7097 * particular backref then we can't add it to our
7098 * current extent record. We only want to add backrefs
7099 * that don't have a corresponding extent item in the
7100 * extent tree since they likely belong to this record
7101 * and we need to fix it if it doesn't match bytenrs.
7107 dback->found_ref += 1;
7108 dback->disk_bytenr = bytenr;
7109 dback->bytes = bytes;
7112 * Set this so the verify backref code knows not to trust the
7113 * values in this backref.
7122 * Record orphan data ref into corresponding root.
7124 * Return 0 if the extent item contains data ref and recorded.
7125 * Return 1 if the extent item contains no useful data ref
7126 * On that case, it may contains only shared_dataref or metadata backref
7127 * or the file extent exists(this should be handled by the extent bytenr
7129 * Return <0 if something goes wrong.
7131 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7132 struct extent_record *rec)
7134 struct btrfs_key key;
7135 struct btrfs_root *dest_root;
7136 struct extent_backref *back, *tmp;
7137 struct data_backref *dback;
7138 struct orphan_data_extent *orphan;
7139 struct btrfs_path path;
7140 int recorded_data_ref = 0;
7145 btrfs_init_path(&path);
7146 rbtree_postorder_for_each_entry_safe(back, tmp,
7147 &rec->backref_tree, node) {
7148 if (back->full_backref || !back->is_data ||
7149 !back->found_extent_tree)
7151 dback = to_data_backref(back);
7152 if (dback->found_ref)
7154 key.objectid = dback->root;
7155 key.type = BTRFS_ROOT_ITEM_KEY;
7156 key.offset = (u64)-1;
7158 dest_root = btrfs_read_fs_root(fs_info, &key);
7160 /* For non-exist root we just skip it */
7161 if (IS_ERR(dest_root) || !dest_root)
7164 key.objectid = dback->owner;
7165 key.type = BTRFS_EXTENT_DATA_KEY;
7166 key.offset = dback->offset;
7168 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7169 btrfs_release_path(&path);
7171 * For ret < 0, it's OK since the fs-tree may be corrupted,
7172 * we need to record it for inode/file extent rebuild.
7173 * For ret > 0, we record it only for file extent rebuild.
7174 * For ret == 0, the file extent exists but only bytenr
7175 * mismatch, let the original bytenr fix routine to handle,
7181 orphan = malloc(sizeof(*orphan));
7186 INIT_LIST_HEAD(&orphan->list);
7187 orphan->root = dback->root;
7188 orphan->objectid = dback->owner;
7189 orphan->offset = dback->offset;
7190 orphan->disk_bytenr = rec->cache.start;
7191 orphan->disk_len = rec->cache.size;
7192 list_add(&dest_root->orphan_data_extents, &orphan->list);
7193 recorded_data_ref = 1;
7196 btrfs_release_path(&path);
7198 return !recorded_data_ref;
7204 * when an incorrect extent item is found, this will delete
7205 * all of the existing entries for it and recreate them
7206 * based on what the tree scan found.
7208 static int fixup_extent_refs(struct btrfs_fs_info *info,
7209 struct cache_tree *extent_cache,
7210 struct extent_record *rec)
7212 struct btrfs_trans_handle *trans = NULL;
7214 struct btrfs_path path;
7215 struct cache_extent *cache;
7216 struct extent_backref *back, *tmp;
7220 if (rec->flag_block_full_backref)
7221 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7223 btrfs_init_path(&path);
7224 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7226 * Sometimes the backrefs themselves are so broken they don't
7227 * get attached to any meaningful rec, so first go back and
7228 * check any of our backrefs that we couldn't find and throw
7229 * them into the list if we find the backref so that
7230 * verify_backrefs can figure out what to do.
7232 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7237 /* step one, make sure all of the backrefs agree */
7238 ret = verify_backrefs(info, &path, rec);
7242 trans = btrfs_start_transaction(info->extent_root, 1);
7243 if (IS_ERR(trans)) {
7244 ret = PTR_ERR(trans);
7248 /* step two, delete all the existing records */
7249 ret = delete_extent_records(trans, info->extent_root, &path,
7255 /* was this block corrupt? If so, don't add references to it */
7256 cache = lookup_cache_extent(info->corrupt_blocks,
7257 rec->start, rec->max_size);
7263 /* step three, recreate all the refs we did find */
7264 rbtree_postorder_for_each_entry_safe(back, tmp,
7265 &rec->backref_tree, node) {
7267 * if we didn't find any references, don't create a
7270 if (!back->found_ref)
7273 rec->bad_full_backref = 0;
7274 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
7282 int err = btrfs_commit_transaction(trans, info->extent_root);
7288 fprintf(stderr, "Repaired extent references for %llu\n",
7289 (unsigned long long)rec->start);
7291 btrfs_release_path(&path);
7295 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7296 struct extent_record *rec)
7298 struct btrfs_trans_handle *trans;
7299 struct btrfs_root *root = fs_info->extent_root;
7300 struct btrfs_path path;
7301 struct btrfs_extent_item *ei;
7302 struct btrfs_key key;
7306 key.objectid = rec->start;
7307 if (rec->metadata) {
7308 key.type = BTRFS_METADATA_ITEM_KEY;
7309 key.offset = rec->info_level;
7311 key.type = BTRFS_EXTENT_ITEM_KEY;
7312 key.offset = rec->max_size;
7315 trans = btrfs_start_transaction(root, 0);
7317 return PTR_ERR(trans);
7319 btrfs_init_path(&path);
7320 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7322 btrfs_release_path(&path);
7323 btrfs_commit_transaction(trans, root);
7326 fprintf(stderr, "Didn't find extent for %llu\n",
7327 (unsigned long long)rec->start);
7328 btrfs_release_path(&path);
7329 btrfs_commit_transaction(trans, root);
7333 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7334 struct btrfs_extent_item);
7335 flags = btrfs_extent_flags(path.nodes[0], ei);
7336 if (rec->flag_block_full_backref) {
7337 fprintf(stderr, "setting full backref on %llu\n",
7338 (unsigned long long)key.objectid);
7339 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7341 fprintf(stderr, "clearing full backref on %llu\n",
7342 (unsigned long long)key.objectid);
7343 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7345 btrfs_set_extent_flags(path.nodes[0], ei, flags);
7346 btrfs_mark_buffer_dirty(path.nodes[0]);
7347 btrfs_release_path(&path);
7348 ret = btrfs_commit_transaction(trans, root);
7350 fprintf(stderr, "Repaired extent flags for %llu\n",
7351 (unsigned long long)rec->start);
7356 /* right now we only prune from the extent allocation tree */
7357 static int prune_one_block(struct btrfs_trans_handle *trans,
7358 struct btrfs_fs_info *info,
7359 struct btrfs_corrupt_block *corrupt)
7362 struct btrfs_path path;
7363 struct extent_buffer *eb;
7367 int level = corrupt->level + 1;
7369 btrfs_init_path(&path);
7371 /* we want to stop at the parent to our busted block */
7372 path.lowest_level = level;
7374 ret = btrfs_search_slot(trans, info->extent_root,
7375 &corrupt->key, &path, -1, 1);
7380 eb = path.nodes[level];
7387 * hopefully the search gave us the block we want to prune,
7388 * lets try that first
7390 slot = path.slots[level];
7391 found = btrfs_node_blockptr(eb, slot);
7392 if (found == corrupt->cache.start)
7395 nritems = btrfs_header_nritems(eb);
7397 /* the search failed, lets scan this node and hope we find it */
7398 for (slot = 0; slot < nritems; slot++) {
7399 found = btrfs_node_blockptr(eb, slot);
7400 if (found == corrupt->cache.start)
7404 * we couldn't find the bad block. TODO, search all the nodes for pointers
7407 if (eb == info->extent_root->node) {
7412 btrfs_release_path(&path);
7417 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7418 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7421 btrfs_release_path(&path);
7425 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7427 struct btrfs_trans_handle *trans = NULL;
7428 struct cache_extent *cache;
7429 struct btrfs_corrupt_block *corrupt;
7432 cache = search_cache_extent(info->corrupt_blocks, 0);
7436 trans = btrfs_start_transaction(info->extent_root, 1);
7438 return PTR_ERR(trans);
7440 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7441 prune_one_block(trans, info, corrupt);
7442 remove_cache_extent(info->corrupt_blocks, cache);
7445 return btrfs_commit_transaction(trans, info->extent_root);
7449 static int check_extent_refs(struct btrfs_root *root,
7450 struct cache_tree *extent_cache)
7452 struct extent_record *rec;
7453 struct cache_extent *cache;
7460 * if we're doing a repair, we have to make sure
7461 * we don't allocate from the problem extents.
7462 * In the worst case, this will be all the
7465 cache = search_cache_extent(extent_cache, 0);
7467 rec = container_of(cache, struct extent_record, cache);
7468 set_extent_dirty(root->fs_info->excluded_extents,
7470 rec->start + rec->max_size - 1);
7471 cache = next_cache_extent(cache);
7474 /* pin down all the corrupted blocks too */
7475 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7477 set_extent_dirty(root->fs_info->excluded_extents,
7479 cache->start + cache->size - 1);
7480 cache = next_cache_extent(cache);
7482 prune_corrupt_blocks(root->fs_info);
7483 reset_cached_block_groups(root->fs_info);
7486 reset_cached_block_groups(root->fs_info);
7489 * We need to delete any duplicate entries we find first otherwise we
7490 * could mess up the extent tree when we have backrefs that actually
7491 * belong to a different extent item and not the weird duplicate one.
7493 while (repair && !list_empty(&duplicate_extents)) {
7494 rec = to_extent_record(duplicate_extents.next);
7495 list_del_init(&rec->list);
7497 /* Sometimes we can find a backref before we find an actual
7498 * extent, so we need to process it a little bit to see if there
7499 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7500 * if this is a backref screwup. If we need to delete stuff
7501 * process_duplicates() will return 0, otherwise it will return
7504 if (process_duplicates(extent_cache, rec))
7506 ret = delete_duplicate_records(root, rec);
7510 * delete_duplicate_records will return the number of entries
7511 * deleted, so if it's greater than 0 then we know we actually
7512 * did something and we need to remove.
7525 cache = search_cache_extent(extent_cache, 0);
7528 rec = container_of(cache, struct extent_record, cache);
7529 if (rec->num_duplicates) {
7530 fprintf(stderr, "extent item %llu has multiple extent "
7531 "items\n", (unsigned long long)rec->start);
7535 if (rec->refs != rec->extent_item_refs) {
7536 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7537 (unsigned long long)rec->start,
7538 (unsigned long long)rec->nr);
7539 fprintf(stderr, "extent item %llu, found %llu\n",
7540 (unsigned long long)rec->extent_item_refs,
7541 (unsigned long long)rec->refs);
7542 ret = record_orphan_data_extents(root->fs_info, rec);
7548 if (all_backpointers_checked(rec, 1)) {
7549 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7550 (unsigned long long)rec->start,
7551 (unsigned long long)rec->nr);
7555 if (!rec->owner_ref_checked) {
7556 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7557 (unsigned long long)rec->start,
7558 (unsigned long long)rec->nr);
7563 if (repair && fix) {
7564 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
7570 if (rec->bad_full_backref) {
7571 fprintf(stderr, "bad full backref, on [%llu]\n",
7572 (unsigned long long)rec->start);
7574 ret = fixup_extent_flags(root->fs_info, rec);
7582 * Although it's not a extent ref's problem, we reuse this
7583 * routine for error reporting.
7584 * No repair function yet.
7586 if (rec->crossing_stripes) {
7588 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7589 rec->start, rec->start + rec->max_size);
7593 if (rec->wrong_chunk_type) {
7595 "bad extent [%llu, %llu), type mismatch with chunk\n",
7596 rec->start, rec->start + rec->max_size);
7601 remove_cache_extent(extent_cache, cache);
7602 free_all_extent_backrefs(rec);
7603 if (!init_extent_tree && repair && (!cur_err || fix))
7604 clear_extent_dirty(root->fs_info->excluded_extents,
7606 rec->start + rec->max_size - 1);
7611 if (ret && ret != -EAGAIN) {
7612 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7615 struct btrfs_trans_handle *trans;
7617 root = root->fs_info->extent_root;
7618 trans = btrfs_start_transaction(root, 1);
7619 if (IS_ERR(trans)) {
7620 ret = PTR_ERR(trans);
7624 ret = btrfs_fix_block_accounting(trans, root);
7627 ret = btrfs_commit_transaction(trans, root);
7639 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7643 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7644 stripe_size = length;
7645 stripe_size /= num_stripes;
7646 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7647 stripe_size = length * 2;
7648 stripe_size /= num_stripes;
7649 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7650 stripe_size = length;
7651 stripe_size /= (num_stripes - 1);
7652 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7653 stripe_size = length;
7654 stripe_size /= (num_stripes - 2);
7656 stripe_size = length;
7662 * Check the chunk with its block group/dev list ref:
7663 * Return 0 if all refs seems valid.
7664 * Return 1 if part of refs seems valid, need later check for rebuild ref
7665 * like missing block group and needs to search extent tree to rebuild them.
7666 * Return -1 if essential refs are missing and unable to rebuild.
7668 static int check_chunk_refs(struct chunk_record *chunk_rec,
7669 struct block_group_tree *block_group_cache,
7670 struct device_extent_tree *dev_extent_cache,
7673 struct cache_extent *block_group_item;
7674 struct block_group_record *block_group_rec;
7675 struct cache_extent *dev_extent_item;
7676 struct device_extent_record *dev_extent_rec;
7680 int metadump_v2 = 0;
7684 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7687 if (block_group_item) {
7688 block_group_rec = container_of(block_group_item,
7689 struct block_group_record,
7691 if (chunk_rec->length != block_group_rec->offset ||
7692 chunk_rec->offset != block_group_rec->objectid ||
7694 chunk_rec->type_flags != block_group_rec->flags)) {
7697 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7698 chunk_rec->objectid,
7703 chunk_rec->type_flags,
7704 block_group_rec->objectid,
7705 block_group_rec->type,
7706 block_group_rec->offset,
7707 block_group_rec->offset,
7708 block_group_rec->objectid,
7709 block_group_rec->flags);
7712 list_del_init(&block_group_rec->list);
7713 chunk_rec->bg_rec = block_group_rec;
7718 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7719 chunk_rec->objectid,
7724 chunk_rec->type_flags);
7731 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7732 chunk_rec->num_stripes);
7733 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7734 devid = chunk_rec->stripes[i].devid;
7735 offset = chunk_rec->stripes[i].offset;
7736 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7737 devid, offset, length);
7738 if (dev_extent_item) {
7739 dev_extent_rec = container_of(dev_extent_item,
7740 struct device_extent_record,
7742 if (dev_extent_rec->objectid != devid ||
7743 dev_extent_rec->offset != offset ||
7744 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7745 dev_extent_rec->length != length) {
7748 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7749 chunk_rec->objectid,
7752 chunk_rec->stripes[i].devid,
7753 chunk_rec->stripes[i].offset,
7754 dev_extent_rec->objectid,
7755 dev_extent_rec->offset,
7756 dev_extent_rec->length);
7759 list_move(&dev_extent_rec->chunk_list,
7760 &chunk_rec->dextents);
7765 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7766 chunk_rec->objectid,
7769 chunk_rec->stripes[i].devid,
7770 chunk_rec->stripes[i].offset);
7777 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7778 int check_chunks(struct cache_tree *chunk_cache,
7779 struct block_group_tree *block_group_cache,
7780 struct device_extent_tree *dev_extent_cache,
7781 struct list_head *good, struct list_head *bad,
7782 struct list_head *rebuild, int silent)
7784 struct cache_extent *chunk_item;
7785 struct chunk_record *chunk_rec;
7786 struct block_group_record *bg_rec;
7787 struct device_extent_record *dext_rec;
7791 chunk_item = first_cache_extent(chunk_cache);
7792 while (chunk_item) {
7793 chunk_rec = container_of(chunk_item, struct chunk_record,
7795 err = check_chunk_refs(chunk_rec, block_group_cache,
7796 dev_extent_cache, silent);
7799 if (err == 0 && good)
7800 list_add_tail(&chunk_rec->list, good);
7801 if (err > 0 && rebuild)
7802 list_add_tail(&chunk_rec->list, rebuild);
7804 list_add_tail(&chunk_rec->list, bad);
7805 chunk_item = next_cache_extent(chunk_item);
7808 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7811 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7819 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7823 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7834 static int check_device_used(struct device_record *dev_rec,
7835 struct device_extent_tree *dext_cache)
7837 struct cache_extent *cache;
7838 struct device_extent_record *dev_extent_rec;
7841 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7843 dev_extent_rec = container_of(cache,
7844 struct device_extent_record,
7846 if (dev_extent_rec->objectid != dev_rec->devid)
7849 list_del_init(&dev_extent_rec->device_list);
7850 total_byte += dev_extent_rec->length;
7851 cache = next_cache_extent(cache);
7854 if (total_byte != dev_rec->byte_used) {
7856 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7857 total_byte, dev_rec->byte_used, dev_rec->objectid,
7858 dev_rec->type, dev_rec->offset);
7866 * Unlike device size alignment check above, some super total_bytes check
7867 * failure can lead to mount failure for newer kernel.
7869 * So this function will return the error for a fatal super total_bytes problem.
7871 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7873 struct btrfs_device *dev;
7874 struct list_head *dev_list = &fs_info->fs_devices->devices;
7875 u64 total_bytes = 0;
7876 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7878 list_for_each_entry(dev, dev_list, dev_list)
7879 total_bytes += dev->total_bytes;
7881 /* Important check, which can cause unmountable fs */
7882 if (super_bytes < total_bytes) {
7883 error("super total bytes %llu smaller than real device(s) size %llu",
7884 super_bytes, total_bytes);
7885 error("mounting this fs may fail for newer kernels");
7886 error("this can be fixed by 'btrfs rescue fix-device-size'");
7891 * Optional check, just to make everything aligned and match with each
7894 * For a btrfs-image restored fs, we don't need to check it anyway.
7896 if (btrfs_super_flags(fs_info->super_copy) &
7897 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7899 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
7900 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
7901 super_bytes != total_bytes) {
7902 warning("minor unaligned/mismatch device size detected");
7904 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7909 /* check btrfs_dev_item -> btrfs_dev_extent */
7910 static int check_devices(struct rb_root *dev_cache,
7911 struct device_extent_tree *dev_extent_cache)
7913 struct rb_node *dev_node;
7914 struct device_record *dev_rec;
7915 struct device_extent_record *dext_rec;
7919 dev_node = rb_first(dev_cache);
7921 dev_rec = container_of(dev_node, struct device_record, node);
7922 err = check_device_used(dev_rec, dev_extent_cache);
7926 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
7927 global_info->sectorsize);
7928 dev_node = rb_next(dev_node);
7930 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7933 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7934 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7941 static int add_root_item_to_list(struct list_head *head,
7942 u64 objectid, u64 bytenr, u64 last_snapshot,
7943 u8 level, u8 drop_level,
7944 struct btrfs_key *drop_key)
7947 struct root_item_record *ri_rec;
7948 ri_rec = malloc(sizeof(*ri_rec));
7951 ri_rec->bytenr = bytenr;
7952 ri_rec->objectid = objectid;
7953 ri_rec->level = level;
7954 ri_rec->drop_level = drop_level;
7955 ri_rec->last_snapshot = last_snapshot;
7957 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7958 list_add_tail(&ri_rec->list, head);
7963 static void free_root_item_list(struct list_head *list)
7965 struct root_item_record *ri_rec;
7967 while (!list_empty(list)) {
7968 ri_rec = list_first_entry(list, struct root_item_record,
7970 list_del_init(&ri_rec->list);
7975 static int deal_root_from_list(struct list_head *list,
7976 struct btrfs_root *root,
7977 struct block_info *bits,
7979 struct cache_tree *pending,
7980 struct cache_tree *seen,
7981 struct cache_tree *reada,
7982 struct cache_tree *nodes,
7983 struct cache_tree *extent_cache,
7984 struct cache_tree *chunk_cache,
7985 struct rb_root *dev_cache,
7986 struct block_group_tree *block_group_cache,
7987 struct device_extent_tree *dev_extent_cache)
7992 while (!list_empty(list)) {
7993 struct root_item_record *rec;
7994 struct extent_buffer *buf;
7995 rec = list_entry(list->next,
7996 struct root_item_record, list);
7998 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
7999 if (!extent_buffer_uptodate(buf)) {
8000 free_extent_buffer(buf);
8004 ret = add_root_to_pending(buf, extent_cache, pending,
8005 seen, nodes, rec->objectid);
8009 * To rebuild extent tree, we need deal with snapshot
8010 * one by one, otherwise we deal with node firstly which
8011 * can maximize readahead.
8014 ret = run_next_block(root, bits, bits_nr, &last,
8015 pending, seen, reada, nodes,
8016 extent_cache, chunk_cache,
8017 dev_cache, block_group_cache,
8018 dev_extent_cache, rec);
8022 free_extent_buffer(buf);
8023 list_del(&rec->list);
8029 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8030 reada, nodes, extent_cache, chunk_cache,
8031 dev_cache, block_group_cache,
8032 dev_extent_cache, NULL);
8042 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8044 struct rb_root dev_cache;
8045 struct cache_tree chunk_cache;
8046 struct block_group_tree block_group_cache;
8047 struct device_extent_tree dev_extent_cache;
8048 struct cache_tree extent_cache;
8049 struct cache_tree seen;
8050 struct cache_tree pending;
8051 struct cache_tree reada;
8052 struct cache_tree nodes;
8053 struct extent_io_tree excluded_extents;
8054 struct cache_tree corrupt_blocks;
8055 struct btrfs_path path;
8056 struct btrfs_key key;
8057 struct btrfs_key found_key;
8059 struct block_info *bits;
8061 struct extent_buffer *leaf;
8063 struct btrfs_root_item ri;
8064 struct list_head dropping_trees;
8065 struct list_head normal_trees;
8066 struct btrfs_root *root1;
8067 struct btrfs_root *root;
8071 root = fs_info->fs_root;
8072 dev_cache = RB_ROOT;
8073 cache_tree_init(&chunk_cache);
8074 block_group_tree_init(&block_group_cache);
8075 device_extent_tree_init(&dev_extent_cache);
8077 cache_tree_init(&extent_cache);
8078 cache_tree_init(&seen);
8079 cache_tree_init(&pending);
8080 cache_tree_init(&nodes);
8081 cache_tree_init(&reada);
8082 cache_tree_init(&corrupt_blocks);
8083 extent_io_tree_init(&excluded_extents);
8084 INIT_LIST_HEAD(&dropping_trees);
8085 INIT_LIST_HEAD(&normal_trees);
8088 fs_info->excluded_extents = &excluded_extents;
8089 fs_info->fsck_extent_cache = &extent_cache;
8090 fs_info->free_extent_hook = free_extent_hook;
8091 fs_info->corrupt_blocks = &corrupt_blocks;
8095 bits = malloc(bits_nr * sizeof(struct block_info));
8101 if (ctx.progress_enabled) {
8102 ctx.tp = TASK_EXTENTS;
8103 task_start(ctx.info);
8107 root1 = fs_info->tree_root;
8108 level = btrfs_header_level(root1->node);
8109 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8110 root1->node->start, 0, level, 0, NULL);
8113 root1 = fs_info->chunk_root;
8114 level = btrfs_header_level(root1->node);
8115 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8116 root1->node->start, 0, level, 0, NULL);
8119 btrfs_init_path(&path);
8122 key.type = BTRFS_ROOT_ITEM_KEY;
8123 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8127 leaf = path.nodes[0];
8128 slot = path.slots[0];
8129 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8130 ret = btrfs_next_leaf(root, &path);
8133 leaf = path.nodes[0];
8134 slot = path.slots[0];
8136 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8137 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8138 unsigned long offset;
8141 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8142 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8143 last_snapshot = btrfs_root_last_snapshot(&ri);
8144 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8145 level = btrfs_root_level(&ri);
8146 ret = add_root_item_to_list(&normal_trees,
8148 btrfs_root_bytenr(&ri),
8149 last_snapshot, level,
8154 level = btrfs_root_level(&ri);
8155 objectid = found_key.objectid;
8156 btrfs_disk_key_to_cpu(&found_key,
8158 ret = add_root_item_to_list(&dropping_trees,
8160 btrfs_root_bytenr(&ri),
8161 last_snapshot, level,
8162 ri.drop_level, &found_key);
8169 btrfs_release_path(&path);
8172 * check_block can return -EAGAIN if it fixes something, please keep
8173 * this in mind when dealing with return values from these functions, if
8174 * we get -EAGAIN we want to fall through and restart the loop.
8176 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8177 &seen, &reada, &nodes, &extent_cache,
8178 &chunk_cache, &dev_cache, &block_group_cache,
8185 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8186 &pending, &seen, &reada, &nodes,
8187 &extent_cache, &chunk_cache, &dev_cache,
8188 &block_group_cache, &dev_extent_cache);
8195 ret = check_chunks(&chunk_cache, &block_group_cache,
8196 &dev_extent_cache, NULL, NULL, NULL, 0);
8203 ret = check_extent_refs(root, &extent_cache);
8210 ret = check_devices(&dev_cache, &dev_extent_cache);
8215 task_stop(ctx.info);
8217 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8218 extent_io_tree_cleanup(&excluded_extents);
8219 fs_info->fsck_extent_cache = NULL;
8220 fs_info->free_extent_hook = NULL;
8221 fs_info->corrupt_blocks = NULL;
8222 fs_info->excluded_extents = NULL;
8225 free_chunk_cache_tree(&chunk_cache);
8226 free_device_cache_tree(&dev_cache);
8227 free_block_group_tree(&block_group_cache);
8228 free_device_extent_tree(&dev_extent_cache);
8229 free_extent_cache_tree(&seen);
8230 free_extent_cache_tree(&pending);
8231 free_extent_cache_tree(&reada);
8232 free_extent_cache_tree(&nodes);
8233 free_root_item_list(&normal_trees);
8234 free_root_item_list(&dropping_trees);
8237 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8238 free_extent_cache_tree(&seen);
8239 free_extent_cache_tree(&pending);
8240 free_extent_cache_tree(&reada);
8241 free_extent_cache_tree(&nodes);
8242 free_chunk_cache_tree(&chunk_cache);
8243 free_block_group_tree(&block_group_cache);
8244 free_device_cache_tree(&dev_cache);
8245 free_device_extent_tree(&dev_extent_cache);
8246 free_extent_record_cache(&extent_cache);
8247 free_root_item_list(&normal_trees);
8248 free_root_item_list(&dropping_trees);
8249 extent_io_tree_cleanup(&excluded_extents);
8253 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8257 if (!ctx.progress_enabled)
8258 fprintf(stderr, "checking extents\n");
8259 if (check_mode == CHECK_MODE_LOWMEM)
8260 ret = check_chunks_and_extents_v2(fs_info);
8262 ret = check_chunks_and_extents(fs_info);
8264 /* Also repair device size related problems */
8265 if (repair && !ret) {
8266 ret = btrfs_fix_device_and_super_size(fs_info);
8273 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8274 struct btrfs_root *root, int overwrite)
8276 struct extent_buffer *c;
8277 struct extent_buffer *old = root->node;
8280 struct btrfs_disk_key disk_key = {0,0,0};
8286 extent_buffer_get(c);
8289 c = btrfs_alloc_free_block(trans, root,
8290 root->fs_info->nodesize,
8291 root->root_key.objectid,
8292 &disk_key, level, 0, 0);
8295 extent_buffer_get(c);
8299 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8300 btrfs_set_header_level(c, level);
8301 btrfs_set_header_bytenr(c, c->start);
8302 btrfs_set_header_generation(c, trans->transid);
8303 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8304 btrfs_set_header_owner(c, root->root_key.objectid);
8306 write_extent_buffer(c, root->fs_info->fsid,
8307 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8309 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8310 btrfs_header_chunk_tree_uuid(c),
8313 btrfs_mark_buffer_dirty(c);
8315 * this case can happen in the following case:
8317 * 1.overwrite previous root.
8319 * 2.reinit reloc data root, this is because we skip pin
8320 * down reloc data tree before which means we can allocate
8321 * same block bytenr here.
8323 if (old->start == c->start) {
8324 btrfs_set_root_generation(&root->root_item,
8326 root->root_item.level = btrfs_header_level(root->node);
8327 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8328 &root->root_key, &root->root_item);
8330 free_extent_buffer(c);
8334 free_extent_buffer(old);
8336 add_root_to_dirty_list(root);
8340 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8341 struct extent_buffer *eb, int tree_root)
8343 struct extent_buffer *tmp;
8344 struct btrfs_root_item *ri;
8345 struct btrfs_key key;
8347 int level = btrfs_header_level(eb);
8353 * If we have pinned this block before, don't pin it again.
8354 * This can not only avoid forever loop with broken filesystem
8355 * but also give us some speedups.
8357 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8358 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8361 btrfs_pin_extent(fs_info, eb->start, eb->len);
8363 nritems = btrfs_header_nritems(eb);
8364 for (i = 0; i < nritems; i++) {
8366 btrfs_item_key_to_cpu(eb, &key, i);
8367 if (key.type != BTRFS_ROOT_ITEM_KEY)
8369 /* Skip the extent root and reloc roots */
8370 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8371 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8372 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8374 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8375 bytenr = btrfs_disk_root_bytenr(eb, ri);
8378 * If at any point we start needing the real root we
8379 * will have to build a stump root for the root we are
8380 * in, but for now this doesn't actually use the root so
8381 * just pass in extent_root.
8383 tmp = read_tree_block(fs_info, bytenr, 0);
8384 if (!extent_buffer_uptodate(tmp)) {
8385 fprintf(stderr, "Error reading root block\n");
8388 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8389 free_extent_buffer(tmp);
8393 bytenr = btrfs_node_blockptr(eb, i);
8395 /* If we aren't the tree root don't read the block */
8396 if (level == 1 && !tree_root) {
8397 btrfs_pin_extent(fs_info, bytenr,
8402 tmp = read_tree_block(fs_info, bytenr, 0);
8403 if (!extent_buffer_uptodate(tmp)) {
8404 fprintf(stderr, "Error reading tree block\n");
8407 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8408 free_extent_buffer(tmp);
8417 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8421 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8425 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8428 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8430 struct btrfs_block_group_cache *cache;
8431 struct btrfs_path path;
8432 struct extent_buffer *leaf;
8433 struct btrfs_chunk *chunk;
8434 struct btrfs_key key;
8438 btrfs_init_path(&path);
8440 key.type = BTRFS_CHUNK_ITEM_KEY;
8442 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8444 btrfs_release_path(&path);
8449 * We do this in case the block groups were screwed up and had alloc
8450 * bits that aren't actually set on the chunks. This happens with
8451 * restored images every time and could happen in real life I guess.
8453 fs_info->avail_data_alloc_bits = 0;
8454 fs_info->avail_metadata_alloc_bits = 0;
8455 fs_info->avail_system_alloc_bits = 0;
8457 /* First we need to create the in-memory block groups */
8459 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8460 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8462 btrfs_release_path(&path);
8470 leaf = path.nodes[0];
8471 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8472 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8477 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8478 btrfs_add_block_group(fs_info, 0,
8479 btrfs_chunk_type(leaf, chunk), key.offset,
8480 btrfs_chunk_length(leaf, chunk));
8481 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8482 key.offset + btrfs_chunk_length(leaf, chunk));
8487 cache = btrfs_lookup_first_block_group(fs_info, start);
8491 start = cache->key.objectid + cache->key.offset;
8494 btrfs_release_path(&path);
8498 static int reset_balance(struct btrfs_trans_handle *trans,
8499 struct btrfs_fs_info *fs_info)
8501 struct btrfs_root *root = fs_info->tree_root;
8502 struct btrfs_path path;
8503 struct extent_buffer *leaf;
8504 struct btrfs_key key;
8505 int del_slot, del_nr = 0;
8509 btrfs_init_path(&path);
8510 key.objectid = BTRFS_BALANCE_OBJECTID;
8511 key.type = BTRFS_BALANCE_ITEM_KEY;
8513 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8518 goto reinit_data_reloc;
8523 ret = btrfs_del_item(trans, root, &path);
8526 btrfs_release_path(&path);
8528 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8529 key.type = BTRFS_ROOT_ITEM_KEY;
8531 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8535 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8540 ret = btrfs_del_items(trans, root, &path,
8547 btrfs_release_path(&path);
8550 ret = btrfs_search_slot(trans, root, &key, &path,
8557 leaf = path.nodes[0];
8558 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8559 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8561 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8566 del_slot = path.slots[0];
8575 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8579 btrfs_release_path(&path);
8582 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8583 key.type = BTRFS_ROOT_ITEM_KEY;
8584 key.offset = (u64)-1;
8585 root = btrfs_read_fs_root(fs_info, &key);
8587 fprintf(stderr, "Error reading data reloc tree\n");
8588 ret = PTR_ERR(root);
8591 record_root_in_trans(trans, root);
8592 ret = btrfs_fsck_reinit_root(trans, root, 0);
8595 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8597 btrfs_release_path(&path);
8601 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8602 struct btrfs_fs_info *fs_info)
8608 * The only reason we don't do this is because right now we're just
8609 * walking the trees we find and pinning down their bytes, we don't look
8610 * at any of the leaves. In order to do mixed groups we'd have to check
8611 * the leaves of any fs roots and pin down the bytes for any file
8612 * extents we find. Not hard but why do it if we don't have to?
8614 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8615 fprintf(stderr, "We don't support re-initing the extent tree "
8616 "for mixed block groups yet, please notify a btrfs "
8617 "developer you want to do this so they can add this "
8618 "functionality.\n");
8623 * first we need to walk all of the trees except the extent tree and pin
8624 * down the bytes that are in use so we don't overwrite any existing
8627 ret = pin_metadata_blocks(fs_info);
8629 fprintf(stderr, "error pinning down used bytes\n");
8634 * Need to drop all the block groups since we're going to recreate all
8637 btrfs_free_block_groups(fs_info);
8638 ret = reset_block_groups(fs_info);
8640 fprintf(stderr, "error resetting the block groups\n");
8644 /* Ok we can allocate now, reinit the extent root */
8645 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8647 fprintf(stderr, "extent root initialization failed\n");
8649 * When the transaction code is updated we should end the
8650 * transaction, but for now progs only knows about commit so
8651 * just return an error.
8657 * Now we have all the in-memory block groups setup so we can make
8658 * allocations properly, and the metadata we care about is safe since we
8659 * pinned all of it above.
8662 struct btrfs_block_group_cache *cache;
8664 cache = btrfs_lookup_first_block_group(fs_info, start);
8667 start = cache->key.objectid + cache->key.offset;
8668 ret = btrfs_insert_item(trans, fs_info->extent_root,
8669 &cache->key, &cache->item,
8670 sizeof(cache->item));
8672 fprintf(stderr, "Error adding block group\n");
8675 btrfs_extent_post_op(trans, fs_info->extent_root);
8678 ret = reset_balance(trans, fs_info);
8680 fprintf(stderr, "error resetting the pending balance\n");
8685 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8687 struct btrfs_path path;
8688 struct btrfs_trans_handle *trans;
8689 struct btrfs_key key;
8692 printf("Recowing metadata block %llu\n", eb->start);
8693 key.objectid = btrfs_header_owner(eb);
8694 key.type = BTRFS_ROOT_ITEM_KEY;
8695 key.offset = (u64)-1;
8697 root = btrfs_read_fs_root(root->fs_info, &key);
8699 fprintf(stderr, "Couldn't find owner root %llu\n",
8701 return PTR_ERR(root);
8704 trans = btrfs_start_transaction(root, 1);
8706 return PTR_ERR(trans);
8708 btrfs_init_path(&path);
8709 path.lowest_level = btrfs_header_level(eb);
8710 if (path.lowest_level)
8711 btrfs_node_key_to_cpu(eb, &key, 0);
8713 btrfs_item_key_to_cpu(eb, &key, 0);
8715 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8716 btrfs_commit_transaction(trans, root);
8717 btrfs_release_path(&path);
8721 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8723 struct btrfs_path path;
8724 struct btrfs_trans_handle *trans;
8725 struct btrfs_key key;
8728 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8729 bad->key.type, bad->key.offset);
8730 key.objectid = bad->root_id;
8731 key.type = BTRFS_ROOT_ITEM_KEY;
8732 key.offset = (u64)-1;
8734 root = btrfs_read_fs_root(root->fs_info, &key);
8736 fprintf(stderr, "Couldn't find owner root %llu\n",
8738 return PTR_ERR(root);
8741 trans = btrfs_start_transaction(root, 1);
8743 return PTR_ERR(trans);
8745 btrfs_init_path(&path);
8746 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8752 ret = btrfs_del_item(trans, root, &path);
8754 btrfs_commit_transaction(trans, root);
8755 btrfs_release_path(&path);
8759 static int zero_log_tree(struct btrfs_root *root)
8761 struct btrfs_trans_handle *trans;
8764 trans = btrfs_start_transaction(root, 1);
8765 if (IS_ERR(trans)) {
8766 ret = PTR_ERR(trans);
8769 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8770 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8771 ret = btrfs_commit_transaction(trans, root);
8775 static int populate_csum(struct btrfs_trans_handle *trans,
8776 struct btrfs_root *csum_root, char *buf, u64 start,
8779 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8784 while (offset < len) {
8785 sectorsize = fs_info->sectorsize;
8786 ret = read_extent_data(fs_info, buf, start + offset,
8790 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8791 start + offset, buf, sectorsize);
8794 offset += sectorsize;
8799 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8800 struct btrfs_root *csum_root,
8801 struct btrfs_root *cur_root)
8803 struct btrfs_path path;
8804 struct btrfs_key key;
8805 struct extent_buffer *node;
8806 struct btrfs_file_extent_item *fi;
8813 buf = malloc(cur_root->fs_info->sectorsize);
8817 btrfs_init_path(&path);
8821 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8824 /* Iterate all regular file extents and fill its csum */
8826 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8828 if (key.type != BTRFS_EXTENT_DATA_KEY)
8830 node = path.nodes[0];
8831 slot = path.slots[0];
8832 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8833 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8835 start = btrfs_file_extent_disk_bytenr(node, fi);
8836 len = btrfs_file_extent_disk_num_bytes(node, fi);
8838 ret = populate_csum(trans, csum_root, buf, start, len);
8845 * TODO: if next leaf is corrupted, jump to nearest next valid
8848 ret = btrfs_next_item(cur_root, &path);
8858 btrfs_release_path(&path);
8863 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8864 struct btrfs_root *csum_root)
8866 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8867 struct btrfs_path path;
8868 struct btrfs_root *tree_root = fs_info->tree_root;
8869 struct btrfs_root *cur_root;
8870 struct extent_buffer *node;
8871 struct btrfs_key key;
8875 btrfs_init_path(&path);
8876 key.objectid = BTRFS_FS_TREE_OBJECTID;
8878 key.type = BTRFS_ROOT_ITEM_KEY;
8879 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8888 node = path.nodes[0];
8889 slot = path.slots[0];
8890 btrfs_item_key_to_cpu(node, &key, slot);
8891 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8893 if (key.type != BTRFS_ROOT_ITEM_KEY)
8895 if (!is_fstree(key.objectid))
8897 key.offset = (u64)-1;
8899 cur_root = btrfs_read_fs_root(fs_info, &key);
8900 if (IS_ERR(cur_root) || !cur_root) {
8901 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8905 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8910 ret = btrfs_next_item(tree_root, &path);
8920 btrfs_release_path(&path);
8924 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8925 struct btrfs_root *csum_root)
8927 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8928 struct btrfs_path path;
8929 struct btrfs_extent_item *ei;
8930 struct extent_buffer *leaf;
8932 struct btrfs_key key;
8935 btrfs_init_path(&path);
8937 key.type = BTRFS_EXTENT_ITEM_KEY;
8939 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8941 btrfs_release_path(&path);
8945 buf = malloc(csum_root->fs_info->sectorsize);
8947 btrfs_release_path(&path);
8952 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8953 ret = btrfs_next_leaf(extent_root, &path);
8961 leaf = path.nodes[0];
8963 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8964 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8969 ei = btrfs_item_ptr(leaf, path.slots[0],
8970 struct btrfs_extent_item);
8971 if (!(btrfs_extent_flags(leaf, ei) &
8972 BTRFS_EXTENT_FLAG_DATA)) {
8977 ret = populate_csum(trans, csum_root, buf, key.objectid,
8984 btrfs_release_path(&path);
8990 * Recalculate the csum and put it into the csum tree.
8992 * Extent tree init will wipe out all the extent info, so in that case, we
8993 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
8994 * will use fs/subvol trees to init the csum tree.
8996 static int fill_csum_tree(struct btrfs_trans_handle *trans,
8997 struct btrfs_root *csum_root,
9001 return fill_csum_tree_from_fs(trans, csum_root);
9003 return fill_csum_tree_from_extent(trans, csum_root);
9006 static void free_roots_info_cache(void)
9008 if (!roots_info_cache)
9011 while (!cache_tree_empty(roots_info_cache)) {
9012 struct cache_extent *entry;
9013 struct root_item_info *rii;
9015 entry = first_cache_extent(roots_info_cache);
9018 remove_cache_extent(roots_info_cache, entry);
9019 rii = container_of(entry, struct root_item_info, cache_extent);
9023 free(roots_info_cache);
9024 roots_info_cache = NULL;
9027 static int build_roots_info_cache(struct btrfs_fs_info *info)
9030 struct btrfs_key key;
9031 struct extent_buffer *leaf;
9032 struct btrfs_path path;
9034 if (!roots_info_cache) {
9035 roots_info_cache = malloc(sizeof(*roots_info_cache));
9036 if (!roots_info_cache)
9038 cache_tree_init(roots_info_cache);
9041 btrfs_init_path(&path);
9043 key.type = BTRFS_EXTENT_ITEM_KEY;
9045 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9048 leaf = path.nodes[0];
9051 struct btrfs_key found_key;
9052 struct btrfs_extent_item *ei;
9053 struct btrfs_extent_inline_ref *iref;
9054 int slot = path.slots[0];
9059 struct cache_extent *entry;
9060 struct root_item_info *rii;
9062 if (slot >= btrfs_header_nritems(leaf)) {
9063 ret = btrfs_next_leaf(info->extent_root, &path);
9070 leaf = path.nodes[0];
9071 slot = path.slots[0];
9074 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9076 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9077 found_key.type != BTRFS_METADATA_ITEM_KEY)
9080 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9081 flags = btrfs_extent_flags(leaf, ei);
9083 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9084 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9087 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9088 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9089 level = found_key.offset;
9091 struct btrfs_tree_block_info *binfo;
9093 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9094 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9095 level = btrfs_tree_block_level(leaf, binfo);
9099 * For a root extent, it must be of the following type and the
9100 * first (and only one) iref in the item.
9102 type = btrfs_extent_inline_ref_type(leaf, iref);
9103 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9106 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9107 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9109 rii = malloc(sizeof(struct root_item_info));
9114 rii->cache_extent.start = root_id;
9115 rii->cache_extent.size = 1;
9116 rii->level = (u8)-1;
9117 entry = &rii->cache_extent;
9118 ret = insert_cache_extent(roots_info_cache, entry);
9121 rii = container_of(entry, struct root_item_info,
9125 ASSERT(rii->cache_extent.start == root_id);
9126 ASSERT(rii->cache_extent.size == 1);
9128 if (level > rii->level || rii->level == (u8)-1) {
9130 rii->bytenr = found_key.objectid;
9131 rii->gen = btrfs_extent_generation(leaf, ei);
9132 rii->node_count = 1;
9133 } else if (level == rii->level) {
9141 btrfs_release_path(&path);
9146 static int maybe_repair_root_item(struct btrfs_path *path,
9147 const struct btrfs_key *root_key,
9148 const int read_only_mode)
9150 const u64 root_id = root_key->objectid;
9151 struct cache_extent *entry;
9152 struct root_item_info *rii;
9153 struct btrfs_root_item ri;
9154 unsigned long offset;
9156 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9159 "Error: could not find extent items for root %llu\n",
9160 root_key->objectid);
9164 rii = container_of(entry, struct root_item_info, cache_extent);
9165 ASSERT(rii->cache_extent.start == root_id);
9166 ASSERT(rii->cache_extent.size == 1);
9168 if (rii->node_count != 1) {
9170 "Error: could not find btree root extent for root %llu\n",
9175 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9176 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9178 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9179 btrfs_root_level(&ri) != rii->level ||
9180 btrfs_root_generation(&ri) != rii->gen) {
9183 * If we're in repair mode but our caller told us to not update
9184 * the root item, i.e. just check if it needs to be updated, don't
9185 * print this message, since the caller will call us again shortly
9186 * for the same root item without read only mode (the caller will
9187 * open a transaction first).
9189 if (!(read_only_mode && repair))
9191 "%sroot item for root %llu,"
9192 " current bytenr %llu, current gen %llu, current level %u,"
9193 " new bytenr %llu, new gen %llu, new level %u\n",
9194 (read_only_mode ? "" : "fixing "),
9196 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9197 btrfs_root_level(&ri),
9198 rii->bytenr, rii->gen, rii->level);
9200 if (btrfs_root_generation(&ri) > rii->gen) {
9202 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9203 root_id, btrfs_root_generation(&ri), rii->gen);
9207 if (!read_only_mode) {
9208 btrfs_set_root_bytenr(&ri, rii->bytenr);
9209 btrfs_set_root_level(&ri, rii->level);
9210 btrfs_set_root_generation(&ri, rii->gen);
9211 write_extent_buffer(path->nodes[0], &ri,
9212 offset, sizeof(ri));
9222 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9223 * caused read-only snapshots to be corrupted if they were created at a moment
9224 * when the source subvolume/snapshot had orphan items. The issue was that the
9225 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9226 * node instead of the post orphan cleanup root node.
9227 * So this function, and its callees, just detects and fixes those cases. Even
9228 * though the regression was for read-only snapshots, this function applies to
9229 * any snapshot/subvolume root.
9230 * This must be run before any other repair code - not doing it so, makes other
9231 * repair code delete or modify backrefs in the extent tree for example, which
9232 * will result in an inconsistent fs after repairing the root items.
9234 static int repair_root_items(struct btrfs_fs_info *info)
9236 struct btrfs_path path;
9237 struct btrfs_key key;
9238 struct extent_buffer *leaf;
9239 struct btrfs_trans_handle *trans = NULL;
9244 btrfs_init_path(&path);
9246 ret = build_roots_info_cache(info);
9250 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9251 key.type = BTRFS_ROOT_ITEM_KEY;
9256 * Avoid opening and committing transactions if a leaf doesn't have
9257 * any root items that need to be fixed, so that we avoid rotating
9258 * backup roots unnecessarily.
9261 trans = btrfs_start_transaction(info->tree_root, 1);
9262 if (IS_ERR(trans)) {
9263 ret = PTR_ERR(trans);
9268 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9272 leaf = path.nodes[0];
9275 struct btrfs_key found_key;
9277 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9278 int no_more_keys = find_next_key(&path, &key);
9280 btrfs_release_path(&path);
9282 ret = btrfs_commit_transaction(trans,
9294 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9296 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9298 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9301 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9305 if (!trans && repair) {
9308 btrfs_release_path(&path);
9318 free_roots_info_cache();
9319 btrfs_release_path(&path);
9321 btrfs_commit_transaction(trans, info->tree_root);
9328 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9330 struct btrfs_trans_handle *trans;
9331 struct btrfs_block_group_cache *bg_cache;
9335 /* Clear all free space cache inodes and its extent data */
9337 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9340 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9343 current = bg_cache->key.objectid + bg_cache->key.offset;
9346 /* Don't forget to set cache_generation to -1 */
9347 trans = btrfs_start_transaction(fs_info->tree_root, 0);
9348 if (IS_ERR(trans)) {
9349 error("failed to update super block cache generation");
9350 return PTR_ERR(trans);
9352 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9353 btrfs_commit_transaction(trans, fs_info->tree_root);
9358 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9363 if (clear_version == 1) {
9364 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9366 "free space cache v2 detected, use --clear-space-cache v2");
9370 printf("Clearing free space cache\n");
9371 ret = clear_free_space_cache(fs_info);
9373 error("failed to clear free space cache");
9376 printf("Free space cache cleared\n");
9378 } else if (clear_version == 2) {
9379 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9380 printf("no free space cache v2 to clear\n");
9384 printf("Clear free space cache v2\n");
9385 ret = btrfs_clear_free_space_tree(fs_info);
9387 error("failed to clear free space cache v2: %d", ret);
9390 printf("free space cache v2 cleared\n");
9397 const char * const cmd_check_usage[] = {
9398 "btrfs check [options] <device>",
9399 "Check structural integrity of a filesystem (unmounted).",
9400 "Check structural integrity of an unmounted filesystem. Verify internal",
9401 "trees' consistency and item connectivity. In the repair mode try to",
9402 "fix the problems found. ",
9403 "WARNING: the repair mode is considered dangerous",
9405 "-s|--super <superblock> use this superblock copy",
9406 "-b|--backup use the first valid backup root copy",
9407 "--force skip mount checks, repair is not possible",
9408 "--repair try to repair the filesystem",
9409 "--readonly run in read-only mode (default)",
9410 "--init-csum-tree create a new CRC tree",
9411 "--init-extent-tree create a new extent tree",
9412 "--mode <MODE> allows choice of memory/IO trade-offs",
9413 " where MODE is one of:",
9414 " original - read inodes and extents to memory (requires",
9415 " more memory, does less IO)",
9416 " lowmem - try to use less memory but read blocks again",
9418 "--check-data-csum verify checksums of data blocks",
9419 "-Q|--qgroup-report print a report on qgroup consistency",
9420 "-E|--subvol-extents <subvolid>",
9421 " print subvolume extents and sharing state",
9422 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9423 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9424 "-p|--progress indicate progress",
9425 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
9429 int cmd_check(int argc, char **argv)
9431 struct cache_tree root_cache;
9432 struct btrfs_root *root;
9433 struct btrfs_fs_info *info;
9436 u64 tree_root_bytenr = 0;
9437 u64 chunk_root_bytenr = 0;
9438 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9442 int init_csum_tree = 0;
9444 int clear_space_cache = 0;
9445 int qgroup_report = 0;
9446 int qgroups_repaired = 0;
9447 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9452 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9453 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9454 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9455 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9457 static const struct option long_options[] = {
9458 { "super", required_argument, NULL, 's' },
9459 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9460 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9461 { "init-csum-tree", no_argument, NULL,
9462 GETOPT_VAL_INIT_CSUM },
9463 { "init-extent-tree", no_argument, NULL,
9464 GETOPT_VAL_INIT_EXTENT },
9465 { "check-data-csum", no_argument, NULL,
9466 GETOPT_VAL_CHECK_CSUM },
9467 { "backup", no_argument, NULL, 'b' },
9468 { "subvol-extents", required_argument, NULL, 'E' },
9469 { "qgroup-report", no_argument, NULL, 'Q' },
9470 { "tree-root", required_argument, NULL, 'r' },
9471 { "chunk-root", required_argument, NULL,
9472 GETOPT_VAL_CHUNK_TREE },
9473 { "progress", no_argument, NULL, 'p' },
9474 { "mode", required_argument, NULL,
9476 { "clear-space-cache", required_argument, NULL,
9477 GETOPT_VAL_CLEAR_SPACE_CACHE},
9478 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9482 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9486 case 'a': /* ignored */ break;
9488 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9491 num = arg_strtou64(optarg);
9492 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9494 "super mirror should be less than %d",
9495 BTRFS_SUPER_MIRROR_MAX);
9498 bytenr = btrfs_sb_offset(((int)num));
9499 printf("using SB copy %llu, bytenr %llu\n", num,
9500 (unsigned long long)bytenr);
9506 subvolid = arg_strtou64(optarg);
9509 tree_root_bytenr = arg_strtou64(optarg);
9511 case GETOPT_VAL_CHUNK_TREE:
9512 chunk_root_bytenr = arg_strtou64(optarg);
9515 ctx.progress_enabled = true;
9519 usage(cmd_check_usage);
9520 case GETOPT_VAL_REPAIR:
9521 printf("enabling repair mode\n");
9523 ctree_flags |= OPEN_CTREE_WRITES;
9525 case GETOPT_VAL_READONLY:
9528 case GETOPT_VAL_INIT_CSUM:
9529 printf("Creating a new CRC tree\n");
9532 ctree_flags |= OPEN_CTREE_WRITES;
9534 case GETOPT_VAL_INIT_EXTENT:
9535 init_extent_tree = 1;
9536 ctree_flags |= (OPEN_CTREE_WRITES |
9537 OPEN_CTREE_NO_BLOCK_GROUPS);
9540 case GETOPT_VAL_CHECK_CSUM:
9541 check_data_csum = 1;
9543 case GETOPT_VAL_MODE:
9544 check_mode = parse_check_mode(optarg);
9545 if (check_mode == CHECK_MODE_UNKNOWN) {
9546 error("unknown mode: %s", optarg);
9550 case GETOPT_VAL_CLEAR_SPACE_CACHE:
9551 if (strcmp(optarg, "v1") == 0) {
9552 clear_space_cache = 1;
9553 } else if (strcmp(optarg, "v2") == 0) {
9554 clear_space_cache = 2;
9555 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9558 "invalid argument to --clear-space-cache, must be v1 or v2");
9561 ctree_flags |= OPEN_CTREE_WRITES;
9563 case GETOPT_VAL_FORCE:
9569 if (check_argc_exact(argc - optind, 1))
9570 usage(cmd_check_usage);
9572 if (ctx.progress_enabled) {
9573 ctx.tp = TASK_NOTHING;
9574 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9577 /* This check is the only reason for --readonly to exist */
9578 if (readonly && repair) {
9579 error("repair options are not compatible with --readonly");
9584 * experimental and dangerous
9586 if (repair && check_mode == CHECK_MODE_LOWMEM)
9587 warning("low-memory mode repair support is only partial");
9590 cache_tree_init(&root_cache);
9592 ret = check_mounted(argv[optind]);
9595 error("could not check mount status: %s",
9601 "%s is currently mounted, use --force if you really intend to check the filesystem",
9609 error("repair and --force is not yet supported");
9616 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9620 "filesystem mounted, continuing because of --force");
9622 /* A block device is mounted in exclusive mode by kernel */
9623 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9626 /* only allow partial opening under repair mode */
9628 ctree_flags |= OPEN_CTREE_PARTIAL;
9630 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9631 chunk_root_bytenr, ctree_flags);
9633 error("cannot open file system");
9640 root = info->fs_root;
9641 uuid_unparse(info->super_copy->fsid, uuidbuf);
9643 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9646 * Check the bare minimum before starting anything else that could rely
9647 * on it, namely the tree roots, any local consistency checks
9649 if (!extent_buffer_uptodate(info->tree_root->node) ||
9650 !extent_buffer_uptodate(info->dev_root->node) ||
9651 !extent_buffer_uptodate(info->chunk_root->node)) {
9652 error("critical roots corrupted, unable to check the filesystem");
9658 if (clear_space_cache) {
9659 ret = do_clear_free_space_cache(info, clear_space_cache);
9665 * repair mode will force us to commit transaction which
9666 * will make us fail to load log tree when mounting.
9668 if (repair && btrfs_super_log_root(info->super_copy)) {
9669 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9675 ret = zero_log_tree(root);
9678 error("failed to zero log tree: %d", ret);
9683 if (qgroup_report) {
9684 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9686 ret = qgroup_verify_all(info);
9693 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9694 subvolid, argv[optind], uuidbuf);
9695 ret = print_extent_state(info, subvolid);
9700 if (init_extent_tree || init_csum_tree) {
9701 struct btrfs_trans_handle *trans;
9703 trans = btrfs_start_transaction(info->extent_root, 0);
9704 if (IS_ERR(trans)) {
9705 error("error starting transaction");
9706 ret = PTR_ERR(trans);
9711 if (init_extent_tree) {
9712 printf("Creating a new extent tree\n");
9713 ret = reinit_extent_tree(trans, info);
9719 if (init_csum_tree) {
9720 printf("Reinitialize checksum tree\n");
9721 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9723 error("checksum tree initialization failed: %d",
9730 ret = fill_csum_tree(trans, info->csum_root,
9734 error("checksum tree refilling failed: %d", ret);
9739 * Ok now we commit and run the normal fsck, which will add
9740 * extent entries for all of the items it finds.
9742 ret = btrfs_commit_transaction(trans, info->extent_root);
9747 if (!extent_buffer_uptodate(info->extent_root->node)) {
9748 error("critical: extent_root, unable to check the filesystem");
9753 if (!extent_buffer_uptodate(info->csum_root->node)) {
9754 error("critical: csum_root, unable to check the filesystem");
9760 if (!init_extent_tree) {
9761 ret = repair_root_items(info);
9764 error("failed to repair root items: %s", strerror(-ret));
9768 fprintf(stderr, "Fixed %d roots.\n", ret);
9770 } else if (ret > 0) {
9772 "Found %d roots with an outdated root item.\n",
9775 "Please run a filesystem check with the option --repair to fix them.\n");
9782 ret = do_check_chunks_and_extents(info);
9786 "errors found in extent allocation tree or chunk allocation");
9788 /* Only re-check super size after we checked and repaired the fs */
9789 err |= !is_super_size_valid(info);
9791 if (!ctx.progress_enabled) {
9792 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9793 fprintf(stderr, "checking free space tree\n");
9795 fprintf(stderr, "checking free space cache\n");
9797 ret = check_space_cache(root);
9800 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9801 error("errors found in free space tree");
9803 error("errors found in free space cache");
9808 * We used to have to have these hole extents in between our real
9809 * extents so if we don't have this flag set we need to make sure there
9810 * are no gaps in the file extents for inodes, otherwise we can just
9811 * ignore it when this happens.
9813 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9814 ret = do_check_fs_roots(info, &root_cache);
9817 error("errors found in fs roots");
9821 fprintf(stderr, "checking csums\n");
9822 ret = check_csums(root);
9825 error("errors found in csum tree");
9829 fprintf(stderr, "checking root refs\n");
9830 /* For low memory mode, check_fs_roots_v2 handles root refs */
9831 if (check_mode != CHECK_MODE_LOWMEM) {
9832 ret = check_root_refs(root, &root_cache);
9835 error("errors found in root refs");
9840 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9841 struct extent_buffer *eb;
9843 eb = list_first_entry(&root->fs_info->recow_ebs,
9844 struct extent_buffer, recow);
9845 list_del_init(&eb->recow);
9846 ret = recow_extent_buffer(root, eb);
9849 error("fails to fix transid errors");
9854 while (!list_empty(&delete_items)) {
9855 struct bad_item *bad;
9857 bad = list_first_entry(&delete_items, struct bad_item, list);
9858 list_del_init(&bad->list);
9860 ret = delete_bad_item(root, bad);
9866 if (info->quota_enabled) {
9867 fprintf(stderr, "checking quota groups\n");
9868 ret = qgroup_verify_all(info);
9871 error("failed to check quota groups");
9875 ret = repair_qgroups(info, &qgroups_repaired);
9878 error("failed to repair quota groups");
9884 if (!list_empty(&root->fs_info->recow_ebs)) {
9885 error("transid errors in file system");
9890 printf("found %llu bytes used, ",
9891 (unsigned long long)bytes_used);
9893 printf("error(s) found\n");
9895 printf("no error found\n");
9896 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9897 printf("total tree bytes: %llu\n",
9898 (unsigned long long)total_btree_bytes);
9899 printf("total fs tree bytes: %llu\n",
9900 (unsigned long long)total_fs_tree_bytes);
9901 printf("total extent tree bytes: %llu\n",
9902 (unsigned long long)total_extent_tree_bytes);
9903 printf("btree space waste bytes: %llu\n",
9904 (unsigned long long)btree_space_waste);
9905 printf("file data blocks allocated: %llu\n referenced %llu\n",
9906 (unsigned long long)data_bytes_allocated,
9907 (unsigned long long)data_bytes_referenced);
9909 free_qgroup_counts();
9910 free_root_recs_tree(&root_cache);
9914 if (ctx.progress_enabled)
9915 task_deinit(ctx.info);