2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
54 TASK_NOTHING, /* have to be the last element */
59 enum task_position tp;
61 struct task_info *info;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
103 if (back1->parent < back2->parent)
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
110 if (back1->owner > back2->owner)
112 if (back1->owner < back2->owner)
115 if (back1->offset > back2->offset)
117 if (back1->offset < back2->offset)
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
123 if (back1->disk_bytenr < back2->disk_bytenr)
126 if (back1->bytes > back2->bytes)
128 if (back1->bytes < back2->bytes)
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
148 if (back1->parent < back2->parent)
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
162 if (ext1->is_data < ext2->is_data)
165 if (ext1->full_backref > ext2->full_backref)
167 if (ext1->full_backref < ext2->full_backref)
171 return compare_data_backref(node1, node2);
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string[] = {
184 "checking free space cache",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
198 task_period_wait(priv->info);
203 static int print_status_return(void *p)
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
245 if (hole1->start < hole2->start)
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole->node))
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
301 rb_erase(&next->node, holes);
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
315 hole = (struct file_extent_hole *)data;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
321 if (start >= hole->start && start < hole->start + hole->len)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
340 struct rb_node *node;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
368 rb_erase(node, holes);
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
376 ret = add_file_extent_hole(holes, next_start, next_len);
383 static int copy_file_extent_holes(struct rb_root *dst,
386 struct file_extent_hole *hole;
387 struct rb_node *node;
390 node = rb_first(src);
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
396 node = rb_next(node);
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
411 node = rb_first(holes);
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416 struct btrfs_root *root)
418 if (root->last_trans != trans->transid) {
419 root->track_dirty = 1;
420 root->last_trans = trans->transid;
421 root->commit_root = root->node;
422 extent_buffer_get(root->node);
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
428 struct device_record *rec1;
429 struct device_record *rec2;
431 rec1 = rb_entry(node1, struct device_record, node);
432 rec2 = rb_entry(node2, struct device_record, node);
433 if (rec1->devid > rec2->devid)
435 else if (rec1->devid < rec2->devid)
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
443 struct inode_record *rec;
444 struct inode_backref *backref;
445 struct inode_backref *orig;
446 struct inode_backref *tmp;
447 struct orphan_data_extent *src_orphan;
448 struct orphan_data_extent *dst_orphan;
453 rec = malloc(sizeof(*rec));
455 return ERR_PTR(-ENOMEM);
456 memcpy(rec, orig_rec, sizeof(*rec));
458 INIT_LIST_HEAD(&rec->backrefs);
459 INIT_LIST_HEAD(&rec->orphan_extents);
460 rec->holes = RB_ROOT;
462 list_for_each_entry(orig, &orig_rec->backrefs, list) {
463 size = sizeof(*orig) + orig->namelen + 1;
464 backref = malloc(size);
469 memcpy(backref, orig, size);
470 list_add_tail(&backref->list, &rec->backrefs);
472 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473 dst_orphan = malloc(sizeof(*dst_orphan));
478 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
481 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
488 rb = rb_first(&rec->holes);
490 struct file_extent_hole *hole;
492 hole = rb_entry(rb, struct file_extent_hole, node);
498 if (!list_empty(&rec->backrefs))
499 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500 list_del(&orig->list);
504 if (!list_empty(&rec->orphan_extents))
505 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506 list_del(&orig->list);
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
518 struct orphan_data_extent *orphan;
520 if (list_empty(orphan_extents))
522 printf("The following data extent is lost in tree %llu:\n",
524 list_for_each_entry(orphan, orphan_extents, list) {
525 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526 orphan->objectid, orphan->offset, orphan->disk_bytenr,
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
533 u64 root_objectid = root->root_key.objectid;
534 int errors = rec->errors;
538 /* reloc root errors, we print its corresponding fs root objectid*/
539 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540 root_objectid = root->root_key.offset;
541 fprintf(stderr, "reloc");
543 fprintf(stderr, "root %llu inode %llu errors %x",
544 (unsigned long long) root_objectid,
545 (unsigned long long) rec->ino, rec->errors);
547 if (errors & I_ERR_NO_INODE_ITEM)
548 fprintf(stderr, ", no inode item");
549 if (errors & I_ERR_NO_ORPHAN_ITEM)
550 fprintf(stderr, ", no orphan item");
551 if (errors & I_ERR_DUP_INODE_ITEM)
552 fprintf(stderr, ", dup inode item");
553 if (errors & I_ERR_DUP_DIR_INDEX)
554 fprintf(stderr, ", dup dir index");
555 if (errors & I_ERR_ODD_DIR_ITEM)
556 fprintf(stderr, ", odd dir item");
557 if (errors & I_ERR_ODD_FILE_EXTENT)
558 fprintf(stderr, ", odd file extent");
559 if (errors & I_ERR_BAD_FILE_EXTENT)
560 fprintf(stderr, ", bad file extent");
561 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562 fprintf(stderr, ", file extent overlap");
563 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
564 fprintf(stderr, ", file extent discount");
565 if (errors & I_ERR_DIR_ISIZE_WRONG)
566 fprintf(stderr, ", dir isize wrong");
567 if (errors & I_ERR_FILE_NBYTES_WRONG)
568 fprintf(stderr, ", nbytes wrong");
569 if (errors & I_ERR_ODD_CSUM_ITEM)
570 fprintf(stderr, ", odd csum item");
571 if (errors & I_ERR_SOME_CSUM_MISSING)
572 fprintf(stderr, ", some csum missing");
573 if (errors & I_ERR_LINK_COUNT_WRONG)
574 fprintf(stderr, ", link count wrong");
575 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
576 fprintf(stderr, ", orphan file extent");
577 fprintf(stderr, "\n");
578 /* Print the orphan extents if needed */
579 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
580 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
582 /* Print the holes if needed */
583 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
584 struct file_extent_hole *hole;
585 struct rb_node *node;
588 node = rb_first(&rec->holes);
589 fprintf(stderr, "Found file extent holes:\n");
592 hole = rb_entry(node, struct file_extent_hole, node);
593 fprintf(stderr, "\tstart: %llu, len: %llu\n",
594 hole->start, hole->len);
595 node = rb_next(node);
598 fprintf(stderr, "\tstart: 0, len: %llu\n",
600 root->fs_info->sectorsize));
604 static void print_ref_error(int errors)
606 if (errors & REF_ERR_NO_DIR_ITEM)
607 fprintf(stderr, ", no dir item");
608 if (errors & REF_ERR_NO_DIR_INDEX)
609 fprintf(stderr, ", no dir index");
610 if (errors & REF_ERR_NO_INODE_REF)
611 fprintf(stderr, ", no inode ref");
612 if (errors & REF_ERR_DUP_DIR_ITEM)
613 fprintf(stderr, ", dup dir item");
614 if (errors & REF_ERR_DUP_DIR_INDEX)
615 fprintf(stderr, ", dup dir index");
616 if (errors & REF_ERR_DUP_INODE_REF)
617 fprintf(stderr, ", dup inode ref");
618 if (errors & REF_ERR_INDEX_UNMATCH)
619 fprintf(stderr, ", index mismatch");
620 if (errors & REF_ERR_FILETYPE_UNMATCH)
621 fprintf(stderr, ", filetype mismatch");
622 if (errors & REF_ERR_NAME_TOO_LONG)
623 fprintf(stderr, ", name too long");
624 if (errors & REF_ERR_NO_ROOT_REF)
625 fprintf(stderr, ", no root ref");
626 if (errors & REF_ERR_NO_ROOT_BACKREF)
627 fprintf(stderr, ", no root backref");
628 if (errors & REF_ERR_DUP_ROOT_REF)
629 fprintf(stderr, ", dup root ref");
630 if (errors & REF_ERR_DUP_ROOT_BACKREF)
631 fprintf(stderr, ", dup root backref");
632 fprintf(stderr, "\n");
635 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638 struct ptr_node *node;
639 struct cache_extent *cache;
640 struct inode_record *rec = NULL;
643 cache = lookup_cache_extent(inode_cache, ino, 1);
645 node = container_of(cache, struct ptr_node, cache);
647 if (mod && rec->refs > 1) {
648 node->data = clone_inode_rec(rec);
649 if (IS_ERR(node->data))
655 rec = calloc(1, sizeof(*rec));
657 return ERR_PTR(-ENOMEM);
659 rec->extent_start = (u64)-1;
661 INIT_LIST_HEAD(&rec->backrefs);
662 INIT_LIST_HEAD(&rec->orphan_extents);
663 rec->holes = RB_ROOT;
665 node = malloc(sizeof(*node));
668 return ERR_PTR(-ENOMEM);
670 node->cache.start = ino;
671 node->cache.size = 1;
674 if (ino == BTRFS_FREE_INO_OBJECTID)
677 ret = insert_cache_extent(inode_cache, &node->cache);
679 return ERR_PTR(-EEXIST);
684 static void free_orphan_data_extents(struct list_head *orphan_extents)
686 struct orphan_data_extent *orphan;
688 while (!list_empty(orphan_extents)) {
689 orphan = list_entry(orphan_extents->next,
690 struct orphan_data_extent, list);
691 list_del(&orphan->list);
696 static void free_inode_rec(struct inode_record *rec)
698 struct inode_backref *backref;
703 while (!list_empty(&rec->backrefs)) {
704 backref = to_inode_backref(rec->backrefs.next);
705 list_del(&backref->list);
708 free_orphan_data_extents(&rec->orphan_extents);
709 free_file_extent_holes(&rec->holes);
713 static int can_free_inode_rec(struct inode_record *rec)
715 if (!rec->errors && rec->checked && rec->found_inode_item &&
716 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
721 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
722 struct inode_record *rec)
724 struct cache_extent *cache;
725 struct inode_backref *tmp, *backref;
726 struct ptr_node *node;
729 if (!rec->found_inode_item)
732 filetype = imode_to_type(rec->imode);
733 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
734 if (backref->found_dir_item && backref->found_dir_index) {
735 if (backref->filetype != filetype)
736 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
737 if (!backref->errors && backref->found_inode_ref &&
738 rec->nlink == rec->found_link) {
739 list_del(&backref->list);
745 if (!rec->checked || rec->merging)
748 if (S_ISDIR(rec->imode)) {
749 if (rec->found_size != rec->isize)
750 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
751 if (rec->found_file_extent)
752 rec->errors |= I_ERR_ODD_FILE_EXTENT;
753 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
754 if (rec->found_dir_item)
755 rec->errors |= I_ERR_ODD_DIR_ITEM;
756 if (rec->found_size != rec->nbytes)
757 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
758 if (rec->nlink > 0 && !no_holes &&
759 (rec->extent_end < rec->isize ||
760 first_extent_gap(&rec->holes) < rec->isize))
761 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
765 if (rec->found_csum_item && rec->nodatasum)
766 rec->errors |= I_ERR_ODD_CSUM_ITEM;
767 if (rec->some_csum_missing && !rec->nodatasum)
768 rec->errors |= I_ERR_SOME_CSUM_MISSING;
771 BUG_ON(rec->refs != 1);
772 if (can_free_inode_rec(rec)) {
773 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
774 node = container_of(cache, struct ptr_node, cache);
775 BUG_ON(node->data != rec);
776 remove_cache_extent(inode_cache, &node->cache);
782 static int check_orphan_item(struct btrfs_root *root, u64 ino)
784 struct btrfs_path path;
785 struct btrfs_key key;
788 key.objectid = BTRFS_ORPHAN_OBJECTID;
789 key.type = BTRFS_ORPHAN_ITEM_KEY;
792 btrfs_init_path(&path);
793 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
794 btrfs_release_path(&path);
800 static int process_inode_item(struct extent_buffer *eb,
801 int slot, struct btrfs_key *key,
802 struct shared_node *active_node)
804 struct inode_record *rec;
805 struct btrfs_inode_item *item;
807 rec = active_node->current;
808 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
809 if (rec->found_inode_item) {
810 rec->errors |= I_ERR_DUP_INODE_ITEM;
813 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
814 rec->nlink = btrfs_inode_nlink(eb, item);
815 rec->isize = btrfs_inode_size(eb, item);
816 rec->nbytes = btrfs_inode_nbytes(eb, item);
817 rec->imode = btrfs_inode_mode(eb, item);
818 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
820 rec->found_inode_item = 1;
822 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
823 maybe_free_inode_rec(&active_node->inode_cache, rec);
827 static struct inode_backref *get_inode_backref(struct inode_record *rec,
829 int namelen, u64 dir)
831 struct inode_backref *backref;
833 list_for_each_entry(backref, &rec->backrefs, list) {
834 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
836 if (backref->dir != dir || backref->namelen != namelen)
838 if (memcmp(name, backref->name, namelen))
843 backref = malloc(sizeof(*backref) + namelen + 1);
846 memset(backref, 0, sizeof(*backref));
848 backref->namelen = namelen;
849 memcpy(backref->name, name, namelen);
850 backref->name[namelen] = '\0';
851 list_add_tail(&backref->list, &rec->backrefs);
855 static int add_inode_backref(struct cache_tree *inode_cache,
856 u64 ino, u64 dir, u64 index,
857 const char *name, int namelen,
858 u8 filetype, u8 itemtype, int errors)
860 struct inode_record *rec;
861 struct inode_backref *backref;
863 rec = get_inode_rec(inode_cache, ino, 1);
865 backref = get_inode_backref(rec, name, namelen, dir);
868 backref->errors |= errors;
869 if (itemtype == BTRFS_DIR_INDEX_KEY) {
870 if (backref->found_dir_index)
871 backref->errors |= REF_ERR_DUP_DIR_INDEX;
872 if (backref->found_inode_ref && backref->index != index)
873 backref->errors |= REF_ERR_INDEX_UNMATCH;
874 if (backref->found_dir_item && backref->filetype != filetype)
875 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
877 backref->index = index;
878 backref->filetype = filetype;
879 backref->found_dir_index = 1;
880 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
882 if (backref->found_dir_item)
883 backref->errors |= REF_ERR_DUP_DIR_ITEM;
884 if (backref->found_dir_index && backref->filetype != filetype)
885 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
887 backref->filetype = filetype;
888 backref->found_dir_item = 1;
889 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
890 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
891 if (backref->found_inode_ref)
892 backref->errors |= REF_ERR_DUP_INODE_REF;
893 if (backref->found_dir_index && backref->index != index)
894 backref->errors |= REF_ERR_INDEX_UNMATCH;
896 backref->index = index;
898 backref->ref_type = itemtype;
899 backref->found_inode_ref = 1;
904 maybe_free_inode_rec(inode_cache, rec);
908 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
909 struct cache_tree *dst_cache)
911 struct inode_backref *backref;
916 list_for_each_entry(backref, &src->backrefs, list) {
917 if (backref->found_dir_index) {
918 add_inode_backref(dst_cache, dst->ino, backref->dir,
919 backref->index, backref->name,
920 backref->namelen, backref->filetype,
921 BTRFS_DIR_INDEX_KEY, backref->errors);
923 if (backref->found_dir_item) {
925 add_inode_backref(dst_cache, dst->ino,
926 backref->dir, 0, backref->name,
927 backref->namelen, backref->filetype,
928 BTRFS_DIR_ITEM_KEY, backref->errors);
930 if (backref->found_inode_ref) {
931 add_inode_backref(dst_cache, dst->ino,
932 backref->dir, backref->index,
933 backref->name, backref->namelen, 0,
934 backref->ref_type, backref->errors);
938 if (src->found_dir_item)
939 dst->found_dir_item = 1;
940 if (src->found_file_extent)
941 dst->found_file_extent = 1;
942 if (src->found_csum_item)
943 dst->found_csum_item = 1;
944 if (src->some_csum_missing)
945 dst->some_csum_missing = 1;
946 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
947 ret = copy_file_extent_holes(&dst->holes, &src->holes);
952 BUG_ON(src->found_link < dir_count);
953 dst->found_link += src->found_link - dir_count;
954 dst->found_size += src->found_size;
955 if (src->extent_start != (u64)-1) {
956 if (dst->extent_start == (u64)-1) {
957 dst->extent_start = src->extent_start;
958 dst->extent_end = src->extent_end;
960 if (dst->extent_end > src->extent_start)
961 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
962 else if (dst->extent_end < src->extent_start) {
963 ret = add_file_extent_hole(&dst->holes,
965 src->extent_start - dst->extent_end);
967 if (dst->extent_end < src->extent_end)
968 dst->extent_end = src->extent_end;
972 dst->errors |= src->errors;
973 if (src->found_inode_item) {
974 if (!dst->found_inode_item) {
975 dst->nlink = src->nlink;
976 dst->isize = src->isize;
977 dst->nbytes = src->nbytes;
978 dst->imode = src->imode;
979 dst->nodatasum = src->nodatasum;
980 dst->found_inode_item = 1;
982 dst->errors |= I_ERR_DUP_INODE_ITEM;
990 static int splice_shared_node(struct shared_node *src_node,
991 struct shared_node *dst_node)
993 struct cache_extent *cache;
994 struct ptr_node *node, *ins;
995 struct cache_tree *src, *dst;
996 struct inode_record *rec, *conflict;
1001 if (--src_node->refs == 0)
1003 if (src_node->current)
1004 current_ino = src_node->current->ino;
1006 src = &src_node->root_cache;
1007 dst = &dst_node->root_cache;
1009 cache = search_cache_extent(src, 0);
1011 node = container_of(cache, struct ptr_node, cache);
1013 cache = next_cache_extent(cache);
1016 remove_cache_extent(src, &node->cache);
1019 ins = malloc(sizeof(*ins));
1021 ins->cache.start = node->cache.start;
1022 ins->cache.size = node->cache.size;
1026 ret = insert_cache_extent(dst, &ins->cache);
1027 if (ret == -EEXIST) {
1028 conflict = get_inode_rec(dst, rec->ino, 1);
1029 BUG_ON(IS_ERR(conflict));
1030 merge_inode_recs(rec, conflict, dst);
1032 conflict->checked = 1;
1033 if (dst_node->current == conflict)
1034 dst_node->current = NULL;
1036 maybe_free_inode_rec(dst, conflict);
1037 free_inode_rec(rec);
1044 if (src == &src_node->root_cache) {
1045 src = &src_node->inode_cache;
1046 dst = &dst_node->inode_cache;
1050 if (current_ino > 0 && (!dst_node->current ||
1051 current_ino > dst_node->current->ino)) {
1052 if (dst_node->current) {
1053 dst_node->current->checked = 1;
1054 maybe_free_inode_rec(dst, dst_node->current);
1056 dst_node->current = get_inode_rec(dst, current_ino, 1);
1057 BUG_ON(IS_ERR(dst_node->current));
1062 static void free_inode_ptr(struct cache_extent *cache)
1064 struct ptr_node *node;
1065 struct inode_record *rec;
1067 node = container_of(cache, struct ptr_node, cache);
1069 free_inode_rec(rec);
1073 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1075 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078 struct cache_extent *cache;
1079 struct shared_node *node;
1081 cache = lookup_cache_extent(shared, bytenr, 1);
1083 node = container_of(cache, struct shared_node, cache);
1089 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 struct shared_node *node;
1094 node = calloc(1, sizeof(*node));
1097 node->cache.start = bytenr;
1098 node->cache.size = 1;
1099 cache_tree_init(&node->root_cache);
1100 cache_tree_init(&node->inode_cache);
1103 ret = insert_cache_extent(shared, &node->cache);
1108 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1109 struct walk_control *wc, int level)
1111 struct shared_node *node;
1112 struct shared_node *dest;
1115 if (level == wc->active_node)
1118 BUG_ON(wc->active_node <= level);
1119 node = find_shared_node(&wc->shared, bytenr);
1121 ret = add_shared_node(&wc->shared, bytenr, refs);
1123 node = find_shared_node(&wc->shared, bytenr);
1124 wc->nodes[level] = node;
1125 wc->active_node = level;
1129 if (wc->root_level == wc->active_node &&
1130 btrfs_root_refs(&root->root_item) == 0) {
1131 if (--node->refs == 0) {
1132 free_inode_recs_tree(&node->root_cache);
1133 free_inode_recs_tree(&node->inode_cache);
1134 remove_cache_extent(&wc->shared, &node->cache);
1140 dest = wc->nodes[wc->active_node];
1141 splice_shared_node(node, dest);
1142 if (node->refs == 0) {
1143 remove_cache_extent(&wc->shared, &node->cache);
1149 static int leave_shared_node(struct btrfs_root *root,
1150 struct walk_control *wc, int level)
1152 struct shared_node *node;
1153 struct shared_node *dest;
1156 if (level == wc->root_level)
1159 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1163 BUG_ON(i >= BTRFS_MAX_LEVEL);
1165 node = wc->nodes[wc->active_node];
1166 wc->nodes[wc->active_node] = NULL;
1167 wc->active_node = i;
1169 dest = wc->nodes[wc->active_node];
1170 if (wc->active_node < wc->root_level ||
1171 btrfs_root_refs(&root->root_item) > 0) {
1172 BUG_ON(node->refs <= 1);
1173 splice_shared_node(node, dest);
1175 BUG_ON(node->refs < 2);
1184 * 1 - if the root with id child_root_id is a child of root parent_root_id
1185 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1186 * has other root(s) as parent(s)
1187 * 2 - if the root child_root_id doesn't have any parent roots
1189 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192 struct btrfs_path path;
1193 struct btrfs_key key;
1194 struct extent_buffer *leaf;
1198 btrfs_init_path(&path);
1200 key.objectid = parent_root_id;
1201 key.type = BTRFS_ROOT_REF_KEY;
1202 key.offset = child_root_id;
1203 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1207 btrfs_release_path(&path);
1211 key.objectid = child_root_id;
1212 key.type = BTRFS_ROOT_BACKREF_KEY;
1214 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1220 leaf = path.nodes[0];
1221 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1222 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225 leaf = path.nodes[0];
1228 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1229 if (key.objectid != child_root_id ||
1230 key.type != BTRFS_ROOT_BACKREF_KEY)
1235 if (key.offset == parent_root_id) {
1236 btrfs_release_path(&path);
1243 btrfs_release_path(&path);
1246 return has_parent ? 0 : 2;
1249 static int process_dir_item(struct extent_buffer *eb,
1250 int slot, struct btrfs_key *key,
1251 struct shared_node *active_node)
1261 struct btrfs_dir_item *di;
1262 struct inode_record *rec;
1263 struct cache_tree *root_cache;
1264 struct cache_tree *inode_cache;
1265 struct btrfs_key location;
1266 char namebuf[BTRFS_NAME_LEN];
1268 root_cache = &active_node->root_cache;
1269 inode_cache = &active_node->inode_cache;
1270 rec = active_node->current;
1271 rec->found_dir_item = 1;
1273 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1274 total = btrfs_item_size_nr(eb, slot);
1275 while (cur < total) {
1277 btrfs_dir_item_key_to_cpu(eb, di, &location);
1278 name_len = btrfs_dir_name_len(eb, di);
1279 data_len = btrfs_dir_data_len(eb, di);
1280 filetype = btrfs_dir_type(eb, di);
1282 rec->found_size += name_len;
1283 if (cur + sizeof(*di) + name_len > total ||
1284 name_len > BTRFS_NAME_LEN) {
1285 error = REF_ERR_NAME_TOO_LONG;
1287 if (cur + sizeof(*di) > total)
1289 len = min_t(u32, total - cur - sizeof(*di),
1296 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1298 if (key->type == BTRFS_DIR_ITEM_KEY &&
1299 key->offset != btrfs_name_hash(namebuf, len)) {
1300 rec->errors |= I_ERR_ODD_DIR_ITEM;
1301 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1302 key->objectid, key->offset, namebuf, len, filetype,
1303 key->offset, btrfs_name_hash(namebuf, len));
1306 if (location.type == BTRFS_INODE_ITEM_KEY) {
1307 add_inode_backref(inode_cache, location.objectid,
1308 key->objectid, key->offset, namebuf,
1309 len, filetype, key->type, error);
1310 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1311 add_inode_backref(root_cache, location.objectid,
1312 key->objectid, key->offset,
1313 namebuf, len, filetype,
1317 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1318 location.type, key->objectid, key->offset);
1319 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1320 key->objectid, key->offset, namebuf,
1321 len, filetype, key->type, error);
1324 len = sizeof(*di) + name_len + data_len;
1325 di = (struct btrfs_dir_item *)((char *)di + len);
1328 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1329 rec->errors |= I_ERR_DUP_DIR_INDEX;
1334 static int process_inode_ref(struct extent_buffer *eb,
1335 int slot, struct btrfs_key *key,
1336 struct shared_node *active_node)
1344 struct cache_tree *inode_cache;
1345 struct btrfs_inode_ref *ref;
1346 char namebuf[BTRFS_NAME_LEN];
1348 inode_cache = &active_node->inode_cache;
1350 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1351 total = btrfs_item_size_nr(eb, slot);
1352 while (cur < total) {
1353 name_len = btrfs_inode_ref_name_len(eb, ref);
1354 index = btrfs_inode_ref_index(eb, ref);
1356 /* inode_ref + namelen should not cross item boundary */
1357 if (cur + sizeof(*ref) + name_len > total ||
1358 name_len > BTRFS_NAME_LEN) {
1359 if (total < cur + sizeof(*ref))
1362 /* Still try to read out the remaining part */
1363 len = min_t(u32, total - cur - sizeof(*ref),
1365 error = REF_ERR_NAME_TOO_LONG;
1371 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1372 add_inode_backref(inode_cache, key->objectid, key->offset,
1373 index, namebuf, len, 0, key->type, error);
1375 len = sizeof(*ref) + name_len;
1376 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1382 static int process_inode_extref(struct extent_buffer *eb,
1383 int slot, struct btrfs_key *key,
1384 struct shared_node *active_node)
1393 struct cache_tree *inode_cache;
1394 struct btrfs_inode_extref *extref;
1395 char namebuf[BTRFS_NAME_LEN];
1397 inode_cache = &active_node->inode_cache;
1399 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1400 total = btrfs_item_size_nr(eb, slot);
1401 while (cur < total) {
1402 name_len = btrfs_inode_extref_name_len(eb, extref);
1403 index = btrfs_inode_extref_index(eb, extref);
1404 parent = btrfs_inode_extref_parent(eb, extref);
1405 if (name_len <= BTRFS_NAME_LEN) {
1409 len = BTRFS_NAME_LEN;
1410 error = REF_ERR_NAME_TOO_LONG;
1412 read_extent_buffer(eb, namebuf,
1413 (unsigned long)(extref + 1), len);
1414 add_inode_backref(inode_cache, key->objectid, parent,
1415 index, namebuf, len, 0, key->type, error);
1417 len = sizeof(*extref) + name_len;
1418 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1425 static int process_file_extent(struct btrfs_root *root,
1426 struct extent_buffer *eb,
1427 int slot, struct btrfs_key *key,
1428 struct shared_node *active_node)
1430 struct inode_record *rec;
1431 struct btrfs_file_extent_item *fi;
1433 u64 disk_bytenr = 0;
1434 u64 extent_offset = 0;
1435 u64 mask = root->fs_info->sectorsize - 1;
1439 rec = active_node->current;
1440 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1441 rec->found_file_extent = 1;
1443 if (rec->extent_start == (u64)-1) {
1444 rec->extent_start = key->offset;
1445 rec->extent_end = key->offset;
1448 if (rec->extent_end > key->offset)
1449 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1450 else if (rec->extent_end < key->offset) {
1451 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1452 key->offset - rec->extent_end);
1457 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1458 extent_type = btrfs_file_extent_type(eb, fi);
1460 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1461 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1463 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1464 rec->found_size += num_bytes;
1465 num_bytes = (num_bytes + mask) & ~mask;
1466 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1467 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1468 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1469 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1470 extent_offset = btrfs_file_extent_offset(eb, fi);
1471 if (num_bytes == 0 || (num_bytes & mask))
1472 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1473 if (num_bytes + extent_offset >
1474 btrfs_file_extent_ram_bytes(eb, fi))
1475 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1476 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1477 (btrfs_file_extent_compression(eb, fi) ||
1478 btrfs_file_extent_encryption(eb, fi) ||
1479 btrfs_file_extent_other_encoding(eb, fi)))
1480 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1481 if (disk_bytenr > 0)
1482 rec->found_size += num_bytes;
1484 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1486 rec->extent_end = key->offset + num_bytes;
1489 * The data reloc tree will copy full extents into its inode and then
1490 * copy the corresponding csums. Because the extent it copied could be
1491 * a preallocated extent that hasn't been written to yet there may be no
1492 * csums to copy, ergo we won't have csums for our file extent. This is
1493 * ok so just don't bother checking csums if the inode belongs to the
1496 if (disk_bytenr > 0 &&
1497 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1499 if (btrfs_file_extent_compression(eb, fi))
1500 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1502 disk_bytenr += extent_offset;
1504 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1508 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1510 rec->found_csum_item = 1;
1511 if (found < num_bytes)
1512 rec->some_csum_missing = 1;
1513 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1515 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1521 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1522 struct walk_control *wc)
1524 struct btrfs_key key;
1528 struct cache_tree *inode_cache;
1529 struct shared_node *active_node;
1531 if (wc->root_level == wc->active_node &&
1532 btrfs_root_refs(&root->root_item) == 0)
1535 active_node = wc->nodes[wc->active_node];
1536 inode_cache = &active_node->inode_cache;
1537 nritems = btrfs_header_nritems(eb);
1538 for (i = 0; i < nritems; i++) {
1539 btrfs_item_key_to_cpu(eb, &key, i);
1541 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1543 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1546 if (active_node->current == NULL ||
1547 active_node->current->ino < key.objectid) {
1548 if (active_node->current) {
1549 active_node->current->checked = 1;
1550 maybe_free_inode_rec(inode_cache,
1551 active_node->current);
1553 active_node->current = get_inode_rec(inode_cache,
1555 BUG_ON(IS_ERR(active_node->current));
1558 case BTRFS_DIR_ITEM_KEY:
1559 case BTRFS_DIR_INDEX_KEY:
1560 ret = process_dir_item(eb, i, &key, active_node);
1562 case BTRFS_INODE_REF_KEY:
1563 ret = process_inode_ref(eb, i, &key, active_node);
1565 case BTRFS_INODE_EXTREF_KEY:
1566 ret = process_inode_extref(eb, i, &key, active_node);
1568 case BTRFS_INODE_ITEM_KEY:
1569 ret = process_inode_item(eb, i, &key, active_node);
1571 case BTRFS_EXTENT_DATA_KEY:
1572 ret = process_file_extent(root, eb, i, &key,
1582 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1583 struct extent_buffer *eb, struct node_refs *nrefs,
1584 u64 level, int check_all);
1585 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1586 unsigned int ext_ref);
1589 * Returns >0 Found error, not fatal, should continue
1590 * Returns <0 Fatal error, must exit the whole check
1591 * Returns 0 No errors found
1593 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1594 struct node_refs *nrefs, int *level, int ext_ref)
1596 struct extent_buffer *cur = path->nodes[0];
1597 struct btrfs_key key;
1601 int root_level = btrfs_header_level(root->node);
1603 int ret = 0; /* Final return value */
1604 int err = 0; /* Positive error bitmap */
1606 cur_bytenr = cur->start;
1608 /* skip to first inode item or the first inode number change */
1609 nritems = btrfs_header_nritems(cur);
1610 for (i = 0; i < nritems; i++) {
1611 btrfs_item_key_to_cpu(cur, &key, i);
1613 first_ino = key.objectid;
1614 if (key.type == BTRFS_INODE_ITEM_KEY ||
1615 (first_ino && first_ino != key.objectid))
1619 path->slots[0] = nritems;
1625 err |= check_inode_item(root, path, ext_ref);
1627 /* modify cur since check_inode_item may change path */
1628 cur = path->nodes[0];
1630 if (err & LAST_ITEM)
1633 /* still have inode items in thie leaf */
1634 if (cur->start == cur_bytenr)
1638 * we have switched to another leaf, above nodes may
1639 * have changed, here walk down the path, if a node
1640 * or leaf is shared, check whether we can skip this
1643 for (i = root_level; i >= 0; i--) {
1644 if (path->nodes[i]->start == nrefs->bytenr[i])
1647 ret = update_nodes_refs(root, path->nodes[i]->start,
1648 path->nodes[i], nrefs, i, 0);
1652 if (!nrefs->need_check[i]) {
1658 for (i = 0; i < *level; i++) {
1659 free_extent_buffer(path->nodes[i]);
1660 path->nodes[i] = NULL;
1670 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1671 * in every fs or file tree check. Here we find its all root ids, and only check
1672 * it in the fs or file tree which has the smallest root id.
1674 static int need_check(struct btrfs_root *root, struct ulist *roots)
1676 struct rb_node *node;
1677 struct ulist_node *u;
1680 * @roots can be empty if it belongs to tree reloc tree
1681 * In that case, we should always check the leaf, as we can't use
1682 * the tree owner to ensure some other root will check it.
1684 if (roots->nnodes == 1 || roots->nnodes == 0)
1687 node = rb_first(&roots->root);
1688 u = rb_entry(node, struct ulist_node, rb_node);
1690 * current root id is not smallest, we skip it and let it be checked
1691 * in the fs or file tree who hash the smallest root id.
1693 if (root->objectid != u->val)
1699 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1702 struct btrfs_root *extent_root = root->fs_info->extent_root;
1703 struct btrfs_root_item *ri = &root->root_item;
1704 struct btrfs_extent_inline_ref *iref;
1705 struct btrfs_extent_item *ei;
1706 struct btrfs_key key;
1707 struct btrfs_path *path = NULL;
1718 * Except file/reloc tree, we can not have FULL BACKREF MODE
1720 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1724 if (eb->start == btrfs_root_bytenr(ri))
1727 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1730 owner = btrfs_header_owner(eb);
1731 if (owner == root->objectid)
1734 path = btrfs_alloc_path();
1738 key.objectid = btrfs_header_bytenr(eb);
1740 key.offset = (u64)-1;
1742 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1749 ret = btrfs_previous_extent_item(extent_root, path,
1755 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1757 eb = path->nodes[0];
1758 slot = path->slots[0];
1759 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1761 flags = btrfs_extent_flags(eb, ei);
1762 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1765 ptr = (unsigned long)(ei + 1);
1766 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1768 if (key.type == BTRFS_EXTENT_ITEM_KEY)
1769 ptr += sizeof(struct btrfs_tree_block_info);
1772 /* Reached extent item ends normally */
1776 /* Beyond extent item end, wrong item size */
1778 error("extent item at bytenr %llu slot %d has wrong size",
1783 iref = (struct btrfs_extent_inline_ref *)ptr;
1784 offset = btrfs_extent_inline_ref_offset(eb, iref);
1785 type = btrfs_extent_inline_ref_type(eb, iref);
1787 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1789 ptr += btrfs_extent_inline_ref_size(type);
1793 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1797 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1799 btrfs_free_path(path);
1804 * for a tree node or leaf, we record its reference count, so later if we still
1805 * process this node or leaf, don't need to compute its reference count again.
1807 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1809 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1810 struct extent_buffer *eb, struct node_refs *nrefs,
1811 u64 level, int check_all)
1813 struct ulist *roots;
1816 int root_level = btrfs_header_level(root->node);
1820 if (nrefs->bytenr[level] == bytenr)
1823 if (bytenr != (u64)-1) {
1824 /* the return value of this function seems a mistake */
1825 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1826 level, 1, &refs, &flags);
1828 if (ret < 0 && !check_all)
1831 nrefs->bytenr[level] = bytenr;
1832 nrefs->refs[level] = refs;
1833 nrefs->full_backref[level] = 0;
1834 nrefs->checked[level] = 0;
1837 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1842 check = need_check(root, roots);
1844 nrefs->need_check[level] = check;
1847 nrefs->need_check[level] = 1;
1849 if (level == root_level) {
1850 nrefs->need_check[level] = 1;
1853 * The node refs may have not been
1854 * updated if upper needs checking (the
1855 * lowest root_objectid) the node can
1858 nrefs->need_check[level] =
1859 nrefs->need_check[level + 1];
1865 if (check_all && eb) {
1866 calc_extent_flag_v2(root, eb, &flags);
1867 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1868 nrefs->full_backref[level] = 1;
1875 * @level if @level == -1 means extent data item
1876 * else normal treeblocl.
1878 static int should_check_extent_strictly(struct btrfs_root *root,
1879 struct node_refs *nrefs, int level)
1881 int root_level = btrfs_header_level(root->node);
1883 if (level > root_level || level < -1)
1885 if (level == root_level)
1888 * if the upper node is marked full backref, it should contain shared
1889 * backref of the parent (except owner == root->objectid).
1891 while (++level <= root_level)
1892 if (nrefs->refs[level] > 1)
1898 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1899 struct walk_control *wc, int *level,
1900 struct node_refs *nrefs)
1902 enum btrfs_tree_block_status status;
1905 struct btrfs_fs_info *fs_info = root->fs_info;
1906 struct extent_buffer *next;
1907 struct extent_buffer *cur;
1911 WARN_ON(*level < 0);
1912 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1914 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1915 refs = nrefs->refs[*level];
1918 ret = btrfs_lookup_extent_info(NULL, root,
1919 path->nodes[*level]->start,
1920 *level, 1, &refs, NULL);
1925 nrefs->bytenr[*level] = path->nodes[*level]->start;
1926 nrefs->refs[*level] = refs;
1930 ret = enter_shared_node(root, path->nodes[*level]->start,
1938 while (*level >= 0) {
1939 WARN_ON(*level < 0);
1940 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1941 cur = path->nodes[*level];
1943 if (btrfs_header_level(cur) != *level)
1946 if (path->slots[*level] >= btrfs_header_nritems(cur))
1949 ret = process_one_leaf(root, cur, wc);
1954 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1955 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1957 if (bytenr == nrefs->bytenr[*level - 1]) {
1958 refs = nrefs->refs[*level - 1];
1960 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1961 *level - 1, 1, &refs, NULL);
1965 nrefs->bytenr[*level - 1] = bytenr;
1966 nrefs->refs[*level - 1] = refs;
1971 ret = enter_shared_node(root, bytenr, refs,
1974 path->slots[*level]++;
1979 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1980 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1981 free_extent_buffer(next);
1982 reada_walk_down(root, cur, path->slots[*level]);
1983 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1984 if (!extent_buffer_uptodate(next)) {
1985 struct btrfs_key node_key;
1987 btrfs_node_key_to_cpu(path->nodes[*level],
1989 path->slots[*level]);
1990 btrfs_add_corrupt_extent_record(root->fs_info,
1992 path->nodes[*level]->start,
1993 root->fs_info->nodesize,
2000 ret = check_child_node(cur, path->slots[*level], next);
2002 free_extent_buffer(next);
2007 if (btrfs_is_leaf(next))
2008 status = btrfs_check_leaf(root, NULL, next);
2010 status = btrfs_check_node(root, NULL, next);
2011 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2012 free_extent_buffer(next);
2017 *level = *level - 1;
2018 free_extent_buffer(path->nodes[*level]);
2019 path->nodes[*level] = next;
2020 path->slots[*level] = 0;
2023 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2028 * Update global fs information.
2030 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2034 struct extent_buffer *eb = path->nodes[level];
2036 total_btree_bytes += eb->len;
2037 if (fs_root_objectid(root->objectid))
2038 total_fs_tree_bytes += eb->len;
2039 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2040 total_extent_tree_bytes += eb->len;
2043 btree_space_waste += btrfs_leaf_free_space(root, eb);
2045 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2046 btrfs_header_nritems(eb));
2047 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2052 * This function only handles BACKREF_MISSING,
2053 * If corresponding extent item exists, increase the ref, else insert an extent
2056 * Returns error bits after repair.
2058 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2059 struct btrfs_root *root,
2060 struct extent_buffer *node,
2061 struct node_refs *nrefs, int level, int err)
2063 struct btrfs_fs_info *fs_info = root->fs_info;
2064 struct btrfs_root *extent_root = fs_info->extent_root;
2065 struct btrfs_path path;
2066 struct btrfs_extent_item *ei;
2067 struct btrfs_tree_block_info *bi;
2068 struct btrfs_key key;
2069 struct extent_buffer *eb;
2070 u32 size = sizeof(*ei);
2071 u32 node_size = root->fs_info->nodesize;
2072 int insert_extent = 0;
2073 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2074 int root_level = btrfs_header_level(root->node);
2079 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2082 if ((err & BACKREF_MISSING) == 0)
2085 WARN_ON(level > BTRFS_MAX_LEVEL);
2088 btrfs_init_path(&path);
2089 bytenr = btrfs_header_bytenr(node);
2090 owner = btrfs_header_owner(node);
2091 generation = btrfs_header_generation(node);
2093 key.objectid = bytenr;
2095 key.offset = (u64)-1;
2097 /* Search for the extent item */
2098 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2104 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2108 /* calculate if the extent item flag is full backref or not */
2109 if (nrefs->full_backref[level] != 0)
2110 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2112 /* insert an extent item */
2113 if (insert_extent) {
2114 struct btrfs_disk_key copy_key;
2116 generation = btrfs_header_generation(node);
2118 if (level < root_level && nrefs->full_backref[level + 1] &&
2119 owner != root->objectid) {
2120 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2123 key.objectid = bytenr;
2124 if (!skinny_metadata) {
2125 key.type = BTRFS_EXTENT_ITEM_KEY;
2126 key.offset = node_size;
2127 size += sizeof(*bi);
2129 key.type = BTRFS_METADATA_ITEM_KEY;
2133 btrfs_release_path(&path);
2134 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2140 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2142 btrfs_set_extent_refs(eb, ei, 0);
2143 btrfs_set_extent_generation(eb, ei, generation);
2144 btrfs_set_extent_flags(eb, ei, flags);
2146 if (!skinny_metadata) {
2147 bi = (struct btrfs_tree_block_info *)(ei + 1);
2148 memset_extent_buffer(eb, 0, (unsigned long)bi,
2150 btrfs_set_disk_key_objectid(©_key, root->objectid);
2151 btrfs_set_disk_key_type(©_key, 0);
2152 btrfs_set_disk_key_offset(©_key, 0);
2154 btrfs_set_tree_block_level(eb, bi, level);
2155 btrfs_set_tree_block_key(eb, bi, ©_key);
2157 btrfs_mark_buffer_dirty(eb);
2158 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2159 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2161 nrefs->refs[level] = 0;
2162 nrefs->full_backref[level] =
2163 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2164 btrfs_release_path(&path);
2167 if (level < root_level && nrefs->full_backref[level + 1] &&
2168 owner != root->objectid)
2169 parent = nrefs->bytenr[level + 1];
2171 /* increase the ref */
2172 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2173 parent, root->objectid, level, 0);
2175 nrefs->refs[level]++;
2177 btrfs_release_path(&path);
2180 "failed to repair tree block ref start %llu root %llu due to %s",
2181 bytenr, root->objectid, strerror(-ret));
2183 printf("Added one tree block ref start %llu %s %llu\n",
2184 bytenr, parent ? "parent" : "root",
2185 parent ? parent : root->objectid);
2186 err &= ~BACKREF_MISSING;
2192 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2193 unsigned int ext_ref);
2194 static int check_tree_block_ref(struct btrfs_root *root,
2195 struct extent_buffer *eb, u64 bytenr,
2196 int level, u64 owner, struct node_refs *nrefs);
2197 static int check_leaf_items(struct btrfs_trans_handle *trans,
2198 struct btrfs_root *root, struct btrfs_path *path,
2199 struct node_refs *nrefs, int account_bytes);
2202 * @trans just for lowmem repair mode
2203 * @check all if not 0 then check all tree block backrefs and items
2204 * 0 then just check relationship of items in fs tree(s)
2206 * Returns >0 Found error, should continue
2207 * Returns <0 Fatal error, must exit the whole check
2208 * Returns 0 No errors found
2210 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2211 struct btrfs_root *root, struct btrfs_path *path,
2212 int *level, struct node_refs *nrefs, int ext_ref,
2216 enum btrfs_tree_block_status status;
2219 struct btrfs_fs_info *fs_info = root->fs_info;
2220 struct extent_buffer *next;
2221 struct extent_buffer *cur;
2225 int account_file_data = 0;
2227 WARN_ON(*level < 0);
2228 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2230 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2231 path->nodes[*level], nrefs, *level, check_all);
2235 while (*level >= 0) {
2236 WARN_ON(*level < 0);
2237 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238 cur = path->nodes[*level];
2239 bytenr = btrfs_header_bytenr(cur);
2240 check = nrefs->need_check[*level];
2242 if (btrfs_header_level(cur) != *level)
2245 * Update bytes accounting and check tree block ref
2246 * NOTE: Doing accounting and check before checking nritems
2247 * is necessary because of empty node/leaf.
2249 if ((check_all && !nrefs->checked[*level]) ||
2250 (!check_all && nrefs->need_check[*level])) {
2251 ret = check_tree_block_ref(root, cur,
2252 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2253 btrfs_header_owner(cur), nrefs);
2256 ret = repair_tree_block_ref(trans, root,
2257 path->nodes[*level], nrefs, *level, ret);
2260 if (check_all && nrefs->need_check[*level] &&
2261 nrefs->refs[*level]) {
2262 account_bytes(root, path, *level);
2263 account_file_data = 1;
2265 nrefs->checked[*level] = 1;
2268 if (path->slots[*level] >= btrfs_header_nritems(cur))
2271 /* Don't forgot to check leaf/node validation */
2273 /* skip duplicate check */
2274 if (check || !check_all) {
2275 ret = btrfs_check_leaf(root, NULL, cur);
2276 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2284 ret = process_one_leaf_v2(root, path, nrefs,
2287 ret = check_leaf_items(trans, root, path,
2288 nrefs, account_file_data);
2292 if (check || !check_all) {
2293 ret = btrfs_check_node(root, NULL, cur);
2294 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2301 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2302 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2304 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2309 * check all trees in check_chunks_and_extent_v2
2310 * check shared node once in check_fs_roots
2312 if (!check_all && !nrefs->need_check[*level - 1]) {
2313 path->slots[*level]++;
2317 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2318 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2319 free_extent_buffer(next);
2320 reada_walk_down(root, cur, path->slots[*level]);
2321 next = read_tree_block(fs_info, bytenr, ptr_gen);
2322 if (!extent_buffer_uptodate(next)) {
2323 struct btrfs_key node_key;
2325 btrfs_node_key_to_cpu(path->nodes[*level],
2327 path->slots[*level]);
2328 btrfs_add_corrupt_extent_record(fs_info,
2329 &node_key, path->nodes[*level]->start,
2330 fs_info->nodesize, *level);
2336 ret = check_child_node(cur, path->slots[*level], next);
2341 if (btrfs_is_leaf(next))
2342 status = btrfs_check_leaf(root, NULL, next);
2344 status = btrfs_check_node(root, NULL, next);
2345 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2346 free_extent_buffer(next);
2351 *level = *level - 1;
2352 free_extent_buffer(path->nodes[*level]);
2353 path->nodes[*level] = next;
2354 path->slots[*level] = 0;
2355 account_file_data = 0;
2357 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2362 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2363 struct walk_control *wc, int *level)
2366 struct extent_buffer *leaf;
2368 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2369 leaf = path->nodes[i];
2370 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2375 free_extent_buffer(path->nodes[*level]);
2376 path->nodes[*level] = NULL;
2377 BUG_ON(*level > wc->active_node);
2378 if (*level == wc->active_node)
2379 leave_shared_node(root, wc, *level);
2386 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2390 struct extent_buffer *leaf;
2392 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2393 leaf = path->nodes[i];
2394 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2399 free_extent_buffer(path->nodes[*level]);
2400 path->nodes[*level] = NULL;
2407 static int check_root_dir(struct inode_record *rec)
2409 struct inode_backref *backref;
2412 if (!rec->found_inode_item || rec->errors)
2414 if (rec->nlink != 1 || rec->found_link != 0)
2416 if (list_empty(&rec->backrefs))
2418 backref = to_inode_backref(rec->backrefs.next);
2419 if (!backref->found_inode_ref)
2421 if (backref->index != 0 || backref->namelen != 2 ||
2422 memcmp(backref->name, "..", 2))
2424 if (backref->found_dir_index || backref->found_dir_item)
2431 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2432 struct btrfs_root *root, struct btrfs_path *path,
2433 struct inode_record *rec)
2435 struct btrfs_inode_item *ei;
2436 struct btrfs_key key;
2439 key.objectid = rec->ino;
2440 key.type = BTRFS_INODE_ITEM_KEY;
2441 key.offset = (u64)-1;
2443 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2447 if (!path->slots[0]) {
2454 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2455 if (key.objectid != rec->ino) {
2460 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2461 struct btrfs_inode_item);
2462 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2463 btrfs_mark_buffer_dirty(path->nodes[0]);
2464 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2465 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2466 root->root_key.objectid);
2468 btrfs_release_path(path);
2472 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2473 struct btrfs_root *root,
2474 struct btrfs_path *path,
2475 struct inode_record *rec)
2479 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2480 btrfs_release_path(path);
2482 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2486 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2487 struct btrfs_root *root,
2488 struct btrfs_path *path,
2489 struct inode_record *rec)
2491 struct btrfs_inode_item *ei;
2492 struct btrfs_key key;
2495 key.objectid = rec->ino;
2496 key.type = BTRFS_INODE_ITEM_KEY;
2499 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2506 /* Since ret == 0, no need to check anything */
2507 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2508 struct btrfs_inode_item);
2509 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2510 btrfs_mark_buffer_dirty(path->nodes[0]);
2511 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2512 printf("reset nbytes for ino %llu root %llu\n",
2513 rec->ino, root->root_key.objectid);
2515 btrfs_release_path(path);
2519 static int add_missing_dir_index(struct btrfs_root *root,
2520 struct cache_tree *inode_cache,
2521 struct inode_record *rec,
2522 struct inode_backref *backref)
2524 struct btrfs_path path;
2525 struct btrfs_trans_handle *trans;
2526 struct btrfs_dir_item *dir_item;
2527 struct extent_buffer *leaf;
2528 struct btrfs_key key;
2529 struct btrfs_disk_key disk_key;
2530 struct inode_record *dir_rec;
2531 unsigned long name_ptr;
2532 u32 data_size = sizeof(*dir_item) + backref->namelen;
2535 trans = btrfs_start_transaction(root, 1);
2537 return PTR_ERR(trans);
2539 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2540 (unsigned long long)rec->ino);
2542 btrfs_init_path(&path);
2543 key.objectid = backref->dir;
2544 key.type = BTRFS_DIR_INDEX_KEY;
2545 key.offset = backref->index;
2546 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2549 leaf = path.nodes[0];
2550 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2552 disk_key.objectid = cpu_to_le64(rec->ino);
2553 disk_key.type = BTRFS_INODE_ITEM_KEY;
2554 disk_key.offset = 0;
2556 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2557 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2558 btrfs_set_dir_data_len(leaf, dir_item, 0);
2559 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2560 name_ptr = (unsigned long)(dir_item + 1);
2561 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2562 btrfs_mark_buffer_dirty(leaf);
2563 btrfs_release_path(&path);
2564 btrfs_commit_transaction(trans, root);
2566 backref->found_dir_index = 1;
2567 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2568 BUG_ON(IS_ERR(dir_rec));
2571 dir_rec->found_size += backref->namelen;
2572 if (dir_rec->found_size == dir_rec->isize &&
2573 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2574 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2575 if (dir_rec->found_size != dir_rec->isize)
2576 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2581 static int delete_dir_index(struct btrfs_root *root,
2582 struct inode_backref *backref)
2584 struct btrfs_trans_handle *trans;
2585 struct btrfs_dir_item *di;
2586 struct btrfs_path path;
2589 trans = btrfs_start_transaction(root, 1);
2591 return PTR_ERR(trans);
2593 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2594 (unsigned long long)backref->dir,
2595 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2596 (unsigned long long)root->objectid);
2598 btrfs_init_path(&path);
2599 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2600 backref->name, backref->namelen,
2601 backref->index, -1);
2604 btrfs_release_path(&path);
2605 btrfs_commit_transaction(trans, root);
2612 ret = btrfs_del_item(trans, root, &path);
2614 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2616 btrfs_release_path(&path);
2617 btrfs_commit_transaction(trans, root);
2621 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2622 struct btrfs_root *root, u64 ino,
2625 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2627 return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2630 static int create_inode_item(struct btrfs_root *root,
2631 struct inode_record *rec, int root_dir)
2633 struct btrfs_trans_handle *trans;
2639 trans = btrfs_start_transaction(root, 1);
2640 if (IS_ERR(trans)) {
2641 ret = PTR_ERR(trans);
2645 nlink = root_dir ? 1 : rec->found_link;
2646 if (rec->found_dir_item) {
2647 if (rec->found_file_extent)
2648 fprintf(stderr, "root %llu inode %llu has both a dir "
2649 "item and extents, unsure if it is a dir or a "
2650 "regular file so setting it as a directory\n",
2651 (unsigned long long)root->objectid,
2652 (unsigned long long)rec->ino);
2653 mode = S_IFDIR | 0755;
2654 size = rec->found_size;
2655 } else if (!rec->found_dir_item) {
2656 size = rec->extent_end;
2657 mode = S_IFREG | 0755;
2660 ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2662 btrfs_commit_transaction(trans, root);
2666 static int repair_inode_backrefs(struct btrfs_root *root,
2667 struct inode_record *rec,
2668 struct cache_tree *inode_cache,
2671 struct inode_backref *tmp, *backref;
2672 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2676 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2677 if (!delete && rec->ino == root_dirid) {
2678 if (!rec->found_inode_item) {
2679 ret = create_inode_item(root, rec, 1);
2686 /* Index 0 for root dir's are special, don't mess with it */
2687 if (rec->ino == root_dirid && backref->index == 0)
2691 ((backref->found_dir_index && !backref->found_inode_ref) ||
2692 (backref->found_dir_index && backref->found_inode_ref &&
2693 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2694 ret = delete_dir_index(root, backref);
2698 list_del(&backref->list);
2703 if (!delete && !backref->found_dir_index &&
2704 backref->found_dir_item && backref->found_inode_ref) {
2705 ret = add_missing_dir_index(root, inode_cache, rec,
2710 if (backref->found_dir_item &&
2711 backref->found_dir_index) {
2712 if (!backref->errors &&
2713 backref->found_inode_ref) {
2714 list_del(&backref->list);
2721 if (!delete && (!backref->found_dir_index &&
2722 !backref->found_dir_item &&
2723 backref->found_inode_ref)) {
2724 struct btrfs_trans_handle *trans;
2725 struct btrfs_key location;
2727 ret = check_dir_conflict(root, backref->name,
2733 * let nlink fixing routine to handle it,
2734 * which can do it better.
2739 location.objectid = rec->ino;
2740 location.type = BTRFS_INODE_ITEM_KEY;
2741 location.offset = 0;
2743 trans = btrfs_start_transaction(root, 1);
2744 if (IS_ERR(trans)) {
2745 ret = PTR_ERR(trans);
2748 fprintf(stderr, "adding missing dir index/item pair "
2750 (unsigned long long)rec->ino);
2751 ret = btrfs_insert_dir_item(trans, root, backref->name,
2753 backref->dir, &location,
2754 imode_to_type(rec->imode),
2757 btrfs_commit_transaction(trans, root);
2761 if (!delete && (backref->found_inode_ref &&
2762 backref->found_dir_index &&
2763 backref->found_dir_item &&
2764 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2765 !rec->found_inode_item)) {
2766 ret = create_inode_item(root, rec, 0);
2773 return ret ? ret : repaired;
2777 * To determine the file type for nlink/inode_item repair
2779 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2780 * Return -ENOENT if file type is not found.
2782 static int find_file_type(struct inode_record *rec, u8 *type)
2784 struct inode_backref *backref;
2786 /* For inode item recovered case */
2787 if (rec->found_inode_item) {
2788 *type = imode_to_type(rec->imode);
2792 list_for_each_entry(backref, &rec->backrefs, list) {
2793 if (backref->found_dir_index || backref->found_dir_item) {
2794 *type = backref->filetype;
2802 * To determine the file name for nlink repair
2804 * Return 0 if file name is found, set name and namelen.
2805 * Return -ENOENT if file name is not found.
2807 static int find_file_name(struct inode_record *rec,
2808 char *name, int *namelen)
2810 struct inode_backref *backref;
2812 list_for_each_entry(backref, &rec->backrefs, list) {
2813 if (backref->found_dir_index || backref->found_dir_item ||
2814 backref->found_inode_ref) {
2815 memcpy(name, backref->name, backref->namelen);
2816 *namelen = backref->namelen;
2823 /* Reset the nlink of the inode to the correct one */
2824 static int reset_nlink(struct btrfs_trans_handle *trans,
2825 struct btrfs_root *root,
2826 struct btrfs_path *path,
2827 struct inode_record *rec)
2829 struct inode_backref *backref;
2830 struct inode_backref *tmp;
2831 struct btrfs_key key;
2832 struct btrfs_inode_item *inode_item;
2835 /* We don't believe this either, reset it and iterate backref */
2836 rec->found_link = 0;
2838 /* Remove all backref including the valid ones */
2839 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2840 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2841 backref->index, backref->name,
2842 backref->namelen, 0);
2846 /* remove invalid backref, so it won't be added back */
2847 if (!(backref->found_dir_index &&
2848 backref->found_dir_item &&
2849 backref->found_inode_ref)) {
2850 list_del(&backref->list);
2857 /* Set nlink to 0 */
2858 key.objectid = rec->ino;
2859 key.type = BTRFS_INODE_ITEM_KEY;
2861 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2868 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2869 struct btrfs_inode_item);
2870 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2871 btrfs_mark_buffer_dirty(path->nodes[0]);
2872 btrfs_release_path(path);
2875 * Add back valid inode_ref/dir_item/dir_index,
2876 * add_link() will handle the nlink inc, so new nlink must be correct
2878 list_for_each_entry(backref, &rec->backrefs, list) {
2879 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2880 backref->name, backref->namelen,
2881 backref->filetype, &backref->index, 1, 0);
2886 btrfs_release_path(path);
2890 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2891 struct btrfs_root *root,
2892 struct btrfs_path *path,
2893 struct inode_record *rec)
2895 char namebuf[BTRFS_NAME_LEN] = {0};
2898 int name_recovered = 0;
2899 int type_recovered = 0;
2903 * Get file name and type first before these invalid inode ref
2904 * are deleted by remove_all_invalid_backref()
2906 name_recovered = !find_file_name(rec, namebuf, &namelen);
2907 type_recovered = !find_file_type(rec, &type);
2909 if (!name_recovered) {
2910 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2911 rec->ino, rec->ino);
2912 namelen = count_digits(rec->ino);
2913 sprintf(namebuf, "%llu", rec->ino);
2916 if (!type_recovered) {
2917 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2919 type = BTRFS_FT_REG_FILE;
2923 ret = reset_nlink(trans, root, path, rec);
2926 "Failed to reset nlink for inode %llu: %s\n",
2927 rec->ino, strerror(-ret));
2931 if (rec->found_link == 0) {
2932 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2933 namebuf, namelen, type,
2934 (u64 *)&rec->found_link);
2938 printf("Fixed the nlink of inode %llu\n", rec->ino);
2941 * Clear the flag anyway, or we will loop forever for the same inode
2942 * as it will not be removed from the bad inode list and the dead loop
2945 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2946 btrfs_release_path(path);
2951 * Check if there is any normal(reg or prealloc) file extent for given
2953 * This is used to determine the file type when neither its dir_index/item or
2954 * inode_item exists.
2956 * This will *NOT* report error, if any error happens, just consider it does
2957 * not have any normal file extent.
2959 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2961 struct btrfs_path path;
2962 struct btrfs_key key;
2963 struct btrfs_key found_key;
2964 struct btrfs_file_extent_item *fi;
2968 btrfs_init_path(&path);
2970 key.type = BTRFS_EXTENT_DATA_KEY;
2973 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2978 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2979 ret = btrfs_next_leaf(root, &path);
2986 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2988 if (found_key.objectid != ino ||
2989 found_key.type != BTRFS_EXTENT_DATA_KEY)
2991 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2992 struct btrfs_file_extent_item);
2993 type = btrfs_file_extent_type(path.nodes[0], fi);
2994 if (type != BTRFS_FILE_EXTENT_INLINE) {
3000 btrfs_release_path(&path);
3004 static u32 btrfs_type_to_imode(u8 type)
3006 static u32 imode_by_btrfs_type[] = {
3007 [BTRFS_FT_REG_FILE] = S_IFREG,
3008 [BTRFS_FT_DIR] = S_IFDIR,
3009 [BTRFS_FT_CHRDEV] = S_IFCHR,
3010 [BTRFS_FT_BLKDEV] = S_IFBLK,
3011 [BTRFS_FT_FIFO] = S_IFIFO,
3012 [BTRFS_FT_SOCK] = S_IFSOCK,
3013 [BTRFS_FT_SYMLINK] = S_IFLNK,
3016 return imode_by_btrfs_type[(type)];
3019 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3020 struct btrfs_root *root,
3021 struct btrfs_path *path,
3022 struct inode_record *rec)
3026 int type_recovered = 0;
3029 printf("Trying to rebuild inode:%llu\n", rec->ino);
3031 type_recovered = !find_file_type(rec, &filetype);
3034 * Try to determine inode type if type not found.
3036 * For found regular file extent, it must be FILE.
3037 * For found dir_item/index, it must be DIR.
3039 * For undetermined one, use FILE as fallback.
3042 * 1. If found backref(inode_index/item is already handled) to it,
3044 * Need new inode-inode ref structure to allow search for that.
3046 if (!type_recovered) {
3047 if (rec->found_file_extent &&
3048 find_normal_file_extent(root, rec->ino)) {
3050 filetype = BTRFS_FT_REG_FILE;
3051 } else if (rec->found_dir_item) {
3053 filetype = BTRFS_FT_DIR;
3054 } else if (!list_empty(&rec->orphan_extents)) {
3056 filetype = BTRFS_FT_REG_FILE;
3058 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3061 filetype = BTRFS_FT_REG_FILE;
3065 ret = btrfs_new_inode(trans, root, rec->ino,
3066 mode | btrfs_type_to_imode(filetype));
3071 * Here inode rebuild is done, we only rebuild the inode item,
3072 * don't repair the nlink(like move to lost+found).
3073 * That is the job of nlink repair.
3075 * We just fill the record and return
3077 rec->found_dir_item = 1;
3078 rec->imode = mode | btrfs_type_to_imode(filetype);
3080 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3081 /* Ensure the inode_nlinks repair function will be called */
3082 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3087 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3088 struct btrfs_root *root,
3089 struct btrfs_path *path,
3090 struct inode_record *rec)
3092 struct orphan_data_extent *orphan;
3093 struct orphan_data_extent *tmp;
3096 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3098 * Check for conflicting file extents
3100 * Here we don't know whether the extents is compressed or not,
3101 * so we can only assume it not compressed nor data offset,
3102 * and use its disk_len as extent length.
3104 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3105 orphan->offset, orphan->disk_len, 0);
3106 btrfs_release_path(path);
3111 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3112 orphan->disk_bytenr, orphan->disk_len);
3113 ret = btrfs_free_extent(trans,
3114 root->fs_info->extent_root,
3115 orphan->disk_bytenr, orphan->disk_len,
3116 0, root->objectid, orphan->objectid,
3121 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3122 orphan->offset, orphan->disk_bytenr,
3123 orphan->disk_len, orphan->disk_len);
3127 /* Update file size info */
3128 rec->found_size += orphan->disk_len;
3129 if (rec->found_size == rec->nbytes)
3130 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3132 /* Update the file extent hole info too */
3133 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3137 if (RB_EMPTY_ROOT(&rec->holes))
3138 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3140 list_del(&orphan->list);
3143 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3148 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3149 struct btrfs_root *root,
3150 struct btrfs_path *path,
3151 struct inode_record *rec)
3153 struct rb_node *node;
3154 struct file_extent_hole *hole;
3158 node = rb_first(&rec->holes);
3162 hole = rb_entry(node, struct file_extent_hole, node);
3163 ret = btrfs_punch_hole(trans, root, rec->ino,
3164 hole->start, hole->len);
3167 ret = del_file_extent_hole(&rec->holes, hole->start,
3171 if (RB_EMPTY_ROOT(&rec->holes))
3172 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3173 node = rb_first(&rec->holes);
3175 /* special case for a file losing all its file extent */
3177 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3178 round_up(rec->isize,
3179 root->fs_info->sectorsize));
3183 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3184 rec->ino, root->objectid);
3189 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3191 struct btrfs_trans_handle *trans;
3192 struct btrfs_path path;
3195 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3196 I_ERR_NO_ORPHAN_ITEM |
3197 I_ERR_LINK_COUNT_WRONG |
3198 I_ERR_NO_INODE_ITEM |
3199 I_ERR_FILE_EXTENT_ORPHAN |
3200 I_ERR_FILE_EXTENT_DISCOUNT|
3201 I_ERR_FILE_NBYTES_WRONG)))
3205 * For nlink repair, it may create a dir and add link, so
3206 * 2 for parent(256)'s dir_index and dir_item
3207 * 2 for lost+found dir's inode_item and inode_ref
3208 * 1 for the new inode_ref of the file
3209 * 2 for lost+found dir's dir_index and dir_item for the file
3211 trans = btrfs_start_transaction(root, 7);
3213 return PTR_ERR(trans);
3215 btrfs_init_path(&path);
3216 if (rec->errors & I_ERR_NO_INODE_ITEM)
3217 ret = repair_inode_no_item(trans, root, &path, rec);
3218 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3219 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3220 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3221 ret = repair_inode_discount_extent(trans, root, &path, rec);
3222 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3223 ret = repair_inode_isize(trans, root, &path, rec);
3224 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3225 ret = repair_inode_orphan_item(trans, root, &path, rec);
3226 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3227 ret = repair_inode_nlinks(trans, root, &path, rec);
3228 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3229 ret = repair_inode_nbytes(trans, root, &path, rec);
3230 btrfs_commit_transaction(trans, root);
3231 btrfs_release_path(&path);
3235 static int check_inode_recs(struct btrfs_root *root,
3236 struct cache_tree *inode_cache)
3238 struct cache_extent *cache;
3239 struct ptr_node *node;
3240 struct inode_record *rec;
3241 struct inode_backref *backref;
3246 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3248 if (btrfs_root_refs(&root->root_item) == 0) {
3249 if (!cache_tree_empty(inode_cache))
3250 fprintf(stderr, "warning line %d\n", __LINE__);
3255 * We need to repair backrefs first because we could change some of the
3256 * errors in the inode recs.
3258 * We also need to go through and delete invalid backrefs first and then
3259 * add the correct ones second. We do this because we may get EEXIST
3260 * when adding back the correct index because we hadn't yet deleted the
3263 * For example, if we were missing a dir index then the directories
3264 * isize would be wrong, so if we fixed the isize to what we thought it
3265 * would be and then fixed the backref we'd still have a invalid fs, so
3266 * we need to add back the dir index and then check to see if the isize
3271 if (stage == 3 && !err)
3274 cache = search_cache_extent(inode_cache, 0);
3275 while (repair && cache) {
3276 node = container_of(cache, struct ptr_node, cache);
3278 cache = next_cache_extent(cache);
3280 /* Need to free everything up and rescan */
3282 remove_cache_extent(inode_cache, &node->cache);
3284 free_inode_rec(rec);
3288 if (list_empty(&rec->backrefs))
3291 ret = repair_inode_backrefs(root, rec, inode_cache,
3305 rec = get_inode_rec(inode_cache, root_dirid, 0);
3306 BUG_ON(IS_ERR(rec));
3308 ret = check_root_dir(rec);
3310 fprintf(stderr, "root %llu root dir %llu error\n",
3311 (unsigned long long)root->root_key.objectid,
3312 (unsigned long long)root_dirid);
3313 print_inode_error(root, rec);
3318 struct btrfs_trans_handle *trans;
3320 trans = btrfs_start_transaction(root, 1);
3321 if (IS_ERR(trans)) {
3322 err = PTR_ERR(trans);
3327 "root %llu missing its root dir, recreating\n",
3328 (unsigned long long)root->objectid);
3330 ret = btrfs_make_root_dir(trans, root, root_dirid);
3333 btrfs_commit_transaction(trans, root);
3337 fprintf(stderr, "root %llu root dir %llu not found\n",
3338 (unsigned long long)root->root_key.objectid,
3339 (unsigned long long)root_dirid);
3343 cache = search_cache_extent(inode_cache, 0);
3346 node = container_of(cache, struct ptr_node, cache);
3348 remove_cache_extent(inode_cache, &node->cache);
3350 if (rec->ino == root_dirid ||
3351 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3352 free_inode_rec(rec);
3356 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3357 ret = check_orphan_item(root, rec->ino);
3359 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3360 if (can_free_inode_rec(rec)) {
3361 free_inode_rec(rec);
3366 if (!rec->found_inode_item)
3367 rec->errors |= I_ERR_NO_INODE_ITEM;
3368 if (rec->found_link != rec->nlink)
3369 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3371 ret = try_repair_inode(root, rec);
3372 if (ret == 0 && can_free_inode_rec(rec)) {
3373 free_inode_rec(rec);
3379 if (!(repair && ret == 0))
3381 print_inode_error(root, rec);
3382 list_for_each_entry(backref, &rec->backrefs, list) {
3383 if (!backref->found_dir_item)
3384 backref->errors |= REF_ERR_NO_DIR_ITEM;
3385 if (!backref->found_dir_index)
3386 backref->errors |= REF_ERR_NO_DIR_INDEX;
3387 if (!backref->found_inode_ref)
3388 backref->errors |= REF_ERR_NO_INODE_REF;
3389 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3390 " namelen %u name %s filetype %d errors %x",
3391 (unsigned long long)backref->dir,
3392 (unsigned long long)backref->index,
3393 backref->namelen, backref->name,
3394 backref->filetype, backref->errors);
3395 print_ref_error(backref->errors);
3397 free_inode_rec(rec);
3399 return (error > 0) ? -1 : 0;
3402 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3405 struct cache_extent *cache;
3406 struct root_record *rec = NULL;
3409 cache = lookup_cache_extent(root_cache, objectid, 1);
3411 rec = container_of(cache, struct root_record, cache);
3413 rec = calloc(1, sizeof(*rec));
3415 return ERR_PTR(-ENOMEM);
3416 rec->objectid = objectid;
3417 INIT_LIST_HEAD(&rec->backrefs);
3418 rec->cache.start = objectid;
3419 rec->cache.size = 1;
3421 ret = insert_cache_extent(root_cache, &rec->cache);
3423 return ERR_PTR(-EEXIST);
3428 static struct root_backref *get_root_backref(struct root_record *rec,
3429 u64 ref_root, u64 dir, u64 index,
3430 const char *name, int namelen)
3432 struct root_backref *backref;
3434 list_for_each_entry(backref, &rec->backrefs, list) {
3435 if (backref->ref_root != ref_root || backref->dir != dir ||
3436 backref->namelen != namelen)
3438 if (memcmp(name, backref->name, namelen))
3443 backref = calloc(1, sizeof(*backref) + namelen + 1);
3446 backref->ref_root = ref_root;
3448 backref->index = index;
3449 backref->namelen = namelen;
3450 memcpy(backref->name, name, namelen);
3451 backref->name[namelen] = '\0';
3452 list_add_tail(&backref->list, &rec->backrefs);
3456 static void free_root_record(struct cache_extent *cache)
3458 struct root_record *rec;
3459 struct root_backref *backref;
3461 rec = container_of(cache, struct root_record, cache);
3462 while (!list_empty(&rec->backrefs)) {
3463 backref = to_root_backref(rec->backrefs.next);
3464 list_del(&backref->list);
3471 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3473 static int add_root_backref(struct cache_tree *root_cache,
3474 u64 root_id, u64 ref_root, u64 dir, u64 index,
3475 const char *name, int namelen,
3476 int item_type, int errors)
3478 struct root_record *rec;
3479 struct root_backref *backref;
3481 rec = get_root_rec(root_cache, root_id);
3482 BUG_ON(IS_ERR(rec));
3483 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3486 backref->errors |= errors;
3488 if (item_type != BTRFS_DIR_ITEM_KEY) {
3489 if (backref->found_dir_index || backref->found_back_ref ||
3490 backref->found_forward_ref) {
3491 if (backref->index != index)
3492 backref->errors |= REF_ERR_INDEX_UNMATCH;
3494 backref->index = index;
3498 if (item_type == BTRFS_DIR_ITEM_KEY) {
3499 if (backref->found_forward_ref)
3501 backref->found_dir_item = 1;
3502 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3503 backref->found_dir_index = 1;
3504 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3505 if (backref->found_forward_ref)
3506 backref->errors |= REF_ERR_DUP_ROOT_REF;
3507 else if (backref->found_dir_item)
3509 backref->found_forward_ref = 1;
3510 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3511 if (backref->found_back_ref)
3512 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3513 backref->found_back_ref = 1;
3518 if (backref->found_forward_ref && backref->found_dir_item)
3519 backref->reachable = 1;
3523 static int merge_root_recs(struct btrfs_root *root,
3524 struct cache_tree *src_cache,
3525 struct cache_tree *dst_cache)
3527 struct cache_extent *cache;
3528 struct ptr_node *node;
3529 struct inode_record *rec;
3530 struct inode_backref *backref;
3533 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3534 free_inode_recs_tree(src_cache);
3539 cache = search_cache_extent(src_cache, 0);
3542 node = container_of(cache, struct ptr_node, cache);
3544 remove_cache_extent(src_cache, &node->cache);
3547 ret = is_child_root(root, root->objectid, rec->ino);
3553 list_for_each_entry(backref, &rec->backrefs, list) {
3554 BUG_ON(backref->found_inode_ref);
3555 if (backref->found_dir_item)
3556 add_root_backref(dst_cache, rec->ino,
3557 root->root_key.objectid, backref->dir,
3558 backref->index, backref->name,
3559 backref->namelen, BTRFS_DIR_ITEM_KEY,
3561 if (backref->found_dir_index)
3562 add_root_backref(dst_cache, rec->ino,
3563 root->root_key.objectid, backref->dir,
3564 backref->index, backref->name,
3565 backref->namelen, BTRFS_DIR_INDEX_KEY,
3569 free_inode_rec(rec);
3576 static int check_root_refs(struct btrfs_root *root,
3577 struct cache_tree *root_cache)
3579 struct root_record *rec;
3580 struct root_record *ref_root;
3581 struct root_backref *backref;
3582 struct cache_extent *cache;
3588 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3589 BUG_ON(IS_ERR(rec));
3592 /* fixme: this can not detect circular references */
3595 cache = search_cache_extent(root_cache, 0);
3599 rec = container_of(cache, struct root_record, cache);
3600 cache = next_cache_extent(cache);
3602 if (rec->found_ref == 0)
3605 list_for_each_entry(backref, &rec->backrefs, list) {
3606 if (!backref->reachable)
3609 ref_root = get_root_rec(root_cache,
3611 BUG_ON(IS_ERR(ref_root));
3612 if (ref_root->found_ref > 0)
3615 backref->reachable = 0;
3617 if (rec->found_ref == 0)
3623 cache = search_cache_extent(root_cache, 0);
3627 rec = container_of(cache, struct root_record, cache);
3628 cache = next_cache_extent(cache);
3630 if (rec->found_ref == 0 &&
3631 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3632 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3633 ret = check_orphan_item(root->fs_info->tree_root,
3639 * If we don't have a root item then we likely just have
3640 * a dir item in a snapshot for this root but no actual
3641 * ref key or anything so it's meaningless.
3643 if (!rec->found_root_item)
3646 fprintf(stderr, "fs tree %llu not referenced\n",
3647 (unsigned long long)rec->objectid);
3651 if (rec->found_ref > 0 && !rec->found_root_item)
3653 list_for_each_entry(backref, &rec->backrefs, list) {
3654 if (!backref->found_dir_item)
3655 backref->errors |= REF_ERR_NO_DIR_ITEM;
3656 if (!backref->found_dir_index)
3657 backref->errors |= REF_ERR_NO_DIR_INDEX;
3658 if (!backref->found_back_ref)
3659 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3660 if (!backref->found_forward_ref)
3661 backref->errors |= REF_ERR_NO_ROOT_REF;
3662 if (backref->reachable && backref->errors)
3669 fprintf(stderr, "fs tree %llu refs %u %s\n",
3670 (unsigned long long)rec->objectid, rec->found_ref,
3671 rec->found_root_item ? "" : "not found");
3673 list_for_each_entry(backref, &rec->backrefs, list) {
3674 if (!backref->reachable)
3676 if (!backref->errors && rec->found_root_item)
3678 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3679 " index %llu namelen %u name %s errors %x\n",
3680 (unsigned long long)backref->ref_root,
3681 (unsigned long long)backref->dir,
3682 (unsigned long long)backref->index,
3683 backref->namelen, backref->name,
3685 print_ref_error(backref->errors);
3688 return errors > 0 ? 1 : 0;
3691 static int process_root_ref(struct extent_buffer *eb, int slot,
3692 struct btrfs_key *key,
3693 struct cache_tree *root_cache)
3699 struct btrfs_root_ref *ref;
3700 char namebuf[BTRFS_NAME_LEN];
3703 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3705 dirid = btrfs_root_ref_dirid(eb, ref);
3706 index = btrfs_root_ref_sequence(eb, ref);
3707 name_len = btrfs_root_ref_name_len(eb, ref);
3709 if (name_len <= BTRFS_NAME_LEN) {
3713 len = BTRFS_NAME_LEN;
3714 error = REF_ERR_NAME_TOO_LONG;
3716 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3718 if (key->type == BTRFS_ROOT_REF_KEY) {
3719 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3720 index, namebuf, len, key->type, error);
3722 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3723 index, namebuf, len, key->type, error);
3728 static void free_corrupt_block(struct cache_extent *cache)
3730 struct btrfs_corrupt_block *corrupt;
3732 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3736 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3739 * Repair the btree of the given root.
3741 * The fix is to remove the node key in corrupt_blocks cache_tree.
3742 * and rebalance the tree.
3743 * After the fix, the btree should be writeable.
3745 static int repair_btree(struct btrfs_root *root,
3746 struct cache_tree *corrupt_blocks)
3748 struct btrfs_trans_handle *trans;
3749 struct btrfs_path path;
3750 struct btrfs_corrupt_block *corrupt;
3751 struct cache_extent *cache;
3752 struct btrfs_key key;
3757 if (cache_tree_empty(corrupt_blocks))
3760 trans = btrfs_start_transaction(root, 1);
3761 if (IS_ERR(trans)) {
3762 ret = PTR_ERR(trans);
3763 fprintf(stderr, "Error starting transaction: %s\n",
3767 btrfs_init_path(&path);
3768 cache = first_cache_extent(corrupt_blocks);
3770 corrupt = container_of(cache, struct btrfs_corrupt_block,
3772 level = corrupt->level;
3773 path.lowest_level = level;
3774 key.objectid = corrupt->key.objectid;
3775 key.type = corrupt->key.type;
3776 key.offset = corrupt->key.offset;
3779 * Here we don't want to do any tree balance, since it may
3780 * cause a balance with corrupted brother leaf/node,
3781 * so ins_len set to 0 here.
3782 * Balance will be done after all corrupt node/leaf is deleted.
3784 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3787 offset = btrfs_node_blockptr(path.nodes[level],
3790 /* Remove the ptr */
3791 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3795 * Remove the corresponding extent
3796 * return value is not concerned.
3798 btrfs_release_path(&path);
3799 ret = btrfs_free_extent(trans, root, offset,
3800 root->fs_info->nodesize, 0,
3801 root->root_key.objectid, level - 1, 0);
3802 cache = next_cache_extent(cache);
3805 /* Balance the btree using btrfs_search_slot() */
3806 cache = first_cache_extent(corrupt_blocks);
3808 corrupt = container_of(cache, struct btrfs_corrupt_block,
3810 memcpy(&key, &corrupt->key, sizeof(key));
3811 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3814 /* return will always >0 since it won't find the item */
3816 btrfs_release_path(&path);
3817 cache = next_cache_extent(cache);
3820 btrfs_commit_transaction(trans, root);
3821 btrfs_release_path(&path);
3825 static int check_fs_root(struct btrfs_root *root,
3826 struct cache_tree *root_cache,
3827 struct walk_control *wc)
3833 struct btrfs_path path;
3834 struct shared_node root_node;
3835 struct root_record *rec;
3836 struct btrfs_root_item *root_item = &root->root_item;
3837 struct cache_tree corrupt_blocks;
3838 struct orphan_data_extent *orphan;
3839 struct orphan_data_extent *tmp;
3840 enum btrfs_tree_block_status status;
3841 struct node_refs nrefs;
3844 * Reuse the corrupt_block cache tree to record corrupted tree block
3846 * Unlike the usage in extent tree check, here we do it in a per
3847 * fs/subvol tree base.
3849 cache_tree_init(&corrupt_blocks);
3850 root->fs_info->corrupt_blocks = &corrupt_blocks;
3852 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3853 rec = get_root_rec(root_cache, root->root_key.objectid);
3854 BUG_ON(IS_ERR(rec));
3855 if (btrfs_root_refs(root_item) > 0)
3856 rec->found_root_item = 1;
3859 btrfs_init_path(&path);
3860 memset(&root_node, 0, sizeof(root_node));
3861 cache_tree_init(&root_node.root_cache);
3862 cache_tree_init(&root_node.inode_cache);
3863 memset(&nrefs, 0, sizeof(nrefs));
3865 /* Move the orphan extent record to corresponding inode_record */
3866 list_for_each_entry_safe(orphan, tmp,
3867 &root->orphan_data_extents, list) {
3868 struct inode_record *inode;
3870 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3872 BUG_ON(IS_ERR(inode));
3873 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3874 list_move(&orphan->list, &inode->orphan_extents);
3877 level = btrfs_header_level(root->node);
3878 memset(wc->nodes, 0, sizeof(wc->nodes));
3879 wc->nodes[level] = &root_node;
3880 wc->active_node = level;
3881 wc->root_level = level;
3883 /* We may not have checked the root block, lets do that now */
3884 if (btrfs_is_leaf(root->node))
3885 status = btrfs_check_leaf(root, NULL, root->node);
3887 status = btrfs_check_node(root, NULL, root->node);
3888 if (status != BTRFS_TREE_BLOCK_CLEAN)
3891 if (btrfs_root_refs(root_item) > 0 ||
3892 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3893 path.nodes[level] = root->node;
3894 extent_buffer_get(root->node);
3895 path.slots[level] = 0;
3897 struct btrfs_key key;
3898 struct btrfs_disk_key found_key;
3900 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3901 level = root_item->drop_level;
3902 path.lowest_level = level;
3903 if (level > btrfs_header_level(root->node) ||
3904 level >= BTRFS_MAX_LEVEL) {
3905 error("ignoring invalid drop level: %u", level);
3908 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3911 btrfs_node_key(path.nodes[level], &found_key,
3913 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3914 sizeof(found_key)));
3918 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3924 wret = walk_up_tree(root, &path, wc, &level);
3931 btrfs_release_path(&path);
3933 if (!cache_tree_empty(&corrupt_blocks)) {
3934 struct cache_extent *cache;
3935 struct btrfs_corrupt_block *corrupt;
3937 printf("The following tree block(s) is corrupted in tree %llu:\n",
3938 root->root_key.objectid);
3939 cache = first_cache_extent(&corrupt_blocks);
3941 corrupt = container_of(cache,
3942 struct btrfs_corrupt_block,
3944 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3945 cache->start, corrupt->level,
3946 corrupt->key.objectid, corrupt->key.type,
3947 corrupt->key.offset);
3948 cache = next_cache_extent(cache);
3951 printf("Try to repair the btree for root %llu\n",
3952 root->root_key.objectid);
3953 ret = repair_btree(root, &corrupt_blocks);
3955 fprintf(stderr, "Failed to repair btree: %s\n",
3958 printf("Btree for root %llu is fixed\n",
3959 root->root_key.objectid);
3963 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3967 if (root_node.current) {
3968 root_node.current->checked = 1;
3969 maybe_free_inode_rec(&root_node.inode_cache,
3973 err = check_inode_recs(root, &root_node.inode_cache);
3977 free_corrupt_blocks_tree(&corrupt_blocks);
3978 root->fs_info->corrupt_blocks = NULL;
3979 free_orphan_data_extents(&root->orphan_data_extents);
3983 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3984 struct cache_tree *root_cache)
3986 struct btrfs_path path;
3987 struct btrfs_key key;
3988 struct walk_control wc;
3989 struct extent_buffer *leaf, *tree_node;
3990 struct btrfs_root *tmp_root;
3991 struct btrfs_root *tree_root = fs_info->tree_root;
3995 if (ctx.progress_enabled) {
3996 ctx.tp = TASK_FS_ROOTS;
3997 task_start(ctx.info);
4001 * Just in case we made any changes to the extent tree that weren't
4002 * reflected into the free space cache yet.
4005 reset_cached_block_groups(fs_info);
4006 memset(&wc, 0, sizeof(wc));
4007 cache_tree_init(&wc.shared);
4008 btrfs_init_path(&path);
4013 key.type = BTRFS_ROOT_ITEM_KEY;
4014 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4019 tree_node = tree_root->node;
4021 if (tree_node != tree_root->node) {
4022 free_root_recs_tree(root_cache);
4023 btrfs_release_path(&path);
4026 leaf = path.nodes[0];
4027 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4028 ret = btrfs_next_leaf(tree_root, &path);
4034 leaf = path.nodes[0];
4036 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4037 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4038 fs_root_objectid(key.objectid)) {
4039 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4040 tmp_root = btrfs_read_fs_root_no_cache(
4043 key.offset = (u64)-1;
4044 tmp_root = btrfs_read_fs_root(
4047 if (IS_ERR(tmp_root)) {
4051 ret = check_fs_root(tmp_root, root_cache, &wc);
4052 if (ret == -EAGAIN) {
4053 free_root_recs_tree(root_cache);
4054 btrfs_release_path(&path);
4059 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4060 btrfs_free_fs_root(tmp_root);
4061 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4062 key.type == BTRFS_ROOT_BACKREF_KEY) {
4063 process_root_ref(leaf, path.slots[0], &key,
4070 btrfs_release_path(&path);
4072 free_extent_cache_tree(&wc.shared);
4073 if (!cache_tree_empty(&wc.shared))
4074 fprintf(stderr, "warning line %d\n", __LINE__);
4076 task_stop(ctx.info);
4082 * Find the @index according by @ino and name.
4083 * Notice:time efficiency is O(N)
4085 * @root: the root of the fs/file tree
4086 * @index_ret: the index as return value
4087 * @namebuf: the name to match
4088 * @name_len: the length of name to match
4089 * @file_type: the file_type of INODE_ITEM to match
4091 * Returns 0 if found and *@index_ret will be modified with right value
4092 * Returns< 0 not found and *@index_ret will be (u64)-1
4094 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4095 u64 *index_ret, char *namebuf, u32 name_len,
4098 struct btrfs_path path;
4099 struct extent_buffer *node;
4100 struct btrfs_dir_item *di;
4101 struct btrfs_key key;
4102 struct btrfs_key location;
4103 char name[BTRFS_NAME_LEN] = {0};
4115 /* search from the last index */
4116 key.objectid = dirid;
4117 key.offset = (u64)-1;
4118 key.type = BTRFS_DIR_INDEX_KEY;
4120 btrfs_init_path(&path);
4121 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4126 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4129 *index_ret = (64)-1;
4132 /* Check whether inode_id/filetype/name match */
4133 node = path.nodes[0];
4134 slot = path.slots[0];
4135 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4136 total = btrfs_item_size_nr(node, slot);
4137 while (cur < total) {
4139 len = btrfs_dir_name_len(node, di);
4140 data_len = btrfs_dir_data_len(node, di);
4142 btrfs_dir_item_key_to_cpu(node, di, &location);
4143 if (location.objectid != location_id ||
4144 location.type != BTRFS_INODE_ITEM_KEY ||
4145 location.offset != 0)
4148 filetype = btrfs_dir_type(node, di);
4149 if (file_type != filetype)
4152 if (len > BTRFS_NAME_LEN)
4153 len = BTRFS_NAME_LEN;
4155 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4156 if (len != name_len || strncmp(namebuf, name, len))
4159 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4160 *index_ret = key.offset;
4164 len += sizeof(*di) + data_len;
4165 di = (struct btrfs_dir_item *)((char *)di + len);
4171 btrfs_release_path(&path);
4176 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4177 * INODE_REF/INODE_EXTREF match.
4179 * @root: the root of the fs/file tree
4180 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4181 * value while find index
4182 * @location_key: location key of the struct btrfs_dir_item to match
4183 * @name: the name to match
4184 * @namelen: the length of name
4185 * @file_type: the type of file to math
4187 * Return 0 if no error occurred.
4188 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4189 * DIR_ITEM/DIR_INDEX
4190 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4191 * and DIR_ITEM/DIR_INDEX mismatch
4193 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4194 struct btrfs_key *location_key, char *name,
4195 u32 namelen, u8 file_type)
4197 struct btrfs_path path;
4198 struct extent_buffer *node;
4199 struct btrfs_dir_item *di;
4200 struct btrfs_key location;
4201 char namebuf[BTRFS_NAME_LEN] = {0};
4210 /* get the index by traversing all index */
4211 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4212 ret = find_dir_index(root, key->objectid,
4213 location_key->objectid, &key->offset,
4214 name, namelen, file_type);
4216 ret = DIR_INDEX_MISSING;
4220 btrfs_init_path(&path);
4221 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4223 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4228 /* Check whether inode_id/filetype/name match */
4229 node = path.nodes[0];
4230 slot = path.slots[0];
4231 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4232 total = btrfs_item_size_nr(node, slot);
4233 while (cur < total) {
4234 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4235 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4237 len = btrfs_dir_name_len(node, di);
4238 data_len = btrfs_dir_data_len(node, di);
4240 btrfs_dir_item_key_to_cpu(node, di, &location);
4241 if (location.objectid != location_key->objectid ||
4242 location.type != location_key->type ||
4243 location.offset != location_key->offset)
4246 filetype = btrfs_dir_type(node, di);
4247 if (file_type != filetype)
4250 if (len > BTRFS_NAME_LEN) {
4251 len = BTRFS_NAME_LEN;
4252 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4254 key->type == BTRFS_DIR_ITEM_KEY ?
4255 "DIR_ITEM" : "DIR_INDEX",
4256 key->objectid, key->offset, len);
4258 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4260 if (len != namelen || strncmp(namebuf, name, len))
4266 len += sizeof(*di) + data_len;
4267 di = (struct btrfs_dir_item *)((char *)di + len);
4272 btrfs_release_path(&path);
4277 * Prints inode ref error message
4279 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4280 u64 index, const char *namebuf, int name_len,
4281 u8 filetype, int err)
4286 /* root dir error */
4287 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4289 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4290 root->objectid, key->objectid, key->offset, namebuf);
4295 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4296 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4297 root->objectid, key->offset,
4298 btrfs_name_hash(namebuf, name_len),
4299 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4301 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4302 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4303 root->objectid, key->offset, index,
4304 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4309 * Insert the missing inode item.
4311 * Returns 0 means success.
4312 * Returns <0 means error.
4314 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4317 struct btrfs_key key;
4318 struct btrfs_trans_handle *trans;
4319 struct btrfs_path path;
4323 key.type = BTRFS_INODE_ITEM_KEY;
4326 btrfs_init_path(&path);
4327 trans = btrfs_start_transaction(root, 1);
4328 if (IS_ERR(trans)) {
4333 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4334 if (ret < 0 || !ret)
4337 /* insert inode item */
4338 create_inode_item_lowmem(trans, root, ino, filetype);
4341 btrfs_commit_transaction(trans, root);
4344 error("failed to repair root %llu INODE ITEM[%llu] missing",
4345 root->objectid, ino);
4346 btrfs_release_path(&path);
4351 * The ternary means dir item, dir index and relative inode ref.
4352 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4353 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4355 * If two of three is missing or mismatched, delete the existing one.
4356 * If one of three is missing or mismatched, add the missing one.
4358 * returns 0 means success.
4359 * returns not 0 means on error;
4361 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4362 u64 index, char *name, int name_len, u8 filetype,
4365 struct btrfs_trans_handle *trans;
4370 * stage shall be one of following valild values:
4371 * 0: Fine, nothing to do.
4372 * 1: One of three is wrong, so add missing one.
4373 * 2: Two of three is wrong, so delete existed one.
4375 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4377 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4379 if (err & (INODE_REF_MISSING))
4382 /* stage must be smllarer than 3 */
4385 trans = btrfs_start_transaction(root, 1);
4387 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4392 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4393 filetype, &index, 1, 1);
4397 btrfs_commit_transaction(trans, root);
4400 error("fail to repair inode %llu name %s filetype %u",
4401 ino, name, filetype);
4403 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4404 stage == 2 ? "Delete" : "Add",
4405 ino, name, filetype);
4411 * Traverse the given INODE_REF and call find_dir_item() to find related
4412 * DIR_ITEM/DIR_INDEX.
4414 * @root: the root of the fs/file tree
4415 * @ref_key: the key of the INODE_REF
4416 * @path the path provides node and slot
4417 * @refs: the count of INODE_REF
4418 * @mode: the st_mode of INODE_ITEM
4419 * @name_ret: returns with the first ref's name
4420 * @name_len_ret: len of the name_ret
4422 * Return 0 if no error occurred.
4424 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4425 struct btrfs_path *path, char *name_ret,
4426 u32 *namelen_ret, u64 *refs_ret, int mode)
4428 struct btrfs_key key;
4429 struct btrfs_key location;
4430 struct btrfs_inode_ref *ref;
4431 struct extent_buffer *node;
4432 char namebuf[BTRFS_NAME_LEN] = {0};
4442 int need_research = 0;
4450 /* since after repair, path and the dir item may be changed */
4451 if (need_research) {
4453 btrfs_release_path(path);
4454 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4455 /* the item was deleted, let path point to the last checked item */
4457 if (path->slots[0] == 0)
4458 btrfs_prev_leaf(root, path);
4466 location.objectid = ref_key->objectid;
4467 location.type = BTRFS_INODE_ITEM_KEY;
4468 location.offset = 0;
4469 node = path->nodes[0];
4470 slot = path->slots[0];
4472 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4473 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4474 total = btrfs_item_size_nr(node, slot);
4477 /* Update inode ref count */
4480 index = btrfs_inode_ref_index(node, ref);
4481 name_len = btrfs_inode_ref_name_len(node, ref);
4483 if (name_len <= BTRFS_NAME_LEN) {
4486 len = BTRFS_NAME_LEN;
4487 warning("root %llu INODE_REF[%llu %llu] name too long",
4488 root->objectid, ref_key->objectid, ref_key->offset);
4491 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4493 /* copy the first name found to name_ret */
4494 if (refs == 1 && name_ret) {
4495 memcpy(name_ret, namebuf, len);
4499 /* Check root dir ref */
4500 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4501 if (index != 0 || len != strlen("..") ||
4502 strncmp("..", namebuf, len) ||
4503 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4504 /* set err bits then repair will delete the ref */
4505 err |= DIR_INDEX_MISSING;
4506 err |= DIR_ITEM_MISSING;
4511 /* Find related DIR_INDEX */
4512 key.objectid = ref_key->offset;
4513 key.type = BTRFS_DIR_INDEX_KEY;
4515 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4516 imode_to_type(mode));
4518 /* Find related dir_item */
4519 key.objectid = ref_key->offset;
4520 key.type = BTRFS_DIR_ITEM_KEY;
4521 key.offset = btrfs_name_hash(namebuf, len);
4522 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4523 imode_to_type(mode));
4525 if (tmp_err && repair) {
4526 ret = repair_ternary_lowmem(root, ref_key->offset,
4527 ref_key->objectid, index, namebuf,
4528 name_len, imode_to_type(mode),
4535 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4536 imode_to_type(mode), tmp_err);
4538 len = sizeof(*ref) + name_len;
4539 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4550 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4551 * DIR_ITEM/DIR_INDEX.
4553 * @root: the root of the fs/file tree
4554 * @ref_key: the key of the INODE_EXTREF
4555 * @refs: the count of INODE_EXTREF
4556 * @mode: the st_mode of INODE_ITEM
4558 * Return 0 if no error occurred.
4560 static int check_inode_extref(struct btrfs_root *root,
4561 struct btrfs_key *ref_key,
4562 struct extent_buffer *node, int slot, u64 *refs,
4565 struct btrfs_key key;
4566 struct btrfs_key location;
4567 struct btrfs_inode_extref *extref;
4568 char namebuf[BTRFS_NAME_LEN] = {0};
4578 location.objectid = ref_key->objectid;
4579 location.type = BTRFS_INODE_ITEM_KEY;
4580 location.offset = 0;
4582 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4583 total = btrfs_item_size_nr(node, slot);
4586 /* update inode ref count */
4588 name_len = btrfs_inode_extref_name_len(node, extref);
4589 index = btrfs_inode_extref_index(node, extref);
4590 parent = btrfs_inode_extref_parent(node, extref);
4591 if (name_len <= BTRFS_NAME_LEN) {
4594 len = BTRFS_NAME_LEN;
4595 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4596 root->objectid, ref_key->objectid, ref_key->offset);
4598 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4600 /* Check root dir ref name */
4601 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4602 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4603 root->objectid, ref_key->objectid, ref_key->offset,
4605 err |= ROOT_DIR_ERROR;
4608 /* find related dir_index */
4609 key.objectid = parent;
4610 key.type = BTRFS_DIR_INDEX_KEY;
4612 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4615 /* find related dir_item */
4616 key.objectid = parent;
4617 key.type = BTRFS_DIR_ITEM_KEY;
4618 key.offset = btrfs_name_hash(namebuf, len);
4619 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4622 len = sizeof(*extref) + name_len;
4623 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4633 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4634 * DIR_ITEM/DIR_INDEX match.
4635 * Return with @index_ret.
4637 * @root: the root of the fs/file tree
4638 * @key: the key of the INODE_REF/INODE_EXTREF
4639 * @name: the name in the INODE_REF/INODE_EXTREF
4640 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4641 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4642 * value (64)-1 means do not check index
4643 * @ext_ref: the EXTENDED_IREF feature
4645 * Return 0 if no error occurred.
4646 * Return >0 for error bitmap
4648 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4649 char *name, int namelen, u64 *index_ret,
4650 unsigned int ext_ref)
4652 struct btrfs_path path;
4653 struct btrfs_inode_ref *ref;
4654 struct btrfs_inode_extref *extref;
4655 struct extent_buffer *node;
4656 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4669 btrfs_init_path(&path);
4670 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4672 ret = INODE_REF_MISSING;
4676 node = path.nodes[0];
4677 slot = path.slots[0];
4679 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4680 total = btrfs_item_size_nr(node, slot);
4682 /* Iterate all entry of INODE_REF */
4683 while (cur < total) {
4684 ret = INODE_REF_MISSING;
4686 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4687 ref_index = btrfs_inode_ref_index(node, ref);
4688 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4691 if (cur + sizeof(*ref) + ref_namelen > total ||
4692 ref_namelen > BTRFS_NAME_LEN) {
4693 warning("root %llu INODE %s[%llu %llu] name too long",
4695 key->type == BTRFS_INODE_REF_KEY ?
4697 key->objectid, key->offset);
4699 if (cur + sizeof(*ref) > total)
4701 len = min_t(u32, total - cur - sizeof(*ref),
4707 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4710 if (len != namelen || strncmp(ref_namebuf, name, len))
4713 *index_ret = ref_index;
4717 len = sizeof(*ref) + ref_namelen;
4718 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4723 /* Skip if not support EXTENDED_IREF feature */
4727 btrfs_release_path(&path);
4728 btrfs_init_path(&path);
4730 dir_id = key->offset;
4731 key->type = BTRFS_INODE_EXTREF_KEY;
4732 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4734 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4736 ret = INODE_REF_MISSING;
4740 node = path.nodes[0];
4741 slot = path.slots[0];
4743 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4745 total = btrfs_item_size_nr(node, slot);
4747 /* Iterate all entry of INODE_EXTREF */
4748 while (cur < total) {
4749 ret = INODE_REF_MISSING;
4751 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4752 ref_index = btrfs_inode_extref_index(node, extref);
4753 parent = btrfs_inode_extref_parent(node, extref);
4754 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4757 if (parent != dir_id)
4760 if (ref_namelen <= BTRFS_NAME_LEN) {
4763 len = BTRFS_NAME_LEN;
4764 warning("root %llu INODE %s[%llu %llu] name too long",
4766 key->type == BTRFS_INODE_REF_KEY ?
4768 key->objectid, key->offset);
4770 read_extent_buffer(node, ref_namebuf,
4771 (unsigned long)(extref + 1), len);
4773 if (len != namelen || strncmp(ref_namebuf, name, len))
4776 *index_ret = ref_index;
4781 len = sizeof(*extref) + ref_namelen;
4782 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4787 btrfs_release_path(&path);
4791 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4792 u64 ino, u64 index, const char *namebuf,
4793 int name_len, u8 filetype, int err)
4795 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4796 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4797 root->objectid, key->objectid, key->offset, namebuf,
4799 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4802 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4803 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4804 root->objectid, key->objectid, index, namebuf, filetype,
4805 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4808 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4810 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4811 root->objectid, ino, index, namebuf, filetype,
4812 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4815 if (err & INODE_REF_MISSING)
4817 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4818 root->objectid, ino, key->objectid, namebuf, filetype);
4823 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4825 * Returns error after repair
4827 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4828 u64 index, u8 filetype, char *namebuf, u32 name_len,
4833 if (err & INODE_ITEM_MISSING) {
4834 ret = repair_inode_item_missing(root, ino, filetype);
4836 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
4839 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
4840 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
4841 name_len, filetype, err);
4843 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
4844 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
4845 err &= ~(INODE_REF_MISSING);
4851 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
4854 struct btrfs_key key;
4855 struct btrfs_path path;
4857 struct btrfs_dir_item *di;
4867 key.offset = (u64)-1;
4869 btrfs_init_path(&path);
4870 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4875 /* if found, go to spacial case */
4880 ret = btrfs_previous_item(root, &path, ino, type);
4888 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
4890 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
4892 while (cur < total) {
4893 len = btrfs_dir_name_len(path.nodes[0], di);
4894 if (len > BTRFS_NAME_LEN)
4895 len = BTRFS_NAME_LEN;
4898 len += btrfs_dir_data_len(path.nodes[0], di);
4900 di = (struct btrfs_dir_item *)((char *)di + len);
4906 btrfs_release_path(&path);
4910 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
4917 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
4921 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
4925 *size = item_size + index_size;
4929 error("failed to count root %llu INODE[%llu] root size",
4930 root->objectid, ino);
4935 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4936 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4938 * @root: the root of the fs/file tree
4939 * @key: the key of the INODE_REF/INODE_EXTREF
4941 * @size: the st_size of the INODE_ITEM
4942 * @ext_ref: the EXTENDED_IREF feature
4944 * Return 0 if no error occurred.
4945 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
4947 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4948 struct btrfs_path *path, u64 *size,
4949 unsigned int ext_ref)
4951 struct btrfs_dir_item *di;
4952 struct btrfs_inode_item *ii;
4953 struct btrfs_key key;
4954 struct btrfs_key location;
4955 struct extent_buffer *node;
4957 char namebuf[BTRFS_NAME_LEN] = {0};
4969 int need_research = 0;
4972 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4973 * ignore index check.
4975 if (di_key->type == BTRFS_DIR_INDEX_KEY)
4976 index = di_key->offset;
4983 /* since after repair, path and the dir item may be changed */
4984 if (need_research) {
4986 err |= DIR_COUNT_AGAIN;
4987 btrfs_release_path(path);
4988 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
4989 /* the item was deleted, let path point the last checked item */
4991 if (path->slots[0] == 0)
4992 btrfs_prev_leaf(root, path);
5000 node = path->nodes[0];
5001 slot = path->slots[0];
5003 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5004 total = btrfs_item_size_nr(node, slot);
5005 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5007 while (cur < total) {
5008 data_len = btrfs_dir_data_len(node, di);
5011 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5013 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5014 di_key->objectid, di_key->offset, data_len);
5016 name_len = btrfs_dir_name_len(node, di);
5017 if (name_len <= BTRFS_NAME_LEN) {
5020 len = BTRFS_NAME_LEN;
5021 warning("root %llu %s[%llu %llu] name too long",
5023 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5024 di_key->objectid, di_key->offset);
5026 (*size) += name_len;
5027 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5029 filetype = btrfs_dir_type(node, di);
5031 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5032 di_key->offset != btrfs_name_hash(namebuf, len)) {
5034 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5035 root->objectid, di_key->objectid, di_key->offset,
5036 namebuf, len, filetype, di_key->offset,
5037 btrfs_name_hash(namebuf, len));
5040 btrfs_dir_item_key_to_cpu(node, di, &location);
5041 /* Ignore related ROOT_ITEM check */
5042 if (location.type == BTRFS_ROOT_ITEM_KEY)
5045 btrfs_release_path(path);
5046 /* Check relative INODE_ITEM(existence/filetype) */
5047 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5049 tmp_err |= INODE_ITEM_MISSING;
5053 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5054 struct btrfs_inode_item);
5055 mode = btrfs_inode_mode(path->nodes[0], ii);
5056 if (imode_to_type(mode) != filetype) {
5057 tmp_err |= INODE_ITEM_MISMATCH;
5061 /* Check relative INODE_REF/INODE_EXTREF */
5062 key.objectid = location.objectid;
5063 key.type = BTRFS_INODE_REF_KEY;
5064 key.offset = di_key->objectid;
5065 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5068 /* check relative INDEX/ITEM */
5069 key.objectid = di_key->objectid;
5070 if (key.type == BTRFS_DIR_ITEM_KEY) {
5071 key.type = BTRFS_DIR_INDEX_KEY;
5074 key.type = BTRFS_DIR_ITEM_KEY;
5075 key.offset = btrfs_name_hash(namebuf, name_len);
5078 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5079 name_len, filetype);
5080 /* find_dir_item may find index */
5081 if (key.type == BTRFS_DIR_INDEX_KEY)
5085 if (tmp_err && repair) {
5086 ret = repair_dir_item(root, di_key->objectid,
5087 location.objectid, index,
5088 imode_to_type(mode), namebuf,
5090 if (ret != tmp_err) {
5095 btrfs_release_path(path);
5096 print_dir_item_err(root, di_key, location.objectid, index,
5097 namebuf, name_len, filetype, tmp_err);
5099 len = sizeof(*di) + name_len + data_len;
5100 di = (struct btrfs_dir_item *)((char *)di + len);
5103 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5104 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5105 root->objectid, di_key->objectid,
5112 btrfs_release_path(path);
5113 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5115 err |= ret > 0 ? -ENOENT : ret;
5120 * Wrapper function of btrfs_punch_hole.
5122 * Returns 0 means success.
5123 * Returns not 0 means error.
5125 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5128 struct btrfs_trans_handle *trans;
5131 trans = btrfs_start_transaction(root, 1);
5133 return PTR_ERR(trans);
5135 ret = btrfs_punch_hole(trans, root, ino, start, len);
5137 error("failed to add hole [%llu, %llu] in inode [%llu]",
5140 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5143 btrfs_commit_transaction(trans, root);
5148 * Check file extent datasum/hole, update the size of the file extents,
5149 * check and update the last offset of the file extent.
5151 * @root: the root of fs/file tree.
5152 * @fkey: the key of the file extent.
5153 * @nodatasum: INODE_NODATASUM feature.
5154 * @size: the sum of all EXTENT_DATA items size for this inode.
5155 * @end: the offset of the last extent.
5157 * Return 0 if no error occurred.
5159 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5160 struct extent_buffer *node, int slot,
5161 unsigned int nodatasum, u64 *size, u64 *end)
5163 struct btrfs_file_extent_item *fi;
5166 u64 extent_num_bytes;
5168 u64 csum_found; /* In byte size, sectorsize aligned */
5169 u64 search_start; /* Logical range start we search for csum */
5170 u64 search_len; /* Logical range len we search for csum */
5171 unsigned int extent_type;
5172 unsigned int is_hole;
5177 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5179 /* Check inline extent */
5180 extent_type = btrfs_file_extent_type(node, fi);
5181 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5182 struct btrfs_item *e = btrfs_item_nr(slot);
5183 u32 item_inline_len;
5185 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5186 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5187 compressed = btrfs_file_extent_compression(node, fi);
5188 if (extent_num_bytes == 0) {
5190 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5191 root->objectid, fkey->objectid, fkey->offset);
5192 err |= FILE_EXTENT_ERROR;
5194 if (!compressed && extent_num_bytes != item_inline_len) {
5196 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5197 root->objectid, fkey->objectid, fkey->offset,
5198 extent_num_bytes, item_inline_len);
5199 err |= FILE_EXTENT_ERROR;
5201 *end += extent_num_bytes;
5202 *size += extent_num_bytes;
5206 /* Check extent type */
5207 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5208 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5209 err |= FILE_EXTENT_ERROR;
5210 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5211 root->objectid, fkey->objectid, fkey->offset);
5215 /* Check REG_EXTENT/PREALLOC_EXTENT */
5216 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5217 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5218 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5219 extent_offset = btrfs_file_extent_offset(node, fi);
5220 compressed = btrfs_file_extent_compression(node, fi);
5221 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5224 * Check EXTENT_DATA csum
5226 * For plain (uncompressed) extent, we should only check the range
5227 * we're referring to, as it's possible that part of prealloc extent
5228 * has been written, and has csum:
5230 * |<--- Original large preallocated extent A ---->|
5231 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5234 * For compressed extent, we should check the whole range.
5237 search_start = disk_bytenr + extent_offset;
5238 search_len = extent_num_bytes;
5240 search_start = disk_bytenr;
5241 search_len = disk_num_bytes;
5243 ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5244 if (csum_found > 0 && nodatasum) {
5245 err |= ODD_CSUM_ITEM;
5246 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5247 root->objectid, fkey->objectid, fkey->offset);
5248 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5249 !is_hole && (ret < 0 || csum_found < search_len)) {
5250 err |= CSUM_ITEM_MISSING;
5251 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5252 root->objectid, fkey->objectid, fkey->offset,
5253 csum_found, search_len);
5254 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5255 err |= ODD_CSUM_ITEM;
5256 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5257 root->objectid, fkey->objectid, fkey->offset, csum_found);
5260 /* Check EXTENT_DATA hole */
5261 if (!no_holes && *end != fkey->offset) {
5263 ret = punch_extent_hole(root, fkey->objectid,
5264 *end, fkey->offset - *end);
5265 if (!repair || ret) {
5266 err |= FILE_EXTENT_ERROR;
5268 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5269 root->objectid, fkey->objectid, fkey->offset,
5270 fkey->objectid, *end);
5274 *end += extent_num_bytes;
5276 *size += extent_num_bytes;
5282 * Set inode item nbytes to @nbytes
5284 * Returns 0 on success
5285 * Returns != 0 on error
5287 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5288 struct btrfs_path *path,
5289 u64 ino, u64 nbytes)
5291 struct btrfs_trans_handle *trans;
5292 struct btrfs_inode_item *ii;
5293 struct btrfs_key key;
5294 struct btrfs_key research_key;
5298 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5301 key.type = BTRFS_INODE_ITEM_KEY;
5304 trans = btrfs_start_transaction(root, 1);
5305 if (IS_ERR(trans)) {
5306 ret = PTR_ERR(trans);
5311 btrfs_release_path(path);
5312 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5320 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5321 struct btrfs_inode_item);
5322 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5323 btrfs_mark_buffer_dirty(path->nodes[0]);
5325 btrfs_commit_transaction(trans, root);
5328 error("failed to set nbytes in inode %llu root %llu",
5329 ino, root->root_key.objectid);
5331 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5332 root->root_key.objectid, nbytes);
5335 btrfs_release_path(path);
5336 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5343 * Set directory inode isize to @isize.
5345 * Returns 0 on success.
5346 * Returns != 0 on error.
5348 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5349 struct btrfs_path *path,
5352 struct btrfs_trans_handle *trans;
5353 struct btrfs_inode_item *ii;
5354 struct btrfs_key key;
5355 struct btrfs_key research_key;
5359 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5362 key.type = BTRFS_INODE_ITEM_KEY;
5365 trans = btrfs_start_transaction(root, 1);
5366 if (IS_ERR(trans)) {
5367 ret = PTR_ERR(trans);
5372 btrfs_release_path(path);
5373 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5381 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5382 struct btrfs_inode_item);
5383 btrfs_set_inode_size(path->nodes[0], ii, isize);
5384 btrfs_mark_buffer_dirty(path->nodes[0]);
5386 btrfs_commit_transaction(trans, root);
5389 error("failed to set isize in inode %llu root %llu",
5390 ino, root->root_key.objectid);
5392 printf("Set isize in inode %llu root %llu to %llu\n",
5393 ino, root->root_key.objectid, isize);
5395 btrfs_release_path(path);
5396 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5403 * Wrapper function for btrfs_add_orphan_item().
5405 * Returns 0 on success.
5406 * Returns != 0 on error.
5408 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5409 struct btrfs_path *path, u64 ino)
5411 struct btrfs_trans_handle *trans;
5412 struct btrfs_key research_key;
5416 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5418 trans = btrfs_start_transaction(root, 1);
5419 if (IS_ERR(trans)) {
5420 ret = PTR_ERR(trans);
5425 btrfs_release_path(path);
5426 ret = btrfs_add_orphan_item(trans, root, path, ino);
5428 btrfs_commit_transaction(trans, root);
5431 error("failed to add inode %llu as orphan item root %llu",
5432 ino, root->root_key.objectid);
5434 printf("Added inode %llu as orphan item root %llu\n",
5435 ino, root->root_key.objectid);
5437 btrfs_release_path(path);
5438 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5444 /* Set inode_item nlink to @ref_count.
5445 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5447 * Returns 0 on success
5449 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5450 struct btrfs_path *path, u64 ino,
5451 const char *name, u32 namelen,
5452 u64 ref_count, u8 filetype, u64 *nlink)
5454 struct btrfs_trans_handle *trans;
5455 struct btrfs_inode_item *ii;
5456 struct btrfs_key key;
5457 struct btrfs_key old_key;
5458 char namebuf[BTRFS_NAME_LEN] = {0};
5464 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5466 if (name && namelen) {
5467 ASSERT(namelen <= BTRFS_NAME_LEN);
5468 memcpy(namebuf, name, namelen);
5471 sprintf(namebuf, "%llu", ino);
5472 name_len = count_digits(ino);
5473 printf("Can't find file name for inode %llu, use %s instead\n",
5477 trans = btrfs_start_transaction(root, 1);
5478 if (IS_ERR(trans)) {
5479 ret = PTR_ERR(trans);
5483 btrfs_release_path(path);
5484 /* if refs is 0, put it into lostfound */
5485 if (ref_count == 0) {
5486 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5487 name_len, filetype, &ref_count);
5492 /* reset inode_item's nlink to ref_count */
5494 key.type = BTRFS_INODE_ITEM_KEY;
5497 btrfs_release_path(path);
5498 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5504 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5505 struct btrfs_inode_item);
5506 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5507 btrfs_mark_buffer_dirty(path->nodes[0]);
5512 btrfs_commit_transaction(trans, root);
5516 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5517 root->objectid, ino, namebuf, filetype);
5519 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5520 root->objectid, ino, namebuf, filetype);
5523 btrfs_release_path(path);
5524 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5531 * Check INODE_ITEM and related ITEMs (the same inode number)
5532 * 1. check link count
5533 * 2. check inode ref/extref
5534 * 3. check dir item/index
5536 * @ext_ref: the EXTENDED_IREF feature
5538 * Return 0 if no error occurred.
5539 * Return >0 for error or hit the traversal is done(by error bitmap)
5541 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5542 unsigned int ext_ref)
5544 struct extent_buffer *node;
5545 struct btrfs_inode_item *ii;
5546 struct btrfs_key key;
5547 struct btrfs_key last_key;
5556 u64 extent_size = 0;
5558 unsigned int nodatasum;
5562 char namebuf[BTRFS_NAME_LEN] = {0};
5565 node = path->nodes[0];
5566 slot = path->slots[0];
5568 btrfs_item_key_to_cpu(node, &key, slot);
5569 inode_id = key.objectid;
5571 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5572 ret = btrfs_next_item(root, path);
5578 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5579 isize = btrfs_inode_size(node, ii);
5580 nbytes = btrfs_inode_nbytes(node, ii);
5581 mode = btrfs_inode_mode(node, ii);
5582 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5583 nlink = btrfs_inode_nlink(node, ii);
5584 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5587 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5588 ret = btrfs_next_item(root, path);
5590 /* out will fill 'err' rusing current statistics */
5592 } else if (ret > 0) {
5597 node = path->nodes[0];
5598 slot = path->slots[0];
5599 btrfs_item_key_to_cpu(node, &key, slot);
5600 if (key.objectid != inode_id)
5604 case BTRFS_INODE_REF_KEY:
5605 ret = check_inode_ref(root, &key, path, namebuf,
5606 &name_len, &refs, mode);
5609 case BTRFS_INODE_EXTREF_KEY:
5610 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5611 warning("root %llu EXTREF[%llu %llu] isn't supported",
5612 root->objectid, key.objectid,
5614 ret = check_inode_extref(root, &key, node, slot, &refs,
5618 case BTRFS_DIR_ITEM_KEY:
5619 case BTRFS_DIR_INDEX_KEY:
5621 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5622 root->objectid, inode_id,
5623 imode_to_type(mode), key.objectid,
5626 ret = check_dir_item(root, &key, path, &size, ext_ref);
5629 case BTRFS_EXTENT_DATA_KEY:
5631 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5632 root->objectid, inode_id, key.objectid,
5635 ret = check_file_extent(root, &key, node, slot,
5636 nodatasum, &extent_size,
5640 case BTRFS_XATTR_ITEM_KEY:
5643 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5644 key.objectid, key.type, key.offset);
5649 if (err & LAST_ITEM) {
5650 btrfs_release_path(path);
5651 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5656 /* verify INODE_ITEM nlink/isize/nbytes */
5658 if (repair && (err & DIR_COUNT_AGAIN)) {
5659 err &= ~DIR_COUNT_AGAIN;
5660 count_dir_isize(root, inode_id, &size);
5663 if ((nlink != 1 || refs != 1) && repair) {
5664 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5665 namebuf, name_len, refs, imode_to_type(mode),
5670 err |= LINK_COUNT_ERROR;
5671 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5672 root->objectid, inode_id, nlink);
5676 * Just a warning, as dir inode nbytes is just an
5677 * instructive value.
5679 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5680 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5681 root->objectid, inode_id,
5682 root->fs_info->nodesize);
5685 if (isize != size) {
5687 ret = repair_dir_isize_lowmem(root, path,
5689 if (!repair || ret) {
5692 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5693 root->objectid, inode_id, isize, size);
5697 if (nlink != refs) {
5699 ret = repair_inode_nlinks_lowmem(root, path,
5700 inode_id, namebuf, name_len, refs,
5701 imode_to_type(mode), &nlink);
5702 if (!repair || ret) {
5703 err |= LINK_COUNT_ERROR;
5705 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5706 root->objectid, inode_id, nlink, refs);
5708 } else if (!nlink) {
5710 ret = repair_inode_orphan_item_lowmem(root,
5712 if (!repair || ret) {
5714 error("root %llu INODE[%llu] is orphan item",
5715 root->objectid, inode_id);
5719 if (!nbytes && !no_holes && extent_end < isize) {
5721 ret = punch_extent_hole(root, inode_id,
5722 extent_end, isize - extent_end);
5723 if (!repair || ret) {
5724 err |= NBYTES_ERROR;
5726 "root %llu INODE[%llu] size %llu should have a file extent hole",
5727 root->objectid, inode_id, isize);
5731 if (nbytes != extent_size) {
5733 ret = repair_inode_nbytes_lowmem(root, path,
5734 inode_id, extent_size);
5735 if (!repair || ret) {
5736 err |= NBYTES_ERROR;
5738 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5739 root->objectid, inode_id, nbytes,
5745 if (err & LAST_ITEM)
5746 btrfs_next_item(root, path);
5751 * Insert the missing inode item and inode ref.
5753 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5754 * Root dir should be handled specially because root dir is the root of fs.
5756 * returns err (>0 or 0) after repair
5758 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5760 struct btrfs_trans_handle *trans;
5761 struct btrfs_key key;
5762 struct btrfs_path path;
5763 int filetype = BTRFS_FT_DIR;
5766 btrfs_init_path(&path);
5768 if (err & INODE_REF_MISSING) {
5769 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5770 key.type = BTRFS_INODE_REF_KEY;
5771 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5773 trans = btrfs_start_transaction(root, 1);
5774 if (IS_ERR(trans)) {
5775 ret = PTR_ERR(trans);
5779 btrfs_release_path(&path);
5780 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5784 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5785 BTRFS_FIRST_FREE_OBJECTID,
5786 BTRFS_FIRST_FREE_OBJECTID, 0);
5790 printf("Add INODE_REF[%llu %llu] name %s\n",
5791 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5793 err &= ~INODE_REF_MISSING;
5796 error("fail to insert first inode's ref");
5797 btrfs_commit_transaction(trans, root);
5800 if (err & INODE_ITEM_MISSING) {
5801 ret = repair_inode_item_missing(root,
5802 BTRFS_FIRST_FREE_OBJECTID, filetype);
5805 err &= ~INODE_ITEM_MISSING;
5809 error("fail to repair first inode");
5810 btrfs_release_path(&path);
5815 * check first root dir's inode_item and inode_ref
5817 * returns 0 means no error
5818 * returns >0 means error
5819 * returns <0 means fatal error
5821 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5823 struct btrfs_path path;
5824 struct btrfs_key key;
5825 struct btrfs_inode_item *ii;
5831 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5832 key.type = BTRFS_INODE_ITEM_KEY;
5835 /* For root being dropped, we don't need to check first inode */
5836 if (btrfs_root_refs(&root->root_item) == 0 &&
5837 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5838 BTRFS_FIRST_FREE_OBJECTID)
5841 btrfs_init_path(&path);
5842 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5847 err |= INODE_ITEM_MISSING;
5849 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5850 struct btrfs_inode_item);
5851 mode = btrfs_inode_mode(path.nodes[0], ii);
5852 if (imode_to_type(mode) != BTRFS_FT_DIR)
5853 err |= INODE_ITEM_MISMATCH;
5856 /* lookup first inode ref */
5857 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5858 key.type = BTRFS_INODE_REF_KEY;
5859 /* special index value */
5862 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5868 btrfs_release_path(&path);
5871 err = repair_fs_first_inode(root, err);
5873 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5874 error("root dir INODE_ITEM is %s",
5875 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5876 if (err & INODE_REF_MISSING)
5877 error("root dir INODE_REF is missing");
5879 return ret < 0 ? ret : err;
5882 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5883 u64 parent, u64 root)
5885 struct rb_node *node;
5886 struct tree_backref *back = NULL;
5887 struct tree_backref match = {
5894 match.parent = parent;
5895 match.node.full_backref = 1;
5900 node = rb_search(&rec->backref_tree, &match.node.node,
5901 (rb_compare_keys)compare_extent_backref, NULL);
5903 back = to_tree_backref(rb_node_to_extent_backref(node));
5908 static struct data_backref *find_data_backref(struct extent_record *rec,
5909 u64 parent, u64 root,
5910 u64 owner, u64 offset,
5912 u64 disk_bytenr, u64 bytes)
5914 struct rb_node *node;
5915 struct data_backref *back = NULL;
5916 struct data_backref match = {
5923 .found_ref = found_ref,
5924 .disk_bytenr = disk_bytenr,
5928 match.parent = parent;
5929 match.node.full_backref = 1;
5934 node = rb_search(&rec->backref_tree, &match.node.node,
5935 (rb_compare_keys)compare_extent_backref, NULL);
5937 back = to_data_backref(rb_node_to_extent_backref(node));
5942 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
5943 * blocks and integrity of fs tree items.
5945 * @root: the root of the tree to be checked.
5946 * @ext_ref feature EXTENDED_IREF is enable or not.
5947 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
5948 * otherwise means check fs tree(s) items relationship and
5949 * @root MUST be a fs tree root.
5950 * Returns 0 represents OK.
5951 * Returns not 0 represents error.
5953 static int check_btrfs_root(struct btrfs_trans_handle *trans,
5954 struct btrfs_root *root, unsigned int ext_ref,
5958 struct btrfs_path path;
5959 struct node_refs nrefs;
5960 struct btrfs_root_item *root_item = &root->root_item;
5965 memset(&nrefs, 0, sizeof(nrefs));
5968 * We need to manually check the first inode item (256)
5969 * As the following traversal function will only start from
5970 * the first inode item in the leaf, if inode item (256) is
5971 * missing we will skip it forever.
5973 ret = check_fs_first_inode(root, ext_ref);
5979 level = btrfs_header_level(root->node);
5980 btrfs_init_path(&path);
5982 if (btrfs_root_refs(root_item) > 0 ||
5983 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5984 path.nodes[level] = root->node;
5985 path.slots[level] = 0;
5986 extent_buffer_get(root->node);
5988 struct btrfs_key key;
5990 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5991 level = root_item->drop_level;
5992 path.lowest_level = level;
5993 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6000 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6001 ext_ref, check_all);
6005 /* if ret is negative, walk shall stop */
6011 ret = walk_up_tree_v2(root, &path, &level);
6013 /* Normal exit, reset ret to err */
6020 btrfs_release_path(&path);
6025 * Iterate all items in the tree and call check_inode_item() to check.
6027 * @root: the root of the tree to be checked.
6028 * @ext_ref: the EXTENDED_IREF feature
6030 * Return 0 if no error found.
6031 * Return <0 for error.
6033 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6035 reset_cached_block_groups(root->fs_info);
6036 return check_btrfs_root(NULL, root, ext_ref, 0);
6040 * Find the relative ref for root_ref and root_backref.
6042 * @root: the root of the root tree.
6043 * @ref_key: the key of the root ref.
6045 * Return 0 if no error occurred.
6047 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6048 struct extent_buffer *node, int slot)
6050 struct btrfs_path path;
6051 struct btrfs_key key;
6052 struct btrfs_root_ref *ref;
6053 struct btrfs_root_ref *backref;
6054 char ref_name[BTRFS_NAME_LEN] = {0};
6055 char backref_name[BTRFS_NAME_LEN] = {0};
6061 u32 backref_namelen;
6066 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6067 ref_dirid = btrfs_root_ref_dirid(node, ref);
6068 ref_seq = btrfs_root_ref_sequence(node, ref);
6069 ref_namelen = btrfs_root_ref_name_len(node, ref);
6071 if (ref_namelen <= BTRFS_NAME_LEN) {
6074 len = BTRFS_NAME_LEN;
6075 warning("%s[%llu %llu] ref_name too long",
6076 ref_key->type == BTRFS_ROOT_REF_KEY ?
6077 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6080 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6082 /* Find relative root_ref */
6083 key.objectid = ref_key->offset;
6084 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6085 key.offset = ref_key->objectid;
6087 btrfs_init_path(&path);
6088 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6090 err |= ROOT_REF_MISSING;
6091 error("%s[%llu %llu] couldn't find relative ref",
6092 ref_key->type == BTRFS_ROOT_REF_KEY ?
6093 "ROOT_REF" : "ROOT_BACKREF",
6094 ref_key->objectid, ref_key->offset);
6098 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6099 struct btrfs_root_ref);
6100 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6101 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6102 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6104 if (backref_namelen <= BTRFS_NAME_LEN) {
6105 len = backref_namelen;
6107 len = BTRFS_NAME_LEN;
6108 warning("%s[%llu %llu] ref_name too long",
6109 key.type == BTRFS_ROOT_REF_KEY ?
6110 "ROOT_REF" : "ROOT_BACKREF",
6111 key.objectid, key.offset);
6113 read_extent_buffer(path.nodes[0], backref_name,
6114 (unsigned long)(backref + 1), len);
6116 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6117 ref_namelen != backref_namelen ||
6118 strncmp(ref_name, backref_name, len)) {
6119 err |= ROOT_REF_MISMATCH;
6120 error("%s[%llu %llu] mismatch relative ref",
6121 ref_key->type == BTRFS_ROOT_REF_KEY ?
6122 "ROOT_REF" : "ROOT_BACKREF",
6123 ref_key->objectid, ref_key->offset);
6126 btrfs_release_path(&path);
6131 * Check all fs/file tree in low_memory mode.
6133 * 1. for fs tree root item, call check_fs_root_v2()
6134 * 2. for fs tree root ref/backref, call check_root_ref()
6136 * Return 0 if no error occurred.
6138 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6140 struct btrfs_root *tree_root = fs_info->tree_root;
6141 struct btrfs_root *cur_root = NULL;
6142 struct btrfs_path path;
6143 struct btrfs_key key;
6144 struct extent_buffer *node;
6145 unsigned int ext_ref;
6150 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6152 btrfs_init_path(&path);
6153 key.objectid = BTRFS_FS_TREE_OBJECTID;
6155 key.type = BTRFS_ROOT_ITEM_KEY;
6157 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6161 } else if (ret > 0) {
6167 node = path.nodes[0];
6168 slot = path.slots[0];
6169 btrfs_item_key_to_cpu(node, &key, slot);
6170 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6172 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6173 fs_root_objectid(key.objectid)) {
6174 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6175 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6178 key.offset = (u64)-1;
6179 cur_root = btrfs_read_fs_root(fs_info, &key);
6182 if (IS_ERR(cur_root)) {
6183 error("Fail to read fs/subvol tree: %lld",
6189 ret = check_fs_root_v2(cur_root, ext_ref);
6192 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6193 btrfs_free_fs_root(cur_root);
6194 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6195 key.type == BTRFS_ROOT_BACKREF_KEY) {
6196 ret = check_root_ref(tree_root, &key, node, slot);
6200 ret = btrfs_next_item(tree_root, &path);
6210 btrfs_release_path(&path);
6214 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6215 struct cache_tree *root_cache)
6219 if (!ctx.progress_enabled)
6220 fprintf(stderr, "checking fs roots\n");
6221 if (check_mode == CHECK_MODE_LOWMEM)
6222 ret = check_fs_roots_v2(fs_info);
6224 ret = check_fs_roots(fs_info, root_cache);
6229 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6231 struct extent_backref *back, *tmp;
6232 struct tree_backref *tback;
6233 struct data_backref *dback;
6237 rbtree_postorder_for_each_entry_safe(back, tmp,
6238 &rec->backref_tree, node) {
6239 if (!back->found_extent_tree) {
6243 if (back->is_data) {
6244 dback = to_data_backref(back);
6245 fprintf(stderr, "Data backref %llu %s %llu"
6246 " owner %llu offset %llu num_refs %lu"
6247 " not found in extent tree\n",
6248 (unsigned long long)rec->start,
6249 back->full_backref ?
6251 back->full_backref ?
6252 (unsigned long long)dback->parent:
6253 (unsigned long long)dback->root,
6254 (unsigned long long)dback->owner,
6255 (unsigned long long)dback->offset,
6256 (unsigned long)dback->num_refs);
6258 tback = to_tree_backref(back);
6259 fprintf(stderr, "Tree backref %llu parent %llu"
6260 " root %llu not found in extent tree\n",
6261 (unsigned long long)rec->start,
6262 (unsigned long long)tback->parent,
6263 (unsigned long long)tback->root);
6266 if (!back->is_data && !back->found_ref) {
6270 tback = to_tree_backref(back);
6271 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6272 (unsigned long long)rec->start,
6273 back->full_backref ? "parent" : "root",
6274 back->full_backref ?
6275 (unsigned long long)tback->parent :
6276 (unsigned long long)tback->root, back);
6278 if (back->is_data) {
6279 dback = to_data_backref(back);
6280 if (dback->found_ref != dback->num_refs) {
6284 fprintf(stderr, "Incorrect local backref count"
6285 " on %llu %s %llu owner %llu"
6286 " offset %llu found %u wanted %u back %p\n",
6287 (unsigned long long)rec->start,
6288 back->full_backref ?
6290 back->full_backref ?
6291 (unsigned long long)dback->parent:
6292 (unsigned long long)dback->root,
6293 (unsigned long long)dback->owner,
6294 (unsigned long long)dback->offset,
6295 dback->found_ref, dback->num_refs, back);
6297 if (dback->disk_bytenr != rec->start) {
6301 fprintf(stderr, "Backref disk bytenr does not"
6302 " match extent record, bytenr=%llu, "
6303 "ref bytenr=%llu\n",
6304 (unsigned long long)rec->start,
6305 (unsigned long long)dback->disk_bytenr);
6308 if (dback->bytes != rec->nr) {
6312 fprintf(stderr, "Backref bytes do not match "
6313 "extent backref, bytenr=%llu, ref "
6314 "bytes=%llu, backref bytes=%llu\n",
6315 (unsigned long long)rec->start,
6316 (unsigned long long)rec->nr,
6317 (unsigned long long)dback->bytes);
6320 if (!back->is_data) {
6323 dback = to_data_backref(back);
6324 found += dback->found_ref;
6327 if (found != rec->refs) {
6331 fprintf(stderr, "Incorrect global backref count "
6332 "on %llu found %llu wanted %llu\n",
6333 (unsigned long long)rec->start,
6334 (unsigned long long)found,
6335 (unsigned long long)rec->refs);
6341 static void __free_one_backref(struct rb_node *node)
6343 struct extent_backref *back = rb_node_to_extent_backref(node);
6348 static void free_all_extent_backrefs(struct extent_record *rec)
6350 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6353 static void free_extent_record_cache(struct cache_tree *extent_cache)
6355 struct cache_extent *cache;
6356 struct extent_record *rec;
6359 cache = first_cache_extent(extent_cache);
6362 rec = container_of(cache, struct extent_record, cache);
6363 remove_cache_extent(extent_cache, cache);
6364 free_all_extent_backrefs(rec);
6369 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6370 struct extent_record *rec)
6372 if (rec->content_checked && rec->owner_ref_checked &&
6373 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6374 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6375 !rec->bad_full_backref && !rec->crossing_stripes &&
6376 !rec->wrong_chunk_type) {
6377 remove_cache_extent(extent_cache, &rec->cache);
6378 free_all_extent_backrefs(rec);
6379 list_del_init(&rec->list);
6385 static int check_owner_ref(struct btrfs_root *root,
6386 struct extent_record *rec,
6387 struct extent_buffer *buf)
6389 struct extent_backref *node, *tmp;
6390 struct tree_backref *back;
6391 struct btrfs_root *ref_root;
6392 struct btrfs_key key;
6393 struct btrfs_path path;
6394 struct extent_buffer *parent;
6399 rbtree_postorder_for_each_entry_safe(node, tmp,
6400 &rec->backref_tree, node) {
6403 if (!node->found_ref)
6405 if (node->full_backref)
6407 back = to_tree_backref(node);
6408 if (btrfs_header_owner(buf) == back->root)
6411 BUG_ON(rec->is_root);
6413 /* try to find the block by search corresponding fs tree */
6414 key.objectid = btrfs_header_owner(buf);
6415 key.type = BTRFS_ROOT_ITEM_KEY;
6416 key.offset = (u64)-1;
6418 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6419 if (IS_ERR(ref_root))
6422 level = btrfs_header_level(buf);
6424 btrfs_item_key_to_cpu(buf, &key, 0);
6426 btrfs_node_key_to_cpu(buf, &key, 0);
6428 btrfs_init_path(&path);
6429 path.lowest_level = level + 1;
6430 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6434 parent = path.nodes[level + 1];
6435 if (parent && buf->start == btrfs_node_blockptr(parent,
6436 path.slots[level + 1]))
6439 btrfs_release_path(&path);
6440 return found ? 0 : 1;
6443 static int is_extent_tree_record(struct extent_record *rec)
6445 struct extent_backref *node, *tmp;
6446 struct tree_backref *back;
6449 rbtree_postorder_for_each_entry_safe(node, tmp,
6450 &rec->backref_tree, node) {
6453 back = to_tree_backref(node);
6454 if (node->full_backref)
6456 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6463 static int record_bad_block_io(struct btrfs_fs_info *info,
6464 struct cache_tree *extent_cache,
6467 struct extent_record *rec;
6468 struct cache_extent *cache;
6469 struct btrfs_key key;
6471 cache = lookup_cache_extent(extent_cache, start, len);
6475 rec = container_of(cache, struct extent_record, cache);
6476 if (!is_extent_tree_record(rec))
6479 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6480 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6483 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6484 struct extent_buffer *buf, int slot)
6486 if (btrfs_header_level(buf)) {
6487 struct btrfs_key_ptr ptr1, ptr2;
6489 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6490 sizeof(struct btrfs_key_ptr));
6491 read_extent_buffer(buf, &ptr2,
6492 btrfs_node_key_ptr_offset(slot + 1),
6493 sizeof(struct btrfs_key_ptr));
6494 write_extent_buffer(buf, &ptr1,
6495 btrfs_node_key_ptr_offset(slot + 1),
6496 sizeof(struct btrfs_key_ptr));
6497 write_extent_buffer(buf, &ptr2,
6498 btrfs_node_key_ptr_offset(slot),
6499 sizeof(struct btrfs_key_ptr));
6501 struct btrfs_disk_key key;
6502 btrfs_node_key(buf, &key, 0);
6503 btrfs_fixup_low_keys(root, path, &key,
6504 btrfs_header_level(buf) + 1);
6507 struct btrfs_item *item1, *item2;
6508 struct btrfs_key k1, k2;
6509 char *item1_data, *item2_data;
6510 u32 item1_offset, item2_offset, item1_size, item2_size;
6512 item1 = btrfs_item_nr(slot);
6513 item2 = btrfs_item_nr(slot + 1);
6514 btrfs_item_key_to_cpu(buf, &k1, slot);
6515 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6516 item1_offset = btrfs_item_offset(buf, item1);
6517 item2_offset = btrfs_item_offset(buf, item2);
6518 item1_size = btrfs_item_size(buf, item1);
6519 item2_size = btrfs_item_size(buf, item2);
6521 item1_data = malloc(item1_size);
6524 item2_data = malloc(item2_size);
6530 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6531 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6533 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6534 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6538 btrfs_set_item_offset(buf, item1, item2_offset);
6539 btrfs_set_item_offset(buf, item2, item1_offset);
6540 btrfs_set_item_size(buf, item1, item2_size);
6541 btrfs_set_item_size(buf, item2, item1_size);
6543 path->slots[0] = slot;
6544 btrfs_set_item_key_unsafe(root, path, &k2);
6545 path->slots[0] = slot + 1;
6546 btrfs_set_item_key_unsafe(root, path, &k1);
6551 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6553 struct extent_buffer *buf;
6554 struct btrfs_key k1, k2;
6556 int level = path->lowest_level;
6559 buf = path->nodes[level];
6560 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6562 btrfs_node_key_to_cpu(buf, &k1, i);
6563 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6565 btrfs_item_key_to_cpu(buf, &k1, i);
6566 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6568 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6570 ret = swap_values(root, path, buf, i);
6573 btrfs_mark_buffer_dirty(buf);
6579 static int delete_bogus_item(struct btrfs_root *root,
6580 struct btrfs_path *path,
6581 struct extent_buffer *buf, int slot)
6583 struct btrfs_key key;
6584 int nritems = btrfs_header_nritems(buf);
6586 btrfs_item_key_to_cpu(buf, &key, slot);
6588 /* These are all the keys we can deal with missing. */
6589 if (key.type != BTRFS_DIR_INDEX_KEY &&
6590 key.type != BTRFS_EXTENT_ITEM_KEY &&
6591 key.type != BTRFS_METADATA_ITEM_KEY &&
6592 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6593 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6596 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6597 (unsigned long long)key.objectid, key.type,
6598 (unsigned long long)key.offset, slot, buf->start);
6599 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6600 btrfs_item_nr_offset(slot + 1),
6601 sizeof(struct btrfs_item) *
6602 (nritems - slot - 1));
6603 btrfs_set_header_nritems(buf, nritems - 1);
6605 struct btrfs_disk_key disk_key;
6607 btrfs_item_key(buf, &disk_key, 0);
6608 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6610 btrfs_mark_buffer_dirty(buf);
6614 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6616 struct extent_buffer *buf;
6620 /* We should only get this for leaves */
6621 BUG_ON(path->lowest_level);
6622 buf = path->nodes[0];
6624 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6625 unsigned int shift = 0, offset;
6627 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6628 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6629 if (btrfs_item_end_nr(buf, i) >
6630 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6631 ret = delete_bogus_item(root, path, buf, i);
6634 fprintf(stderr, "item is off the end of the "
6635 "leaf, can't fix\n");
6639 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6640 btrfs_item_end_nr(buf, i);
6641 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6642 btrfs_item_offset_nr(buf, i - 1)) {
6643 if (btrfs_item_end_nr(buf, i) >
6644 btrfs_item_offset_nr(buf, i - 1)) {
6645 ret = delete_bogus_item(root, path, buf, i);
6648 fprintf(stderr, "items overlap, can't fix\n");
6652 shift = btrfs_item_offset_nr(buf, i - 1) -
6653 btrfs_item_end_nr(buf, i);
6658 printf("Shifting item nr %d by %u bytes in block %llu\n",
6659 i, shift, (unsigned long long)buf->start);
6660 offset = btrfs_item_offset_nr(buf, i);
6661 memmove_extent_buffer(buf,
6662 btrfs_leaf_data(buf) + offset + shift,
6663 btrfs_leaf_data(buf) + offset,
6664 btrfs_item_size_nr(buf, i));
6665 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6667 btrfs_mark_buffer_dirty(buf);
6671 * We may have moved things, in which case we want to exit so we don't
6672 * write those changes out. Once we have proper abort functionality in
6673 * progs this can be changed to something nicer.
6680 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6681 * then just return -EIO.
6683 static int try_to_fix_bad_block(struct btrfs_root *root,
6684 struct extent_buffer *buf,
6685 enum btrfs_tree_block_status status)
6687 struct btrfs_trans_handle *trans;
6688 struct ulist *roots;
6689 struct ulist_node *node;
6690 struct btrfs_root *search_root;
6691 struct btrfs_path path;
6692 struct ulist_iterator iter;
6693 struct btrfs_key root_key, key;
6696 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6697 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6700 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6704 btrfs_init_path(&path);
6705 ULIST_ITER_INIT(&iter);
6706 while ((node = ulist_next(roots, &iter))) {
6707 root_key.objectid = node->val;
6708 root_key.type = BTRFS_ROOT_ITEM_KEY;
6709 root_key.offset = (u64)-1;
6711 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6718 trans = btrfs_start_transaction(search_root, 0);
6719 if (IS_ERR(trans)) {
6720 ret = PTR_ERR(trans);
6724 path.lowest_level = btrfs_header_level(buf);
6725 path.skip_check_block = 1;
6726 if (path.lowest_level)
6727 btrfs_node_key_to_cpu(buf, &key, 0);
6729 btrfs_item_key_to_cpu(buf, &key, 0);
6730 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6733 btrfs_commit_transaction(trans, search_root);
6736 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6737 ret = fix_key_order(search_root, &path);
6738 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6739 ret = fix_item_offset(search_root, &path);
6741 btrfs_commit_transaction(trans, search_root);
6744 btrfs_release_path(&path);
6745 btrfs_commit_transaction(trans, search_root);
6748 btrfs_release_path(&path);
6752 static int check_block(struct btrfs_root *root,
6753 struct cache_tree *extent_cache,
6754 struct extent_buffer *buf, u64 flags)
6756 struct extent_record *rec;
6757 struct cache_extent *cache;
6758 struct btrfs_key key;
6759 enum btrfs_tree_block_status status;
6763 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6766 rec = container_of(cache, struct extent_record, cache);
6767 rec->generation = btrfs_header_generation(buf);
6769 level = btrfs_header_level(buf);
6770 if (btrfs_header_nritems(buf) > 0) {
6773 btrfs_item_key_to_cpu(buf, &key, 0);
6775 btrfs_node_key_to_cpu(buf, &key, 0);
6777 rec->info_objectid = key.objectid;
6779 rec->info_level = level;
6781 if (btrfs_is_leaf(buf))
6782 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6784 status = btrfs_check_node(root, &rec->parent_key, buf);
6786 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6788 status = try_to_fix_bad_block(root, buf, status);
6789 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6791 fprintf(stderr, "bad block %llu\n",
6792 (unsigned long long)buf->start);
6795 * Signal to callers we need to start the scan over
6796 * again since we'll have cowed blocks.
6801 rec->content_checked = 1;
6802 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6803 rec->owner_ref_checked = 1;
6805 ret = check_owner_ref(root, rec, buf);
6807 rec->owner_ref_checked = 1;
6811 maybe_free_extent_rec(extent_cache, rec);
6816 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6817 u64 parent, u64 root)
6819 struct list_head *cur = rec->backrefs.next;
6820 struct extent_backref *node;
6821 struct tree_backref *back;
6823 while(cur != &rec->backrefs) {
6824 node = to_extent_backref(cur);
6828 back = to_tree_backref(node);
6830 if (!node->full_backref)
6832 if (parent == back->parent)
6835 if (node->full_backref)
6837 if (back->root == root)
6845 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6846 u64 parent, u64 root)
6848 struct tree_backref *ref = malloc(sizeof(*ref));
6852 memset(&ref->node, 0, sizeof(ref->node));
6854 ref->parent = parent;
6855 ref->node.full_backref = 1;
6858 ref->node.full_backref = 0;
6865 static struct data_backref *find_data_backref(struct extent_record *rec,
6866 u64 parent, u64 root,
6867 u64 owner, u64 offset,
6869 u64 disk_bytenr, u64 bytes)
6871 struct list_head *cur = rec->backrefs.next;
6872 struct extent_backref *node;
6873 struct data_backref *back;
6875 while(cur != &rec->backrefs) {
6876 node = to_extent_backref(cur);
6880 back = to_data_backref(node);
6882 if (!node->full_backref)
6884 if (parent == back->parent)
6887 if (node->full_backref)
6889 if (back->root == root && back->owner == owner &&
6890 back->offset == offset) {
6891 if (found_ref && node->found_ref &&
6892 (back->bytes != bytes ||
6893 back->disk_bytenr != disk_bytenr))
6903 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6904 u64 parent, u64 root,
6905 u64 owner, u64 offset,
6908 struct data_backref *ref = malloc(sizeof(*ref));
6912 memset(&ref->node, 0, sizeof(ref->node));
6913 ref->node.is_data = 1;
6916 ref->parent = parent;
6919 ref->node.full_backref = 1;
6923 ref->offset = offset;
6924 ref->node.full_backref = 0;
6926 ref->bytes = max_size;
6929 if (max_size > rec->max_size)
6930 rec->max_size = max_size;
6934 /* Check if the type of extent matches with its chunk */
6935 static void check_extent_type(struct extent_record *rec)
6937 struct btrfs_block_group_cache *bg_cache;
6939 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6943 /* data extent, check chunk directly*/
6944 if (!rec->metadata) {
6945 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6946 rec->wrong_chunk_type = 1;
6950 /* metadata extent, check the obvious case first */
6951 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6952 BTRFS_BLOCK_GROUP_METADATA))) {
6953 rec->wrong_chunk_type = 1;
6958 * Check SYSTEM extent, as it's also marked as metadata, we can only
6959 * make sure it's a SYSTEM extent by its backref
6961 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6962 struct extent_backref *node;
6963 struct tree_backref *tback;
6966 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6967 if (node->is_data) {
6968 /* tree block shouldn't have data backref */
6969 rec->wrong_chunk_type = 1;
6972 tback = container_of(node, struct tree_backref, node);
6974 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6975 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6977 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6978 if (!(bg_cache->flags & bg_type))
6979 rec->wrong_chunk_type = 1;
6984 * Allocate a new extent record, fill default values from @tmpl and insert int
6985 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6986 * the cache, otherwise it fails.
6988 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6989 struct extent_record *tmpl)
6991 struct extent_record *rec;
6994 BUG_ON(tmpl->max_size == 0);
6995 rec = malloc(sizeof(*rec));
6998 rec->start = tmpl->start;
6999 rec->max_size = tmpl->max_size;
7000 rec->nr = max(tmpl->nr, tmpl->max_size);
7001 rec->found_rec = tmpl->found_rec;
7002 rec->content_checked = tmpl->content_checked;
7003 rec->owner_ref_checked = tmpl->owner_ref_checked;
7004 rec->num_duplicates = 0;
7005 rec->metadata = tmpl->metadata;
7006 rec->flag_block_full_backref = FLAG_UNSET;
7007 rec->bad_full_backref = 0;
7008 rec->crossing_stripes = 0;
7009 rec->wrong_chunk_type = 0;
7010 rec->is_root = tmpl->is_root;
7011 rec->refs = tmpl->refs;
7012 rec->extent_item_refs = tmpl->extent_item_refs;
7013 rec->parent_generation = tmpl->parent_generation;
7014 INIT_LIST_HEAD(&rec->backrefs);
7015 INIT_LIST_HEAD(&rec->dups);
7016 INIT_LIST_HEAD(&rec->list);
7017 rec->backref_tree = RB_ROOT;
7018 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7019 rec->cache.start = tmpl->start;
7020 rec->cache.size = tmpl->nr;
7021 ret = insert_cache_extent(extent_cache, &rec->cache);
7026 bytes_used += rec->nr;
7029 rec->crossing_stripes = check_crossing_stripes(global_info,
7030 rec->start, global_info->nodesize);
7031 check_extent_type(rec);
7036 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7038 * - refs - if found, increase refs
7039 * - is_root - if found, set
7040 * - content_checked - if found, set
7041 * - owner_ref_checked - if found, set
7043 * If not found, create a new one, initialize and insert.
7045 static int add_extent_rec(struct cache_tree *extent_cache,
7046 struct extent_record *tmpl)
7048 struct extent_record *rec;
7049 struct cache_extent *cache;
7053 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7055 rec = container_of(cache, struct extent_record, cache);
7059 rec->nr = max(tmpl->nr, tmpl->max_size);
7062 * We need to make sure to reset nr to whatever the extent
7063 * record says was the real size, this way we can compare it to
7066 if (tmpl->found_rec) {
7067 if (tmpl->start != rec->start || rec->found_rec) {
7068 struct extent_record *tmp;
7071 if (list_empty(&rec->list))
7072 list_add_tail(&rec->list,
7073 &duplicate_extents);
7076 * We have to do this song and dance in case we
7077 * find an extent record that falls inside of
7078 * our current extent record but does not have
7079 * the same objectid.
7081 tmp = malloc(sizeof(*tmp));
7084 tmp->start = tmpl->start;
7085 tmp->max_size = tmpl->max_size;
7088 tmp->metadata = tmpl->metadata;
7089 tmp->extent_item_refs = tmpl->extent_item_refs;
7090 INIT_LIST_HEAD(&tmp->list);
7091 list_add_tail(&tmp->list, &rec->dups);
7092 rec->num_duplicates++;
7099 if (tmpl->extent_item_refs && !dup) {
7100 if (rec->extent_item_refs) {
7101 fprintf(stderr, "block %llu rec "
7102 "extent_item_refs %llu, passed %llu\n",
7103 (unsigned long long)tmpl->start,
7104 (unsigned long long)
7105 rec->extent_item_refs,
7106 (unsigned long long)tmpl->extent_item_refs);
7108 rec->extent_item_refs = tmpl->extent_item_refs;
7112 if (tmpl->content_checked)
7113 rec->content_checked = 1;
7114 if (tmpl->owner_ref_checked)
7115 rec->owner_ref_checked = 1;
7116 memcpy(&rec->parent_key, &tmpl->parent_key,
7117 sizeof(tmpl->parent_key));
7118 if (tmpl->parent_generation)
7119 rec->parent_generation = tmpl->parent_generation;
7120 if (rec->max_size < tmpl->max_size)
7121 rec->max_size = tmpl->max_size;
7124 * A metadata extent can't cross stripe_len boundary, otherwise
7125 * kernel scrub won't be able to handle it.
7126 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7130 rec->crossing_stripes = check_crossing_stripes(
7131 global_info, rec->start,
7132 global_info->nodesize);
7133 check_extent_type(rec);
7134 maybe_free_extent_rec(extent_cache, rec);
7138 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7143 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7144 u64 parent, u64 root, int found_ref)
7146 struct extent_record *rec;
7147 struct tree_backref *back;
7148 struct cache_extent *cache;
7150 bool insert = false;
7152 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7154 struct extent_record tmpl;
7156 memset(&tmpl, 0, sizeof(tmpl));
7157 tmpl.start = bytenr;
7162 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7166 /* really a bug in cache_extent implement now */
7167 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7172 rec = container_of(cache, struct extent_record, cache);
7173 if (rec->start != bytenr) {
7175 * Several cause, from unaligned bytenr to over lapping extents
7180 back = find_tree_backref(rec, parent, root);
7182 back = alloc_tree_backref(rec, parent, root);
7189 if (back->node.found_ref) {
7190 fprintf(stderr, "Extent back ref already exists "
7191 "for %llu parent %llu root %llu \n",
7192 (unsigned long long)bytenr,
7193 (unsigned long long)parent,
7194 (unsigned long long)root);
7196 back->node.found_ref = 1;
7198 if (back->node.found_extent_tree) {
7199 fprintf(stderr, "Extent back ref already exists "
7200 "for %llu parent %llu root %llu \n",
7201 (unsigned long long)bytenr,
7202 (unsigned long long)parent,
7203 (unsigned long long)root);
7205 back->node.found_extent_tree = 1;
7208 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7209 compare_extent_backref));
7210 check_extent_type(rec);
7211 maybe_free_extent_rec(extent_cache, rec);
7215 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7216 u64 parent, u64 root, u64 owner, u64 offset,
7217 u32 num_refs, int found_ref, u64 max_size)
7219 struct extent_record *rec;
7220 struct data_backref *back;
7221 struct cache_extent *cache;
7223 bool insert = false;
7225 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7227 struct extent_record tmpl;
7229 memset(&tmpl, 0, sizeof(tmpl));
7230 tmpl.start = bytenr;
7232 tmpl.max_size = max_size;
7234 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7238 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7243 rec = container_of(cache, struct extent_record, cache);
7244 if (rec->max_size < max_size)
7245 rec->max_size = max_size;
7248 * If found_ref is set then max_size is the real size and must match the
7249 * existing refs. So if we have already found a ref then we need to
7250 * make sure that this ref matches the existing one, otherwise we need
7251 * to add a new backref so we can notice that the backrefs don't match
7252 * and we need to figure out who is telling the truth. This is to
7253 * account for that awful fsync bug I introduced where we'd end up with
7254 * a btrfs_file_extent_item that would have its length include multiple
7255 * prealloc extents or point inside of a prealloc extent.
7257 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7260 back = alloc_data_backref(rec, parent, root, owner, offset,
7267 BUG_ON(num_refs != 1);
7268 if (back->node.found_ref)
7269 BUG_ON(back->bytes != max_size);
7270 back->node.found_ref = 1;
7271 back->found_ref += 1;
7272 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7273 back->bytes = max_size;
7274 back->disk_bytenr = bytenr;
7276 /* Need to reinsert if not already in the tree */
7278 rb_erase(&back->node.node, &rec->backref_tree);
7283 rec->content_checked = 1;
7284 rec->owner_ref_checked = 1;
7286 if (back->node.found_extent_tree) {
7287 fprintf(stderr, "Extent back ref already exists "
7288 "for %llu parent %llu root %llu "
7289 "owner %llu offset %llu num_refs %lu\n",
7290 (unsigned long long)bytenr,
7291 (unsigned long long)parent,
7292 (unsigned long long)root,
7293 (unsigned long long)owner,
7294 (unsigned long long)offset,
7295 (unsigned long)num_refs);
7297 back->num_refs = num_refs;
7298 back->node.found_extent_tree = 1;
7301 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7302 compare_extent_backref));
7304 maybe_free_extent_rec(extent_cache, rec);
7308 static int add_pending(struct cache_tree *pending,
7309 struct cache_tree *seen, u64 bytenr, u32 size)
7312 ret = add_cache_extent(seen, bytenr, size);
7315 add_cache_extent(pending, bytenr, size);
7319 static int pick_next_pending(struct cache_tree *pending,
7320 struct cache_tree *reada,
7321 struct cache_tree *nodes,
7322 u64 last, struct block_info *bits, int bits_nr,
7325 unsigned long node_start = last;
7326 struct cache_extent *cache;
7329 cache = search_cache_extent(reada, 0);
7331 bits[0].start = cache->start;
7332 bits[0].size = cache->size;
7337 if (node_start > 32768)
7338 node_start -= 32768;
7340 cache = search_cache_extent(nodes, node_start);
7342 cache = search_cache_extent(nodes, 0);
7345 cache = search_cache_extent(pending, 0);
7350 bits[ret].start = cache->start;
7351 bits[ret].size = cache->size;
7352 cache = next_cache_extent(cache);
7354 } while (cache && ret < bits_nr);
7360 bits[ret].start = cache->start;
7361 bits[ret].size = cache->size;
7362 cache = next_cache_extent(cache);
7364 } while (cache && ret < bits_nr);
7366 if (bits_nr - ret > 8) {
7367 u64 lookup = bits[0].start + bits[0].size;
7368 struct cache_extent *next;
7369 next = search_cache_extent(pending, lookup);
7371 if (next->start - lookup > 32768)
7373 bits[ret].start = next->start;
7374 bits[ret].size = next->size;
7375 lookup = next->start + next->size;
7379 next = next_cache_extent(next);
7387 static void free_chunk_record(struct cache_extent *cache)
7389 struct chunk_record *rec;
7391 rec = container_of(cache, struct chunk_record, cache);
7392 list_del_init(&rec->list);
7393 list_del_init(&rec->dextents);
7397 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7399 cache_tree_free_extents(chunk_cache, free_chunk_record);
7402 static void free_device_record(struct rb_node *node)
7404 struct device_record *rec;
7406 rec = container_of(node, struct device_record, node);
7410 FREE_RB_BASED_TREE(device_cache, free_device_record);
7412 int insert_block_group_record(struct block_group_tree *tree,
7413 struct block_group_record *bg_rec)
7417 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7421 list_add_tail(&bg_rec->list, &tree->block_groups);
7425 static void free_block_group_record(struct cache_extent *cache)
7427 struct block_group_record *rec;
7429 rec = container_of(cache, struct block_group_record, cache);
7430 list_del_init(&rec->list);
7434 void free_block_group_tree(struct block_group_tree *tree)
7436 cache_tree_free_extents(&tree->tree, free_block_group_record);
7439 int insert_device_extent_record(struct device_extent_tree *tree,
7440 struct device_extent_record *de_rec)
7445 * Device extent is a bit different from the other extents, because
7446 * the extents which belong to the different devices may have the
7447 * same start and size, so we need use the special extent cache
7448 * search/insert functions.
7450 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7454 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7455 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7459 static void free_device_extent_record(struct cache_extent *cache)
7461 struct device_extent_record *rec;
7463 rec = container_of(cache, struct device_extent_record, cache);
7464 if (!list_empty(&rec->chunk_list))
7465 list_del_init(&rec->chunk_list);
7466 if (!list_empty(&rec->device_list))
7467 list_del_init(&rec->device_list);
7471 void free_device_extent_tree(struct device_extent_tree *tree)
7473 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7476 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7477 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7478 struct extent_buffer *leaf, int slot)
7480 struct btrfs_extent_ref_v0 *ref0;
7481 struct btrfs_key key;
7484 btrfs_item_key_to_cpu(leaf, &key, slot);
7485 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7486 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7487 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7490 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7491 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7497 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7498 struct btrfs_key *key,
7501 struct btrfs_chunk *ptr;
7502 struct chunk_record *rec;
7505 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7506 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7508 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7510 fprintf(stderr, "memory allocation failed\n");
7514 INIT_LIST_HEAD(&rec->list);
7515 INIT_LIST_HEAD(&rec->dextents);
7518 rec->cache.start = key->offset;
7519 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7521 rec->generation = btrfs_header_generation(leaf);
7523 rec->objectid = key->objectid;
7524 rec->type = key->type;
7525 rec->offset = key->offset;
7527 rec->length = rec->cache.size;
7528 rec->owner = btrfs_chunk_owner(leaf, ptr);
7529 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7530 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7531 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7532 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7533 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7534 rec->num_stripes = num_stripes;
7535 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7537 for (i = 0; i < rec->num_stripes; ++i) {
7538 rec->stripes[i].devid =
7539 btrfs_stripe_devid_nr(leaf, ptr, i);
7540 rec->stripes[i].offset =
7541 btrfs_stripe_offset_nr(leaf, ptr, i);
7542 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7543 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7550 static int process_chunk_item(struct cache_tree *chunk_cache,
7551 struct btrfs_key *key, struct extent_buffer *eb,
7554 struct chunk_record *rec;
7555 struct btrfs_chunk *chunk;
7558 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7560 * Do extra check for this chunk item,
7562 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7563 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7564 * and owner<->key_type check.
7566 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7569 error("chunk(%llu, %llu) is not valid, ignore it",
7570 key->offset, btrfs_chunk_length(eb, chunk));
7573 rec = btrfs_new_chunk_record(eb, key, slot);
7574 ret = insert_cache_extent(chunk_cache, &rec->cache);
7576 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7577 rec->offset, rec->length);
7584 static int process_device_item(struct rb_root *dev_cache,
7585 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7587 struct btrfs_dev_item *ptr;
7588 struct device_record *rec;
7591 ptr = btrfs_item_ptr(eb,
7592 slot, struct btrfs_dev_item);
7594 rec = malloc(sizeof(*rec));
7596 fprintf(stderr, "memory allocation failed\n");
7600 rec->devid = key->offset;
7601 rec->generation = btrfs_header_generation(eb);
7603 rec->objectid = key->objectid;
7604 rec->type = key->type;
7605 rec->offset = key->offset;
7607 rec->devid = btrfs_device_id(eb, ptr);
7608 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7609 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7611 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7613 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7620 struct block_group_record *
7621 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7624 struct btrfs_block_group_item *ptr;
7625 struct block_group_record *rec;
7627 rec = calloc(1, sizeof(*rec));
7629 fprintf(stderr, "memory allocation failed\n");
7633 rec->cache.start = key->objectid;
7634 rec->cache.size = key->offset;
7636 rec->generation = btrfs_header_generation(leaf);
7638 rec->objectid = key->objectid;
7639 rec->type = key->type;
7640 rec->offset = key->offset;
7642 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7643 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7645 INIT_LIST_HEAD(&rec->list);
7650 static int process_block_group_item(struct block_group_tree *block_group_cache,
7651 struct btrfs_key *key,
7652 struct extent_buffer *eb, int slot)
7654 struct block_group_record *rec;
7657 rec = btrfs_new_block_group_record(eb, key, slot);
7658 ret = insert_block_group_record(block_group_cache, rec);
7660 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7661 rec->objectid, rec->offset);
7668 struct device_extent_record *
7669 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7670 struct btrfs_key *key, int slot)
7672 struct device_extent_record *rec;
7673 struct btrfs_dev_extent *ptr;
7675 rec = calloc(1, sizeof(*rec));
7677 fprintf(stderr, "memory allocation failed\n");
7681 rec->cache.objectid = key->objectid;
7682 rec->cache.start = key->offset;
7684 rec->generation = btrfs_header_generation(leaf);
7686 rec->objectid = key->objectid;
7687 rec->type = key->type;
7688 rec->offset = key->offset;
7690 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7691 rec->chunk_objecteid =
7692 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7694 btrfs_dev_extent_chunk_offset(leaf, ptr);
7695 rec->length = btrfs_dev_extent_length(leaf, ptr);
7696 rec->cache.size = rec->length;
7698 INIT_LIST_HEAD(&rec->chunk_list);
7699 INIT_LIST_HEAD(&rec->device_list);
7705 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7706 struct btrfs_key *key, struct extent_buffer *eb,
7709 struct device_extent_record *rec;
7712 rec = btrfs_new_device_extent_record(eb, key, slot);
7713 ret = insert_device_extent_record(dev_extent_cache, rec);
7716 "Device extent[%llu, %llu, %llu] existed.\n",
7717 rec->objectid, rec->offset, rec->length);
7724 static int process_extent_item(struct btrfs_root *root,
7725 struct cache_tree *extent_cache,
7726 struct extent_buffer *eb, int slot)
7728 struct btrfs_extent_item *ei;
7729 struct btrfs_extent_inline_ref *iref;
7730 struct btrfs_extent_data_ref *dref;
7731 struct btrfs_shared_data_ref *sref;
7732 struct btrfs_key key;
7733 struct extent_record tmpl;
7738 u32 item_size = btrfs_item_size_nr(eb, slot);
7744 btrfs_item_key_to_cpu(eb, &key, slot);
7746 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7748 num_bytes = root->fs_info->nodesize;
7750 num_bytes = key.offset;
7753 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7754 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7755 key.objectid, root->fs_info->sectorsize);
7758 if (item_size < sizeof(*ei)) {
7759 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7760 struct btrfs_extent_item_v0 *ei0;
7761 if (item_size != sizeof(*ei0)) {
7763 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7764 key.objectid, key.type, key.offset,
7765 btrfs_header_bytenr(eb), slot);
7768 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7769 refs = btrfs_extent_refs_v0(eb, ei0);
7773 memset(&tmpl, 0, sizeof(tmpl));
7774 tmpl.start = key.objectid;
7775 tmpl.nr = num_bytes;
7776 tmpl.extent_item_refs = refs;
7777 tmpl.metadata = metadata;
7779 tmpl.max_size = num_bytes;
7781 return add_extent_rec(extent_cache, &tmpl);
7784 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7785 refs = btrfs_extent_refs(eb, ei);
7786 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7790 if (metadata && num_bytes != root->fs_info->nodesize) {
7791 error("ignore invalid metadata extent, length %llu does not equal to %u",
7792 num_bytes, root->fs_info->nodesize);
7795 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7796 error("ignore invalid data extent, length %llu is not aligned to %u",
7797 num_bytes, root->fs_info->sectorsize);
7801 memset(&tmpl, 0, sizeof(tmpl));
7802 tmpl.start = key.objectid;
7803 tmpl.nr = num_bytes;
7804 tmpl.extent_item_refs = refs;
7805 tmpl.metadata = metadata;
7807 tmpl.max_size = num_bytes;
7808 add_extent_rec(extent_cache, &tmpl);
7810 ptr = (unsigned long)(ei + 1);
7811 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7812 key.type == BTRFS_EXTENT_ITEM_KEY)
7813 ptr += sizeof(struct btrfs_tree_block_info);
7815 end = (unsigned long)ei + item_size;
7817 iref = (struct btrfs_extent_inline_ref *)ptr;
7818 type = btrfs_extent_inline_ref_type(eb, iref);
7819 offset = btrfs_extent_inline_ref_offset(eb, iref);
7821 case BTRFS_TREE_BLOCK_REF_KEY:
7822 ret = add_tree_backref(extent_cache, key.objectid,
7826 "add_tree_backref failed (extent items tree block): %s",
7829 case BTRFS_SHARED_BLOCK_REF_KEY:
7830 ret = add_tree_backref(extent_cache, key.objectid,
7834 "add_tree_backref failed (extent items shared block): %s",
7837 case BTRFS_EXTENT_DATA_REF_KEY:
7838 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7839 add_data_backref(extent_cache, key.objectid, 0,
7840 btrfs_extent_data_ref_root(eb, dref),
7841 btrfs_extent_data_ref_objectid(eb,
7843 btrfs_extent_data_ref_offset(eb, dref),
7844 btrfs_extent_data_ref_count(eb, dref),
7847 case BTRFS_SHARED_DATA_REF_KEY:
7848 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7849 add_data_backref(extent_cache, key.objectid, offset,
7851 btrfs_shared_data_ref_count(eb, sref),
7855 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7856 key.objectid, key.type, num_bytes);
7859 ptr += btrfs_extent_inline_ref_size(type);
7866 static int check_cache_range(struct btrfs_root *root,
7867 struct btrfs_block_group_cache *cache,
7868 u64 offset, u64 bytes)
7870 struct btrfs_free_space *entry;
7876 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7877 bytenr = btrfs_sb_offset(i);
7878 ret = btrfs_rmap_block(root->fs_info,
7879 cache->key.objectid, bytenr, 0,
7880 &logical, &nr, &stripe_len);
7885 if (logical[nr] + stripe_len <= offset)
7887 if (offset + bytes <= logical[nr])
7889 if (logical[nr] == offset) {
7890 if (stripe_len >= bytes) {
7894 bytes -= stripe_len;
7895 offset += stripe_len;
7896 } else if (logical[nr] < offset) {
7897 if (logical[nr] + stripe_len >=
7902 bytes = (offset + bytes) -
7903 (logical[nr] + stripe_len);
7904 offset = logical[nr] + stripe_len;
7907 * Could be tricky, the super may land in the
7908 * middle of the area we're checking. First
7909 * check the easiest case, it's at the end.
7911 if (logical[nr] + stripe_len >=
7913 bytes = logical[nr] - offset;
7917 /* Check the left side */
7918 ret = check_cache_range(root, cache,
7920 logical[nr] - offset);
7926 /* Now we continue with the right side */
7927 bytes = (offset + bytes) -
7928 (logical[nr] + stripe_len);
7929 offset = logical[nr] + stripe_len;
7936 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7938 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7939 offset, offset+bytes);
7943 if (entry->offset != offset) {
7944 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7949 if (entry->bytes != bytes) {
7950 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7951 bytes, entry->bytes, offset);
7955 unlink_free_space(cache->free_space_ctl, entry);
7960 static int verify_space_cache(struct btrfs_root *root,
7961 struct btrfs_block_group_cache *cache)
7963 struct btrfs_path path;
7964 struct extent_buffer *leaf;
7965 struct btrfs_key key;
7969 root = root->fs_info->extent_root;
7971 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7973 btrfs_init_path(&path);
7974 key.objectid = last;
7976 key.type = BTRFS_EXTENT_ITEM_KEY;
7977 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7982 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7983 ret = btrfs_next_leaf(root, &path);
7991 leaf = path.nodes[0];
7992 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7993 if (key.objectid >= cache->key.offset + cache->key.objectid)
7995 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7996 key.type != BTRFS_METADATA_ITEM_KEY) {
8001 if (last == key.objectid) {
8002 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8003 last = key.objectid + key.offset;
8005 last = key.objectid + root->fs_info->nodesize;
8010 ret = check_cache_range(root, cache, last,
8011 key.objectid - last);
8014 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8015 last = key.objectid + key.offset;
8017 last = key.objectid + root->fs_info->nodesize;
8021 if (last < cache->key.objectid + cache->key.offset)
8022 ret = check_cache_range(root, cache, last,
8023 cache->key.objectid +
8024 cache->key.offset - last);
8027 btrfs_release_path(&path);
8030 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8031 fprintf(stderr, "There are still entries left in the space "
8039 static int check_space_cache(struct btrfs_root *root)
8041 struct btrfs_block_group_cache *cache;
8042 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8046 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8047 btrfs_super_generation(root->fs_info->super_copy) !=
8048 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8049 printf("cache and super generation don't match, space cache "
8050 "will be invalidated\n");
8054 if (ctx.progress_enabled) {
8055 ctx.tp = TASK_FREE_SPACE;
8056 task_start(ctx.info);
8060 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8064 start = cache->key.objectid + cache->key.offset;
8065 if (!cache->free_space_ctl) {
8066 if (btrfs_init_free_space_ctl(cache,
8067 root->fs_info->sectorsize)) {
8072 btrfs_remove_free_space_cache(cache);
8075 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8076 ret = exclude_super_stripes(root, cache);
8078 fprintf(stderr, "could not exclude super stripes: %s\n",
8083 ret = load_free_space_tree(root->fs_info, cache);
8084 free_excluded_extents(root, cache);
8086 fprintf(stderr, "could not load free space tree: %s\n",
8093 ret = load_free_space_cache(root->fs_info, cache);
8098 ret = verify_space_cache(root, cache);
8100 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8101 cache->key.objectid);
8106 task_stop(ctx.info);
8108 return error ? -EINVAL : 0;
8111 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8112 u64 num_bytes, unsigned long leaf_offset,
8113 struct extent_buffer *eb) {
8115 struct btrfs_fs_info *fs_info = root->fs_info;
8117 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8119 unsigned long csum_offset;
8123 u64 data_checked = 0;
8129 if (num_bytes % fs_info->sectorsize)
8132 data = malloc(num_bytes);
8136 while (offset < num_bytes) {
8139 read_len = num_bytes - offset;
8140 /* read as much space once a time */
8141 ret = read_extent_data(fs_info, data + offset,
8142 bytenr + offset, &read_len, mirror);
8146 /* verify every 4k data's checksum */
8147 while (data_checked < read_len) {
8149 tmp = offset + data_checked;
8151 csum = btrfs_csum_data((char *)data + tmp,
8152 csum, fs_info->sectorsize);
8153 btrfs_csum_final(csum, (u8 *)&csum);
8155 csum_offset = leaf_offset +
8156 tmp / fs_info->sectorsize * csum_size;
8157 read_extent_buffer(eb, (char *)&csum_expected,
8158 csum_offset, csum_size);
8159 /* try another mirror */
8160 if (csum != csum_expected) {
8161 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8162 mirror, bytenr + tmp,
8163 csum, csum_expected);
8164 num_copies = btrfs_num_copies(root->fs_info,
8166 if (mirror < num_copies - 1) {
8171 data_checked += fs_info->sectorsize;
8180 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8183 struct btrfs_path path;
8184 struct extent_buffer *leaf;
8185 struct btrfs_key key;
8188 btrfs_init_path(&path);
8189 key.objectid = bytenr;
8190 key.type = BTRFS_EXTENT_ITEM_KEY;
8191 key.offset = (u64)-1;
8194 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8197 fprintf(stderr, "Error looking up extent record %d\n", ret);
8198 btrfs_release_path(&path);
8201 if (path.slots[0] > 0) {
8204 ret = btrfs_prev_leaf(root, &path);
8207 } else if (ret > 0) {
8214 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8217 * Block group items come before extent items if they have the same
8218 * bytenr, so walk back one more just in case. Dear future traveller,
8219 * first congrats on mastering time travel. Now if it's not too much
8220 * trouble could you go back to 2006 and tell Chris to make the
8221 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8222 * EXTENT_ITEM_KEY please?
8224 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8225 if (path.slots[0] > 0) {
8228 ret = btrfs_prev_leaf(root, &path);
8231 } else if (ret > 0) {
8236 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8240 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8241 ret = btrfs_next_leaf(root, &path);
8243 fprintf(stderr, "Error going to next leaf "
8245 btrfs_release_path(&path);
8251 leaf = path.nodes[0];
8252 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8253 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8257 if (key.objectid + key.offset < bytenr) {
8261 if (key.objectid > bytenr + num_bytes)
8264 if (key.objectid == bytenr) {
8265 if (key.offset >= num_bytes) {
8269 num_bytes -= key.offset;
8270 bytenr += key.offset;
8271 } else if (key.objectid < bytenr) {
8272 if (key.objectid + key.offset >= bytenr + num_bytes) {
8276 num_bytes = (bytenr + num_bytes) -
8277 (key.objectid + key.offset);
8278 bytenr = key.objectid + key.offset;
8280 if (key.objectid + key.offset < bytenr + num_bytes) {
8281 u64 new_start = key.objectid + key.offset;
8282 u64 new_bytes = bytenr + num_bytes - new_start;
8285 * Weird case, the extent is in the middle of
8286 * our range, we'll have to search one side
8287 * and then the other. Not sure if this happens
8288 * in real life, but no harm in coding it up
8289 * anyway just in case.
8291 btrfs_release_path(&path);
8292 ret = check_extent_exists(root, new_start,
8295 fprintf(stderr, "Right section didn't "
8299 num_bytes = key.objectid - bytenr;
8302 num_bytes = key.objectid - bytenr;
8309 if (num_bytes && !ret) {
8310 fprintf(stderr, "There are no extents for csum range "
8311 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8315 btrfs_release_path(&path);
8319 static int check_csums(struct btrfs_root *root)
8321 struct btrfs_path path;
8322 struct extent_buffer *leaf;
8323 struct btrfs_key key;
8324 u64 offset = 0, num_bytes = 0;
8325 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8329 unsigned long leaf_offset;
8331 root = root->fs_info->csum_root;
8332 if (!extent_buffer_uptodate(root->node)) {
8333 fprintf(stderr, "No valid csum tree found\n");
8337 btrfs_init_path(&path);
8338 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8339 key.type = BTRFS_EXTENT_CSUM_KEY;
8341 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8343 fprintf(stderr, "Error searching csum tree %d\n", ret);
8344 btrfs_release_path(&path);
8348 if (ret > 0 && path.slots[0])
8353 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8354 ret = btrfs_next_leaf(root, &path);
8356 fprintf(stderr, "Error going to next leaf "
8363 leaf = path.nodes[0];
8365 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8366 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8371 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8372 csum_size) * root->fs_info->sectorsize;
8373 if (!check_data_csum)
8374 goto skip_csum_check;
8375 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8376 ret = check_extent_csums(root, key.offset, data_len,
8382 offset = key.offset;
8383 } else if (key.offset != offset + num_bytes) {
8384 ret = check_extent_exists(root, offset, num_bytes);
8386 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8387 "there is no extent record\n",
8388 offset, offset+num_bytes);
8391 offset = key.offset;
8394 num_bytes += data_len;
8398 btrfs_release_path(&path);
8402 static int is_dropped_key(struct btrfs_key *key,
8403 struct btrfs_key *drop_key) {
8404 if (key->objectid < drop_key->objectid)
8406 else if (key->objectid == drop_key->objectid) {
8407 if (key->type < drop_key->type)
8409 else if (key->type == drop_key->type) {
8410 if (key->offset < drop_key->offset)
8418 * Here are the rules for FULL_BACKREF.
8420 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8421 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8423 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8424 * if it happened after the relocation occurred since we'll have dropped the
8425 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8426 * have no real way to know for sure.
8428 * We process the blocks one root at a time, and we start from the lowest root
8429 * objectid and go to the highest. So we can just lookup the owner backref for
8430 * the record and if we don't find it then we know it doesn't exist and we have
8433 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8434 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8435 * be set or not and then we can check later once we've gathered all the refs.
8437 static int calc_extent_flag(struct cache_tree *extent_cache,
8438 struct extent_buffer *buf,
8439 struct root_item_record *ri,
8442 struct extent_record *rec;
8443 struct cache_extent *cache;
8444 struct tree_backref *tback;
8447 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8448 /* we have added this extent before */
8452 rec = container_of(cache, struct extent_record, cache);
8455 * Except file/reloc tree, we can not have
8458 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8463 if (buf->start == ri->bytenr)
8466 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8469 owner = btrfs_header_owner(buf);
8470 if (owner == ri->objectid)
8473 tback = find_tree_backref(rec, 0, owner);
8478 if (rec->flag_block_full_backref != FLAG_UNSET &&
8479 rec->flag_block_full_backref != 0)
8480 rec->bad_full_backref = 1;
8483 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8484 if (rec->flag_block_full_backref != FLAG_UNSET &&
8485 rec->flag_block_full_backref != 1)
8486 rec->bad_full_backref = 1;
8490 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8492 fprintf(stderr, "Invalid key type(");
8493 print_key_type(stderr, 0, key_type);
8494 fprintf(stderr, ") found in root(");
8495 print_objectid(stderr, rootid, 0);
8496 fprintf(stderr, ")\n");
8500 * Check if the key is valid with its extent buffer.
8502 * This is a early check in case invalid key exists in a extent buffer
8503 * This is not comprehensive yet, but should prevent wrong key/item passed
8506 static int check_type_with_root(u64 rootid, u8 key_type)
8509 /* Only valid in chunk tree */
8510 case BTRFS_DEV_ITEM_KEY:
8511 case BTRFS_CHUNK_ITEM_KEY:
8512 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8515 /* valid in csum and log tree */
8516 case BTRFS_CSUM_TREE_OBJECTID:
8517 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8521 case BTRFS_EXTENT_ITEM_KEY:
8522 case BTRFS_METADATA_ITEM_KEY:
8523 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8524 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8527 case BTRFS_ROOT_ITEM_KEY:
8528 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8531 case BTRFS_DEV_EXTENT_KEY:
8532 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8538 report_mismatch_key_root(key_type, rootid);
8542 static int run_next_block(struct btrfs_root *root,
8543 struct block_info *bits,
8546 struct cache_tree *pending,
8547 struct cache_tree *seen,
8548 struct cache_tree *reada,
8549 struct cache_tree *nodes,
8550 struct cache_tree *extent_cache,
8551 struct cache_tree *chunk_cache,
8552 struct rb_root *dev_cache,
8553 struct block_group_tree *block_group_cache,
8554 struct device_extent_tree *dev_extent_cache,
8555 struct root_item_record *ri)
8557 struct btrfs_fs_info *fs_info = root->fs_info;
8558 struct extent_buffer *buf;
8559 struct extent_record *rec = NULL;
8570 struct btrfs_key key;
8571 struct cache_extent *cache;
8574 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8575 bits_nr, &reada_bits);
8580 for(i = 0; i < nritems; i++) {
8581 ret = add_cache_extent(reada, bits[i].start,
8586 /* fixme, get the parent transid */
8587 readahead_tree_block(fs_info, bits[i].start, 0);
8590 *last = bits[0].start;
8591 bytenr = bits[0].start;
8592 size = bits[0].size;
8594 cache = lookup_cache_extent(pending, bytenr, size);
8596 remove_cache_extent(pending, cache);
8599 cache = lookup_cache_extent(reada, bytenr, size);
8601 remove_cache_extent(reada, cache);
8604 cache = lookup_cache_extent(nodes, bytenr, size);
8606 remove_cache_extent(nodes, cache);
8609 cache = lookup_cache_extent(extent_cache, bytenr, size);
8611 rec = container_of(cache, struct extent_record, cache);
8612 gen = rec->parent_generation;
8615 /* fixme, get the real parent transid */
8616 buf = read_tree_block(root->fs_info, bytenr, gen);
8617 if (!extent_buffer_uptodate(buf)) {
8618 record_bad_block_io(root->fs_info,
8619 extent_cache, bytenr, size);
8623 nritems = btrfs_header_nritems(buf);
8626 if (!init_extent_tree) {
8627 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8628 btrfs_header_level(buf), 1, NULL,
8631 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8633 fprintf(stderr, "Couldn't calc extent flags\n");
8634 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8639 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8641 fprintf(stderr, "Couldn't calc extent flags\n");
8642 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8646 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8648 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8649 ri->objectid == btrfs_header_owner(buf)) {
8651 * Ok we got to this block from it's original owner and
8652 * we have FULL_BACKREF set. Relocation can leave
8653 * converted blocks over so this is altogether possible,
8654 * however it's not possible if the generation > the
8655 * last snapshot, so check for this case.
8657 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8658 btrfs_header_generation(buf) > ri->last_snapshot) {
8659 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8660 rec->bad_full_backref = 1;
8665 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8666 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8667 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8668 rec->bad_full_backref = 1;
8672 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8673 rec->flag_block_full_backref = 1;
8677 rec->flag_block_full_backref = 0;
8679 owner = btrfs_header_owner(buf);
8682 ret = check_block(root, extent_cache, buf, flags);
8686 if (btrfs_is_leaf(buf)) {
8687 btree_space_waste += btrfs_leaf_free_space(root, buf);
8688 for (i = 0; i < nritems; i++) {
8689 struct btrfs_file_extent_item *fi;
8690 btrfs_item_key_to_cpu(buf, &key, i);
8692 * Check key type against the leaf owner.
8693 * Could filter quite a lot of early error if
8696 if (check_type_with_root(btrfs_header_owner(buf),
8698 fprintf(stderr, "ignoring invalid key\n");
8701 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8702 process_extent_item(root, extent_cache, buf,
8706 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8707 process_extent_item(root, extent_cache, buf,
8711 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8713 btrfs_item_size_nr(buf, i);
8716 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8717 process_chunk_item(chunk_cache, &key, buf, i);
8720 if (key.type == BTRFS_DEV_ITEM_KEY) {
8721 process_device_item(dev_cache, &key, buf, i);
8724 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8725 process_block_group_item(block_group_cache,
8729 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8730 process_device_extent_item(dev_extent_cache,
8735 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8736 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8737 process_extent_ref_v0(extent_cache, buf, i);
8744 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8745 ret = add_tree_backref(extent_cache,
8746 key.objectid, 0, key.offset, 0);
8749 "add_tree_backref failed (leaf tree block): %s",
8753 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8754 ret = add_tree_backref(extent_cache,
8755 key.objectid, key.offset, 0, 0);
8758 "add_tree_backref failed (leaf shared block): %s",
8762 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8763 struct btrfs_extent_data_ref *ref;
8764 ref = btrfs_item_ptr(buf, i,
8765 struct btrfs_extent_data_ref);
8766 add_data_backref(extent_cache,
8768 btrfs_extent_data_ref_root(buf, ref),
8769 btrfs_extent_data_ref_objectid(buf,
8771 btrfs_extent_data_ref_offset(buf, ref),
8772 btrfs_extent_data_ref_count(buf, ref),
8773 0, root->fs_info->sectorsize);
8776 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8777 struct btrfs_shared_data_ref *ref;
8778 ref = btrfs_item_ptr(buf, i,
8779 struct btrfs_shared_data_ref);
8780 add_data_backref(extent_cache,
8781 key.objectid, key.offset, 0, 0, 0,
8782 btrfs_shared_data_ref_count(buf, ref),
8783 0, root->fs_info->sectorsize);
8786 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8787 struct bad_item *bad;
8789 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8793 bad = malloc(sizeof(struct bad_item));
8796 INIT_LIST_HEAD(&bad->list);
8797 memcpy(&bad->key, &key,
8798 sizeof(struct btrfs_key));
8799 bad->root_id = owner;
8800 list_add_tail(&bad->list, &delete_items);
8803 if (key.type != BTRFS_EXTENT_DATA_KEY)
8805 fi = btrfs_item_ptr(buf, i,
8806 struct btrfs_file_extent_item);
8807 if (btrfs_file_extent_type(buf, fi) ==
8808 BTRFS_FILE_EXTENT_INLINE)
8810 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8813 data_bytes_allocated +=
8814 btrfs_file_extent_disk_num_bytes(buf, fi);
8815 if (data_bytes_allocated < root->fs_info->sectorsize) {
8818 data_bytes_referenced +=
8819 btrfs_file_extent_num_bytes(buf, fi);
8820 add_data_backref(extent_cache,
8821 btrfs_file_extent_disk_bytenr(buf, fi),
8822 parent, owner, key.objectid, key.offset -
8823 btrfs_file_extent_offset(buf, fi), 1, 1,
8824 btrfs_file_extent_disk_num_bytes(buf, fi));
8828 struct btrfs_key first_key;
8830 first_key.objectid = 0;
8833 btrfs_item_key_to_cpu(buf, &first_key, 0);
8834 level = btrfs_header_level(buf);
8835 for (i = 0; i < nritems; i++) {
8836 struct extent_record tmpl;
8838 ptr = btrfs_node_blockptr(buf, i);
8839 size = root->fs_info->nodesize;
8840 btrfs_node_key_to_cpu(buf, &key, i);
8842 if ((level == ri->drop_level)
8843 && is_dropped_key(&key, &ri->drop_key)) {
8848 memset(&tmpl, 0, sizeof(tmpl));
8849 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8850 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8855 tmpl.max_size = size;
8856 ret = add_extent_rec(extent_cache, &tmpl);
8860 ret = add_tree_backref(extent_cache, ptr, parent,
8864 "add_tree_backref failed (non-leaf block): %s",
8870 add_pending(nodes, seen, ptr, size);
8872 add_pending(pending, seen, ptr, size);
8875 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
8876 nritems) * sizeof(struct btrfs_key_ptr);
8878 total_btree_bytes += buf->len;
8879 if (fs_root_objectid(btrfs_header_owner(buf)))
8880 total_fs_tree_bytes += buf->len;
8881 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8882 total_extent_tree_bytes += buf->len;
8884 free_extent_buffer(buf);
8888 static int add_root_to_pending(struct extent_buffer *buf,
8889 struct cache_tree *extent_cache,
8890 struct cache_tree *pending,
8891 struct cache_tree *seen,
8892 struct cache_tree *nodes,
8895 struct extent_record tmpl;
8898 if (btrfs_header_level(buf) > 0)
8899 add_pending(nodes, seen, buf->start, buf->len);
8901 add_pending(pending, seen, buf->start, buf->len);
8903 memset(&tmpl, 0, sizeof(tmpl));
8904 tmpl.start = buf->start;
8909 tmpl.max_size = buf->len;
8910 add_extent_rec(extent_cache, &tmpl);
8912 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8913 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8914 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8917 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8922 /* as we fix the tree, we might be deleting blocks that
8923 * we're tracking for repair. This hook makes sure we
8924 * remove any backrefs for blocks as we are fixing them.
8926 static int free_extent_hook(struct btrfs_trans_handle *trans,
8927 struct btrfs_root *root,
8928 u64 bytenr, u64 num_bytes, u64 parent,
8929 u64 root_objectid, u64 owner, u64 offset,
8932 struct extent_record *rec;
8933 struct cache_extent *cache;
8935 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8937 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8938 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8942 rec = container_of(cache, struct extent_record, cache);
8944 struct data_backref *back;
8945 back = find_data_backref(rec, parent, root_objectid, owner,
8946 offset, 1, bytenr, num_bytes);
8949 if (back->node.found_ref) {
8950 back->found_ref -= refs_to_drop;
8952 rec->refs -= refs_to_drop;
8954 if (back->node.found_extent_tree) {
8955 back->num_refs -= refs_to_drop;
8956 if (rec->extent_item_refs)
8957 rec->extent_item_refs -= refs_to_drop;
8959 if (back->found_ref == 0)
8960 back->node.found_ref = 0;
8961 if (back->num_refs == 0)
8962 back->node.found_extent_tree = 0;
8964 if (!back->node.found_extent_tree && back->node.found_ref) {
8965 rb_erase(&back->node.node, &rec->backref_tree);
8969 struct tree_backref *back;
8970 back = find_tree_backref(rec, parent, root_objectid);
8973 if (back->node.found_ref) {
8976 back->node.found_ref = 0;
8978 if (back->node.found_extent_tree) {
8979 if (rec->extent_item_refs)
8980 rec->extent_item_refs--;
8981 back->node.found_extent_tree = 0;
8983 if (!back->node.found_extent_tree && back->node.found_ref) {
8984 rb_erase(&back->node.node, &rec->backref_tree);
8988 maybe_free_extent_rec(extent_cache, rec);
8993 static int delete_extent_records(struct btrfs_trans_handle *trans,
8994 struct btrfs_root *root,
8995 struct btrfs_path *path,
8998 struct btrfs_key key;
8999 struct btrfs_key found_key;
9000 struct extent_buffer *leaf;
9005 key.objectid = bytenr;
9007 key.offset = (u64)-1;
9010 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9017 if (path->slots[0] == 0)
9023 leaf = path->nodes[0];
9024 slot = path->slots[0];
9026 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9027 if (found_key.objectid != bytenr)
9030 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9031 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9032 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9033 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9034 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9035 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9036 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9037 btrfs_release_path(path);
9038 if (found_key.type == 0) {
9039 if (found_key.offset == 0)
9041 key.offset = found_key.offset - 1;
9042 key.type = found_key.type;
9044 key.type = found_key.type - 1;
9045 key.offset = (u64)-1;
9049 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9050 found_key.objectid, found_key.type, found_key.offset);
9052 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9055 btrfs_release_path(path);
9057 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9058 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9059 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9060 found_key.offset : root->fs_info->nodesize;
9062 ret = btrfs_update_block_group(root, bytenr,
9069 btrfs_release_path(path);
9074 * for a single backref, this will allocate a new extent
9075 * and add the backref to it.
9077 static int record_extent(struct btrfs_trans_handle *trans,
9078 struct btrfs_fs_info *info,
9079 struct btrfs_path *path,
9080 struct extent_record *rec,
9081 struct extent_backref *back,
9082 int allocated, u64 flags)
9085 struct btrfs_root *extent_root = info->extent_root;
9086 struct extent_buffer *leaf;
9087 struct btrfs_key ins_key;
9088 struct btrfs_extent_item *ei;
9089 struct data_backref *dback;
9090 struct btrfs_tree_block_info *bi;
9093 rec->max_size = max_t(u64, rec->max_size,
9097 u32 item_size = sizeof(*ei);
9100 item_size += sizeof(*bi);
9102 ins_key.objectid = rec->start;
9103 ins_key.offset = rec->max_size;
9104 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9106 ret = btrfs_insert_empty_item(trans, extent_root, path,
9107 &ins_key, item_size);
9111 leaf = path->nodes[0];
9112 ei = btrfs_item_ptr(leaf, path->slots[0],
9113 struct btrfs_extent_item);
9115 btrfs_set_extent_refs(leaf, ei, 0);
9116 btrfs_set_extent_generation(leaf, ei, rec->generation);
9118 if (back->is_data) {
9119 btrfs_set_extent_flags(leaf, ei,
9120 BTRFS_EXTENT_FLAG_DATA);
9122 struct btrfs_disk_key copy_key;;
9124 bi = (struct btrfs_tree_block_info *)(ei + 1);
9125 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9128 btrfs_set_disk_key_objectid(©_key,
9129 rec->info_objectid);
9130 btrfs_set_disk_key_type(©_key, 0);
9131 btrfs_set_disk_key_offset(©_key, 0);
9133 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9134 btrfs_set_tree_block_key(leaf, bi, ©_key);
9136 btrfs_set_extent_flags(leaf, ei,
9137 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9140 btrfs_mark_buffer_dirty(leaf);
9141 ret = btrfs_update_block_group(extent_root, rec->start,
9142 rec->max_size, 1, 0);
9145 btrfs_release_path(path);
9148 if (back->is_data) {
9152 dback = to_data_backref(back);
9153 if (back->full_backref)
9154 parent = dback->parent;
9158 for (i = 0; i < dback->found_ref; i++) {
9159 /* if parent != 0, we're doing a full backref
9160 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9161 * just makes the backref allocator create a data
9164 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9165 rec->start, rec->max_size,
9169 BTRFS_FIRST_FREE_OBJECTID :
9175 fprintf(stderr, "adding new data backref"
9176 " on %llu %s %llu owner %llu"
9177 " offset %llu found %d\n",
9178 (unsigned long long)rec->start,
9179 back->full_backref ?
9181 back->full_backref ?
9182 (unsigned long long)parent :
9183 (unsigned long long)dback->root,
9184 (unsigned long long)dback->owner,
9185 (unsigned long long)dback->offset,
9189 struct tree_backref *tback;
9191 tback = to_tree_backref(back);
9192 if (back->full_backref)
9193 parent = tback->parent;
9197 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9198 rec->start, rec->max_size,
9199 parent, tback->root, 0, 0);
9200 fprintf(stderr, "adding new tree backref on "
9201 "start %llu len %llu parent %llu root %llu\n",
9202 rec->start, rec->max_size, parent, tback->root);
9205 btrfs_release_path(path);
9209 static struct extent_entry *find_entry(struct list_head *entries,
9210 u64 bytenr, u64 bytes)
9212 struct extent_entry *entry = NULL;
9214 list_for_each_entry(entry, entries, list) {
9215 if (entry->bytenr == bytenr && entry->bytes == bytes)
9222 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9224 struct extent_entry *entry, *best = NULL, *prev = NULL;
9226 list_for_each_entry(entry, entries, list) {
9228 * If there are as many broken entries as entries then we know
9229 * not to trust this particular entry.
9231 if (entry->broken == entry->count)
9235 * Special case, when there are only two entries and 'best' is
9245 * If our current entry == best then we can't be sure our best
9246 * is really the best, so we need to keep searching.
9248 if (best && best->count == entry->count) {
9254 /* Prev == entry, not good enough, have to keep searching */
9255 if (!prev->broken && prev->count == entry->count)
9259 best = (prev->count > entry->count) ? prev : entry;
9260 else if (best->count < entry->count)
9268 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9269 struct data_backref *dback, struct extent_entry *entry)
9271 struct btrfs_trans_handle *trans;
9272 struct btrfs_root *root;
9273 struct btrfs_file_extent_item *fi;
9274 struct extent_buffer *leaf;
9275 struct btrfs_key key;
9279 key.objectid = dback->root;
9280 key.type = BTRFS_ROOT_ITEM_KEY;
9281 key.offset = (u64)-1;
9282 root = btrfs_read_fs_root(info, &key);
9284 fprintf(stderr, "Couldn't find root for our ref\n");
9289 * The backref points to the original offset of the extent if it was
9290 * split, so we need to search down to the offset we have and then walk
9291 * forward until we find the backref we're looking for.
9293 key.objectid = dback->owner;
9294 key.type = BTRFS_EXTENT_DATA_KEY;
9295 key.offset = dback->offset;
9296 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9298 fprintf(stderr, "Error looking up ref %d\n", ret);
9303 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9304 ret = btrfs_next_leaf(root, path);
9306 fprintf(stderr, "Couldn't find our ref, next\n");
9310 leaf = path->nodes[0];
9311 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9312 if (key.objectid != dback->owner ||
9313 key.type != BTRFS_EXTENT_DATA_KEY) {
9314 fprintf(stderr, "Couldn't find our ref, search\n");
9317 fi = btrfs_item_ptr(leaf, path->slots[0],
9318 struct btrfs_file_extent_item);
9319 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9320 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9322 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9327 btrfs_release_path(path);
9329 trans = btrfs_start_transaction(root, 1);
9331 return PTR_ERR(trans);
9334 * Ok we have the key of the file extent we want to fix, now we can cow
9335 * down to the thing and fix it.
9337 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9339 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9340 key.objectid, key.type, key.offset, ret);
9344 fprintf(stderr, "Well that's odd, we just found this key "
9345 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9350 leaf = path->nodes[0];
9351 fi = btrfs_item_ptr(leaf, path->slots[0],
9352 struct btrfs_file_extent_item);
9354 if (btrfs_file_extent_compression(leaf, fi) &&
9355 dback->disk_bytenr != entry->bytenr) {
9356 fprintf(stderr, "Ref doesn't match the record start and is "
9357 "compressed, please take a btrfs-image of this file "
9358 "system and send it to a btrfs developer so they can "
9359 "complete this functionality for bytenr %Lu\n",
9360 dback->disk_bytenr);
9365 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9366 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9367 } else if (dback->disk_bytenr > entry->bytenr) {
9368 u64 off_diff, offset;
9370 off_diff = dback->disk_bytenr - entry->bytenr;
9371 offset = btrfs_file_extent_offset(leaf, fi);
9372 if (dback->disk_bytenr + offset +
9373 btrfs_file_extent_num_bytes(leaf, fi) >
9374 entry->bytenr + entry->bytes) {
9375 fprintf(stderr, "Ref is past the entry end, please "
9376 "take a btrfs-image of this file system and "
9377 "send it to a btrfs developer, ref %Lu\n",
9378 dback->disk_bytenr);
9383 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9384 btrfs_set_file_extent_offset(leaf, fi, offset);
9385 } else if (dback->disk_bytenr < entry->bytenr) {
9388 offset = btrfs_file_extent_offset(leaf, fi);
9389 if (dback->disk_bytenr + offset < entry->bytenr) {
9390 fprintf(stderr, "Ref is before the entry start, please"
9391 " take a btrfs-image of this file system and "
9392 "send it to a btrfs developer, ref %Lu\n",
9393 dback->disk_bytenr);
9398 offset += dback->disk_bytenr;
9399 offset -= entry->bytenr;
9400 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9401 btrfs_set_file_extent_offset(leaf, fi, offset);
9404 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9407 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9408 * only do this if we aren't using compression, otherwise it's a
9411 if (!btrfs_file_extent_compression(leaf, fi))
9412 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9414 printf("ram bytes may be wrong?\n");
9415 btrfs_mark_buffer_dirty(leaf);
9417 err = btrfs_commit_transaction(trans, root);
9418 btrfs_release_path(path);
9419 return ret ? ret : err;
9422 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9423 struct extent_record *rec)
9425 struct extent_backref *back, *tmp;
9426 struct data_backref *dback;
9427 struct extent_entry *entry, *best = NULL;
9430 int broken_entries = 0;
9435 * Metadata is easy and the backrefs should always agree on bytenr and
9436 * size, if not we've got bigger issues.
9441 rbtree_postorder_for_each_entry_safe(back, tmp,
9442 &rec->backref_tree, node) {
9443 if (back->full_backref || !back->is_data)
9446 dback = to_data_backref(back);
9449 * We only pay attention to backrefs that we found a real
9452 if (dback->found_ref == 0)
9456 * For now we only catch when the bytes don't match, not the
9457 * bytenr. We can easily do this at the same time, but I want
9458 * to have a fs image to test on before we just add repair
9459 * functionality willy-nilly so we know we won't screw up the
9463 entry = find_entry(&entries, dback->disk_bytenr,
9466 entry = malloc(sizeof(struct extent_entry));
9471 memset(entry, 0, sizeof(*entry));
9472 entry->bytenr = dback->disk_bytenr;
9473 entry->bytes = dback->bytes;
9474 list_add_tail(&entry->list, &entries);
9479 * If we only have on entry we may think the entries agree when
9480 * in reality they don't so we have to do some extra checking.
9482 if (dback->disk_bytenr != rec->start ||
9483 dback->bytes != rec->nr || back->broken)
9494 /* Yay all the backrefs agree, carry on good sir */
9495 if (nr_entries <= 1 && !mismatch)
9498 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9499 "%Lu\n", rec->start);
9502 * First we want to see if the backrefs can agree amongst themselves who
9503 * is right, so figure out which one of the entries has the highest
9506 best = find_most_right_entry(&entries);
9509 * Ok so we may have an even split between what the backrefs think, so
9510 * this is where we use the extent ref to see what it thinks.
9513 entry = find_entry(&entries, rec->start, rec->nr);
9514 if (!entry && (!broken_entries || !rec->found_rec)) {
9515 fprintf(stderr, "Backrefs don't agree with each other "
9516 "and extent record doesn't agree with anybody,"
9517 " so we can't fix bytenr %Lu bytes %Lu\n",
9518 rec->start, rec->nr);
9521 } else if (!entry) {
9523 * Ok our backrefs were broken, we'll assume this is the
9524 * correct value and add an entry for this range.
9526 entry = malloc(sizeof(struct extent_entry));
9531 memset(entry, 0, sizeof(*entry));
9532 entry->bytenr = rec->start;
9533 entry->bytes = rec->nr;
9534 list_add_tail(&entry->list, &entries);
9538 best = find_most_right_entry(&entries);
9540 fprintf(stderr, "Backrefs and extent record evenly "
9541 "split on who is right, this is going to "
9542 "require user input to fix bytenr %Lu bytes "
9543 "%Lu\n", rec->start, rec->nr);
9550 * I don't think this can happen currently as we'll abort() if we catch
9551 * this case higher up, but in case somebody removes that we still can't
9552 * deal with it properly here yet, so just bail out of that's the case.
9554 if (best->bytenr != rec->start) {
9555 fprintf(stderr, "Extent start and backref starts don't match, "
9556 "please use btrfs-image on this file system and send "
9557 "it to a btrfs developer so they can make fsck fix "
9558 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9559 rec->start, rec->nr);
9565 * Ok great we all agreed on an extent record, let's go find the real
9566 * references and fix up the ones that don't match.
9568 rbtree_postorder_for_each_entry_safe(back, tmp,
9569 &rec->backref_tree, node) {
9570 if (back->full_backref || !back->is_data)
9573 dback = to_data_backref(back);
9576 * Still ignoring backrefs that don't have a real ref attached
9579 if (dback->found_ref == 0)
9582 if (dback->bytes == best->bytes &&
9583 dback->disk_bytenr == best->bytenr)
9586 ret = repair_ref(info, path, dback, best);
9592 * Ok we messed with the actual refs, which means we need to drop our
9593 * entire cache and go back and rescan. I know this is a huge pain and
9594 * adds a lot of extra work, but it's the only way to be safe. Once all
9595 * the backrefs agree we may not need to do anything to the extent
9600 while (!list_empty(&entries)) {
9601 entry = list_entry(entries.next, struct extent_entry, list);
9602 list_del_init(&entry->list);
9608 static int process_duplicates(struct cache_tree *extent_cache,
9609 struct extent_record *rec)
9611 struct extent_record *good, *tmp;
9612 struct cache_extent *cache;
9616 * If we found a extent record for this extent then return, or if we
9617 * have more than one duplicate we are likely going to need to delete
9620 if (rec->found_rec || rec->num_duplicates > 1)
9623 /* Shouldn't happen but just in case */
9624 BUG_ON(!rec->num_duplicates);
9627 * So this happens if we end up with a backref that doesn't match the
9628 * actual extent entry. So either the backref is bad or the extent
9629 * entry is bad. Either way we want to have the extent_record actually
9630 * reflect what we found in the extent_tree, so we need to take the
9631 * duplicate out and use that as the extent_record since the only way we
9632 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9634 remove_cache_extent(extent_cache, &rec->cache);
9636 good = to_extent_record(rec->dups.next);
9637 list_del_init(&good->list);
9638 INIT_LIST_HEAD(&good->backrefs);
9639 INIT_LIST_HEAD(&good->dups);
9640 good->cache.start = good->start;
9641 good->cache.size = good->nr;
9642 good->content_checked = 0;
9643 good->owner_ref_checked = 0;
9644 good->num_duplicates = 0;
9645 good->refs = rec->refs;
9646 list_splice_init(&rec->backrefs, &good->backrefs);
9648 cache = lookup_cache_extent(extent_cache, good->start,
9652 tmp = container_of(cache, struct extent_record, cache);
9655 * If we find another overlapping extent and it's found_rec is
9656 * set then it's a duplicate and we need to try and delete
9659 if (tmp->found_rec || tmp->num_duplicates > 0) {
9660 if (list_empty(&good->list))
9661 list_add_tail(&good->list,
9662 &duplicate_extents);
9663 good->num_duplicates += tmp->num_duplicates + 1;
9664 list_splice_init(&tmp->dups, &good->dups);
9665 list_del_init(&tmp->list);
9666 list_add_tail(&tmp->list, &good->dups);
9667 remove_cache_extent(extent_cache, &tmp->cache);
9672 * Ok we have another non extent item backed extent rec, so lets
9673 * just add it to this extent and carry on like we did above.
9675 good->refs += tmp->refs;
9676 list_splice_init(&tmp->backrefs, &good->backrefs);
9677 remove_cache_extent(extent_cache, &tmp->cache);
9680 ret = insert_cache_extent(extent_cache, &good->cache);
9683 return good->num_duplicates ? 0 : 1;
9686 static int delete_duplicate_records(struct btrfs_root *root,
9687 struct extent_record *rec)
9689 struct btrfs_trans_handle *trans;
9690 LIST_HEAD(delete_list);
9691 struct btrfs_path path;
9692 struct extent_record *tmp, *good, *n;
9695 struct btrfs_key key;
9697 btrfs_init_path(&path);
9700 /* Find the record that covers all of the duplicates. */
9701 list_for_each_entry(tmp, &rec->dups, list) {
9702 if (good->start < tmp->start)
9704 if (good->nr > tmp->nr)
9707 if (tmp->start + tmp->nr < good->start + good->nr) {
9708 fprintf(stderr, "Ok we have overlapping extents that "
9709 "aren't completely covered by each other, this "
9710 "is going to require more careful thought. "
9711 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9712 tmp->start, tmp->nr, good->start, good->nr);
9719 list_add_tail(&rec->list, &delete_list);
9721 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9724 list_move_tail(&tmp->list, &delete_list);
9727 root = root->fs_info->extent_root;
9728 trans = btrfs_start_transaction(root, 1);
9729 if (IS_ERR(trans)) {
9730 ret = PTR_ERR(trans);
9734 list_for_each_entry(tmp, &delete_list, list) {
9735 if (tmp->found_rec == 0)
9737 key.objectid = tmp->start;
9738 key.type = BTRFS_EXTENT_ITEM_KEY;
9739 key.offset = tmp->nr;
9741 /* Shouldn't happen but just in case */
9742 if (tmp->metadata) {
9743 fprintf(stderr, "Well this shouldn't happen, extent "
9744 "record overlaps but is metadata? "
9745 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9749 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9755 ret = btrfs_del_item(trans, root, &path);
9758 btrfs_release_path(&path);
9761 err = btrfs_commit_transaction(trans, root);
9765 while (!list_empty(&delete_list)) {
9766 tmp = to_extent_record(delete_list.next);
9767 list_del_init(&tmp->list);
9773 while (!list_empty(&rec->dups)) {
9774 tmp = to_extent_record(rec->dups.next);
9775 list_del_init(&tmp->list);
9779 btrfs_release_path(&path);
9781 if (!ret && !nr_del)
9782 rec->num_duplicates = 0;
9784 return ret ? ret : nr_del;
9787 static int find_possible_backrefs(struct btrfs_fs_info *info,
9788 struct btrfs_path *path,
9789 struct cache_tree *extent_cache,
9790 struct extent_record *rec)
9792 struct btrfs_root *root;
9793 struct extent_backref *back, *tmp;
9794 struct data_backref *dback;
9795 struct cache_extent *cache;
9796 struct btrfs_file_extent_item *fi;
9797 struct btrfs_key key;
9801 rbtree_postorder_for_each_entry_safe(back, tmp,
9802 &rec->backref_tree, node) {
9803 /* Don't care about full backrefs (poor unloved backrefs) */
9804 if (back->full_backref || !back->is_data)
9807 dback = to_data_backref(back);
9809 /* We found this one, we don't need to do a lookup */
9810 if (dback->found_ref)
9813 key.objectid = dback->root;
9814 key.type = BTRFS_ROOT_ITEM_KEY;
9815 key.offset = (u64)-1;
9817 root = btrfs_read_fs_root(info, &key);
9819 /* No root, definitely a bad ref, skip */
9820 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9822 /* Other err, exit */
9824 return PTR_ERR(root);
9826 key.objectid = dback->owner;
9827 key.type = BTRFS_EXTENT_DATA_KEY;
9828 key.offset = dback->offset;
9829 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9831 btrfs_release_path(path);
9834 /* Didn't find it, we can carry on */
9839 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9840 struct btrfs_file_extent_item);
9841 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9842 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9843 btrfs_release_path(path);
9844 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9846 struct extent_record *tmp;
9847 tmp = container_of(cache, struct extent_record, cache);
9850 * If we found an extent record for the bytenr for this
9851 * particular backref then we can't add it to our
9852 * current extent record. We only want to add backrefs
9853 * that don't have a corresponding extent item in the
9854 * extent tree since they likely belong to this record
9855 * and we need to fix it if it doesn't match bytenrs.
9861 dback->found_ref += 1;
9862 dback->disk_bytenr = bytenr;
9863 dback->bytes = bytes;
9866 * Set this so the verify backref code knows not to trust the
9867 * values in this backref.
9876 * Record orphan data ref into corresponding root.
9878 * Return 0 if the extent item contains data ref and recorded.
9879 * Return 1 if the extent item contains no useful data ref
9880 * On that case, it may contains only shared_dataref or metadata backref
9881 * or the file extent exists(this should be handled by the extent bytenr
9883 * Return <0 if something goes wrong.
9885 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9886 struct extent_record *rec)
9888 struct btrfs_key key;
9889 struct btrfs_root *dest_root;
9890 struct extent_backref *back, *tmp;
9891 struct data_backref *dback;
9892 struct orphan_data_extent *orphan;
9893 struct btrfs_path path;
9894 int recorded_data_ref = 0;
9899 btrfs_init_path(&path);
9900 rbtree_postorder_for_each_entry_safe(back, tmp,
9901 &rec->backref_tree, node) {
9902 if (back->full_backref || !back->is_data ||
9903 !back->found_extent_tree)
9905 dback = to_data_backref(back);
9906 if (dback->found_ref)
9908 key.objectid = dback->root;
9909 key.type = BTRFS_ROOT_ITEM_KEY;
9910 key.offset = (u64)-1;
9912 dest_root = btrfs_read_fs_root(fs_info, &key);
9914 /* For non-exist root we just skip it */
9915 if (IS_ERR(dest_root) || !dest_root)
9918 key.objectid = dback->owner;
9919 key.type = BTRFS_EXTENT_DATA_KEY;
9920 key.offset = dback->offset;
9922 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9923 btrfs_release_path(&path);
9925 * For ret < 0, it's OK since the fs-tree may be corrupted,
9926 * we need to record it for inode/file extent rebuild.
9927 * For ret > 0, we record it only for file extent rebuild.
9928 * For ret == 0, the file extent exists but only bytenr
9929 * mismatch, let the original bytenr fix routine to handle,
9935 orphan = malloc(sizeof(*orphan));
9940 INIT_LIST_HEAD(&orphan->list);
9941 orphan->root = dback->root;
9942 orphan->objectid = dback->owner;
9943 orphan->offset = dback->offset;
9944 orphan->disk_bytenr = rec->cache.start;
9945 orphan->disk_len = rec->cache.size;
9946 list_add(&dest_root->orphan_data_extents, &orphan->list);
9947 recorded_data_ref = 1;
9950 btrfs_release_path(&path);
9952 return !recorded_data_ref;
9958 * when an incorrect extent item is found, this will delete
9959 * all of the existing entries for it and recreate them
9960 * based on what the tree scan found.
9962 static int fixup_extent_refs(struct btrfs_fs_info *info,
9963 struct cache_tree *extent_cache,
9964 struct extent_record *rec)
9966 struct btrfs_trans_handle *trans = NULL;
9968 struct btrfs_path path;
9969 struct cache_extent *cache;
9970 struct extent_backref *back, *tmp;
9974 if (rec->flag_block_full_backref)
9975 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9977 btrfs_init_path(&path);
9978 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9980 * Sometimes the backrefs themselves are so broken they don't
9981 * get attached to any meaningful rec, so first go back and
9982 * check any of our backrefs that we couldn't find and throw
9983 * them into the list if we find the backref so that
9984 * verify_backrefs can figure out what to do.
9986 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9991 /* step one, make sure all of the backrefs agree */
9992 ret = verify_backrefs(info, &path, rec);
9996 trans = btrfs_start_transaction(info->extent_root, 1);
9997 if (IS_ERR(trans)) {
9998 ret = PTR_ERR(trans);
10002 /* step two, delete all the existing records */
10003 ret = delete_extent_records(trans, info->extent_root, &path,
10009 /* was this block corrupt? If so, don't add references to it */
10010 cache = lookup_cache_extent(info->corrupt_blocks,
10011 rec->start, rec->max_size);
10017 /* step three, recreate all the refs we did find */
10018 rbtree_postorder_for_each_entry_safe(back, tmp,
10019 &rec->backref_tree, node) {
10021 * if we didn't find any references, don't create a
10022 * new extent record
10024 if (!back->found_ref)
10027 rec->bad_full_backref = 0;
10028 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10036 int err = btrfs_commit_transaction(trans, info->extent_root);
10042 fprintf(stderr, "Repaired extent references for %llu\n",
10043 (unsigned long long)rec->start);
10045 btrfs_release_path(&path);
10049 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10050 struct extent_record *rec)
10052 struct btrfs_trans_handle *trans;
10053 struct btrfs_root *root = fs_info->extent_root;
10054 struct btrfs_path path;
10055 struct btrfs_extent_item *ei;
10056 struct btrfs_key key;
10060 key.objectid = rec->start;
10061 if (rec->metadata) {
10062 key.type = BTRFS_METADATA_ITEM_KEY;
10063 key.offset = rec->info_level;
10065 key.type = BTRFS_EXTENT_ITEM_KEY;
10066 key.offset = rec->max_size;
10069 trans = btrfs_start_transaction(root, 0);
10071 return PTR_ERR(trans);
10073 btrfs_init_path(&path);
10074 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10076 btrfs_release_path(&path);
10077 btrfs_commit_transaction(trans, root);
10080 fprintf(stderr, "Didn't find extent for %llu\n",
10081 (unsigned long long)rec->start);
10082 btrfs_release_path(&path);
10083 btrfs_commit_transaction(trans, root);
10087 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10088 struct btrfs_extent_item);
10089 flags = btrfs_extent_flags(path.nodes[0], ei);
10090 if (rec->flag_block_full_backref) {
10091 fprintf(stderr, "setting full backref on %llu\n",
10092 (unsigned long long)key.objectid);
10093 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10095 fprintf(stderr, "clearing full backref on %llu\n",
10096 (unsigned long long)key.objectid);
10097 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10099 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10100 btrfs_mark_buffer_dirty(path.nodes[0]);
10101 btrfs_release_path(&path);
10102 ret = btrfs_commit_transaction(trans, root);
10104 fprintf(stderr, "Repaired extent flags for %llu\n",
10105 (unsigned long long)rec->start);
10110 /* right now we only prune from the extent allocation tree */
10111 static int prune_one_block(struct btrfs_trans_handle *trans,
10112 struct btrfs_fs_info *info,
10113 struct btrfs_corrupt_block *corrupt)
10116 struct btrfs_path path;
10117 struct extent_buffer *eb;
10121 int level = corrupt->level + 1;
10123 btrfs_init_path(&path);
10125 /* we want to stop at the parent to our busted block */
10126 path.lowest_level = level;
10128 ret = btrfs_search_slot(trans, info->extent_root,
10129 &corrupt->key, &path, -1, 1);
10134 eb = path.nodes[level];
10141 * hopefully the search gave us the block we want to prune,
10142 * lets try that first
10144 slot = path.slots[level];
10145 found = btrfs_node_blockptr(eb, slot);
10146 if (found == corrupt->cache.start)
10149 nritems = btrfs_header_nritems(eb);
10151 /* the search failed, lets scan this node and hope we find it */
10152 for (slot = 0; slot < nritems; slot++) {
10153 found = btrfs_node_blockptr(eb, slot);
10154 if (found == corrupt->cache.start)
10158 * we couldn't find the bad block. TODO, search all the nodes for pointers
10161 if (eb == info->extent_root->node) {
10166 btrfs_release_path(&path);
10171 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10172 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10175 btrfs_release_path(&path);
10179 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10181 struct btrfs_trans_handle *trans = NULL;
10182 struct cache_extent *cache;
10183 struct btrfs_corrupt_block *corrupt;
10186 cache = search_cache_extent(info->corrupt_blocks, 0);
10190 trans = btrfs_start_transaction(info->extent_root, 1);
10192 return PTR_ERR(trans);
10194 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10195 prune_one_block(trans, info, corrupt);
10196 remove_cache_extent(info->corrupt_blocks, cache);
10199 return btrfs_commit_transaction(trans, info->extent_root);
10203 static int check_extent_refs(struct btrfs_root *root,
10204 struct cache_tree *extent_cache)
10206 struct extent_record *rec;
10207 struct cache_extent *cache;
10214 * if we're doing a repair, we have to make sure
10215 * we don't allocate from the problem extents.
10216 * In the worst case, this will be all the
10217 * extents in the FS
10219 cache = search_cache_extent(extent_cache, 0);
10221 rec = container_of(cache, struct extent_record, cache);
10222 set_extent_dirty(root->fs_info->excluded_extents,
10224 rec->start + rec->max_size - 1);
10225 cache = next_cache_extent(cache);
10228 /* pin down all the corrupted blocks too */
10229 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10231 set_extent_dirty(root->fs_info->excluded_extents,
10233 cache->start + cache->size - 1);
10234 cache = next_cache_extent(cache);
10236 prune_corrupt_blocks(root->fs_info);
10237 reset_cached_block_groups(root->fs_info);
10240 reset_cached_block_groups(root->fs_info);
10243 * We need to delete any duplicate entries we find first otherwise we
10244 * could mess up the extent tree when we have backrefs that actually
10245 * belong to a different extent item and not the weird duplicate one.
10247 while (repair && !list_empty(&duplicate_extents)) {
10248 rec = to_extent_record(duplicate_extents.next);
10249 list_del_init(&rec->list);
10251 /* Sometimes we can find a backref before we find an actual
10252 * extent, so we need to process it a little bit to see if there
10253 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10254 * if this is a backref screwup. If we need to delete stuff
10255 * process_duplicates() will return 0, otherwise it will return
10258 if (process_duplicates(extent_cache, rec))
10260 ret = delete_duplicate_records(root, rec);
10264 * delete_duplicate_records will return the number of entries
10265 * deleted, so if it's greater than 0 then we know we actually
10266 * did something and we need to remove.
10279 cache = search_cache_extent(extent_cache, 0);
10282 rec = container_of(cache, struct extent_record, cache);
10283 if (rec->num_duplicates) {
10284 fprintf(stderr, "extent item %llu has multiple extent "
10285 "items\n", (unsigned long long)rec->start);
10289 if (rec->refs != rec->extent_item_refs) {
10290 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10291 (unsigned long long)rec->start,
10292 (unsigned long long)rec->nr);
10293 fprintf(stderr, "extent item %llu, found %llu\n",
10294 (unsigned long long)rec->extent_item_refs,
10295 (unsigned long long)rec->refs);
10296 ret = record_orphan_data_extents(root->fs_info, rec);
10302 if (all_backpointers_checked(rec, 1)) {
10303 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10304 (unsigned long long)rec->start,
10305 (unsigned long long)rec->nr);
10309 if (!rec->owner_ref_checked) {
10310 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10311 (unsigned long long)rec->start,
10312 (unsigned long long)rec->nr);
10317 if (repair && fix) {
10318 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10324 if (rec->bad_full_backref) {
10325 fprintf(stderr, "bad full backref, on [%llu]\n",
10326 (unsigned long long)rec->start);
10328 ret = fixup_extent_flags(root->fs_info, rec);
10336 * Although it's not a extent ref's problem, we reuse this
10337 * routine for error reporting.
10338 * No repair function yet.
10340 if (rec->crossing_stripes) {
10342 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10343 rec->start, rec->start + rec->max_size);
10347 if (rec->wrong_chunk_type) {
10349 "bad extent [%llu, %llu), type mismatch with chunk\n",
10350 rec->start, rec->start + rec->max_size);
10355 remove_cache_extent(extent_cache, cache);
10356 free_all_extent_backrefs(rec);
10357 if (!init_extent_tree && repair && (!cur_err || fix))
10358 clear_extent_dirty(root->fs_info->excluded_extents,
10360 rec->start + rec->max_size - 1);
10365 if (ret && ret != -EAGAIN) {
10366 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10369 struct btrfs_trans_handle *trans;
10371 root = root->fs_info->extent_root;
10372 trans = btrfs_start_transaction(root, 1);
10373 if (IS_ERR(trans)) {
10374 ret = PTR_ERR(trans);
10378 ret = btrfs_fix_block_accounting(trans, root);
10381 ret = btrfs_commit_transaction(trans, root);
10393 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10397 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10398 stripe_size = length;
10399 stripe_size /= num_stripes;
10400 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10401 stripe_size = length * 2;
10402 stripe_size /= num_stripes;
10403 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10404 stripe_size = length;
10405 stripe_size /= (num_stripes - 1);
10406 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10407 stripe_size = length;
10408 stripe_size /= (num_stripes - 2);
10410 stripe_size = length;
10412 return stripe_size;
10416 * Check the chunk with its block group/dev list ref:
10417 * Return 0 if all refs seems valid.
10418 * Return 1 if part of refs seems valid, need later check for rebuild ref
10419 * like missing block group and needs to search extent tree to rebuild them.
10420 * Return -1 if essential refs are missing and unable to rebuild.
10422 static int check_chunk_refs(struct chunk_record *chunk_rec,
10423 struct block_group_tree *block_group_cache,
10424 struct device_extent_tree *dev_extent_cache,
10427 struct cache_extent *block_group_item;
10428 struct block_group_record *block_group_rec;
10429 struct cache_extent *dev_extent_item;
10430 struct device_extent_record *dev_extent_rec;
10434 int metadump_v2 = 0;
10438 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10440 chunk_rec->length);
10441 if (block_group_item) {
10442 block_group_rec = container_of(block_group_item,
10443 struct block_group_record,
10445 if (chunk_rec->length != block_group_rec->offset ||
10446 chunk_rec->offset != block_group_rec->objectid ||
10448 chunk_rec->type_flags != block_group_rec->flags)) {
10451 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10452 chunk_rec->objectid,
10457 chunk_rec->type_flags,
10458 block_group_rec->objectid,
10459 block_group_rec->type,
10460 block_group_rec->offset,
10461 block_group_rec->offset,
10462 block_group_rec->objectid,
10463 block_group_rec->flags);
10466 list_del_init(&block_group_rec->list);
10467 chunk_rec->bg_rec = block_group_rec;
10472 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10473 chunk_rec->objectid,
10478 chunk_rec->type_flags);
10485 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10486 chunk_rec->num_stripes);
10487 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10488 devid = chunk_rec->stripes[i].devid;
10489 offset = chunk_rec->stripes[i].offset;
10490 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10491 devid, offset, length);
10492 if (dev_extent_item) {
10493 dev_extent_rec = container_of(dev_extent_item,
10494 struct device_extent_record,
10496 if (dev_extent_rec->objectid != devid ||
10497 dev_extent_rec->offset != offset ||
10498 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10499 dev_extent_rec->length != length) {
10502 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10503 chunk_rec->objectid,
10506 chunk_rec->stripes[i].devid,
10507 chunk_rec->stripes[i].offset,
10508 dev_extent_rec->objectid,
10509 dev_extent_rec->offset,
10510 dev_extent_rec->length);
10513 list_move(&dev_extent_rec->chunk_list,
10514 &chunk_rec->dextents);
10519 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10520 chunk_rec->objectid,
10523 chunk_rec->stripes[i].devid,
10524 chunk_rec->stripes[i].offset);
10531 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10532 int check_chunks(struct cache_tree *chunk_cache,
10533 struct block_group_tree *block_group_cache,
10534 struct device_extent_tree *dev_extent_cache,
10535 struct list_head *good, struct list_head *bad,
10536 struct list_head *rebuild, int silent)
10538 struct cache_extent *chunk_item;
10539 struct chunk_record *chunk_rec;
10540 struct block_group_record *bg_rec;
10541 struct device_extent_record *dext_rec;
10545 chunk_item = first_cache_extent(chunk_cache);
10546 while (chunk_item) {
10547 chunk_rec = container_of(chunk_item, struct chunk_record,
10549 err = check_chunk_refs(chunk_rec, block_group_cache,
10550 dev_extent_cache, silent);
10553 if (err == 0 && good)
10554 list_add_tail(&chunk_rec->list, good);
10555 if (err > 0 && rebuild)
10556 list_add_tail(&chunk_rec->list, rebuild);
10557 if (err < 0 && bad)
10558 list_add_tail(&chunk_rec->list, bad);
10559 chunk_item = next_cache_extent(chunk_item);
10562 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10565 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10573 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10577 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10578 dext_rec->objectid,
10588 static int check_device_used(struct device_record *dev_rec,
10589 struct device_extent_tree *dext_cache)
10591 struct cache_extent *cache;
10592 struct device_extent_record *dev_extent_rec;
10593 u64 total_byte = 0;
10595 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10597 dev_extent_rec = container_of(cache,
10598 struct device_extent_record,
10600 if (dev_extent_rec->objectid != dev_rec->devid)
10603 list_del_init(&dev_extent_rec->device_list);
10604 total_byte += dev_extent_rec->length;
10605 cache = next_cache_extent(cache);
10608 if (total_byte != dev_rec->byte_used) {
10610 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10611 total_byte, dev_rec->byte_used, dev_rec->objectid,
10612 dev_rec->type, dev_rec->offset);
10620 * Unlike device size alignment check above, some super total_bytes check
10621 * failure can lead to mount failure for newer kernel.
10623 * So this function will return the error for a fatal super total_bytes problem.
10625 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10627 struct btrfs_device *dev;
10628 struct list_head *dev_list = &fs_info->fs_devices->devices;
10629 u64 total_bytes = 0;
10630 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10632 list_for_each_entry(dev, dev_list, dev_list)
10633 total_bytes += dev->total_bytes;
10635 /* Important check, which can cause unmountable fs */
10636 if (super_bytes < total_bytes) {
10637 error("super total bytes %llu smaller than real device(s) size %llu",
10638 super_bytes, total_bytes);
10639 error("mounting this fs may fail for newer kernels");
10640 error("this can be fixed by 'btrfs rescue fix-device-size'");
10645 * Optional check, just to make everything aligned and match with each
10648 * For a btrfs-image restored fs, we don't need to check it anyway.
10650 if (btrfs_super_flags(fs_info->super_copy) &
10651 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10653 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10654 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10655 super_bytes != total_bytes) {
10656 warning("minor unaligned/mismatch device size detected");
10658 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10663 /* check btrfs_dev_item -> btrfs_dev_extent */
10664 static int check_devices(struct rb_root *dev_cache,
10665 struct device_extent_tree *dev_extent_cache)
10667 struct rb_node *dev_node;
10668 struct device_record *dev_rec;
10669 struct device_extent_record *dext_rec;
10673 dev_node = rb_first(dev_cache);
10675 dev_rec = container_of(dev_node, struct device_record, node);
10676 err = check_device_used(dev_rec, dev_extent_cache);
10680 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10681 global_info->sectorsize);
10682 dev_node = rb_next(dev_node);
10684 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10687 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10688 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10695 static int add_root_item_to_list(struct list_head *head,
10696 u64 objectid, u64 bytenr, u64 last_snapshot,
10697 u8 level, u8 drop_level,
10698 struct btrfs_key *drop_key)
10701 struct root_item_record *ri_rec;
10702 ri_rec = malloc(sizeof(*ri_rec));
10705 ri_rec->bytenr = bytenr;
10706 ri_rec->objectid = objectid;
10707 ri_rec->level = level;
10708 ri_rec->drop_level = drop_level;
10709 ri_rec->last_snapshot = last_snapshot;
10711 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10712 list_add_tail(&ri_rec->list, head);
10717 static void free_root_item_list(struct list_head *list)
10719 struct root_item_record *ri_rec;
10721 while (!list_empty(list)) {
10722 ri_rec = list_first_entry(list, struct root_item_record,
10724 list_del_init(&ri_rec->list);
10729 static int deal_root_from_list(struct list_head *list,
10730 struct btrfs_root *root,
10731 struct block_info *bits,
10733 struct cache_tree *pending,
10734 struct cache_tree *seen,
10735 struct cache_tree *reada,
10736 struct cache_tree *nodes,
10737 struct cache_tree *extent_cache,
10738 struct cache_tree *chunk_cache,
10739 struct rb_root *dev_cache,
10740 struct block_group_tree *block_group_cache,
10741 struct device_extent_tree *dev_extent_cache)
10746 while (!list_empty(list)) {
10747 struct root_item_record *rec;
10748 struct extent_buffer *buf;
10749 rec = list_entry(list->next,
10750 struct root_item_record, list);
10752 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10753 if (!extent_buffer_uptodate(buf)) {
10754 free_extent_buffer(buf);
10758 ret = add_root_to_pending(buf, extent_cache, pending,
10759 seen, nodes, rec->objectid);
10763 * To rebuild extent tree, we need deal with snapshot
10764 * one by one, otherwise we deal with node firstly which
10765 * can maximize readahead.
10768 ret = run_next_block(root, bits, bits_nr, &last,
10769 pending, seen, reada, nodes,
10770 extent_cache, chunk_cache,
10771 dev_cache, block_group_cache,
10772 dev_extent_cache, rec);
10776 free_extent_buffer(buf);
10777 list_del(&rec->list);
10783 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10784 reada, nodes, extent_cache, chunk_cache,
10785 dev_cache, block_group_cache,
10786 dev_extent_cache, NULL);
10796 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10798 struct rb_root dev_cache;
10799 struct cache_tree chunk_cache;
10800 struct block_group_tree block_group_cache;
10801 struct device_extent_tree dev_extent_cache;
10802 struct cache_tree extent_cache;
10803 struct cache_tree seen;
10804 struct cache_tree pending;
10805 struct cache_tree reada;
10806 struct cache_tree nodes;
10807 struct extent_io_tree excluded_extents;
10808 struct cache_tree corrupt_blocks;
10809 struct btrfs_path path;
10810 struct btrfs_key key;
10811 struct btrfs_key found_key;
10813 struct block_info *bits;
10815 struct extent_buffer *leaf;
10817 struct btrfs_root_item ri;
10818 struct list_head dropping_trees;
10819 struct list_head normal_trees;
10820 struct btrfs_root *root1;
10821 struct btrfs_root *root;
10825 root = fs_info->fs_root;
10826 dev_cache = RB_ROOT;
10827 cache_tree_init(&chunk_cache);
10828 block_group_tree_init(&block_group_cache);
10829 device_extent_tree_init(&dev_extent_cache);
10831 cache_tree_init(&extent_cache);
10832 cache_tree_init(&seen);
10833 cache_tree_init(&pending);
10834 cache_tree_init(&nodes);
10835 cache_tree_init(&reada);
10836 cache_tree_init(&corrupt_blocks);
10837 extent_io_tree_init(&excluded_extents);
10838 INIT_LIST_HEAD(&dropping_trees);
10839 INIT_LIST_HEAD(&normal_trees);
10842 fs_info->excluded_extents = &excluded_extents;
10843 fs_info->fsck_extent_cache = &extent_cache;
10844 fs_info->free_extent_hook = free_extent_hook;
10845 fs_info->corrupt_blocks = &corrupt_blocks;
10849 bits = malloc(bits_nr * sizeof(struct block_info));
10855 if (ctx.progress_enabled) {
10856 ctx.tp = TASK_EXTENTS;
10857 task_start(ctx.info);
10861 root1 = fs_info->tree_root;
10862 level = btrfs_header_level(root1->node);
10863 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10864 root1->node->start, 0, level, 0, NULL);
10867 root1 = fs_info->chunk_root;
10868 level = btrfs_header_level(root1->node);
10869 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10870 root1->node->start, 0, level, 0, NULL);
10873 btrfs_init_path(&path);
10876 key.type = BTRFS_ROOT_ITEM_KEY;
10877 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10881 leaf = path.nodes[0];
10882 slot = path.slots[0];
10883 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10884 ret = btrfs_next_leaf(root, &path);
10887 leaf = path.nodes[0];
10888 slot = path.slots[0];
10890 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10891 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10892 unsigned long offset;
10895 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10896 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10897 last_snapshot = btrfs_root_last_snapshot(&ri);
10898 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10899 level = btrfs_root_level(&ri);
10900 ret = add_root_item_to_list(&normal_trees,
10901 found_key.objectid,
10902 btrfs_root_bytenr(&ri),
10903 last_snapshot, level,
10908 level = btrfs_root_level(&ri);
10909 objectid = found_key.objectid;
10910 btrfs_disk_key_to_cpu(&found_key,
10911 &ri.drop_progress);
10912 ret = add_root_item_to_list(&dropping_trees,
10914 btrfs_root_bytenr(&ri),
10915 last_snapshot, level,
10916 ri.drop_level, &found_key);
10923 btrfs_release_path(&path);
10926 * check_block can return -EAGAIN if it fixes something, please keep
10927 * this in mind when dealing with return values from these functions, if
10928 * we get -EAGAIN we want to fall through and restart the loop.
10930 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10931 &seen, &reada, &nodes, &extent_cache,
10932 &chunk_cache, &dev_cache, &block_group_cache,
10933 &dev_extent_cache);
10935 if (ret == -EAGAIN)
10939 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10940 &pending, &seen, &reada, &nodes,
10941 &extent_cache, &chunk_cache, &dev_cache,
10942 &block_group_cache, &dev_extent_cache);
10944 if (ret == -EAGAIN)
10949 ret = check_chunks(&chunk_cache, &block_group_cache,
10950 &dev_extent_cache, NULL, NULL, NULL, 0);
10952 if (ret == -EAGAIN)
10957 ret = check_extent_refs(root, &extent_cache);
10959 if (ret == -EAGAIN)
10964 ret = check_devices(&dev_cache, &dev_extent_cache);
10969 task_stop(ctx.info);
10971 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10972 extent_io_tree_cleanup(&excluded_extents);
10973 fs_info->fsck_extent_cache = NULL;
10974 fs_info->free_extent_hook = NULL;
10975 fs_info->corrupt_blocks = NULL;
10976 fs_info->excluded_extents = NULL;
10979 free_chunk_cache_tree(&chunk_cache);
10980 free_device_cache_tree(&dev_cache);
10981 free_block_group_tree(&block_group_cache);
10982 free_device_extent_tree(&dev_extent_cache);
10983 free_extent_cache_tree(&seen);
10984 free_extent_cache_tree(&pending);
10985 free_extent_cache_tree(&reada);
10986 free_extent_cache_tree(&nodes);
10987 free_root_item_list(&normal_trees);
10988 free_root_item_list(&dropping_trees);
10991 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10992 free_extent_cache_tree(&seen);
10993 free_extent_cache_tree(&pending);
10994 free_extent_cache_tree(&reada);
10995 free_extent_cache_tree(&nodes);
10996 free_chunk_cache_tree(&chunk_cache);
10997 free_block_group_tree(&block_group_cache);
10998 free_device_cache_tree(&dev_cache);
10999 free_device_extent_tree(&dev_extent_cache);
11000 free_extent_record_cache(&extent_cache);
11001 free_root_item_list(&normal_trees);
11002 free_root_item_list(&dropping_trees);
11003 extent_io_tree_cleanup(&excluded_extents);
11007 static int check_extent_inline_ref(struct extent_buffer *eb,
11008 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11011 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11014 case BTRFS_TREE_BLOCK_REF_KEY:
11015 case BTRFS_EXTENT_DATA_REF_KEY:
11016 case BTRFS_SHARED_BLOCK_REF_KEY:
11017 case BTRFS_SHARED_DATA_REF_KEY:
11021 error("extent[%llu %u %llu] has unknown ref type: %d",
11022 key->objectid, key->type, key->offset, type);
11023 ret = UNKNOWN_TYPE;
11031 * Check backrefs of a tree block given by @bytenr or @eb.
11033 * @root: the root containing the @bytenr or @eb
11034 * @eb: tree block extent buffer, can be NULL
11035 * @bytenr: bytenr of the tree block to search
11036 * @level: tree level of the tree block
11037 * @owner: owner of the tree block
11039 * Return >0 for any error found and output error message
11040 * Return 0 for no error found
11042 static int check_tree_block_ref(struct btrfs_root *root,
11043 struct extent_buffer *eb, u64 bytenr,
11044 int level, u64 owner, struct node_refs *nrefs)
11046 struct btrfs_key key;
11047 struct btrfs_root *extent_root = root->fs_info->extent_root;
11048 struct btrfs_path path;
11049 struct btrfs_extent_item *ei;
11050 struct btrfs_extent_inline_ref *iref;
11051 struct extent_buffer *leaf;
11056 int root_level = btrfs_header_level(root->node);
11058 u32 nodesize = root->fs_info->nodesize;
11067 btrfs_init_path(&path);
11068 key.objectid = bytenr;
11069 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11070 key.type = BTRFS_METADATA_ITEM_KEY;
11072 key.type = BTRFS_EXTENT_ITEM_KEY;
11073 key.offset = (u64)-1;
11075 /* Search for the backref in extent tree */
11076 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11078 err |= BACKREF_MISSING;
11081 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11083 err |= BACKREF_MISSING;
11087 leaf = path.nodes[0];
11088 slot = path.slots[0];
11089 btrfs_item_key_to_cpu(leaf, &key, slot);
11091 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11093 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11094 skinny_level = (int)key.offset;
11095 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11097 struct btrfs_tree_block_info *info;
11099 info = (struct btrfs_tree_block_info *)(ei + 1);
11100 skinny_level = btrfs_tree_block_level(leaf, info);
11101 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11110 * Due to the feature of shared tree blocks, if the upper node
11111 * is a fs root or shared node, the extent of checked node may
11112 * not be updated until the next CoW.
11115 strict = should_check_extent_strictly(root, nrefs,
11117 if (!(btrfs_extent_flags(leaf, ei) &
11118 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11120 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11121 key.objectid, nodesize,
11122 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11123 err = BACKREF_MISMATCH;
11125 header_gen = btrfs_header_generation(eb);
11126 extent_gen = btrfs_extent_generation(leaf, ei);
11127 if (header_gen != extent_gen) {
11129 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11130 key.objectid, nodesize, header_gen,
11132 err = BACKREF_MISMATCH;
11134 if (level != skinny_level) {
11136 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11137 key.objectid, nodesize, level, skinny_level);
11138 err = BACKREF_MISMATCH;
11140 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11142 "extent[%llu %u] is referred by other roots than %llu",
11143 key.objectid, nodesize, root->objectid);
11144 err = BACKREF_MISMATCH;
11149 * Iterate the extent/metadata item to find the exact backref
11151 item_size = btrfs_item_size_nr(leaf, slot);
11152 ptr = (unsigned long)iref;
11153 end = (unsigned long)ei + item_size;
11155 while (ptr < end) {
11156 iref = (struct btrfs_extent_inline_ref *)ptr;
11157 type = btrfs_extent_inline_ref_type(leaf, iref);
11158 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11160 ret = check_extent_inline_ref(leaf, &key, iref);
11165 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11166 if (offset == root->objectid)
11168 if (!strict && owner == offset)
11170 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11172 * Backref of tree reloc root points to itself, no need
11173 * to check backref any more.
11175 * This may be an error of loop backref, but extent tree
11176 * checker should have already handled it.
11177 * Here we only need to avoid infinite iteration.
11179 if (offset == bytenr) {
11183 * Check if the backref points to valid
11186 found_ref = !check_tree_block_ref( root, NULL,
11187 offset, level + 1, owner,
11194 ptr += btrfs_extent_inline_ref_size(type);
11198 * Inlined extent item doesn't have what we need, check
11199 * TREE_BLOCK_REF_KEY
11202 btrfs_release_path(&path);
11203 key.objectid = bytenr;
11204 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11205 key.offset = root->objectid;
11207 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11212 * Finally check SHARED BLOCK REF, any found will be good
11213 * Here we're not doing comprehensive extent backref checking,
11214 * only need to ensure there is some extent referring to this
11218 btrfs_release_path(&path);
11219 key.objectid = bytenr;
11220 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11221 key.offset = (u64)-1;
11223 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11225 err |= BACKREF_MISSING;
11228 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11230 err |= BACKREF_MISSING;
11236 err |= BACKREF_MISSING;
11238 btrfs_release_path(&path);
11239 if (nrefs && strict &&
11240 level < root_level && nrefs->full_backref[level + 1])
11241 parent = nrefs->bytenr[level + 1];
11242 if (eb && (err & BACKREF_MISSING))
11244 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11245 bytenr, nodesize, owner, level,
11246 parent ? "parent" : "root",
11247 parent ? parent : root->objectid);
11252 * If @err contains BACKREF_MISSING then add extent of the
11253 * file_extent_data_item.
11255 * Returns error bits after reapir.
11257 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11258 struct btrfs_root *root,
11259 struct btrfs_path *pathp,
11260 struct node_refs *nrefs,
11263 struct btrfs_file_extent_item *fi;
11264 struct btrfs_key fi_key;
11265 struct btrfs_key key;
11266 struct btrfs_extent_item *ei;
11267 struct btrfs_path path;
11268 struct btrfs_root *extent_root = root->fs_info->extent_root;
11269 struct extent_buffer *eb;
11281 eb = pathp->nodes[0];
11282 slot = pathp->slots[0];
11283 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11284 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11286 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11287 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11290 file_offset = fi_key.offset;
11291 generation = btrfs_file_extent_generation(eb, fi);
11292 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11293 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11294 extent_offset = btrfs_file_extent_offset(eb, fi);
11295 offset = file_offset - extent_offset;
11297 /* now repair only adds backref */
11298 if ((err & BACKREF_MISSING) == 0)
11301 /* search extent item */
11302 key.objectid = disk_bytenr;
11303 key.type = BTRFS_EXTENT_ITEM_KEY;
11304 key.offset = num_bytes;
11306 btrfs_init_path(&path);
11307 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11313 /* insert an extent item */
11315 key.objectid = disk_bytenr;
11316 key.type = BTRFS_EXTENT_ITEM_KEY;
11317 key.offset = num_bytes;
11318 size = sizeof(*ei);
11320 btrfs_release_path(&path);
11321 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11325 eb = path.nodes[0];
11326 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11328 btrfs_set_extent_refs(eb, ei, 0);
11329 btrfs_set_extent_generation(eb, ei, generation);
11330 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11332 btrfs_mark_buffer_dirty(eb);
11333 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11335 btrfs_release_path(&path);
11338 if (nrefs->full_backref[0])
11339 parent = btrfs_header_bytenr(eb);
11343 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11345 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11349 "failed to increase extent data backref[%llu %llu] root %llu",
11350 disk_bytenr, num_bytes, root->objectid);
11353 printf("Add one extent data backref [%llu %llu]\n",
11354 disk_bytenr, num_bytes);
11357 err &= ~BACKREF_MISSING;
11360 error("can't repair root %llu extent data item[%llu %llu]",
11361 root->objectid, disk_bytenr, num_bytes);
11366 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11368 * Return >0 any error found and output error message
11369 * Return 0 for no error found
11371 static int check_extent_data_item(struct btrfs_root *root,
11372 struct btrfs_path *pathp,
11373 struct node_refs *nrefs, int account_bytes)
11375 struct btrfs_file_extent_item *fi;
11376 struct extent_buffer *eb = pathp->nodes[0];
11377 struct btrfs_path path;
11378 struct btrfs_root *extent_root = root->fs_info->extent_root;
11379 struct btrfs_key fi_key;
11380 struct btrfs_key dbref_key;
11381 struct extent_buffer *leaf;
11382 struct btrfs_extent_item *ei;
11383 struct btrfs_extent_inline_ref *iref;
11384 struct btrfs_extent_data_ref *dref;
11387 u64 disk_num_bytes;
11388 u64 extent_num_bytes;
11395 int found_dbackref = 0;
11396 int slot = pathp->slots[0];
11401 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11402 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11404 /* Nothing to check for hole and inline data extents */
11405 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11406 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11409 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11410 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11411 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11412 offset = btrfs_file_extent_offset(eb, fi);
11414 /* Check unaligned disk_num_bytes and num_bytes */
11415 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11417 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11418 fi_key.objectid, fi_key.offset, disk_num_bytes,
11419 root->fs_info->sectorsize);
11420 err |= BYTES_UNALIGNED;
11421 } else if (account_bytes) {
11422 data_bytes_allocated += disk_num_bytes;
11424 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11426 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11427 fi_key.objectid, fi_key.offset, extent_num_bytes,
11428 root->fs_info->sectorsize);
11429 err |= BYTES_UNALIGNED;
11430 } else if (account_bytes) {
11431 data_bytes_referenced += extent_num_bytes;
11433 owner = btrfs_header_owner(eb);
11435 /* Check the extent item of the file extent in extent tree */
11436 btrfs_init_path(&path);
11437 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11438 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11439 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11441 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11445 leaf = path.nodes[0];
11446 slot = path.slots[0];
11447 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11449 extent_flags = btrfs_extent_flags(leaf, ei);
11451 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11453 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11454 disk_bytenr, disk_num_bytes,
11455 BTRFS_EXTENT_FLAG_DATA);
11456 err |= BACKREF_MISMATCH;
11459 /* Check data backref inside that extent item */
11460 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11461 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11462 ptr = (unsigned long)iref;
11463 end = (unsigned long)ei + item_size;
11464 strict = should_check_extent_strictly(root, nrefs, -1);
11466 while (ptr < end) {
11470 bool match = false;
11472 iref = (struct btrfs_extent_inline_ref *)ptr;
11473 type = btrfs_extent_inline_ref_type(leaf, iref);
11474 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11476 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11481 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11482 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11483 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11484 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11486 if (ref_objectid == fi_key.objectid &&
11487 ref_offset == fi_key.offset - offset)
11489 if (ref_root == root->objectid && match)
11490 found_dbackref = 1;
11491 else if (!strict && owner == ref_root && match)
11492 found_dbackref = 1;
11493 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11494 found_dbackref = !check_tree_block_ref(root, NULL,
11495 btrfs_extent_inline_ref_offset(leaf, iref),
11499 if (found_dbackref)
11501 ptr += btrfs_extent_inline_ref_size(type);
11504 if (!found_dbackref) {
11505 btrfs_release_path(&path);
11507 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11508 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11509 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11510 dbref_key.offset = hash_extent_data_ref(root->objectid,
11511 fi_key.objectid, fi_key.offset - offset);
11513 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11514 &dbref_key, &path, 0, 0);
11516 found_dbackref = 1;
11520 btrfs_release_path(&path);
11523 * Neither inlined nor EXTENT_DATA_REF found, try
11524 * SHARED_DATA_REF as last chance.
11526 dbref_key.objectid = disk_bytenr;
11527 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11528 dbref_key.offset = eb->start;
11530 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11531 &dbref_key, &path, 0, 0);
11533 found_dbackref = 1;
11539 if (!found_dbackref)
11540 err |= BACKREF_MISSING;
11541 btrfs_release_path(&path);
11542 if (err & BACKREF_MISSING) {
11543 error("data extent[%llu %llu] backref lost",
11544 disk_bytenr, disk_num_bytes);
11550 * Get real tree block level for the case like shared block
11551 * Return >= 0 as tree level
11552 * Return <0 for error
11554 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11556 struct extent_buffer *eb;
11557 struct btrfs_path path;
11558 struct btrfs_key key;
11559 struct btrfs_extent_item *ei;
11566 /* Search extent tree for extent generation and level */
11567 key.objectid = bytenr;
11568 key.type = BTRFS_METADATA_ITEM_KEY;
11569 key.offset = (u64)-1;
11571 btrfs_init_path(&path);
11572 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11575 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11583 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11584 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11585 struct btrfs_extent_item);
11586 flags = btrfs_extent_flags(path.nodes[0], ei);
11587 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11592 /* Get transid for later read_tree_block() check */
11593 transid = btrfs_extent_generation(path.nodes[0], ei);
11595 /* Get backref level as one source */
11596 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11597 backref_level = key.offset;
11599 struct btrfs_tree_block_info *info;
11601 info = (struct btrfs_tree_block_info *)(ei + 1);
11602 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11604 btrfs_release_path(&path);
11606 /* Get level from tree block as an alternative source */
11607 eb = read_tree_block(fs_info, bytenr, transid);
11608 if (!extent_buffer_uptodate(eb)) {
11609 free_extent_buffer(eb);
11612 header_level = btrfs_header_level(eb);
11613 free_extent_buffer(eb);
11615 if (header_level != backref_level)
11617 return header_level;
11620 btrfs_release_path(&path);
11625 * Check if a tree block backref is valid (points to a valid tree block)
11626 * if level == -1, level will be resolved
11627 * Return >0 for any error found and print error message
11629 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11630 u64 bytenr, int level)
11632 struct btrfs_root *root;
11633 struct btrfs_key key;
11634 struct btrfs_path path;
11635 struct extent_buffer *eb;
11636 struct extent_buffer *node;
11637 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11641 /* Query level for level == -1 special case */
11643 level = query_tree_block_level(fs_info, bytenr);
11645 err |= REFERENCER_MISSING;
11649 key.objectid = root_id;
11650 key.type = BTRFS_ROOT_ITEM_KEY;
11651 key.offset = (u64)-1;
11653 root = btrfs_read_fs_root(fs_info, &key);
11654 if (IS_ERR(root)) {
11655 err |= REFERENCER_MISSING;
11659 /* Read out the tree block to get item/node key */
11660 eb = read_tree_block(fs_info, bytenr, 0);
11661 if (!extent_buffer_uptodate(eb)) {
11662 err |= REFERENCER_MISSING;
11663 free_extent_buffer(eb);
11667 /* Empty tree, no need to check key */
11668 if (!btrfs_header_nritems(eb) && !level) {
11669 free_extent_buffer(eb);
11674 btrfs_node_key_to_cpu(eb, &key, 0);
11676 btrfs_item_key_to_cpu(eb, &key, 0);
11678 free_extent_buffer(eb);
11680 btrfs_init_path(&path);
11681 path.lowest_level = level;
11682 /* Search with the first key, to ensure we can reach it */
11683 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11685 err |= REFERENCER_MISSING;
11689 node = path.nodes[level];
11690 if (btrfs_header_bytenr(node) != bytenr) {
11692 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11693 bytenr, nodesize, bytenr,
11694 btrfs_header_bytenr(node));
11695 err |= REFERENCER_MISMATCH;
11697 if (btrfs_header_level(node) != level) {
11699 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11700 bytenr, nodesize, level,
11701 btrfs_header_level(node));
11702 err |= REFERENCER_MISMATCH;
11706 btrfs_release_path(&path);
11708 if (err & REFERENCER_MISSING) {
11710 error("extent [%llu %d] lost referencer (owner: %llu)",
11711 bytenr, nodesize, root_id);
11714 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11715 bytenr, nodesize, root_id, level);
11722 * Check if tree block @eb is tree reloc root.
11723 * Return 0 if it's not or any problem happens
11724 * Return 1 if it's a tree reloc root
11726 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11727 struct extent_buffer *eb)
11729 struct btrfs_root *tree_reloc_root;
11730 struct btrfs_key key;
11731 u64 bytenr = btrfs_header_bytenr(eb);
11732 u64 owner = btrfs_header_owner(eb);
11735 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11736 key.offset = owner;
11737 key.type = BTRFS_ROOT_ITEM_KEY;
11739 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11740 if (IS_ERR(tree_reloc_root))
11743 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11745 btrfs_free_fs_root(tree_reloc_root);
11750 * Check referencer for shared block backref
11751 * If level == -1, this function will resolve the level.
11753 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11754 u64 parent, u64 bytenr, int level)
11756 struct extent_buffer *eb;
11758 int found_parent = 0;
11761 eb = read_tree_block(fs_info, parent, 0);
11762 if (!extent_buffer_uptodate(eb))
11766 level = query_tree_block_level(fs_info, bytenr);
11770 /* It's possible it's a tree reloc root */
11771 if (parent == bytenr) {
11772 if (is_tree_reloc_root(fs_info, eb))
11777 if (level + 1 != btrfs_header_level(eb))
11780 nr = btrfs_header_nritems(eb);
11781 for (i = 0; i < nr; i++) {
11782 if (bytenr == btrfs_node_blockptr(eb, i)) {
11788 free_extent_buffer(eb);
11789 if (!found_parent) {
11791 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11792 bytenr, fs_info->nodesize, parent, level);
11793 return REFERENCER_MISSING;
11799 * Check referencer for normal (inlined) data ref
11800 * If len == 0, it will be resolved by searching in extent tree
11802 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11803 u64 root_id, u64 objectid, u64 offset,
11804 u64 bytenr, u64 len, u32 count)
11806 struct btrfs_root *root;
11807 struct btrfs_root *extent_root = fs_info->extent_root;
11808 struct btrfs_key key;
11809 struct btrfs_path path;
11810 struct extent_buffer *leaf;
11811 struct btrfs_file_extent_item *fi;
11812 u32 found_count = 0;
11817 key.objectid = bytenr;
11818 key.type = BTRFS_EXTENT_ITEM_KEY;
11819 key.offset = (u64)-1;
11821 btrfs_init_path(&path);
11822 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11825 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11828 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11829 if (key.objectid != bytenr ||
11830 key.type != BTRFS_EXTENT_ITEM_KEY)
11833 btrfs_release_path(&path);
11835 key.objectid = root_id;
11836 key.type = BTRFS_ROOT_ITEM_KEY;
11837 key.offset = (u64)-1;
11838 btrfs_init_path(&path);
11840 root = btrfs_read_fs_root(fs_info, &key);
11844 key.objectid = objectid;
11845 key.type = BTRFS_EXTENT_DATA_KEY;
11847 * It can be nasty as data backref offset is
11848 * file offset - file extent offset, which is smaller or
11849 * equal to original backref offset. The only special case is
11850 * overflow. So we need to special check and do further search.
11852 key.offset = offset & (1ULL << 63) ? 0 : offset;
11854 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11859 * Search afterwards to get correct one
11860 * NOTE: As we must do a comprehensive check on the data backref to
11861 * make sure the dref count also matches, we must iterate all file
11862 * extents for that inode.
11865 leaf = path.nodes[0];
11866 slot = path.slots[0];
11868 if (slot >= btrfs_header_nritems(leaf) ||
11869 btrfs_header_owner(leaf) != root_id)
11871 btrfs_item_key_to_cpu(leaf, &key, slot);
11872 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11874 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11876 * Except normal disk bytenr and disk num bytes, we still
11877 * need to do extra check on dbackref offset as
11878 * dbackref offset = file_offset - file_extent_offset
11880 * Also, we must check the leaf owner.
11881 * In case of shared tree blocks (snapshots) we can inherit
11882 * leaves from source snapshot.
11883 * In that case, reference from source snapshot should not
11886 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11887 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11888 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11889 offset && btrfs_header_owner(leaf) == root_id)
11893 ret = btrfs_next_item(root, &path);
11898 btrfs_release_path(&path);
11899 if (found_count != count) {
11901 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11902 bytenr, len, root_id, objectid, offset, count, found_count);
11903 return REFERENCER_MISSING;
11909 * Check if the referencer of a shared data backref exists
11911 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11912 u64 parent, u64 bytenr)
11914 struct extent_buffer *eb;
11915 struct btrfs_key key;
11916 struct btrfs_file_extent_item *fi;
11918 int found_parent = 0;
11921 eb = read_tree_block(fs_info, parent, 0);
11922 if (!extent_buffer_uptodate(eb))
11925 nr = btrfs_header_nritems(eb);
11926 for (i = 0; i < nr; i++) {
11927 btrfs_item_key_to_cpu(eb, &key, i);
11928 if (key.type != BTRFS_EXTENT_DATA_KEY)
11931 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11932 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11935 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11942 free_extent_buffer(eb);
11943 if (!found_parent) {
11944 error("shared extent %llu referencer lost (parent: %llu)",
11946 return REFERENCER_MISSING;
11952 * Only delete backref if REFERENCER_MISSING now
11954 * Returns <0 the extent was deleted
11955 * Returns >0 the backref was deleted but extent still exists, returned value
11956 * means error after repair
11957 * Returns 0 nothing happened
11959 static int repair_extent_item(struct btrfs_trans_handle *trans,
11960 struct btrfs_root *root, struct btrfs_path *path,
11961 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
11962 u64 owner, u64 offset, int err)
11964 struct btrfs_key old_key;
11968 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
11970 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
11971 /* delete the backref */
11972 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
11973 num_bytes, parent, root_objectid, owner, offset);
11976 err &= ~REFERENCER_MISSING;
11977 printf("Delete backref in extent [%llu %llu]\n",
11978 bytenr, num_bytes);
11980 error("fail to delete backref in extent [%llu %llu]",
11981 bytenr, num_bytes);
11985 /* btrfs_free_extent may delete the extent */
11986 btrfs_release_path(path);
11987 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
11997 * This function will check a given extent item, including its backref and
11998 * itself (like crossing stripe boundary and type)
12000 * Since we don't use extent_record anymore, introduce new error bit
12002 static int check_extent_item(struct btrfs_trans_handle *trans,
12003 struct btrfs_fs_info *fs_info,
12004 struct btrfs_path *path)
12006 struct btrfs_extent_item *ei;
12007 struct btrfs_extent_inline_ref *iref;
12008 struct btrfs_extent_data_ref *dref;
12009 struct extent_buffer *eb = path->nodes[0];
12012 int slot = path->slots[0];
12014 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12015 u32 item_size = btrfs_item_size_nr(eb, slot);
12025 struct btrfs_key key;
12029 btrfs_item_key_to_cpu(eb, &key, slot);
12030 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12031 bytes_used += key.offset;
12032 num_bytes = key.offset;
12034 bytes_used += nodesize;
12035 num_bytes = nodesize;
12038 if (item_size < sizeof(*ei)) {
12040 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12041 * old thing when on disk format is still un-determined.
12042 * No need to care about it anymore
12044 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12048 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12049 flags = btrfs_extent_flags(eb, ei);
12051 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12053 if (metadata && check_crossing_stripes(global_info, key.objectid,
12055 error("bad metadata [%llu, %llu) crossing stripe boundary",
12056 key.objectid, key.objectid + nodesize);
12057 err |= CROSSING_STRIPE_BOUNDARY;
12060 ptr = (unsigned long)(ei + 1);
12062 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12063 /* Old EXTENT_ITEM metadata */
12064 struct btrfs_tree_block_info *info;
12066 info = (struct btrfs_tree_block_info *)ptr;
12067 level = btrfs_tree_block_level(eb, info);
12068 ptr += sizeof(struct btrfs_tree_block_info);
12070 /* New METADATA_ITEM */
12071 level = key.offset;
12073 end = (unsigned long)ei + item_size;
12076 /* Reached extent item end normally */
12080 /* Beyond extent item end, wrong item size */
12082 err |= ITEM_SIZE_MISMATCH;
12083 error("extent item at bytenr %llu slot %d has wrong size",
12092 /* Now check every backref in this extent item */
12093 iref = (struct btrfs_extent_inline_ref *)ptr;
12094 type = btrfs_extent_inline_ref_type(eb, iref);
12095 offset = btrfs_extent_inline_ref_offset(eb, iref);
12097 case BTRFS_TREE_BLOCK_REF_KEY:
12098 root_objectid = offset;
12100 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12104 case BTRFS_SHARED_BLOCK_REF_KEY:
12106 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12110 case BTRFS_EXTENT_DATA_REF_KEY:
12111 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12112 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12113 owner = btrfs_extent_data_ref_objectid(eb, dref);
12114 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12115 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12116 owner_offset, key.objectid, key.offset,
12117 btrfs_extent_data_ref_count(eb, dref));
12120 case BTRFS_SHARED_DATA_REF_KEY:
12122 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12126 error("extent[%llu %d %llu] has unknown ref type: %d",
12127 key.objectid, key.type, key.offset, type);
12128 ret = UNKNOWN_TYPE;
12133 if (err && repair) {
12134 ret = repair_extent_item(trans, fs_info->extent_root, path,
12135 key.objectid, num_bytes, parent, root_objectid,
12136 owner, owner_offset, ret);
12145 ptr += btrfs_extent_inline_ref_size(type);
12153 * Check if a dev extent item is referred correctly by its chunk
12155 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12156 struct extent_buffer *eb, int slot)
12158 struct btrfs_root *chunk_root = fs_info->chunk_root;
12159 struct btrfs_dev_extent *ptr;
12160 struct btrfs_path path;
12161 struct btrfs_key chunk_key;
12162 struct btrfs_key devext_key;
12163 struct btrfs_chunk *chunk;
12164 struct extent_buffer *l;
12168 int found_chunk = 0;
12171 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12172 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12173 length = btrfs_dev_extent_length(eb, ptr);
12175 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12176 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12177 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12179 btrfs_init_path(&path);
12180 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12185 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12186 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12191 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12194 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12195 for (i = 0; i < num_stripes; i++) {
12196 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12197 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12199 if (devid == devext_key.objectid &&
12200 offset == devext_key.offset) {
12206 btrfs_release_path(&path);
12207 if (!found_chunk) {
12209 "device extent[%llu, %llu, %llu] did not find the related chunk",
12210 devext_key.objectid, devext_key.offset, length);
12211 return REFERENCER_MISSING;
12217 * Check if the used space is correct with the dev item
12219 static int check_dev_item(struct btrfs_fs_info *fs_info,
12220 struct extent_buffer *eb, int slot)
12222 struct btrfs_root *dev_root = fs_info->dev_root;
12223 struct btrfs_dev_item *dev_item;
12224 struct btrfs_path path;
12225 struct btrfs_key key;
12226 struct btrfs_dev_extent *ptr;
12233 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12234 dev_id = btrfs_device_id(eb, dev_item);
12235 used = btrfs_device_bytes_used(eb, dev_item);
12236 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12238 key.objectid = dev_id;
12239 key.type = BTRFS_DEV_EXTENT_KEY;
12242 btrfs_init_path(&path);
12243 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12245 btrfs_item_key_to_cpu(eb, &key, slot);
12246 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12247 key.objectid, key.type, key.offset);
12248 btrfs_release_path(&path);
12249 return REFERENCER_MISSING;
12252 /* Iterate dev_extents to calculate the used space of a device */
12254 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12257 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12258 if (key.objectid > dev_id)
12260 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12263 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12264 struct btrfs_dev_extent);
12265 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12267 ret = btrfs_next_item(dev_root, &path);
12271 btrfs_release_path(&path);
12273 if (used != total) {
12274 btrfs_item_key_to_cpu(eb, &key, slot);
12276 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12277 total, used, BTRFS_ROOT_TREE_OBJECTID,
12278 BTRFS_DEV_EXTENT_KEY, dev_id);
12279 return ACCOUNTING_MISMATCH;
12281 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12287 * Check a block group item with its referener (chunk) and its used space
12288 * with extent/metadata item
12290 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12291 struct extent_buffer *eb, int slot)
12293 struct btrfs_root *extent_root = fs_info->extent_root;
12294 struct btrfs_root *chunk_root = fs_info->chunk_root;
12295 struct btrfs_block_group_item *bi;
12296 struct btrfs_block_group_item bg_item;
12297 struct btrfs_path path;
12298 struct btrfs_key bg_key;
12299 struct btrfs_key chunk_key;
12300 struct btrfs_key extent_key;
12301 struct btrfs_chunk *chunk;
12302 struct extent_buffer *leaf;
12303 struct btrfs_extent_item *ei;
12304 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12312 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12313 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12314 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12315 used = btrfs_block_group_used(&bg_item);
12316 bg_flags = btrfs_block_group_flags(&bg_item);
12318 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12319 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12320 chunk_key.offset = bg_key.objectid;
12322 btrfs_init_path(&path);
12323 /* Search for the referencer chunk */
12324 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12327 "block group[%llu %llu] did not find the related chunk item",
12328 bg_key.objectid, bg_key.offset);
12329 err |= REFERENCER_MISSING;
12331 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12332 struct btrfs_chunk);
12333 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12336 "block group[%llu %llu] related chunk item length does not match",
12337 bg_key.objectid, bg_key.offset);
12338 err |= REFERENCER_MISMATCH;
12341 btrfs_release_path(&path);
12343 /* Search from the block group bytenr */
12344 extent_key.objectid = bg_key.objectid;
12345 extent_key.type = 0;
12346 extent_key.offset = 0;
12348 btrfs_init_path(&path);
12349 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12353 /* Iterate extent tree to account used space */
12355 leaf = path.nodes[0];
12357 /* Search slot can point to the last item beyond leaf nritems */
12358 if (path.slots[0] >= btrfs_header_nritems(leaf))
12361 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12362 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12365 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12366 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12368 if (extent_key.objectid < bg_key.objectid)
12371 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12374 total += extent_key.offset;
12376 ei = btrfs_item_ptr(leaf, path.slots[0],
12377 struct btrfs_extent_item);
12378 flags = btrfs_extent_flags(leaf, ei);
12379 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12380 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12382 "bad extent[%llu, %llu) type mismatch with chunk",
12383 extent_key.objectid,
12384 extent_key.objectid + extent_key.offset);
12385 err |= CHUNK_TYPE_MISMATCH;
12387 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12388 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12389 BTRFS_BLOCK_GROUP_METADATA))) {
12391 "bad extent[%llu, %llu) type mismatch with chunk",
12392 extent_key.objectid,
12393 extent_key.objectid + nodesize);
12394 err |= CHUNK_TYPE_MISMATCH;
12398 ret = btrfs_next_item(extent_root, &path);
12404 btrfs_release_path(&path);
12406 if (total != used) {
12408 "block group[%llu %llu] used %llu but extent items used %llu",
12409 bg_key.objectid, bg_key.offset, used, total);
12410 err |= BG_ACCOUNTING_ERROR;
12416 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12417 * FIXME: We still need to repair error of dev_item.
12419 * Returns error after repair.
12421 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12422 struct btrfs_root *chunk_root,
12423 struct btrfs_path *path, int err)
12425 struct btrfs_chunk *chunk;
12426 struct btrfs_key chunk_key;
12427 struct extent_buffer *eb = path->nodes[0];
12429 int slot = path->slots[0];
12433 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12434 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12436 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12437 type = btrfs_chunk_type(path->nodes[0], chunk);
12438 length = btrfs_chunk_length(eb, chunk);
12440 if (err & REFERENCER_MISSING) {
12441 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12442 type, chunk_key.offset, length);
12444 error("fail to add block group item[%llu %llu]",
12445 chunk_key.offset, length);
12448 err &= ~REFERENCER_MISSING;
12449 printf("Added block group item[%llu %llu]\n",
12450 chunk_key.offset, length);
12459 * Check a chunk item.
12460 * Including checking all referred dev_extents and block group
12462 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12463 struct extent_buffer *eb, int slot)
12465 struct btrfs_root *extent_root = fs_info->extent_root;
12466 struct btrfs_root *dev_root = fs_info->dev_root;
12467 struct btrfs_path path;
12468 struct btrfs_key chunk_key;
12469 struct btrfs_key bg_key;
12470 struct btrfs_key devext_key;
12471 struct btrfs_chunk *chunk;
12472 struct extent_buffer *leaf;
12473 struct btrfs_block_group_item *bi;
12474 struct btrfs_block_group_item bg_item;
12475 struct btrfs_dev_extent *ptr;
12487 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12488 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12489 length = btrfs_chunk_length(eb, chunk);
12490 chunk_end = chunk_key.offset + length;
12491 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12494 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12496 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12499 type = btrfs_chunk_type(eb, chunk);
12501 bg_key.objectid = chunk_key.offset;
12502 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12503 bg_key.offset = length;
12505 btrfs_init_path(&path);
12506 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12509 "chunk[%llu %llu) did not find the related block group item",
12510 chunk_key.offset, chunk_end);
12511 err |= REFERENCER_MISSING;
12513 leaf = path.nodes[0];
12514 bi = btrfs_item_ptr(leaf, path.slots[0],
12515 struct btrfs_block_group_item);
12516 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12518 if (btrfs_block_group_flags(&bg_item) != type) {
12520 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12521 chunk_key.offset, chunk_end, type,
12522 btrfs_block_group_flags(&bg_item));
12523 err |= REFERENCER_MISSING;
12527 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12528 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12529 for (i = 0; i < num_stripes; i++) {
12530 btrfs_release_path(&path);
12531 btrfs_init_path(&path);
12532 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12533 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12534 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12536 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12539 goto not_match_dev;
12541 leaf = path.nodes[0];
12542 ptr = btrfs_item_ptr(leaf, path.slots[0],
12543 struct btrfs_dev_extent);
12544 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12545 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12546 if (objectid != chunk_key.objectid ||
12547 offset != chunk_key.offset ||
12548 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12549 goto not_match_dev;
12552 err |= BACKREF_MISSING;
12554 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12555 chunk_key.objectid, chunk_end, i);
12558 btrfs_release_path(&path);
12563 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12564 struct btrfs_root *root,
12565 struct btrfs_path *path)
12567 struct btrfs_key key;
12570 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12571 btrfs_release_path(path);
12572 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12578 ret = btrfs_del_item(trans, root, path);
12582 if (path->slots[0] == 0)
12583 btrfs_prev_leaf(root, path);
12588 error("failed to delete root %llu item[%llu, %u, %llu]",
12589 root->objectid, key.objectid, key.type, key.offset);
12591 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12592 root->objectid, key.objectid, key.type, key.offset);
12597 * Main entry function to check known items and update related accounting info
12599 static int check_leaf_items(struct btrfs_trans_handle *trans,
12600 struct btrfs_root *root, struct btrfs_path *path,
12601 struct node_refs *nrefs, int account_bytes)
12603 struct btrfs_fs_info *fs_info = root->fs_info;
12604 struct btrfs_key key;
12605 struct extent_buffer *eb;
12608 struct btrfs_extent_data_ref *dref;
12613 eb = path->nodes[0];
12614 slot = path->slots[0];
12615 if (slot >= btrfs_header_nritems(eb)) {
12617 error("empty leaf [%llu %u] root %llu", eb->start,
12618 root->fs_info->nodesize, root->objectid);
12624 btrfs_item_key_to_cpu(eb, &key, slot);
12628 case BTRFS_EXTENT_DATA_KEY:
12629 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12631 ret = repair_extent_data_item(trans, root, path, nrefs,
12635 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12636 ret = check_block_group_item(fs_info, eb, slot);
12638 ret & REFERENCER_MISSING)
12639 ret = delete_extent_tree_item(trans, root, path);
12642 case BTRFS_DEV_ITEM_KEY:
12643 ret = check_dev_item(fs_info, eb, slot);
12646 case BTRFS_CHUNK_ITEM_KEY:
12647 ret = check_chunk_item(fs_info, eb, slot);
12649 ret = repair_chunk_item(trans, root, path, ret);
12652 case BTRFS_DEV_EXTENT_KEY:
12653 ret = check_dev_extent_item(fs_info, eb, slot);
12656 case BTRFS_EXTENT_ITEM_KEY:
12657 case BTRFS_METADATA_ITEM_KEY:
12658 ret = check_extent_item(trans, fs_info, path);
12661 case BTRFS_EXTENT_CSUM_KEY:
12662 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12665 case BTRFS_TREE_BLOCK_REF_KEY:
12666 ret = check_tree_block_backref(fs_info, key.offset,
12669 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12670 ret = delete_extent_tree_item(trans, root, path);
12673 case BTRFS_EXTENT_DATA_REF_KEY:
12674 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12675 ret = check_extent_data_backref(fs_info,
12676 btrfs_extent_data_ref_root(eb, dref),
12677 btrfs_extent_data_ref_objectid(eb, dref),
12678 btrfs_extent_data_ref_offset(eb, dref),
12680 btrfs_extent_data_ref_count(eb, dref));
12682 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12683 ret = delete_extent_tree_item(trans, root, path);
12686 case BTRFS_SHARED_BLOCK_REF_KEY:
12687 ret = check_shared_block_backref(fs_info, key.offset,
12690 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12691 ret = delete_extent_tree_item(trans, root, path);
12694 case BTRFS_SHARED_DATA_REF_KEY:
12695 ret = check_shared_data_backref(fs_info, key.offset,
12698 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12699 ret = delete_extent_tree_item(trans, root, path);
12713 * Low memory usage version check_chunks_and_extents.
12715 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12717 struct btrfs_trans_handle *trans = NULL;
12718 struct btrfs_path path;
12719 struct btrfs_key old_key;
12720 struct btrfs_key key;
12721 struct btrfs_root *root1;
12722 struct btrfs_root *root;
12723 struct btrfs_root *cur_root;
12727 root = fs_info->fs_root;
12730 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12731 if (IS_ERR(trans)) {
12732 error("failed to start transaction before check");
12733 return PTR_ERR(trans);
12737 root1 = root->fs_info->chunk_root;
12738 ret = check_btrfs_root(trans, root1, 0, 1);
12741 root1 = root->fs_info->tree_root;
12742 ret = check_btrfs_root(trans, root1, 0, 1);
12745 btrfs_init_path(&path);
12746 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12748 key.type = BTRFS_ROOT_ITEM_KEY;
12750 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12752 error("cannot find extent tree in tree_root");
12757 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12758 if (key.type != BTRFS_ROOT_ITEM_KEY)
12761 key.offset = (u64)-1;
12763 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12764 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12767 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12768 if (IS_ERR(cur_root) || !cur_root) {
12769 error("failed to read tree: %lld", key.objectid);
12773 ret = check_btrfs_root(trans, cur_root, 0, 1);
12776 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12777 btrfs_free_fs_root(cur_root);
12779 btrfs_release_path(&path);
12780 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12781 &old_key, &path, 0, 0);
12785 ret = btrfs_next_item(root1, &path);
12791 /* if repair, update block accounting */
12793 ret = btrfs_fix_block_accounting(trans, root);
12797 err &= ~BG_ACCOUNTING_ERROR;
12801 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12803 btrfs_release_path(&path);
12808 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12812 if (!ctx.progress_enabled)
12813 fprintf(stderr, "checking extents\n");
12814 if (check_mode == CHECK_MODE_LOWMEM)
12815 ret = check_chunks_and_extents_v2(fs_info);
12817 ret = check_chunks_and_extents(fs_info);
12819 /* Also repair device size related problems */
12820 if (repair && !ret) {
12821 ret = btrfs_fix_device_and_super_size(fs_info);
12828 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12829 struct btrfs_root *root, int overwrite)
12831 struct extent_buffer *c;
12832 struct extent_buffer *old = root->node;
12835 struct btrfs_disk_key disk_key = {0,0,0};
12841 extent_buffer_get(c);
12844 c = btrfs_alloc_free_block(trans, root,
12845 root->fs_info->nodesize,
12846 root->root_key.objectid,
12847 &disk_key, level, 0, 0);
12850 extent_buffer_get(c);
12854 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12855 btrfs_set_header_level(c, level);
12856 btrfs_set_header_bytenr(c, c->start);
12857 btrfs_set_header_generation(c, trans->transid);
12858 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12859 btrfs_set_header_owner(c, root->root_key.objectid);
12861 write_extent_buffer(c, root->fs_info->fsid,
12862 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12864 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12865 btrfs_header_chunk_tree_uuid(c),
12868 btrfs_mark_buffer_dirty(c);
12870 * this case can happen in the following case:
12872 * 1.overwrite previous root.
12874 * 2.reinit reloc data root, this is because we skip pin
12875 * down reloc data tree before which means we can allocate
12876 * same block bytenr here.
12878 if (old->start == c->start) {
12879 btrfs_set_root_generation(&root->root_item,
12881 root->root_item.level = btrfs_header_level(root->node);
12882 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12883 &root->root_key, &root->root_item);
12885 free_extent_buffer(c);
12889 free_extent_buffer(old);
12891 add_root_to_dirty_list(root);
12895 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12896 struct extent_buffer *eb, int tree_root)
12898 struct extent_buffer *tmp;
12899 struct btrfs_root_item *ri;
12900 struct btrfs_key key;
12902 int level = btrfs_header_level(eb);
12908 * If we have pinned this block before, don't pin it again.
12909 * This can not only avoid forever loop with broken filesystem
12910 * but also give us some speedups.
12912 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12913 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12916 btrfs_pin_extent(fs_info, eb->start, eb->len);
12918 nritems = btrfs_header_nritems(eb);
12919 for (i = 0; i < nritems; i++) {
12921 btrfs_item_key_to_cpu(eb, &key, i);
12922 if (key.type != BTRFS_ROOT_ITEM_KEY)
12924 /* Skip the extent root and reloc roots */
12925 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12926 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12927 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12929 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12930 bytenr = btrfs_disk_root_bytenr(eb, ri);
12933 * If at any point we start needing the real root we
12934 * will have to build a stump root for the root we are
12935 * in, but for now this doesn't actually use the root so
12936 * just pass in extent_root.
12938 tmp = read_tree_block(fs_info, bytenr, 0);
12939 if (!extent_buffer_uptodate(tmp)) {
12940 fprintf(stderr, "Error reading root block\n");
12943 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12944 free_extent_buffer(tmp);
12948 bytenr = btrfs_node_blockptr(eb, i);
12950 /* If we aren't the tree root don't read the block */
12951 if (level == 1 && !tree_root) {
12952 btrfs_pin_extent(fs_info, bytenr,
12953 fs_info->nodesize);
12957 tmp = read_tree_block(fs_info, bytenr, 0);
12958 if (!extent_buffer_uptodate(tmp)) {
12959 fprintf(stderr, "Error reading tree block\n");
12962 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12963 free_extent_buffer(tmp);
12972 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12976 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12980 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12983 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12985 struct btrfs_block_group_cache *cache;
12986 struct btrfs_path path;
12987 struct extent_buffer *leaf;
12988 struct btrfs_chunk *chunk;
12989 struct btrfs_key key;
12993 btrfs_init_path(&path);
12995 key.type = BTRFS_CHUNK_ITEM_KEY;
12997 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12999 btrfs_release_path(&path);
13004 * We do this in case the block groups were screwed up and had alloc
13005 * bits that aren't actually set on the chunks. This happens with
13006 * restored images every time and could happen in real life I guess.
13008 fs_info->avail_data_alloc_bits = 0;
13009 fs_info->avail_metadata_alloc_bits = 0;
13010 fs_info->avail_system_alloc_bits = 0;
13012 /* First we need to create the in-memory block groups */
13014 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13015 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13017 btrfs_release_path(&path);
13025 leaf = path.nodes[0];
13026 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13027 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13032 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13033 btrfs_add_block_group(fs_info, 0,
13034 btrfs_chunk_type(leaf, chunk), key.offset,
13035 btrfs_chunk_length(leaf, chunk));
13036 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13037 key.offset + btrfs_chunk_length(leaf, chunk));
13042 cache = btrfs_lookup_first_block_group(fs_info, start);
13046 start = cache->key.objectid + cache->key.offset;
13049 btrfs_release_path(&path);
13053 static int reset_balance(struct btrfs_trans_handle *trans,
13054 struct btrfs_fs_info *fs_info)
13056 struct btrfs_root *root = fs_info->tree_root;
13057 struct btrfs_path path;
13058 struct extent_buffer *leaf;
13059 struct btrfs_key key;
13060 int del_slot, del_nr = 0;
13064 btrfs_init_path(&path);
13065 key.objectid = BTRFS_BALANCE_OBJECTID;
13066 key.type = BTRFS_BALANCE_ITEM_KEY;
13068 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13073 goto reinit_data_reloc;
13078 ret = btrfs_del_item(trans, root, &path);
13081 btrfs_release_path(&path);
13083 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13084 key.type = BTRFS_ROOT_ITEM_KEY;
13086 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13090 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13095 ret = btrfs_del_items(trans, root, &path,
13102 btrfs_release_path(&path);
13105 ret = btrfs_search_slot(trans, root, &key, &path,
13112 leaf = path.nodes[0];
13113 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13114 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13116 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13121 del_slot = path.slots[0];
13130 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13134 btrfs_release_path(&path);
13137 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13138 key.type = BTRFS_ROOT_ITEM_KEY;
13139 key.offset = (u64)-1;
13140 root = btrfs_read_fs_root(fs_info, &key);
13141 if (IS_ERR(root)) {
13142 fprintf(stderr, "Error reading data reloc tree\n");
13143 ret = PTR_ERR(root);
13146 record_root_in_trans(trans, root);
13147 ret = btrfs_fsck_reinit_root(trans, root, 0);
13150 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13152 btrfs_release_path(&path);
13156 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13157 struct btrfs_fs_info *fs_info)
13163 * The only reason we don't do this is because right now we're just
13164 * walking the trees we find and pinning down their bytes, we don't look
13165 * at any of the leaves. In order to do mixed groups we'd have to check
13166 * the leaves of any fs roots and pin down the bytes for any file
13167 * extents we find. Not hard but why do it if we don't have to?
13169 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13170 fprintf(stderr, "We don't support re-initing the extent tree "
13171 "for mixed block groups yet, please notify a btrfs "
13172 "developer you want to do this so they can add this "
13173 "functionality.\n");
13178 * first we need to walk all of the trees except the extent tree and pin
13179 * down the bytes that are in use so we don't overwrite any existing
13182 ret = pin_metadata_blocks(fs_info);
13184 fprintf(stderr, "error pinning down used bytes\n");
13189 * Need to drop all the block groups since we're going to recreate all
13192 btrfs_free_block_groups(fs_info);
13193 ret = reset_block_groups(fs_info);
13195 fprintf(stderr, "error resetting the block groups\n");
13199 /* Ok we can allocate now, reinit the extent root */
13200 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13202 fprintf(stderr, "extent root initialization failed\n");
13204 * When the transaction code is updated we should end the
13205 * transaction, but for now progs only knows about commit so
13206 * just return an error.
13212 * Now we have all the in-memory block groups setup so we can make
13213 * allocations properly, and the metadata we care about is safe since we
13214 * pinned all of it above.
13217 struct btrfs_block_group_cache *cache;
13219 cache = btrfs_lookup_first_block_group(fs_info, start);
13222 start = cache->key.objectid + cache->key.offset;
13223 ret = btrfs_insert_item(trans, fs_info->extent_root,
13224 &cache->key, &cache->item,
13225 sizeof(cache->item));
13227 fprintf(stderr, "Error adding block group\n");
13230 btrfs_extent_post_op(trans, fs_info->extent_root);
13233 ret = reset_balance(trans, fs_info);
13235 fprintf(stderr, "error resetting the pending balance\n");
13240 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13242 struct btrfs_path path;
13243 struct btrfs_trans_handle *trans;
13244 struct btrfs_key key;
13247 printf("Recowing metadata block %llu\n", eb->start);
13248 key.objectid = btrfs_header_owner(eb);
13249 key.type = BTRFS_ROOT_ITEM_KEY;
13250 key.offset = (u64)-1;
13252 root = btrfs_read_fs_root(root->fs_info, &key);
13253 if (IS_ERR(root)) {
13254 fprintf(stderr, "Couldn't find owner root %llu\n",
13256 return PTR_ERR(root);
13259 trans = btrfs_start_transaction(root, 1);
13261 return PTR_ERR(trans);
13263 btrfs_init_path(&path);
13264 path.lowest_level = btrfs_header_level(eb);
13265 if (path.lowest_level)
13266 btrfs_node_key_to_cpu(eb, &key, 0);
13268 btrfs_item_key_to_cpu(eb, &key, 0);
13270 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13271 btrfs_commit_transaction(trans, root);
13272 btrfs_release_path(&path);
13276 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13278 struct btrfs_path path;
13279 struct btrfs_trans_handle *trans;
13280 struct btrfs_key key;
13283 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13284 bad->key.type, bad->key.offset);
13285 key.objectid = bad->root_id;
13286 key.type = BTRFS_ROOT_ITEM_KEY;
13287 key.offset = (u64)-1;
13289 root = btrfs_read_fs_root(root->fs_info, &key);
13290 if (IS_ERR(root)) {
13291 fprintf(stderr, "Couldn't find owner root %llu\n",
13293 return PTR_ERR(root);
13296 trans = btrfs_start_transaction(root, 1);
13298 return PTR_ERR(trans);
13300 btrfs_init_path(&path);
13301 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13307 ret = btrfs_del_item(trans, root, &path);
13309 btrfs_commit_transaction(trans, root);
13310 btrfs_release_path(&path);
13314 static int zero_log_tree(struct btrfs_root *root)
13316 struct btrfs_trans_handle *trans;
13319 trans = btrfs_start_transaction(root, 1);
13320 if (IS_ERR(trans)) {
13321 ret = PTR_ERR(trans);
13324 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13325 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13326 ret = btrfs_commit_transaction(trans, root);
13330 static int populate_csum(struct btrfs_trans_handle *trans,
13331 struct btrfs_root *csum_root, char *buf, u64 start,
13334 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13339 while (offset < len) {
13340 sectorsize = fs_info->sectorsize;
13341 ret = read_extent_data(fs_info, buf, start + offset,
13345 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13346 start + offset, buf, sectorsize);
13349 offset += sectorsize;
13354 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13355 struct btrfs_root *csum_root,
13356 struct btrfs_root *cur_root)
13358 struct btrfs_path path;
13359 struct btrfs_key key;
13360 struct extent_buffer *node;
13361 struct btrfs_file_extent_item *fi;
13368 buf = malloc(cur_root->fs_info->sectorsize);
13372 btrfs_init_path(&path);
13376 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13379 /* Iterate all regular file extents and fill its csum */
13381 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13383 if (key.type != BTRFS_EXTENT_DATA_KEY)
13385 node = path.nodes[0];
13386 slot = path.slots[0];
13387 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13388 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13390 start = btrfs_file_extent_disk_bytenr(node, fi);
13391 len = btrfs_file_extent_disk_num_bytes(node, fi);
13393 ret = populate_csum(trans, csum_root, buf, start, len);
13394 if (ret == -EEXIST)
13400 * TODO: if next leaf is corrupted, jump to nearest next valid
13403 ret = btrfs_next_item(cur_root, &path);
13413 btrfs_release_path(&path);
13418 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13419 struct btrfs_root *csum_root)
13421 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13422 struct btrfs_path path;
13423 struct btrfs_root *tree_root = fs_info->tree_root;
13424 struct btrfs_root *cur_root;
13425 struct extent_buffer *node;
13426 struct btrfs_key key;
13430 btrfs_init_path(&path);
13431 key.objectid = BTRFS_FS_TREE_OBJECTID;
13433 key.type = BTRFS_ROOT_ITEM_KEY;
13434 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13443 node = path.nodes[0];
13444 slot = path.slots[0];
13445 btrfs_item_key_to_cpu(node, &key, slot);
13446 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13448 if (key.type != BTRFS_ROOT_ITEM_KEY)
13450 if (!is_fstree(key.objectid))
13452 key.offset = (u64)-1;
13454 cur_root = btrfs_read_fs_root(fs_info, &key);
13455 if (IS_ERR(cur_root) || !cur_root) {
13456 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13460 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13465 ret = btrfs_next_item(tree_root, &path);
13475 btrfs_release_path(&path);
13479 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13480 struct btrfs_root *csum_root)
13482 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13483 struct btrfs_path path;
13484 struct btrfs_extent_item *ei;
13485 struct extent_buffer *leaf;
13487 struct btrfs_key key;
13490 btrfs_init_path(&path);
13492 key.type = BTRFS_EXTENT_ITEM_KEY;
13494 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13496 btrfs_release_path(&path);
13500 buf = malloc(csum_root->fs_info->sectorsize);
13502 btrfs_release_path(&path);
13507 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13508 ret = btrfs_next_leaf(extent_root, &path);
13516 leaf = path.nodes[0];
13518 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13519 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13524 ei = btrfs_item_ptr(leaf, path.slots[0],
13525 struct btrfs_extent_item);
13526 if (!(btrfs_extent_flags(leaf, ei) &
13527 BTRFS_EXTENT_FLAG_DATA)) {
13532 ret = populate_csum(trans, csum_root, buf, key.objectid,
13539 btrfs_release_path(&path);
13545 * Recalculate the csum and put it into the csum tree.
13547 * Extent tree init will wipe out all the extent info, so in that case, we
13548 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13549 * will use fs/subvol trees to init the csum tree.
13551 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13552 struct btrfs_root *csum_root,
13553 int search_fs_tree)
13555 if (search_fs_tree)
13556 return fill_csum_tree_from_fs(trans, csum_root);
13558 return fill_csum_tree_from_extent(trans, csum_root);
13561 static void free_roots_info_cache(void)
13563 if (!roots_info_cache)
13566 while (!cache_tree_empty(roots_info_cache)) {
13567 struct cache_extent *entry;
13568 struct root_item_info *rii;
13570 entry = first_cache_extent(roots_info_cache);
13573 remove_cache_extent(roots_info_cache, entry);
13574 rii = container_of(entry, struct root_item_info, cache_extent);
13578 free(roots_info_cache);
13579 roots_info_cache = NULL;
13582 static int build_roots_info_cache(struct btrfs_fs_info *info)
13585 struct btrfs_key key;
13586 struct extent_buffer *leaf;
13587 struct btrfs_path path;
13589 if (!roots_info_cache) {
13590 roots_info_cache = malloc(sizeof(*roots_info_cache));
13591 if (!roots_info_cache)
13593 cache_tree_init(roots_info_cache);
13596 btrfs_init_path(&path);
13598 key.type = BTRFS_EXTENT_ITEM_KEY;
13600 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13603 leaf = path.nodes[0];
13606 struct btrfs_key found_key;
13607 struct btrfs_extent_item *ei;
13608 struct btrfs_extent_inline_ref *iref;
13609 int slot = path.slots[0];
13614 struct cache_extent *entry;
13615 struct root_item_info *rii;
13617 if (slot >= btrfs_header_nritems(leaf)) {
13618 ret = btrfs_next_leaf(info->extent_root, &path);
13625 leaf = path.nodes[0];
13626 slot = path.slots[0];
13629 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13631 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13632 found_key.type != BTRFS_METADATA_ITEM_KEY)
13635 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13636 flags = btrfs_extent_flags(leaf, ei);
13638 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13639 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13642 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13643 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13644 level = found_key.offset;
13646 struct btrfs_tree_block_info *binfo;
13648 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13649 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13650 level = btrfs_tree_block_level(leaf, binfo);
13654 * For a root extent, it must be of the following type and the
13655 * first (and only one) iref in the item.
13657 type = btrfs_extent_inline_ref_type(leaf, iref);
13658 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13661 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13662 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13664 rii = malloc(sizeof(struct root_item_info));
13669 rii->cache_extent.start = root_id;
13670 rii->cache_extent.size = 1;
13671 rii->level = (u8)-1;
13672 entry = &rii->cache_extent;
13673 ret = insert_cache_extent(roots_info_cache, entry);
13676 rii = container_of(entry, struct root_item_info,
13680 ASSERT(rii->cache_extent.start == root_id);
13681 ASSERT(rii->cache_extent.size == 1);
13683 if (level > rii->level || rii->level == (u8)-1) {
13684 rii->level = level;
13685 rii->bytenr = found_key.objectid;
13686 rii->gen = btrfs_extent_generation(leaf, ei);
13687 rii->node_count = 1;
13688 } else if (level == rii->level) {
13696 btrfs_release_path(&path);
13701 static int maybe_repair_root_item(struct btrfs_path *path,
13702 const struct btrfs_key *root_key,
13703 const int read_only_mode)
13705 const u64 root_id = root_key->objectid;
13706 struct cache_extent *entry;
13707 struct root_item_info *rii;
13708 struct btrfs_root_item ri;
13709 unsigned long offset;
13711 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13714 "Error: could not find extent items for root %llu\n",
13715 root_key->objectid);
13719 rii = container_of(entry, struct root_item_info, cache_extent);
13720 ASSERT(rii->cache_extent.start == root_id);
13721 ASSERT(rii->cache_extent.size == 1);
13723 if (rii->node_count != 1) {
13725 "Error: could not find btree root extent for root %llu\n",
13730 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13731 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13733 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13734 btrfs_root_level(&ri) != rii->level ||
13735 btrfs_root_generation(&ri) != rii->gen) {
13738 * If we're in repair mode but our caller told us to not update
13739 * the root item, i.e. just check if it needs to be updated, don't
13740 * print this message, since the caller will call us again shortly
13741 * for the same root item without read only mode (the caller will
13742 * open a transaction first).
13744 if (!(read_only_mode && repair))
13746 "%sroot item for root %llu,"
13747 " current bytenr %llu, current gen %llu, current level %u,"
13748 " new bytenr %llu, new gen %llu, new level %u\n",
13749 (read_only_mode ? "" : "fixing "),
13751 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13752 btrfs_root_level(&ri),
13753 rii->bytenr, rii->gen, rii->level);
13755 if (btrfs_root_generation(&ri) > rii->gen) {
13757 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13758 root_id, btrfs_root_generation(&ri), rii->gen);
13762 if (!read_only_mode) {
13763 btrfs_set_root_bytenr(&ri, rii->bytenr);
13764 btrfs_set_root_level(&ri, rii->level);
13765 btrfs_set_root_generation(&ri, rii->gen);
13766 write_extent_buffer(path->nodes[0], &ri,
13767 offset, sizeof(ri));
13777 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13778 * caused read-only snapshots to be corrupted if they were created at a moment
13779 * when the source subvolume/snapshot had orphan items. The issue was that the
13780 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13781 * node instead of the post orphan cleanup root node.
13782 * So this function, and its callees, just detects and fixes those cases. Even
13783 * though the regression was for read-only snapshots, this function applies to
13784 * any snapshot/subvolume root.
13785 * This must be run before any other repair code - not doing it so, makes other
13786 * repair code delete or modify backrefs in the extent tree for example, which
13787 * will result in an inconsistent fs after repairing the root items.
13789 static int repair_root_items(struct btrfs_fs_info *info)
13791 struct btrfs_path path;
13792 struct btrfs_key key;
13793 struct extent_buffer *leaf;
13794 struct btrfs_trans_handle *trans = NULL;
13797 int need_trans = 0;
13799 btrfs_init_path(&path);
13801 ret = build_roots_info_cache(info);
13805 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13806 key.type = BTRFS_ROOT_ITEM_KEY;
13811 * Avoid opening and committing transactions if a leaf doesn't have
13812 * any root items that need to be fixed, so that we avoid rotating
13813 * backup roots unnecessarily.
13816 trans = btrfs_start_transaction(info->tree_root, 1);
13817 if (IS_ERR(trans)) {
13818 ret = PTR_ERR(trans);
13823 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13827 leaf = path.nodes[0];
13830 struct btrfs_key found_key;
13832 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13833 int no_more_keys = find_next_key(&path, &key);
13835 btrfs_release_path(&path);
13837 ret = btrfs_commit_transaction(trans,
13849 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13851 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13853 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13856 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13860 if (!trans && repair) {
13863 btrfs_release_path(&path);
13873 free_roots_info_cache();
13874 btrfs_release_path(&path);
13876 btrfs_commit_transaction(trans, info->tree_root);
13883 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13885 struct btrfs_trans_handle *trans;
13886 struct btrfs_block_group_cache *bg_cache;
13890 /* Clear all free space cache inodes and its extent data */
13892 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13895 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13898 current = bg_cache->key.objectid + bg_cache->key.offset;
13901 /* Don't forget to set cache_generation to -1 */
13902 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13903 if (IS_ERR(trans)) {
13904 error("failed to update super block cache generation");
13905 return PTR_ERR(trans);
13907 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13908 btrfs_commit_transaction(trans, fs_info->tree_root);
13913 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13918 if (clear_version == 1) {
13919 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13921 "free space cache v2 detected, use --clear-space-cache v2");
13925 printf("Clearing free space cache\n");
13926 ret = clear_free_space_cache(fs_info);
13928 error("failed to clear free space cache");
13931 printf("Free space cache cleared\n");
13933 } else if (clear_version == 2) {
13934 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13935 printf("no free space cache v2 to clear\n");
13939 printf("Clear free space cache v2\n");
13940 ret = btrfs_clear_free_space_tree(fs_info);
13942 error("failed to clear free space cache v2: %d", ret);
13945 printf("free space cache v2 cleared\n");
13952 const char * const cmd_check_usage[] = {
13953 "btrfs check [options] <device>",
13954 "Check structural integrity of a filesystem (unmounted).",
13955 "Check structural integrity of an unmounted filesystem. Verify internal",
13956 "trees' consistency and item connectivity. In the repair mode try to",
13957 "fix the problems found. ",
13958 "WARNING: the repair mode is considered dangerous",
13960 "-s|--super <superblock> use this superblock copy",
13961 "-b|--backup use the first valid backup root copy",
13962 "--force skip mount checks, repair is not possible",
13963 "--repair try to repair the filesystem",
13964 "--readonly run in read-only mode (default)",
13965 "--init-csum-tree create a new CRC tree",
13966 "--init-extent-tree create a new extent tree",
13967 "--mode <MODE> allows choice of memory/IO trade-offs",
13968 " where MODE is one of:",
13969 " original - read inodes and extents to memory (requires",
13970 " more memory, does less IO)",
13971 " lowmem - try to use less memory but read blocks again",
13973 "--check-data-csum verify checksums of data blocks",
13974 "-Q|--qgroup-report print a report on qgroup consistency",
13975 "-E|--subvol-extents <subvolid>",
13976 " print subvolume extents and sharing state",
13977 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13978 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13979 "-p|--progress indicate progress",
13980 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13984 int cmd_check(int argc, char **argv)
13986 struct cache_tree root_cache;
13987 struct btrfs_root *root;
13988 struct btrfs_fs_info *info;
13991 u64 tree_root_bytenr = 0;
13992 u64 chunk_root_bytenr = 0;
13993 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13997 int init_csum_tree = 0;
13999 int clear_space_cache = 0;
14000 int qgroup_report = 0;
14001 int qgroups_repaired = 0;
14002 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14007 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14008 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14009 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14010 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14011 GETOPT_VAL_FORCE };
14012 static const struct option long_options[] = {
14013 { "super", required_argument, NULL, 's' },
14014 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14015 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14016 { "init-csum-tree", no_argument, NULL,
14017 GETOPT_VAL_INIT_CSUM },
14018 { "init-extent-tree", no_argument, NULL,
14019 GETOPT_VAL_INIT_EXTENT },
14020 { "check-data-csum", no_argument, NULL,
14021 GETOPT_VAL_CHECK_CSUM },
14022 { "backup", no_argument, NULL, 'b' },
14023 { "subvol-extents", required_argument, NULL, 'E' },
14024 { "qgroup-report", no_argument, NULL, 'Q' },
14025 { "tree-root", required_argument, NULL, 'r' },
14026 { "chunk-root", required_argument, NULL,
14027 GETOPT_VAL_CHUNK_TREE },
14028 { "progress", no_argument, NULL, 'p' },
14029 { "mode", required_argument, NULL,
14031 { "clear-space-cache", required_argument, NULL,
14032 GETOPT_VAL_CLEAR_SPACE_CACHE},
14033 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14034 { NULL, 0, NULL, 0}
14037 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14041 case 'a': /* ignored */ break;
14043 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14046 num = arg_strtou64(optarg);
14047 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14049 "super mirror should be less than %d",
14050 BTRFS_SUPER_MIRROR_MAX);
14053 bytenr = btrfs_sb_offset(((int)num));
14054 printf("using SB copy %llu, bytenr %llu\n", num,
14055 (unsigned long long)bytenr);
14061 subvolid = arg_strtou64(optarg);
14064 tree_root_bytenr = arg_strtou64(optarg);
14066 case GETOPT_VAL_CHUNK_TREE:
14067 chunk_root_bytenr = arg_strtou64(optarg);
14070 ctx.progress_enabled = true;
14074 usage(cmd_check_usage);
14075 case GETOPT_VAL_REPAIR:
14076 printf("enabling repair mode\n");
14078 ctree_flags |= OPEN_CTREE_WRITES;
14080 case GETOPT_VAL_READONLY:
14083 case GETOPT_VAL_INIT_CSUM:
14084 printf("Creating a new CRC tree\n");
14085 init_csum_tree = 1;
14087 ctree_flags |= OPEN_CTREE_WRITES;
14089 case GETOPT_VAL_INIT_EXTENT:
14090 init_extent_tree = 1;
14091 ctree_flags |= (OPEN_CTREE_WRITES |
14092 OPEN_CTREE_NO_BLOCK_GROUPS);
14095 case GETOPT_VAL_CHECK_CSUM:
14096 check_data_csum = 1;
14098 case GETOPT_VAL_MODE:
14099 check_mode = parse_check_mode(optarg);
14100 if (check_mode == CHECK_MODE_UNKNOWN) {
14101 error("unknown mode: %s", optarg);
14105 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14106 if (strcmp(optarg, "v1") == 0) {
14107 clear_space_cache = 1;
14108 } else if (strcmp(optarg, "v2") == 0) {
14109 clear_space_cache = 2;
14110 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14113 "invalid argument to --clear-space-cache, must be v1 or v2");
14116 ctree_flags |= OPEN_CTREE_WRITES;
14118 case GETOPT_VAL_FORCE:
14124 if (check_argc_exact(argc - optind, 1))
14125 usage(cmd_check_usage);
14127 if (ctx.progress_enabled) {
14128 ctx.tp = TASK_NOTHING;
14129 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14132 /* This check is the only reason for --readonly to exist */
14133 if (readonly && repair) {
14134 error("repair options are not compatible with --readonly");
14139 * experimental and dangerous
14141 if (repair && check_mode == CHECK_MODE_LOWMEM)
14142 warning("low-memory mode repair support is only partial");
14145 cache_tree_init(&root_cache);
14147 ret = check_mounted(argv[optind]);
14150 error("could not check mount status: %s",
14156 "%s is currently mounted, use --force if you really intend to check the filesystem",
14164 error("repair and --force is not yet supported");
14171 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14175 "filesystem mounted, continuing because of --force");
14177 /* A block device is mounted in exclusive mode by kernel */
14178 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14181 /* only allow partial opening under repair mode */
14183 ctree_flags |= OPEN_CTREE_PARTIAL;
14185 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14186 chunk_root_bytenr, ctree_flags);
14188 error("cannot open file system");
14194 global_info = info;
14195 root = info->fs_root;
14196 uuid_unparse(info->super_copy->fsid, uuidbuf);
14198 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14201 * Check the bare minimum before starting anything else that could rely
14202 * on it, namely the tree roots, any local consistency checks
14204 if (!extent_buffer_uptodate(info->tree_root->node) ||
14205 !extent_buffer_uptodate(info->dev_root->node) ||
14206 !extent_buffer_uptodate(info->chunk_root->node)) {
14207 error("critical roots corrupted, unable to check the filesystem");
14213 if (clear_space_cache) {
14214 ret = do_clear_free_space_cache(info, clear_space_cache);
14220 * repair mode will force us to commit transaction which
14221 * will make us fail to load log tree when mounting.
14223 if (repair && btrfs_super_log_root(info->super_copy)) {
14224 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14230 ret = zero_log_tree(root);
14233 error("failed to zero log tree: %d", ret);
14238 if (qgroup_report) {
14239 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14241 ret = qgroup_verify_all(info);
14248 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14249 subvolid, argv[optind], uuidbuf);
14250 ret = print_extent_state(info, subvolid);
14255 if (init_extent_tree || init_csum_tree) {
14256 struct btrfs_trans_handle *trans;
14258 trans = btrfs_start_transaction(info->extent_root, 0);
14259 if (IS_ERR(trans)) {
14260 error("error starting transaction");
14261 ret = PTR_ERR(trans);
14266 if (init_extent_tree) {
14267 printf("Creating a new extent tree\n");
14268 ret = reinit_extent_tree(trans, info);
14274 if (init_csum_tree) {
14275 printf("Reinitialize checksum tree\n");
14276 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14278 error("checksum tree initialization failed: %d",
14285 ret = fill_csum_tree(trans, info->csum_root,
14289 error("checksum tree refilling failed: %d", ret);
14294 * Ok now we commit and run the normal fsck, which will add
14295 * extent entries for all of the items it finds.
14297 ret = btrfs_commit_transaction(trans, info->extent_root);
14302 if (!extent_buffer_uptodate(info->extent_root->node)) {
14303 error("critical: extent_root, unable to check the filesystem");
14308 if (!extent_buffer_uptodate(info->csum_root->node)) {
14309 error("critical: csum_root, unable to check the filesystem");
14315 if (!init_extent_tree) {
14316 ret = repair_root_items(info);
14319 error("failed to repair root items: %s", strerror(-ret));
14323 fprintf(stderr, "Fixed %d roots.\n", ret);
14325 } else if (ret > 0) {
14327 "Found %d roots with an outdated root item.\n",
14330 "Please run a filesystem check with the option --repair to fix them.\n");
14337 ret = do_check_chunks_and_extents(info);
14341 "errors found in extent allocation tree or chunk allocation");
14343 /* Only re-check super size after we checked and repaired the fs */
14344 err |= !is_super_size_valid(info);
14346 if (!ctx.progress_enabled) {
14347 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14348 fprintf(stderr, "checking free space tree\n");
14350 fprintf(stderr, "checking free space cache\n");
14352 ret = check_space_cache(root);
14355 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14356 error("errors found in free space tree");
14358 error("errors found in free space cache");
14363 * We used to have to have these hole extents in between our real
14364 * extents so if we don't have this flag set we need to make sure there
14365 * are no gaps in the file extents for inodes, otherwise we can just
14366 * ignore it when this happens.
14368 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14369 ret = do_check_fs_roots(info, &root_cache);
14372 error("errors found in fs roots");
14376 fprintf(stderr, "checking csums\n");
14377 ret = check_csums(root);
14380 error("errors found in csum tree");
14384 fprintf(stderr, "checking root refs\n");
14385 /* For low memory mode, check_fs_roots_v2 handles root refs */
14386 if (check_mode != CHECK_MODE_LOWMEM) {
14387 ret = check_root_refs(root, &root_cache);
14390 error("errors found in root refs");
14395 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14396 struct extent_buffer *eb;
14398 eb = list_first_entry(&root->fs_info->recow_ebs,
14399 struct extent_buffer, recow);
14400 list_del_init(&eb->recow);
14401 ret = recow_extent_buffer(root, eb);
14404 error("fails to fix transid errors");
14409 while (!list_empty(&delete_items)) {
14410 struct bad_item *bad;
14412 bad = list_first_entry(&delete_items, struct bad_item, list);
14413 list_del_init(&bad->list);
14415 ret = delete_bad_item(root, bad);
14421 if (info->quota_enabled) {
14422 fprintf(stderr, "checking quota groups\n");
14423 ret = qgroup_verify_all(info);
14426 error("failed to check quota groups");
14430 ret = repair_qgroups(info, &qgroups_repaired);
14433 error("failed to repair quota groups");
14439 if (!list_empty(&root->fs_info->recow_ebs)) {
14440 error("transid errors in file system");
14445 printf("found %llu bytes used, ",
14446 (unsigned long long)bytes_used);
14448 printf("error(s) found\n");
14450 printf("no error found\n");
14451 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14452 printf("total tree bytes: %llu\n",
14453 (unsigned long long)total_btree_bytes);
14454 printf("total fs tree bytes: %llu\n",
14455 (unsigned long long)total_fs_tree_bytes);
14456 printf("total extent tree bytes: %llu\n",
14457 (unsigned long long)total_extent_tree_bytes);
14458 printf("btree space waste bytes: %llu\n",
14459 (unsigned long long)btree_space_waste);
14460 printf("file data blocks allocated: %llu\n referenced %llu\n",
14461 (unsigned long long)data_bytes_allocated,
14462 (unsigned long long)data_bytes_referenced);
14464 free_qgroup_counts();
14465 free_root_recs_tree(&root_cache);
14469 if (ctx.progress_enabled)
14470 task_deinit(ctx.info);