2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
54 TASK_NOTHING, /* have to be the last element */
59 enum task_position tp;
61 struct task_info *info;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
103 if (back1->parent < back2->parent)
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
110 if (back1->owner > back2->owner)
112 if (back1->owner < back2->owner)
115 if (back1->offset > back2->offset)
117 if (back1->offset < back2->offset)
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
123 if (back1->disk_bytenr < back2->disk_bytenr)
126 if (back1->bytes > back2->bytes)
128 if (back1->bytes < back2->bytes)
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
148 if (back1->parent < back2->parent)
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
162 if (ext1->is_data < ext2->is_data)
165 if (ext1->full_backref > ext2->full_backref)
167 if (ext1->full_backref < ext2->full_backref)
171 return compare_data_backref(node1, node2);
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string[] = {
184 "checking free space cache",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
198 task_period_wait(priv->info);
203 static int print_status_return(void *p)
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
245 if (hole1->start < hole2->start)
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole->node))
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
301 rb_erase(&next->node, holes);
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
315 hole = (struct file_extent_hole *)data;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
321 if (start >= hole->start && start < hole->start + hole->len)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
340 struct rb_node *node;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
368 rb_erase(node, holes);
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
376 ret = add_file_extent_hole(holes, next_start, next_len);
383 static int copy_file_extent_holes(struct rb_root *dst,
386 struct file_extent_hole *hole;
387 struct rb_node *node;
390 node = rb_first(src);
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
396 node = rb_next(node);
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
411 node = rb_first(holes);
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root)
420 if (root->last_trans != trans->transid) {
421 root->track_dirty = 1;
422 root->last_trans = trans->transid;
423 root->commit_root = root->node;
424 extent_buffer_get(root->node);
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
430 struct device_record *rec1;
431 struct device_record *rec2;
433 rec1 = rb_entry(node1, struct device_record, node);
434 rec2 = rb_entry(node2, struct device_record, node);
435 if (rec1->devid > rec2->devid)
437 else if (rec1->devid < rec2->devid)
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
445 struct inode_record *rec;
446 struct inode_backref *backref;
447 struct inode_backref *orig;
448 struct inode_backref *tmp;
449 struct orphan_data_extent *src_orphan;
450 struct orphan_data_extent *dst_orphan;
455 rec = malloc(sizeof(*rec));
457 return ERR_PTR(-ENOMEM);
458 memcpy(rec, orig_rec, sizeof(*rec));
460 INIT_LIST_HEAD(&rec->backrefs);
461 INIT_LIST_HEAD(&rec->orphan_extents);
462 rec->holes = RB_ROOT;
464 list_for_each_entry(orig, &orig_rec->backrefs, list) {
465 size = sizeof(*orig) + orig->namelen + 1;
466 backref = malloc(size);
471 memcpy(backref, orig, size);
472 list_add_tail(&backref->list, &rec->backrefs);
474 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475 dst_orphan = malloc(sizeof(*dst_orphan));
480 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
483 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
490 rb = rb_first(&rec->holes);
492 struct file_extent_hole *hole;
494 hole = rb_entry(rb, struct file_extent_hole, node);
500 if (!list_empty(&rec->backrefs))
501 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502 list_del(&orig->list);
506 if (!list_empty(&rec->orphan_extents))
507 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508 list_del(&orig->list);
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
520 struct orphan_data_extent *orphan;
522 if (list_empty(orphan_extents))
524 printf("The following data extent is lost in tree %llu:\n",
526 list_for_each_entry(orphan, orphan_extents, list) {
527 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528 orphan->objectid, orphan->offset, orphan->disk_bytenr,
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
535 u64 root_objectid = root->root_key.objectid;
536 int errors = rec->errors;
540 /* reloc root errors, we print its corresponding fs root objectid*/
541 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542 root_objectid = root->root_key.offset;
543 fprintf(stderr, "reloc");
545 fprintf(stderr, "root %llu inode %llu errors %x",
546 (unsigned long long) root_objectid,
547 (unsigned long long) rec->ino, rec->errors);
549 if (errors & I_ERR_NO_INODE_ITEM)
550 fprintf(stderr, ", no inode item");
551 if (errors & I_ERR_NO_ORPHAN_ITEM)
552 fprintf(stderr, ", no orphan item");
553 if (errors & I_ERR_DUP_INODE_ITEM)
554 fprintf(stderr, ", dup inode item");
555 if (errors & I_ERR_DUP_DIR_INDEX)
556 fprintf(stderr, ", dup dir index");
557 if (errors & I_ERR_ODD_DIR_ITEM)
558 fprintf(stderr, ", odd dir item");
559 if (errors & I_ERR_ODD_FILE_EXTENT)
560 fprintf(stderr, ", odd file extent");
561 if (errors & I_ERR_BAD_FILE_EXTENT)
562 fprintf(stderr, ", bad file extent");
563 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564 fprintf(stderr, ", file extent overlap");
565 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566 fprintf(stderr, ", file extent discount");
567 if (errors & I_ERR_DIR_ISIZE_WRONG)
568 fprintf(stderr, ", dir isize wrong");
569 if (errors & I_ERR_FILE_NBYTES_WRONG)
570 fprintf(stderr, ", nbytes wrong");
571 if (errors & I_ERR_ODD_CSUM_ITEM)
572 fprintf(stderr, ", odd csum item");
573 if (errors & I_ERR_SOME_CSUM_MISSING)
574 fprintf(stderr, ", some csum missing");
575 if (errors & I_ERR_LINK_COUNT_WRONG)
576 fprintf(stderr, ", link count wrong");
577 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578 fprintf(stderr, ", orphan file extent");
579 fprintf(stderr, "\n");
580 /* Print the orphan extents if needed */
581 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
584 /* Print the holes if needed */
585 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586 struct file_extent_hole *hole;
587 struct rb_node *node;
590 node = rb_first(&rec->holes);
591 fprintf(stderr, "Found file extent holes:\n");
594 hole = rb_entry(node, struct file_extent_hole, node);
595 fprintf(stderr, "\tstart: %llu, len: %llu\n",
596 hole->start, hole->len);
597 node = rb_next(node);
600 fprintf(stderr, "\tstart: 0, len: %llu\n",
602 root->fs_info->sectorsize));
606 static void print_ref_error(int errors)
608 if (errors & REF_ERR_NO_DIR_ITEM)
609 fprintf(stderr, ", no dir item");
610 if (errors & REF_ERR_NO_DIR_INDEX)
611 fprintf(stderr, ", no dir index");
612 if (errors & REF_ERR_NO_INODE_REF)
613 fprintf(stderr, ", no inode ref");
614 if (errors & REF_ERR_DUP_DIR_ITEM)
615 fprintf(stderr, ", dup dir item");
616 if (errors & REF_ERR_DUP_DIR_INDEX)
617 fprintf(stderr, ", dup dir index");
618 if (errors & REF_ERR_DUP_INODE_REF)
619 fprintf(stderr, ", dup inode ref");
620 if (errors & REF_ERR_INDEX_UNMATCH)
621 fprintf(stderr, ", index mismatch");
622 if (errors & REF_ERR_FILETYPE_UNMATCH)
623 fprintf(stderr, ", filetype mismatch");
624 if (errors & REF_ERR_NAME_TOO_LONG)
625 fprintf(stderr, ", name too long");
626 if (errors & REF_ERR_NO_ROOT_REF)
627 fprintf(stderr, ", no root ref");
628 if (errors & REF_ERR_NO_ROOT_BACKREF)
629 fprintf(stderr, ", no root backref");
630 if (errors & REF_ERR_DUP_ROOT_REF)
631 fprintf(stderr, ", dup root ref");
632 if (errors & REF_ERR_DUP_ROOT_BACKREF)
633 fprintf(stderr, ", dup root backref");
634 fprintf(stderr, "\n");
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
640 struct ptr_node *node;
641 struct cache_extent *cache;
642 struct inode_record *rec = NULL;
645 cache = lookup_cache_extent(inode_cache, ino, 1);
647 node = container_of(cache, struct ptr_node, cache);
649 if (mod && rec->refs > 1) {
650 node->data = clone_inode_rec(rec);
651 if (IS_ERR(node->data))
657 rec = calloc(1, sizeof(*rec));
659 return ERR_PTR(-ENOMEM);
661 rec->extent_start = (u64)-1;
663 INIT_LIST_HEAD(&rec->backrefs);
664 INIT_LIST_HEAD(&rec->orphan_extents);
665 rec->holes = RB_ROOT;
667 node = malloc(sizeof(*node));
670 return ERR_PTR(-ENOMEM);
672 node->cache.start = ino;
673 node->cache.size = 1;
676 if (ino == BTRFS_FREE_INO_OBJECTID)
679 ret = insert_cache_extent(inode_cache, &node->cache);
681 return ERR_PTR(-EEXIST);
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
688 struct orphan_data_extent *orphan;
690 while (!list_empty(orphan_extents)) {
691 orphan = list_entry(orphan_extents->next,
692 struct orphan_data_extent, list);
693 list_del(&orphan->list);
698 static void free_inode_rec(struct inode_record *rec)
700 struct inode_backref *backref;
705 while (!list_empty(&rec->backrefs)) {
706 backref = to_inode_backref(rec->backrefs.next);
707 list_del(&backref->list);
710 free_orphan_data_extents(&rec->orphan_extents);
711 free_file_extent_holes(&rec->holes);
715 static int can_free_inode_rec(struct inode_record *rec)
717 if (!rec->errors && rec->checked && rec->found_inode_item &&
718 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724 struct inode_record *rec)
726 struct cache_extent *cache;
727 struct inode_backref *tmp, *backref;
728 struct ptr_node *node;
731 if (!rec->found_inode_item)
734 filetype = imode_to_type(rec->imode);
735 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736 if (backref->found_dir_item && backref->found_dir_index) {
737 if (backref->filetype != filetype)
738 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739 if (!backref->errors && backref->found_inode_ref &&
740 rec->nlink == rec->found_link) {
741 list_del(&backref->list);
747 if (!rec->checked || rec->merging)
750 if (S_ISDIR(rec->imode)) {
751 if (rec->found_size != rec->isize)
752 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753 if (rec->found_file_extent)
754 rec->errors |= I_ERR_ODD_FILE_EXTENT;
755 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756 if (rec->found_dir_item)
757 rec->errors |= I_ERR_ODD_DIR_ITEM;
758 if (rec->found_size != rec->nbytes)
759 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760 if (rec->nlink > 0 && !no_holes &&
761 (rec->extent_end < rec->isize ||
762 first_extent_gap(&rec->holes) < rec->isize))
763 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
766 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767 if (rec->found_csum_item && rec->nodatasum)
768 rec->errors |= I_ERR_ODD_CSUM_ITEM;
769 if (rec->some_csum_missing && !rec->nodatasum)
770 rec->errors |= I_ERR_SOME_CSUM_MISSING;
773 BUG_ON(rec->refs != 1);
774 if (can_free_inode_rec(rec)) {
775 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776 node = container_of(cache, struct ptr_node, cache);
777 BUG_ON(node->data != rec);
778 remove_cache_extent(inode_cache, &node->cache);
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
786 struct btrfs_path path;
787 struct btrfs_key key;
790 key.objectid = BTRFS_ORPHAN_OBJECTID;
791 key.type = BTRFS_ORPHAN_ITEM_KEY;
794 btrfs_init_path(&path);
795 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796 btrfs_release_path(&path);
802 static int process_inode_item(struct extent_buffer *eb,
803 int slot, struct btrfs_key *key,
804 struct shared_node *active_node)
806 struct inode_record *rec;
807 struct btrfs_inode_item *item;
809 rec = active_node->current;
810 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811 if (rec->found_inode_item) {
812 rec->errors |= I_ERR_DUP_INODE_ITEM;
815 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816 rec->nlink = btrfs_inode_nlink(eb, item);
817 rec->isize = btrfs_inode_size(eb, item);
818 rec->nbytes = btrfs_inode_nbytes(eb, item);
819 rec->imode = btrfs_inode_mode(eb, item);
820 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
822 rec->found_inode_item = 1;
824 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825 maybe_free_inode_rec(&active_node->inode_cache, rec);
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
831 int namelen, u64 dir)
833 struct inode_backref *backref;
835 list_for_each_entry(backref, &rec->backrefs, list) {
836 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
838 if (backref->dir != dir || backref->namelen != namelen)
840 if (memcmp(name, backref->name, namelen))
845 backref = malloc(sizeof(*backref) + namelen + 1);
848 memset(backref, 0, sizeof(*backref));
850 backref->namelen = namelen;
851 memcpy(backref->name, name, namelen);
852 backref->name[namelen] = '\0';
853 list_add_tail(&backref->list, &rec->backrefs);
857 static int add_inode_backref(struct cache_tree *inode_cache,
858 u64 ino, u64 dir, u64 index,
859 const char *name, int namelen,
860 u8 filetype, u8 itemtype, int errors)
862 struct inode_record *rec;
863 struct inode_backref *backref;
865 rec = get_inode_rec(inode_cache, ino, 1);
867 backref = get_inode_backref(rec, name, namelen, dir);
870 backref->errors |= errors;
871 if (itemtype == BTRFS_DIR_INDEX_KEY) {
872 if (backref->found_dir_index)
873 backref->errors |= REF_ERR_DUP_DIR_INDEX;
874 if (backref->found_inode_ref && backref->index != index)
875 backref->errors |= REF_ERR_INDEX_UNMATCH;
876 if (backref->found_dir_item && backref->filetype != filetype)
877 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
879 backref->index = index;
880 backref->filetype = filetype;
881 backref->found_dir_index = 1;
882 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
884 if (backref->found_dir_item)
885 backref->errors |= REF_ERR_DUP_DIR_ITEM;
886 if (backref->found_dir_index && backref->filetype != filetype)
887 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
889 backref->filetype = filetype;
890 backref->found_dir_item = 1;
891 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893 if (backref->found_inode_ref)
894 backref->errors |= REF_ERR_DUP_INODE_REF;
895 if (backref->found_dir_index && backref->index != index)
896 backref->errors |= REF_ERR_INDEX_UNMATCH;
898 backref->index = index;
900 backref->ref_type = itemtype;
901 backref->found_inode_ref = 1;
906 maybe_free_inode_rec(inode_cache, rec);
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911 struct cache_tree *dst_cache)
913 struct inode_backref *backref;
918 list_for_each_entry(backref, &src->backrefs, list) {
919 if (backref->found_dir_index) {
920 add_inode_backref(dst_cache, dst->ino, backref->dir,
921 backref->index, backref->name,
922 backref->namelen, backref->filetype,
923 BTRFS_DIR_INDEX_KEY, backref->errors);
925 if (backref->found_dir_item) {
927 add_inode_backref(dst_cache, dst->ino,
928 backref->dir, 0, backref->name,
929 backref->namelen, backref->filetype,
930 BTRFS_DIR_ITEM_KEY, backref->errors);
932 if (backref->found_inode_ref) {
933 add_inode_backref(dst_cache, dst->ino,
934 backref->dir, backref->index,
935 backref->name, backref->namelen, 0,
936 backref->ref_type, backref->errors);
940 if (src->found_dir_item)
941 dst->found_dir_item = 1;
942 if (src->found_file_extent)
943 dst->found_file_extent = 1;
944 if (src->found_csum_item)
945 dst->found_csum_item = 1;
946 if (src->some_csum_missing)
947 dst->some_csum_missing = 1;
948 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949 ret = copy_file_extent_holes(&dst->holes, &src->holes);
954 BUG_ON(src->found_link < dir_count);
955 dst->found_link += src->found_link - dir_count;
956 dst->found_size += src->found_size;
957 if (src->extent_start != (u64)-1) {
958 if (dst->extent_start == (u64)-1) {
959 dst->extent_start = src->extent_start;
960 dst->extent_end = src->extent_end;
962 if (dst->extent_end > src->extent_start)
963 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964 else if (dst->extent_end < src->extent_start) {
965 ret = add_file_extent_hole(&dst->holes,
967 src->extent_start - dst->extent_end);
969 if (dst->extent_end < src->extent_end)
970 dst->extent_end = src->extent_end;
974 dst->errors |= src->errors;
975 if (src->found_inode_item) {
976 if (!dst->found_inode_item) {
977 dst->nlink = src->nlink;
978 dst->isize = src->isize;
979 dst->nbytes = src->nbytes;
980 dst->imode = src->imode;
981 dst->nodatasum = src->nodatasum;
982 dst->found_inode_item = 1;
984 dst->errors |= I_ERR_DUP_INODE_ITEM;
992 static int splice_shared_node(struct shared_node *src_node,
993 struct shared_node *dst_node)
995 struct cache_extent *cache;
996 struct ptr_node *node, *ins;
997 struct cache_tree *src, *dst;
998 struct inode_record *rec, *conflict;
1003 if (--src_node->refs == 0)
1005 if (src_node->current)
1006 current_ino = src_node->current->ino;
1008 src = &src_node->root_cache;
1009 dst = &dst_node->root_cache;
1011 cache = search_cache_extent(src, 0);
1013 node = container_of(cache, struct ptr_node, cache);
1015 cache = next_cache_extent(cache);
1018 remove_cache_extent(src, &node->cache);
1021 ins = malloc(sizeof(*ins));
1023 ins->cache.start = node->cache.start;
1024 ins->cache.size = node->cache.size;
1028 ret = insert_cache_extent(dst, &ins->cache);
1029 if (ret == -EEXIST) {
1030 conflict = get_inode_rec(dst, rec->ino, 1);
1031 BUG_ON(IS_ERR(conflict));
1032 merge_inode_recs(rec, conflict, dst);
1034 conflict->checked = 1;
1035 if (dst_node->current == conflict)
1036 dst_node->current = NULL;
1038 maybe_free_inode_rec(dst, conflict);
1039 free_inode_rec(rec);
1046 if (src == &src_node->root_cache) {
1047 src = &src_node->inode_cache;
1048 dst = &dst_node->inode_cache;
1052 if (current_ino > 0 && (!dst_node->current ||
1053 current_ino > dst_node->current->ino)) {
1054 if (dst_node->current) {
1055 dst_node->current->checked = 1;
1056 maybe_free_inode_rec(dst, dst_node->current);
1058 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059 BUG_ON(IS_ERR(dst_node->current));
1064 static void free_inode_ptr(struct cache_extent *cache)
1066 struct ptr_node *node;
1067 struct inode_record *rec;
1069 node = container_of(cache, struct ptr_node, cache);
1071 free_inode_rec(rec);
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1080 struct cache_extent *cache;
1081 struct shared_node *node;
1083 cache = lookup_cache_extent(shared, bytenr, 1);
1085 node = container_of(cache, struct shared_node, cache);
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1094 struct shared_node *node;
1096 node = calloc(1, sizeof(*node));
1099 node->cache.start = bytenr;
1100 node->cache.size = 1;
1101 cache_tree_init(&node->root_cache);
1102 cache_tree_init(&node->inode_cache);
1105 ret = insert_cache_extent(shared, &node->cache);
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111 struct walk_control *wc, int level)
1113 struct shared_node *node;
1114 struct shared_node *dest;
1117 if (level == wc->active_node)
1120 BUG_ON(wc->active_node <= level);
1121 node = find_shared_node(&wc->shared, bytenr);
1123 ret = add_shared_node(&wc->shared, bytenr, refs);
1125 node = find_shared_node(&wc->shared, bytenr);
1126 wc->nodes[level] = node;
1127 wc->active_node = level;
1131 if (wc->root_level == wc->active_node &&
1132 btrfs_root_refs(&root->root_item) == 0) {
1133 if (--node->refs == 0) {
1134 free_inode_recs_tree(&node->root_cache);
1135 free_inode_recs_tree(&node->inode_cache);
1136 remove_cache_extent(&wc->shared, &node->cache);
1142 dest = wc->nodes[wc->active_node];
1143 splice_shared_node(node, dest);
1144 if (node->refs == 0) {
1145 remove_cache_extent(&wc->shared, &node->cache);
1151 static int leave_shared_node(struct btrfs_root *root,
1152 struct walk_control *wc, int level)
1154 struct shared_node *node;
1155 struct shared_node *dest;
1158 if (level == wc->root_level)
1161 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1165 BUG_ON(i >= BTRFS_MAX_LEVEL);
1167 node = wc->nodes[wc->active_node];
1168 wc->nodes[wc->active_node] = NULL;
1169 wc->active_node = i;
1171 dest = wc->nodes[wc->active_node];
1172 if (wc->active_node < wc->root_level ||
1173 btrfs_root_refs(&root->root_item) > 0) {
1174 BUG_ON(node->refs <= 1);
1175 splice_shared_node(node, dest);
1177 BUG_ON(node->refs < 2);
1186 * 1 - if the root with id child_root_id is a child of root parent_root_id
1187 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1188 * has other root(s) as parent(s)
1189 * 2 - if the root child_root_id doesn't have any parent roots
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1194 struct btrfs_path path;
1195 struct btrfs_key key;
1196 struct extent_buffer *leaf;
1200 btrfs_init_path(&path);
1202 key.objectid = parent_root_id;
1203 key.type = BTRFS_ROOT_REF_KEY;
1204 key.offset = child_root_id;
1205 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1209 btrfs_release_path(&path);
1213 key.objectid = child_root_id;
1214 key.type = BTRFS_ROOT_BACKREF_KEY;
1216 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1222 leaf = path.nodes[0];
1223 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1227 leaf = path.nodes[0];
1230 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231 if (key.objectid != child_root_id ||
1232 key.type != BTRFS_ROOT_BACKREF_KEY)
1237 if (key.offset == parent_root_id) {
1238 btrfs_release_path(&path);
1245 btrfs_release_path(&path);
1248 return has_parent ? 0 : 2;
1251 static int process_dir_item(struct extent_buffer *eb,
1252 int slot, struct btrfs_key *key,
1253 struct shared_node *active_node)
1263 struct btrfs_dir_item *di;
1264 struct inode_record *rec;
1265 struct cache_tree *root_cache;
1266 struct cache_tree *inode_cache;
1267 struct btrfs_key location;
1268 char namebuf[BTRFS_NAME_LEN];
1270 root_cache = &active_node->root_cache;
1271 inode_cache = &active_node->inode_cache;
1272 rec = active_node->current;
1273 rec->found_dir_item = 1;
1275 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276 total = btrfs_item_size_nr(eb, slot);
1277 while (cur < total) {
1279 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280 name_len = btrfs_dir_name_len(eb, di);
1281 data_len = btrfs_dir_data_len(eb, di);
1282 filetype = btrfs_dir_type(eb, di);
1284 rec->found_size += name_len;
1285 if (cur + sizeof(*di) + name_len > total ||
1286 name_len > BTRFS_NAME_LEN) {
1287 error = REF_ERR_NAME_TOO_LONG;
1289 if (cur + sizeof(*di) > total)
1291 len = min_t(u32, total - cur - sizeof(*di),
1298 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1300 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301 key->offset != btrfs_name_hash(namebuf, len)) {
1302 rec->errors |= I_ERR_ODD_DIR_ITEM;
1303 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304 key->objectid, key->offset, namebuf, len, filetype,
1305 key->offset, btrfs_name_hash(namebuf, len));
1308 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309 add_inode_backref(inode_cache, location.objectid,
1310 key->objectid, key->offset, namebuf,
1311 len, filetype, key->type, error);
1312 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313 add_inode_backref(root_cache, location.objectid,
1314 key->objectid, key->offset,
1315 namebuf, len, filetype,
1319 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320 location.type, key->objectid, key->offset);
1321 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322 key->objectid, key->offset, namebuf,
1323 len, filetype, key->type, error);
1326 len = sizeof(*di) + name_len + data_len;
1327 di = (struct btrfs_dir_item *)((char *)di + len);
1330 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331 rec->errors |= I_ERR_DUP_DIR_INDEX;
1336 static int process_inode_ref(struct extent_buffer *eb,
1337 int slot, struct btrfs_key *key,
1338 struct shared_node *active_node)
1346 struct cache_tree *inode_cache;
1347 struct btrfs_inode_ref *ref;
1348 char namebuf[BTRFS_NAME_LEN];
1350 inode_cache = &active_node->inode_cache;
1352 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353 total = btrfs_item_size_nr(eb, slot);
1354 while (cur < total) {
1355 name_len = btrfs_inode_ref_name_len(eb, ref);
1356 index = btrfs_inode_ref_index(eb, ref);
1358 /* inode_ref + namelen should not cross item boundary */
1359 if (cur + sizeof(*ref) + name_len > total ||
1360 name_len > BTRFS_NAME_LEN) {
1361 if (total < cur + sizeof(*ref))
1364 /* Still try to read out the remaining part */
1365 len = min_t(u32, total - cur - sizeof(*ref),
1367 error = REF_ERR_NAME_TOO_LONG;
1373 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374 add_inode_backref(inode_cache, key->objectid, key->offset,
1375 index, namebuf, len, 0, key->type, error);
1377 len = sizeof(*ref) + name_len;
1378 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1384 static int process_inode_extref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1395 struct cache_tree *inode_cache;
1396 struct btrfs_inode_extref *extref;
1397 char namebuf[BTRFS_NAME_LEN];
1399 inode_cache = &active_node->inode_cache;
1401 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402 total = btrfs_item_size_nr(eb, slot);
1403 while (cur < total) {
1404 name_len = btrfs_inode_extref_name_len(eb, extref);
1405 index = btrfs_inode_extref_index(eb, extref);
1406 parent = btrfs_inode_extref_parent(eb, extref);
1407 if (name_len <= BTRFS_NAME_LEN) {
1411 len = BTRFS_NAME_LEN;
1412 error = REF_ERR_NAME_TOO_LONG;
1414 read_extent_buffer(eb, namebuf,
1415 (unsigned long)(extref + 1), len);
1416 add_inode_backref(inode_cache, key->objectid, parent,
1417 index, namebuf, len, 0, key->type, error);
1419 len = sizeof(*extref) + name_len;
1420 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1427 static int process_file_extent(struct btrfs_root *root,
1428 struct extent_buffer *eb,
1429 int slot, struct btrfs_key *key,
1430 struct shared_node *active_node)
1432 struct inode_record *rec;
1433 struct btrfs_file_extent_item *fi;
1435 u64 disk_bytenr = 0;
1436 u64 extent_offset = 0;
1437 u64 mask = root->fs_info->sectorsize - 1;
1441 rec = active_node->current;
1442 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443 rec->found_file_extent = 1;
1445 if (rec->extent_start == (u64)-1) {
1446 rec->extent_start = key->offset;
1447 rec->extent_end = key->offset;
1450 if (rec->extent_end > key->offset)
1451 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452 else if (rec->extent_end < key->offset) {
1453 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454 key->offset - rec->extent_end);
1459 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460 extent_type = btrfs_file_extent_type(eb, fi);
1462 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1465 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466 rec->found_size += num_bytes;
1467 num_bytes = (num_bytes + mask) & ~mask;
1468 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472 extent_offset = btrfs_file_extent_offset(eb, fi);
1473 if (num_bytes == 0 || (num_bytes & mask))
1474 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475 if (num_bytes + extent_offset >
1476 btrfs_file_extent_ram_bytes(eb, fi))
1477 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479 (btrfs_file_extent_compression(eb, fi) ||
1480 btrfs_file_extent_encryption(eb, fi) ||
1481 btrfs_file_extent_other_encoding(eb, fi)))
1482 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483 if (disk_bytenr > 0)
1484 rec->found_size += num_bytes;
1486 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1488 rec->extent_end = key->offset + num_bytes;
1491 * The data reloc tree will copy full extents into its inode and then
1492 * copy the corresponding csums. Because the extent it copied could be
1493 * a preallocated extent that hasn't been written to yet there may be no
1494 * csums to copy, ergo we won't have csums for our file extent. This is
1495 * ok so just don't bother checking csums if the inode belongs to the
1498 if (disk_bytenr > 0 &&
1499 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1501 if (btrfs_file_extent_compression(eb, fi))
1502 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1504 disk_bytenr += extent_offset;
1506 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1510 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1512 rec->found_csum_item = 1;
1513 if (found < num_bytes)
1514 rec->some_csum_missing = 1;
1515 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1517 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524 struct walk_control *wc)
1526 struct btrfs_key key;
1530 struct cache_tree *inode_cache;
1531 struct shared_node *active_node;
1533 if (wc->root_level == wc->active_node &&
1534 btrfs_root_refs(&root->root_item) == 0)
1537 active_node = wc->nodes[wc->active_node];
1538 inode_cache = &active_node->inode_cache;
1539 nritems = btrfs_header_nritems(eb);
1540 for (i = 0; i < nritems; i++) {
1541 btrfs_item_key_to_cpu(eb, &key, i);
1543 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1545 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1548 if (active_node->current == NULL ||
1549 active_node->current->ino < key.objectid) {
1550 if (active_node->current) {
1551 active_node->current->checked = 1;
1552 maybe_free_inode_rec(inode_cache,
1553 active_node->current);
1555 active_node->current = get_inode_rec(inode_cache,
1557 BUG_ON(IS_ERR(active_node->current));
1560 case BTRFS_DIR_ITEM_KEY:
1561 case BTRFS_DIR_INDEX_KEY:
1562 ret = process_dir_item(eb, i, &key, active_node);
1564 case BTRFS_INODE_REF_KEY:
1565 ret = process_inode_ref(eb, i, &key, active_node);
1567 case BTRFS_INODE_EXTREF_KEY:
1568 ret = process_inode_extref(eb, i, &key, active_node);
1570 case BTRFS_INODE_ITEM_KEY:
1571 ret = process_inode_item(eb, i, &key, active_node);
1573 case BTRFS_EXTENT_DATA_KEY:
1574 ret = process_file_extent(root, eb, i, &key,
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585 struct extent_buffer *eb, struct node_refs *nrefs,
1586 u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588 unsigned int ext_ref);
1591 * Returns >0 Found error, not fatal, should continue
1592 * Returns <0 Fatal error, must exit the whole check
1593 * Returns 0 No errors found
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596 struct node_refs *nrefs, int *level, int ext_ref)
1598 struct extent_buffer *cur = path->nodes[0];
1599 struct btrfs_key key;
1603 int root_level = btrfs_header_level(root->node);
1605 int ret = 0; /* Final return value */
1606 int err = 0; /* Positive error bitmap */
1608 cur_bytenr = cur->start;
1610 /* skip to first inode item or the first inode number change */
1611 nritems = btrfs_header_nritems(cur);
1612 for (i = 0; i < nritems; i++) {
1613 btrfs_item_key_to_cpu(cur, &key, i);
1615 first_ino = key.objectid;
1616 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617 (first_ino && first_ino != key.objectid))
1621 path->slots[0] = nritems;
1627 err |= check_inode_item(root, path, ext_ref);
1629 /* modify cur since check_inode_item may change path */
1630 cur = path->nodes[0];
1632 if (err & LAST_ITEM)
1635 /* still have inode items in thie leaf */
1636 if (cur->start == cur_bytenr)
1640 * we have switched to another leaf, above nodes may
1641 * have changed, here walk down the path, if a node
1642 * or leaf is shared, check whether we can skip this
1645 for (i = root_level; i >= 0; i--) {
1646 if (path->nodes[i]->start == nrefs->bytenr[i])
1649 ret = update_nodes_refs(root, path->nodes[i]->start,
1650 path->nodes[i], nrefs, i, 0);
1654 if (!nrefs->need_check[i]) {
1660 for (i = 0; i < *level; i++) {
1661 free_extent_buffer(path->nodes[i]);
1662 path->nodes[i] = NULL;
1672 * Check the child node/leaf by the following condition:
1673 * 1. the first item key of the node/leaf should be the same with the one
1675 * 2. block in parent node should match the child node/leaf.
1676 * 3. generation of parent node and child's header should be consistent.
1678 * Or the child node/leaf pointed by the key in parent is not valid.
1680 * We hope to check leaf owner too, but since subvol may share leaves,
1681 * which makes leaf owner check not so strong, key check should be
1682 * sufficient enough for that case.
1684 static int check_child_node(struct extent_buffer *parent, int slot,
1685 struct extent_buffer *child)
1687 struct btrfs_key parent_key;
1688 struct btrfs_key child_key;
1691 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1692 if (btrfs_header_level(child) == 0)
1693 btrfs_item_key_to_cpu(child, &child_key, 0);
1695 btrfs_node_key_to_cpu(child, &child_key, 0);
1697 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1700 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1701 parent_key.objectid, parent_key.type, parent_key.offset,
1702 child_key.objectid, child_key.type, child_key.offset);
1704 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1706 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1707 btrfs_node_blockptr(parent, slot),
1708 btrfs_header_bytenr(child));
1710 if (btrfs_node_ptr_generation(parent, slot) !=
1711 btrfs_header_generation(child)) {
1713 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1714 btrfs_header_generation(child),
1715 btrfs_node_ptr_generation(parent, slot));
1721 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1722 * in every fs or file tree check. Here we find its all root ids, and only check
1723 * it in the fs or file tree which has the smallest root id.
1725 static int need_check(struct btrfs_root *root, struct ulist *roots)
1727 struct rb_node *node;
1728 struct ulist_node *u;
1731 * @roots can be empty if it belongs to tree reloc tree
1732 * In that case, we should always check the leaf, as we can't use
1733 * the tree owner to ensure some other root will check it.
1735 if (roots->nnodes == 1 || roots->nnodes == 0)
1738 node = rb_first(&roots->root);
1739 u = rb_entry(node, struct ulist_node, rb_node);
1741 * current root id is not smallest, we skip it and let it be checked
1742 * in the fs or file tree who hash the smallest root id.
1744 if (root->objectid != u->val)
1750 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1753 struct btrfs_root *extent_root = root->fs_info->extent_root;
1754 struct btrfs_root_item *ri = &root->root_item;
1755 struct btrfs_extent_inline_ref *iref;
1756 struct btrfs_extent_item *ei;
1757 struct btrfs_key key;
1758 struct btrfs_path *path = NULL;
1769 * Except file/reloc tree, we can not have FULL BACKREF MODE
1771 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1775 if (eb->start == btrfs_root_bytenr(ri))
1778 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1781 owner = btrfs_header_owner(eb);
1782 if (owner == root->objectid)
1785 path = btrfs_alloc_path();
1789 key.objectid = btrfs_header_bytenr(eb);
1791 key.offset = (u64)-1;
1793 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1800 ret = btrfs_previous_extent_item(extent_root, path,
1806 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1808 eb = path->nodes[0];
1809 slot = path->slots[0];
1810 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1812 flags = btrfs_extent_flags(eb, ei);
1813 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1816 ptr = (unsigned long)(ei + 1);
1817 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1819 if (key.type == BTRFS_EXTENT_ITEM_KEY)
1820 ptr += sizeof(struct btrfs_tree_block_info);
1823 /* Reached extent item ends normally */
1827 /* Beyond extent item end, wrong item size */
1829 error("extent item at bytenr %llu slot %d has wrong size",
1834 iref = (struct btrfs_extent_inline_ref *)ptr;
1835 offset = btrfs_extent_inline_ref_offset(eb, iref);
1836 type = btrfs_extent_inline_ref_type(eb, iref);
1838 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1840 ptr += btrfs_extent_inline_ref_size(type);
1844 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1848 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1850 btrfs_free_path(path);
1855 * for a tree node or leaf, we record its reference count, so later if we still
1856 * process this node or leaf, don't need to compute its reference count again.
1858 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1860 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1861 struct extent_buffer *eb, struct node_refs *nrefs,
1862 u64 level, int check_all)
1864 struct ulist *roots;
1867 int root_level = btrfs_header_level(root->node);
1871 if (nrefs->bytenr[level] == bytenr)
1874 if (bytenr != (u64)-1) {
1875 /* the return value of this function seems a mistake */
1876 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1877 level, 1, &refs, &flags);
1879 if (ret < 0 && !check_all)
1882 nrefs->bytenr[level] = bytenr;
1883 nrefs->refs[level] = refs;
1884 nrefs->full_backref[level] = 0;
1885 nrefs->checked[level] = 0;
1888 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1893 check = need_check(root, roots);
1895 nrefs->need_check[level] = check;
1898 nrefs->need_check[level] = 1;
1900 if (level == root_level) {
1901 nrefs->need_check[level] = 1;
1904 * The node refs may have not been
1905 * updated if upper needs checking (the
1906 * lowest root_objectid) the node can
1909 nrefs->need_check[level] =
1910 nrefs->need_check[level + 1];
1916 if (check_all && eb) {
1917 calc_extent_flag_v2(root, eb, &flags);
1918 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1919 nrefs->full_backref[level] = 1;
1926 * @level if @level == -1 means extent data item
1927 * else normal treeblocl.
1929 static int should_check_extent_strictly(struct btrfs_root *root,
1930 struct node_refs *nrefs, int level)
1932 int root_level = btrfs_header_level(root->node);
1934 if (level > root_level || level < -1)
1936 if (level == root_level)
1939 * if the upper node is marked full backref, it should contain shared
1940 * backref of the parent (except owner == root->objectid).
1942 while (++level <= root_level)
1943 if (nrefs->refs[level] > 1)
1949 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1950 struct walk_control *wc, int *level,
1951 struct node_refs *nrefs)
1953 enum btrfs_tree_block_status status;
1956 struct btrfs_fs_info *fs_info = root->fs_info;
1957 struct extent_buffer *next;
1958 struct extent_buffer *cur;
1962 WARN_ON(*level < 0);
1963 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1965 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1966 refs = nrefs->refs[*level];
1969 ret = btrfs_lookup_extent_info(NULL, root,
1970 path->nodes[*level]->start,
1971 *level, 1, &refs, NULL);
1976 nrefs->bytenr[*level] = path->nodes[*level]->start;
1977 nrefs->refs[*level] = refs;
1981 ret = enter_shared_node(root, path->nodes[*level]->start,
1989 while (*level >= 0) {
1990 WARN_ON(*level < 0);
1991 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1992 cur = path->nodes[*level];
1994 if (btrfs_header_level(cur) != *level)
1997 if (path->slots[*level] >= btrfs_header_nritems(cur))
2000 ret = process_one_leaf(root, cur, wc);
2005 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2006 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2008 if (bytenr == nrefs->bytenr[*level - 1]) {
2009 refs = nrefs->refs[*level - 1];
2011 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2012 *level - 1, 1, &refs, NULL);
2016 nrefs->bytenr[*level - 1] = bytenr;
2017 nrefs->refs[*level - 1] = refs;
2022 ret = enter_shared_node(root, bytenr, refs,
2025 path->slots[*level]++;
2030 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2031 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2032 free_extent_buffer(next);
2033 reada_walk_down(root, cur, path->slots[*level]);
2034 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2035 if (!extent_buffer_uptodate(next)) {
2036 struct btrfs_key node_key;
2038 btrfs_node_key_to_cpu(path->nodes[*level],
2040 path->slots[*level]);
2041 btrfs_add_corrupt_extent_record(root->fs_info,
2043 path->nodes[*level]->start,
2044 root->fs_info->nodesize,
2051 ret = check_child_node(cur, path->slots[*level], next);
2053 free_extent_buffer(next);
2058 if (btrfs_is_leaf(next))
2059 status = btrfs_check_leaf(root, NULL, next);
2061 status = btrfs_check_node(root, NULL, next);
2062 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2063 free_extent_buffer(next);
2068 *level = *level - 1;
2069 free_extent_buffer(path->nodes[*level]);
2070 path->nodes[*level] = next;
2071 path->slots[*level] = 0;
2074 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2079 * Update global fs information.
2081 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2085 struct extent_buffer *eb = path->nodes[level];
2087 total_btree_bytes += eb->len;
2088 if (fs_root_objectid(root->objectid))
2089 total_fs_tree_bytes += eb->len;
2090 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2091 total_extent_tree_bytes += eb->len;
2094 btree_space_waste += btrfs_leaf_free_space(root, eb);
2096 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2097 btrfs_header_nritems(eb));
2098 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2103 * This function only handles BACKREF_MISSING,
2104 * If corresponding extent item exists, increase the ref, else insert an extent
2107 * Returns error bits after repair.
2109 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2110 struct btrfs_root *root,
2111 struct extent_buffer *node,
2112 struct node_refs *nrefs, int level, int err)
2114 struct btrfs_fs_info *fs_info = root->fs_info;
2115 struct btrfs_root *extent_root = fs_info->extent_root;
2116 struct btrfs_path path;
2117 struct btrfs_extent_item *ei;
2118 struct btrfs_tree_block_info *bi;
2119 struct btrfs_key key;
2120 struct extent_buffer *eb;
2121 u32 size = sizeof(*ei);
2122 u32 node_size = root->fs_info->nodesize;
2123 int insert_extent = 0;
2124 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2125 int root_level = btrfs_header_level(root->node);
2130 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2133 if ((err & BACKREF_MISSING) == 0)
2136 WARN_ON(level > BTRFS_MAX_LEVEL);
2139 btrfs_init_path(&path);
2140 bytenr = btrfs_header_bytenr(node);
2141 owner = btrfs_header_owner(node);
2142 generation = btrfs_header_generation(node);
2144 key.objectid = bytenr;
2146 key.offset = (u64)-1;
2148 /* Search for the extent item */
2149 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2155 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2159 /* calculate if the extent item flag is full backref or not */
2160 if (nrefs->full_backref[level] != 0)
2161 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2163 /* insert an extent item */
2164 if (insert_extent) {
2165 struct btrfs_disk_key copy_key;
2167 generation = btrfs_header_generation(node);
2169 if (level < root_level && nrefs->full_backref[level + 1] &&
2170 owner != root->objectid) {
2171 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2174 key.objectid = bytenr;
2175 if (!skinny_metadata) {
2176 key.type = BTRFS_EXTENT_ITEM_KEY;
2177 key.offset = node_size;
2178 size += sizeof(*bi);
2180 key.type = BTRFS_METADATA_ITEM_KEY;
2184 btrfs_release_path(&path);
2185 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2191 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2193 btrfs_set_extent_refs(eb, ei, 0);
2194 btrfs_set_extent_generation(eb, ei, generation);
2195 btrfs_set_extent_flags(eb, ei, flags);
2197 if (!skinny_metadata) {
2198 bi = (struct btrfs_tree_block_info *)(ei + 1);
2199 memset_extent_buffer(eb, 0, (unsigned long)bi,
2201 btrfs_set_disk_key_objectid(©_key, root->objectid);
2202 btrfs_set_disk_key_type(©_key, 0);
2203 btrfs_set_disk_key_offset(©_key, 0);
2205 btrfs_set_tree_block_level(eb, bi, level);
2206 btrfs_set_tree_block_key(eb, bi, ©_key);
2208 btrfs_mark_buffer_dirty(eb);
2209 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2210 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2212 nrefs->refs[level] = 0;
2213 nrefs->full_backref[level] =
2214 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2215 btrfs_release_path(&path);
2218 if (level < root_level && nrefs->full_backref[level + 1] &&
2219 owner != root->objectid)
2220 parent = nrefs->bytenr[level + 1];
2222 /* increase the ref */
2223 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2224 parent, root->objectid, level, 0);
2226 nrefs->refs[level]++;
2228 btrfs_release_path(&path);
2231 "failed to repair tree block ref start %llu root %llu due to %s",
2232 bytenr, root->objectid, strerror(-ret));
2234 printf("Added one tree block ref start %llu %s %llu\n",
2235 bytenr, parent ? "parent" : "root",
2236 parent ? parent : root->objectid);
2237 err &= ~BACKREF_MISSING;
2243 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2244 unsigned int ext_ref);
2245 static int check_tree_block_ref(struct btrfs_root *root,
2246 struct extent_buffer *eb, u64 bytenr,
2247 int level, u64 owner, struct node_refs *nrefs);
2248 static int check_leaf_items(struct btrfs_trans_handle *trans,
2249 struct btrfs_root *root, struct btrfs_path *path,
2250 struct node_refs *nrefs, int account_bytes);
2253 * @trans just for lowmem repair mode
2254 * @check all if not 0 then check all tree block backrefs and items
2255 * 0 then just check relationship of items in fs tree(s)
2257 * Returns >0 Found error, should continue
2258 * Returns <0 Fatal error, must exit the whole check
2259 * Returns 0 No errors found
2261 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2262 struct btrfs_root *root, struct btrfs_path *path,
2263 int *level, struct node_refs *nrefs, int ext_ref,
2267 enum btrfs_tree_block_status status;
2270 struct btrfs_fs_info *fs_info = root->fs_info;
2271 struct extent_buffer *next;
2272 struct extent_buffer *cur;
2276 int account_file_data = 0;
2278 WARN_ON(*level < 0);
2279 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2281 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2282 path->nodes[*level], nrefs, *level, check_all);
2286 while (*level >= 0) {
2287 WARN_ON(*level < 0);
2288 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2289 cur = path->nodes[*level];
2290 bytenr = btrfs_header_bytenr(cur);
2291 check = nrefs->need_check[*level];
2293 if (btrfs_header_level(cur) != *level)
2296 * Update bytes accounting and check tree block ref
2297 * NOTE: Doing accounting and check before checking nritems
2298 * is necessary because of empty node/leaf.
2300 if ((check_all && !nrefs->checked[*level]) ||
2301 (!check_all && nrefs->need_check[*level])) {
2302 ret = check_tree_block_ref(root, cur,
2303 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2304 btrfs_header_owner(cur), nrefs);
2307 ret = repair_tree_block_ref(trans, root,
2308 path->nodes[*level], nrefs, *level, ret);
2311 if (check_all && nrefs->need_check[*level] &&
2312 nrefs->refs[*level]) {
2313 account_bytes(root, path, *level);
2314 account_file_data = 1;
2316 nrefs->checked[*level] = 1;
2319 if (path->slots[*level] >= btrfs_header_nritems(cur))
2322 /* Don't forgot to check leaf/node validation */
2324 /* skip duplicate check */
2325 if (check || !check_all) {
2326 ret = btrfs_check_leaf(root, NULL, cur);
2327 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2335 ret = process_one_leaf_v2(root, path, nrefs,
2338 ret = check_leaf_items(trans, root, path,
2339 nrefs, account_file_data);
2343 if (check || !check_all) {
2344 ret = btrfs_check_node(root, NULL, cur);
2345 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2352 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2353 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2355 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2360 * check all trees in check_chunks_and_extent_v2
2361 * check shared node once in check_fs_roots
2363 if (!check_all && !nrefs->need_check[*level - 1]) {
2364 path->slots[*level]++;
2368 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2369 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2370 free_extent_buffer(next);
2371 reada_walk_down(root, cur, path->slots[*level]);
2372 next = read_tree_block(fs_info, bytenr, ptr_gen);
2373 if (!extent_buffer_uptodate(next)) {
2374 struct btrfs_key node_key;
2376 btrfs_node_key_to_cpu(path->nodes[*level],
2378 path->slots[*level]);
2379 btrfs_add_corrupt_extent_record(fs_info,
2380 &node_key, path->nodes[*level]->start,
2381 fs_info->nodesize, *level);
2387 ret = check_child_node(cur, path->slots[*level], next);
2392 if (btrfs_is_leaf(next))
2393 status = btrfs_check_leaf(root, NULL, next);
2395 status = btrfs_check_node(root, NULL, next);
2396 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2397 free_extent_buffer(next);
2402 *level = *level - 1;
2403 free_extent_buffer(path->nodes[*level]);
2404 path->nodes[*level] = next;
2405 path->slots[*level] = 0;
2406 account_file_data = 0;
2408 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2413 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2414 struct walk_control *wc, int *level)
2417 struct extent_buffer *leaf;
2419 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2420 leaf = path->nodes[i];
2421 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2426 free_extent_buffer(path->nodes[*level]);
2427 path->nodes[*level] = NULL;
2428 BUG_ON(*level > wc->active_node);
2429 if (*level == wc->active_node)
2430 leave_shared_node(root, wc, *level);
2437 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2441 struct extent_buffer *leaf;
2443 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2444 leaf = path->nodes[i];
2445 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2450 free_extent_buffer(path->nodes[*level]);
2451 path->nodes[*level] = NULL;
2458 static int check_root_dir(struct inode_record *rec)
2460 struct inode_backref *backref;
2463 if (!rec->found_inode_item || rec->errors)
2465 if (rec->nlink != 1 || rec->found_link != 0)
2467 if (list_empty(&rec->backrefs))
2469 backref = to_inode_backref(rec->backrefs.next);
2470 if (!backref->found_inode_ref)
2472 if (backref->index != 0 || backref->namelen != 2 ||
2473 memcmp(backref->name, "..", 2))
2475 if (backref->found_dir_index || backref->found_dir_item)
2482 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2483 struct btrfs_root *root, struct btrfs_path *path,
2484 struct inode_record *rec)
2486 struct btrfs_inode_item *ei;
2487 struct btrfs_key key;
2490 key.objectid = rec->ino;
2491 key.type = BTRFS_INODE_ITEM_KEY;
2492 key.offset = (u64)-1;
2494 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2498 if (!path->slots[0]) {
2505 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2506 if (key.objectid != rec->ino) {
2511 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2512 struct btrfs_inode_item);
2513 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2514 btrfs_mark_buffer_dirty(path->nodes[0]);
2515 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2516 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2517 root->root_key.objectid);
2519 btrfs_release_path(path);
2523 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2524 struct btrfs_root *root,
2525 struct btrfs_path *path,
2526 struct inode_record *rec)
2530 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2531 btrfs_release_path(path);
2533 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2537 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2538 struct btrfs_root *root,
2539 struct btrfs_path *path,
2540 struct inode_record *rec)
2542 struct btrfs_inode_item *ei;
2543 struct btrfs_key key;
2546 key.objectid = rec->ino;
2547 key.type = BTRFS_INODE_ITEM_KEY;
2550 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2557 /* Since ret == 0, no need to check anything */
2558 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2559 struct btrfs_inode_item);
2560 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2561 btrfs_mark_buffer_dirty(path->nodes[0]);
2562 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2563 printf("reset nbytes for ino %llu root %llu\n",
2564 rec->ino, root->root_key.objectid);
2566 btrfs_release_path(path);
2570 static int add_missing_dir_index(struct btrfs_root *root,
2571 struct cache_tree *inode_cache,
2572 struct inode_record *rec,
2573 struct inode_backref *backref)
2575 struct btrfs_path path;
2576 struct btrfs_trans_handle *trans;
2577 struct btrfs_dir_item *dir_item;
2578 struct extent_buffer *leaf;
2579 struct btrfs_key key;
2580 struct btrfs_disk_key disk_key;
2581 struct inode_record *dir_rec;
2582 unsigned long name_ptr;
2583 u32 data_size = sizeof(*dir_item) + backref->namelen;
2586 trans = btrfs_start_transaction(root, 1);
2588 return PTR_ERR(trans);
2590 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2591 (unsigned long long)rec->ino);
2593 btrfs_init_path(&path);
2594 key.objectid = backref->dir;
2595 key.type = BTRFS_DIR_INDEX_KEY;
2596 key.offset = backref->index;
2597 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2600 leaf = path.nodes[0];
2601 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2603 disk_key.objectid = cpu_to_le64(rec->ino);
2604 disk_key.type = BTRFS_INODE_ITEM_KEY;
2605 disk_key.offset = 0;
2607 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2608 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2609 btrfs_set_dir_data_len(leaf, dir_item, 0);
2610 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2611 name_ptr = (unsigned long)(dir_item + 1);
2612 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2613 btrfs_mark_buffer_dirty(leaf);
2614 btrfs_release_path(&path);
2615 btrfs_commit_transaction(trans, root);
2617 backref->found_dir_index = 1;
2618 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2619 BUG_ON(IS_ERR(dir_rec));
2622 dir_rec->found_size += backref->namelen;
2623 if (dir_rec->found_size == dir_rec->isize &&
2624 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2625 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2626 if (dir_rec->found_size != dir_rec->isize)
2627 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2632 static int delete_dir_index(struct btrfs_root *root,
2633 struct inode_backref *backref)
2635 struct btrfs_trans_handle *trans;
2636 struct btrfs_dir_item *di;
2637 struct btrfs_path path;
2640 trans = btrfs_start_transaction(root, 1);
2642 return PTR_ERR(trans);
2644 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2645 (unsigned long long)backref->dir,
2646 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2647 (unsigned long long)root->objectid);
2649 btrfs_init_path(&path);
2650 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2651 backref->name, backref->namelen,
2652 backref->index, -1);
2655 btrfs_release_path(&path);
2656 btrfs_commit_transaction(trans, root);
2663 ret = btrfs_del_item(trans, root, &path);
2665 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2667 btrfs_release_path(&path);
2668 btrfs_commit_transaction(trans, root);
2672 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2673 struct btrfs_root *root, u64 ino,
2676 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2678 return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2681 static int create_inode_item(struct btrfs_root *root,
2682 struct inode_record *rec, int root_dir)
2684 struct btrfs_trans_handle *trans;
2690 trans = btrfs_start_transaction(root, 1);
2691 if (IS_ERR(trans)) {
2692 ret = PTR_ERR(trans);
2696 nlink = root_dir ? 1 : rec->found_link;
2697 if (rec->found_dir_item) {
2698 if (rec->found_file_extent)
2699 fprintf(stderr, "root %llu inode %llu has both a dir "
2700 "item and extents, unsure if it is a dir or a "
2701 "regular file so setting it as a directory\n",
2702 (unsigned long long)root->objectid,
2703 (unsigned long long)rec->ino);
2704 mode = S_IFDIR | 0755;
2705 size = rec->found_size;
2706 } else if (!rec->found_dir_item) {
2707 size = rec->extent_end;
2708 mode = S_IFREG | 0755;
2711 ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2713 btrfs_commit_transaction(trans, root);
2717 static int repair_inode_backrefs(struct btrfs_root *root,
2718 struct inode_record *rec,
2719 struct cache_tree *inode_cache,
2722 struct inode_backref *tmp, *backref;
2723 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2727 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2728 if (!delete && rec->ino == root_dirid) {
2729 if (!rec->found_inode_item) {
2730 ret = create_inode_item(root, rec, 1);
2737 /* Index 0 for root dir's are special, don't mess with it */
2738 if (rec->ino == root_dirid && backref->index == 0)
2742 ((backref->found_dir_index && !backref->found_inode_ref) ||
2743 (backref->found_dir_index && backref->found_inode_ref &&
2744 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2745 ret = delete_dir_index(root, backref);
2749 list_del(&backref->list);
2754 if (!delete && !backref->found_dir_index &&
2755 backref->found_dir_item && backref->found_inode_ref) {
2756 ret = add_missing_dir_index(root, inode_cache, rec,
2761 if (backref->found_dir_item &&
2762 backref->found_dir_index) {
2763 if (!backref->errors &&
2764 backref->found_inode_ref) {
2765 list_del(&backref->list);
2772 if (!delete && (!backref->found_dir_index &&
2773 !backref->found_dir_item &&
2774 backref->found_inode_ref)) {
2775 struct btrfs_trans_handle *trans;
2776 struct btrfs_key location;
2778 ret = check_dir_conflict(root, backref->name,
2784 * let nlink fixing routine to handle it,
2785 * which can do it better.
2790 location.objectid = rec->ino;
2791 location.type = BTRFS_INODE_ITEM_KEY;
2792 location.offset = 0;
2794 trans = btrfs_start_transaction(root, 1);
2795 if (IS_ERR(trans)) {
2796 ret = PTR_ERR(trans);
2799 fprintf(stderr, "adding missing dir index/item pair "
2801 (unsigned long long)rec->ino);
2802 ret = btrfs_insert_dir_item(trans, root, backref->name,
2804 backref->dir, &location,
2805 imode_to_type(rec->imode),
2808 btrfs_commit_transaction(trans, root);
2812 if (!delete && (backref->found_inode_ref &&
2813 backref->found_dir_index &&
2814 backref->found_dir_item &&
2815 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2816 !rec->found_inode_item)) {
2817 ret = create_inode_item(root, rec, 0);
2824 return ret ? ret : repaired;
2828 * To determine the file type for nlink/inode_item repair
2830 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2831 * Return -ENOENT if file type is not found.
2833 static int find_file_type(struct inode_record *rec, u8 *type)
2835 struct inode_backref *backref;
2837 /* For inode item recovered case */
2838 if (rec->found_inode_item) {
2839 *type = imode_to_type(rec->imode);
2843 list_for_each_entry(backref, &rec->backrefs, list) {
2844 if (backref->found_dir_index || backref->found_dir_item) {
2845 *type = backref->filetype;
2853 * To determine the file name for nlink repair
2855 * Return 0 if file name is found, set name and namelen.
2856 * Return -ENOENT if file name is not found.
2858 static int find_file_name(struct inode_record *rec,
2859 char *name, int *namelen)
2861 struct inode_backref *backref;
2863 list_for_each_entry(backref, &rec->backrefs, list) {
2864 if (backref->found_dir_index || backref->found_dir_item ||
2865 backref->found_inode_ref) {
2866 memcpy(name, backref->name, backref->namelen);
2867 *namelen = backref->namelen;
2874 /* Reset the nlink of the inode to the correct one */
2875 static int reset_nlink(struct btrfs_trans_handle *trans,
2876 struct btrfs_root *root,
2877 struct btrfs_path *path,
2878 struct inode_record *rec)
2880 struct inode_backref *backref;
2881 struct inode_backref *tmp;
2882 struct btrfs_key key;
2883 struct btrfs_inode_item *inode_item;
2886 /* We don't believe this either, reset it and iterate backref */
2887 rec->found_link = 0;
2889 /* Remove all backref including the valid ones */
2890 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2891 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2892 backref->index, backref->name,
2893 backref->namelen, 0);
2897 /* remove invalid backref, so it won't be added back */
2898 if (!(backref->found_dir_index &&
2899 backref->found_dir_item &&
2900 backref->found_inode_ref)) {
2901 list_del(&backref->list);
2908 /* Set nlink to 0 */
2909 key.objectid = rec->ino;
2910 key.type = BTRFS_INODE_ITEM_KEY;
2912 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2919 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2920 struct btrfs_inode_item);
2921 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2922 btrfs_mark_buffer_dirty(path->nodes[0]);
2923 btrfs_release_path(path);
2926 * Add back valid inode_ref/dir_item/dir_index,
2927 * add_link() will handle the nlink inc, so new nlink must be correct
2929 list_for_each_entry(backref, &rec->backrefs, list) {
2930 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2931 backref->name, backref->namelen,
2932 backref->filetype, &backref->index, 1, 0);
2937 btrfs_release_path(path);
2941 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2942 struct btrfs_root *root,
2943 struct btrfs_path *path,
2944 struct inode_record *rec)
2946 char namebuf[BTRFS_NAME_LEN] = {0};
2949 int name_recovered = 0;
2950 int type_recovered = 0;
2954 * Get file name and type first before these invalid inode ref
2955 * are deleted by remove_all_invalid_backref()
2957 name_recovered = !find_file_name(rec, namebuf, &namelen);
2958 type_recovered = !find_file_type(rec, &type);
2960 if (!name_recovered) {
2961 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2962 rec->ino, rec->ino);
2963 namelen = count_digits(rec->ino);
2964 sprintf(namebuf, "%llu", rec->ino);
2967 if (!type_recovered) {
2968 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2970 type = BTRFS_FT_REG_FILE;
2974 ret = reset_nlink(trans, root, path, rec);
2977 "Failed to reset nlink for inode %llu: %s\n",
2978 rec->ino, strerror(-ret));
2982 if (rec->found_link == 0) {
2983 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2984 namebuf, namelen, type,
2985 (u64 *)&rec->found_link);
2989 printf("Fixed the nlink of inode %llu\n", rec->ino);
2992 * Clear the flag anyway, or we will loop forever for the same inode
2993 * as it will not be removed from the bad inode list and the dead loop
2996 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2997 btrfs_release_path(path);
3002 * Check if there is any normal(reg or prealloc) file extent for given
3004 * This is used to determine the file type when neither its dir_index/item or
3005 * inode_item exists.
3007 * This will *NOT* report error, if any error happens, just consider it does
3008 * not have any normal file extent.
3010 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3012 struct btrfs_path path;
3013 struct btrfs_key key;
3014 struct btrfs_key found_key;
3015 struct btrfs_file_extent_item *fi;
3019 btrfs_init_path(&path);
3021 key.type = BTRFS_EXTENT_DATA_KEY;
3024 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3029 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3030 ret = btrfs_next_leaf(root, &path);
3037 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3039 if (found_key.objectid != ino ||
3040 found_key.type != BTRFS_EXTENT_DATA_KEY)
3042 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3043 struct btrfs_file_extent_item);
3044 type = btrfs_file_extent_type(path.nodes[0], fi);
3045 if (type != BTRFS_FILE_EXTENT_INLINE) {
3051 btrfs_release_path(&path);
3055 static u32 btrfs_type_to_imode(u8 type)
3057 static u32 imode_by_btrfs_type[] = {
3058 [BTRFS_FT_REG_FILE] = S_IFREG,
3059 [BTRFS_FT_DIR] = S_IFDIR,
3060 [BTRFS_FT_CHRDEV] = S_IFCHR,
3061 [BTRFS_FT_BLKDEV] = S_IFBLK,
3062 [BTRFS_FT_FIFO] = S_IFIFO,
3063 [BTRFS_FT_SOCK] = S_IFSOCK,
3064 [BTRFS_FT_SYMLINK] = S_IFLNK,
3067 return imode_by_btrfs_type[(type)];
3070 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3071 struct btrfs_root *root,
3072 struct btrfs_path *path,
3073 struct inode_record *rec)
3077 int type_recovered = 0;
3080 printf("Trying to rebuild inode:%llu\n", rec->ino);
3082 type_recovered = !find_file_type(rec, &filetype);
3085 * Try to determine inode type if type not found.
3087 * For found regular file extent, it must be FILE.
3088 * For found dir_item/index, it must be DIR.
3090 * For undetermined one, use FILE as fallback.
3093 * 1. If found backref(inode_index/item is already handled) to it,
3095 * Need new inode-inode ref structure to allow search for that.
3097 if (!type_recovered) {
3098 if (rec->found_file_extent &&
3099 find_normal_file_extent(root, rec->ino)) {
3101 filetype = BTRFS_FT_REG_FILE;
3102 } else if (rec->found_dir_item) {
3104 filetype = BTRFS_FT_DIR;
3105 } else if (!list_empty(&rec->orphan_extents)) {
3107 filetype = BTRFS_FT_REG_FILE;
3109 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3112 filetype = BTRFS_FT_REG_FILE;
3116 ret = btrfs_new_inode(trans, root, rec->ino,
3117 mode | btrfs_type_to_imode(filetype));
3122 * Here inode rebuild is done, we only rebuild the inode item,
3123 * don't repair the nlink(like move to lost+found).
3124 * That is the job of nlink repair.
3126 * We just fill the record and return
3128 rec->found_dir_item = 1;
3129 rec->imode = mode | btrfs_type_to_imode(filetype);
3131 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3132 /* Ensure the inode_nlinks repair function will be called */
3133 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3138 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3139 struct btrfs_root *root,
3140 struct btrfs_path *path,
3141 struct inode_record *rec)
3143 struct orphan_data_extent *orphan;
3144 struct orphan_data_extent *tmp;
3147 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3149 * Check for conflicting file extents
3151 * Here we don't know whether the extents is compressed or not,
3152 * so we can only assume it not compressed nor data offset,
3153 * and use its disk_len as extent length.
3155 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3156 orphan->offset, orphan->disk_len, 0);
3157 btrfs_release_path(path);
3162 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3163 orphan->disk_bytenr, orphan->disk_len);
3164 ret = btrfs_free_extent(trans,
3165 root->fs_info->extent_root,
3166 orphan->disk_bytenr, orphan->disk_len,
3167 0, root->objectid, orphan->objectid,
3172 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3173 orphan->offset, orphan->disk_bytenr,
3174 orphan->disk_len, orphan->disk_len);
3178 /* Update file size info */
3179 rec->found_size += orphan->disk_len;
3180 if (rec->found_size == rec->nbytes)
3181 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3183 /* Update the file extent hole info too */
3184 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3188 if (RB_EMPTY_ROOT(&rec->holes))
3189 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3191 list_del(&orphan->list);
3194 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3199 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3200 struct btrfs_root *root,
3201 struct btrfs_path *path,
3202 struct inode_record *rec)
3204 struct rb_node *node;
3205 struct file_extent_hole *hole;
3209 node = rb_first(&rec->holes);
3213 hole = rb_entry(node, struct file_extent_hole, node);
3214 ret = btrfs_punch_hole(trans, root, rec->ino,
3215 hole->start, hole->len);
3218 ret = del_file_extent_hole(&rec->holes, hole->start,
3222 if (RB_EMPTY_ROOT(&rec->holes))
3223 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3224 node = rb_first(&rec->holes);
3226 /* special case for a file losing all its file extent */
3228 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3229 round_up(rec->isize,
3230 root->fs_info->sectorsize));
3234 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3235 rec->ino, root->objectid);
3240 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3242 struct btrfs_trans_handle *trans;
3243 struct btrfs_path path;
3246 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3247 I_ERR_NO_ORPHAN_ITEM |
3248 I_ERR_LINK_COUNT_WRONG |
3249 I_ERR_NO_INODE_ITEM |
3250 I_ERR_FILE_EXTENT_ORPHAN |
3251 I_ERR_FILE_EXTENT_DISCOUNT|
3252 I_ERR_FILE_NBYTES_WRONG)))
3256 * For nlink repair, it may create a dir and add link, so
3257 * 2 for parent(256)'s dir_index and dir_item
3258 * 2 for lost+found dir's inode_item and inode_ref
3259 * 1 for the new inode_ref of the file
3260 * 2 for lost+found dir's dir_index and dir_item for the file
3262 trans = btrfs_start_transaction(root, 7);
3264 return PTR_ERR(trans);
3266 btrfs_init_path(&path);
3267 if (rec->errors & I_ERR_NO_INODE_ITEM)
3268 ret = repair_inode_no_item(trans, root, &path, rec);
3269 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3270 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3271 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3272 ret = repair_inode_discount_extent(trans, root, &path, rec);
3273 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3274 ret = repair_inode_isize(trans, root, &path, rec);
3275 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3276 ret = repair_inode_orphan_item(trans, root, &path, rec);
3277 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3278 ret = repair_inode_nlinks(trans, root, &path, rec);
3279 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3280 ret = repair_inode_nbytes(trans, root, &path, rec);
3281 btrfs_commit_transaction(trans, root);
3282 btrfs_release_path(&path);
3286 static int check_inode_recs(struct btrfs_root *root,
3287 struct cache_tree *inode_cache)
3289 struct cache_extent *cache;
3290 struct ptr_node *node;
3291 struct inode_record *rec;
3292 struct inode_backref *backref;
3297 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3299 if (btrfs_root_refs(&root->root_item) == 0) {
3300 if (!cache_tree_empty(inode_cache))
3301 fprintf(stderr, "warning line %d\n", __LINE__);
3306 * We need to repair backrefs first because we could change some of the
3307 * errors in the inode recs.
3309 * We also need to go through and delete invalid backrefs first and then
3310 * add the correct ones second. We do this because we may get EEXIST
3311 * when adding back the correct index because we hadn't yet deleted the
3314 * For example, if we were missing a dir index then the directories
3315 * isize would be wrong, so if we fixed the isize to what we thought it
3316 * would be and then fixed the backref we'd still have a invalid fs, so
3317 * we need to add back the dir index and then check to see if the isize
3322 if (stage == 3 && !err)
3325 cache = search_cache_extent(inode_cache, 0);
3326 while (repair && cache) {
3327 node = container_of(cache, struct ptr_node, cache);
3329 cache = next_cache_extent(cache);
3331 /* Need to free everything up and rescan */
3333 remove_cache_extent(inode_cache, &node->cache);
3335 free_inode_rec(rec);
3339 if (list_empty(&rec->backrefs))
3342 ret = repair_inode_backrefs(root, rec, inode_cache,
3356 rec = get_inode_rec(inode_cache, root_dirid, 0);
3357 BUG_ON(IS_ERR(rec));
3359 ret = check_root_dir(rec);
3361 fprintf(stderr, "root %llu root dir %llu error\n",
3362 (unsigned long long)root->root_key.objectid,
3363 (unsigned long long)root_dirid);
3364 print_inode_error(root, rec);
3369 struct btrfs_trans_handle *trans;
3371 trans = btrfs_start_transaction(root, 1);
3372 if (IS_ERR(trans)) {
3373 err = PTR_ERR(trans);
3378 "root %llu missing its root dir, recreating\n",
3379 (unsigned long long)root->objectid);
3381 ret = btrfs_make_root_dir(trans, root, root_dirid);
3384 btrfs_commit_transaction(trans, root);
3388 fprintf(stderr, "root %llu root dir %llu not found\n",
3389 (unsigned long long)root->root_key.objectid,
3390 (unsigned long long)root_dirid);
3394 cache = search_cache_extent(inode_cache, 0);
3397 node = container_of(cache, struct ptr_node, cache);
3399 remove_cache_extent(inode_cache, &node->cache);
3401 if (rec->ino == root_dirid ||
3402 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3403 free_inode_rec(rec);
3407 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3408 ret = check_orphan_item(root, rec->ino);
3410 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3411 if (can_free_inode_rec(rec)) {
3412 free_inode_rec(rec);
3417 if (!rec->found_inode_item)
3418 rec->errors |= I_ERR_NO_INODE_ITEM;
3419 if (rec->found_link != rec->nlink)
3420 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3422 ret = try_repair_inode(root, rec);
3423 if (ret == 0 && can_free_inode_rec(rec)) {
3424 free_inode_rec(rec);
3430 if (!(repair && ret == 0))
3432 print_inode_error(root, rec);
3433 list_for_each_entry(backref, &rec->backrefs, list) {
3434 if (!backref->found_dir_item)
3435 backref->errors |= REF_ERR_NO_DIR_ITEM;
3436 if (!backref->found_dir_index)
3437 backref->errors |= REF_ERR_NO_DIR_INDEX;
3438 if (!backref->found_inode_ref)
3439 backref->errors |= REF_ERR_NO_INODE_REF;
3440 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3441 " namelen %u name %s filetype %d errors %x",
3442 (unsigned long long)backref->dir,
3443 (unsigned long long)backref->index,
3444 backref->namelen, backref->name,
3445 backref->filetype, backref->errors);
3446 print_ref_error(backref->errors);
3448 free_inode_rec(rec);
3450 return (error > 0) ? -1 : 0;
3453 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3456 struct cache_extent *cache;
3457 struct root_record *rec = NULL;
3460 cache = lookup_cache_extent(root_cache, objectid, 1);
3462 rec = container_of(cache, struct root_record, cache);
3464 rec = calloc(1, sizeof(*rec));
3466 return ERR_PTR(-ENOMEM);
3467 rec->objectid = objectid;
3468 INIT_LIST_HEAD(&rec->backrefs);
3469 rec->cache.start = objectid;
3470 rec->cache.size = 1;
3472 ret = insert_cache_extent(root_cache, &rec->cache);
3474 return ERR_PTR(-EEXIST);
3479 static struct root_backref *get_root_backref(struct root_record *rec,
3480 u64 ref_root, u64 dir, u64 index,
3481 const char *name, int namelen)
3483 struct root_backref *backref;
3485 list_for_each_entry(backref, &rec->backrefs, list) {
3486 if (backref->ref_root != ref_root || backref->dir != dir ||
3487 backref->namelen != namelen)
3489 if (memcmp(name, backref->name, namelen))
3494 backref = calloc(1, sizeof(*backref) + namelen + 1);
3497 backref->ref_root = ref_root;
3499 backref->index = index;
3500 backref->namelen = namelen;
3501 memcpy(backref->name, name, namelen);
3502 backref->name[namelen] = '\0';
3503 list_add_tail(&backref->list, &rec->backrefs);
3507 static void free_root_record(struct cache_extent *cache)
3509 struct root_record *rec;
3510 struct root_backref *backref;
3512 rec = container_of(cache, struct root_record, cache);
3513 while (!list_empty(&rec->backrefs)) {
3514 backref = to_root_backref(rec->backrefs.next);
3515 list_del(&backref->list);
3522 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3524 static int add_root_backref(struct cache_tree *root_cache,
3525 u64 root_id, u64 ref_root, u64 dir, u64 index,
3526 const char *name, int namelen,
3527 int item_type, int errors)
3529 struct root_record *rec;
3530 struct root_backref *backref;
3532 rec = get_root_rec(root_cache, root_id);
3533 BUG_ON(IS_ERR(rec));
3534 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3537 backref->errors |= errors;
3539 if (item_type != BTRFS_DIR_ITEM_KEY) {
3540 if (backref->found_dir_index || backref->found_back_ref ||
3541 backref->found_forward_ref) {
3542 if (backref->index != index)
3543 backref->errors |= REF_ERR_INDEX_UNMATCH;
3545 backref->index = index;
3549 if (item_type == BTRFS_DIR_ITEM_KEY) {
3550 if (backref->found_forward_ref)
3552 backref->found_dir_item = 1;
3553 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3554 backref->found_dir_index = 1;
3555 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3556 if (backref->found_forward_ref)
3557 backref->errors |= REF_ERR_DUP_ROOT_REF;
3558 else if (backref->found_dir_item)
3560 backref->found_forward_ref = 1;
3561 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3562 if (backref->found_back_ref)
3563 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3564 backref->found_back_ref = 1;
3569 if (backref->found_forward_ref && backref->found_dir_item)
3570 backref->reachable = 1;
3574 static int merge_root_recs(struct btrfs_root *root,
3575 struct cache_tree *src_cache,
3576 struct cache_tree *dst_cache)
3578 struct cache_extent *cache;
3579 struct ptr_node *node;
3580 struct inode_record *rec;
3581 struct inode_backref *backref;
3584 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3585 free_inode_recs_tree(src_cache);
3590 cache = search_cache_extent(src_cache, 0);
3593 node = container_of(cache, struct ptr_node, cache);
3595 remove_cache_extent(src_cache, &node->cache);
3598 ret = is_child_root(root, root->objectid, rec->ino);
3604 list_for_each_entry(backref, &rec->backrefs, list) {
3605 BUG_ON(backref->found_inode_ref);
3606 if (backref->found_dir_item)
3607 add_root_backref(dst_cache, rec->ino,
3608 root->root_key.objectid, backref->dir,
3609 backref->index, backref->name,
3610 backref->namelen, BTRFS_DIR_ITEM_KEY,
3612 if (backref->found_dir_index)
3613 add_root_backref(dst_cache, rec->ino,
3614 root->root_key.objectid, backref->dir,
3615 backref->index, backref->name,
3616 backref->namelen, BTRFS_DIR_INDEX_KEY,
3620 free_inode_rec(rec);
3627 static int check_root_refs(struct btrfs_root *root,
3628 struct cache_tree *root_cache)
3630 struct root_record *rec;
3631 struct root_record *ref_root;
3632 struct root_backref *backref;
3633 struct cache_extent *cache;
3639 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3640 BUG_ON(IS_ERR(rec));
3643 /* fixme: this can not detect circular references */
3646 cache = search_cache_extent(root_cache, 0);
3650 rec = container_of(cache, struct root_record, cache);
3651 cache = next_cache_extent(cache);
3653 if (rec->found_ref == 0)
3656 list_for_each_entry(backref, &rec->backrefs, list) {
3657 if (!backref->reachable)
3660 ref_root = get_root_rec(root_cache,
3662 BUG_ON(IS_ERR(ref_root));
3663 if (ref_root->found_ref > 0)
3666 backref->reachable = 0;
3668 if (rec->found_ref == 0)
3674 cache = search_cache_extent(root_cache, 0);
3678 rec = container_of(cache, struct root_record, cache);
3679 cache = next_cache_extent(cache);
3681 if (rec->found_ref == 0 &&
3682 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3683 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3684 ret = check_orphan_item(root->fs_info->tree_root,
3690 * If we don't have a root item then we likely just have
3691 * a dir item in a snapshot for this root but no actual
3692 * ref key or anything so it's meaningless.
3694 if (!rec->found_root_item)
3697 fprintf(stderr, "fs tree %llu not referenced\n",
3698 (unsigned long long)rec->objectid);
3702 if (rec->found_ref > 0 && !rec->found_root_item)
3704 list_for_each_entry(backref, &rec->backrefs, list) {
3705 if (!backref->found_dir_item)
3706 backref->errors |= REF_ERR_NO_DIR_ITEM;
3707 if (!backref->found_dir_index)
3708 backref->errors |= REF_ERR_NO_DIR_INDEX;
3709 if (!backref->found_back_ref)
3710 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3711 if (!backref->found_forward_ref)
3712 backref->errors |= REF_ERR_NO_ROOT_REF;
3713 if (backref->reachable && backref->errors)
3720 fprintf(stderr, "fs tree %llu refs %u %s\n",
3721 (unsigned long long)rec->objectid, rec->found_ref,
3722 rec->found_root_item ? "" : "not found");
3724 list_for_each_entry(backref, &rec->backrefs, list) {
3725 if (!backref->reachable)
3727 if (!backref->errors && rec->found_root_item)
3729 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3730 " index %llu namelen %u name %s errors %x\n",
3731 (unsigned long long)backref->ref_root,
3732 (unsigned long long)backref->dir,
3733 (unsigned long long)backref->index,
3734 backref->namelen, backref->name,
3736 print_ref_error(backref->errors);
3739 return errors > 0 ? 1 : 0;
3742 static int process_root_ref(struct extent_buffer *eb, int slot,
3743 struct btrfs_key *key,
3744 struct cache_tree *root_cache)
3750 struct btrfs_root_ref *ref;
3751 char namebuf[BTRFS_NAME_LEN];
3754 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3756 dirid = btrfs_root_ref_dirid(eb, ref);
3757 index = btrfs_root_ref_sequence(eb, ref);
3758 name_len = btrfs_root_ref_name_len(eb, ref);
3760 if (name_len <= BTRFS_NAME_LEN) {
3764 len = BTRFS_NAME_LEN;
3765 error = REF_ERR_NAME_TOO_LONG;
3767 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3769 if (key->type == BTRFS_ROOT_REF_KEY) {
3770 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3771 index, namebuf, len, key->type, error);
3773 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3774 index, namebuf, len, key->type, error);
3779 static void free_corrupt_block(struct cache_extent *cache)
3781 struct btrfs_corrupt_block *corrupt;
3783 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3787 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3790 * Repair the btree of the given root.
3792 * The fix is to remove the node key in corrupt_blocks cache_tree.
3793 * and rebalance the tree.
3794 * After the fix, the btree should be writeable.
3796 static int repair_btree(struct btrfs_root *root,
3797 struct cache_tree *corrupt_blocks)
3799 struct btrfs_trans_handle *trans;
3800 struct btrfs_path path;
3801 struct btrfs_corrupt_block *corrupt;
3802 struct cache_extent *cache;
3803 struct btrfs_key key;
3808 if (cache_tree_empty(corrupt_blocks))
3811 trans = btrfs_start_transaction(root, 1);
3812 if (IS_ERR(trans)) {
3813 ret = PTR_ERR(trans);
3814 fprintf(stderr, "Error starting transaction: %s\n",
3818 btrfs_init_path(&path);
3819 cache = first_cache_extent(corrupt_blocks);
3821 corrupt = container_of(cache, struct btrfs_corrupt_block,
3823 level = corrupt->level;
3824 path.lowest_level = level;
3825 key.objectid = corrupt->key.objectid;
3826 key.type = corrupt->key.type;
3827 key.offset = corrupt->key.offset;
3830 * Here we don't want to do any tree balance, since it may
3831 * cause a balance with corrupted brother leaf/node,
3832 * so ins_len set to 0 here.
3833 * Balance will be done after all corrupt node/leaf is deleted.
3835 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3838 offset = btrfs_node_blockptr(path.nodes[level],
3841 /* Remove the ptr */
3842 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3846 * Remove the corresponding extent
3847 * return value is not concerned.
3849 btrfs_release_path(&path);
3850 ret = btrfs_free_extent(trans, root, offset,
3851 root->fs_info->nodesize, 0,
3852 root->root_key.objectid, level - 1, 0);
3853 cache = next_cache_extent(cache);
3856 /* Balance the btree using btrfs_search_slot() */
3857 cache = first_cache_extent(corrupt_blocks);
3859 corrupt = container_of(cache, struct btrfs_corrupt_block,
3861 memcpy(&key, &corrupt->key, sizeof(key));
3862 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3865 /* return will always >0 since it won't find the item */
3867 btrfs_release_path(&path);
3868 cache = next_cache_extent(cache);
3871 btrfs_commit_transaction(trans, root);
3872 btrfs_release_path(&path);
3876 static int check_fs_root(struct btrfs_root *root,
3877 struct cache_tree *root_cache,
3878 struct walk_control *wc)
3884 struct btrfs_path path;
3885 struct shared_node root_node;
3886 struct root_record *rec;
3887 struct btrfs_root_item *root_item = &root->root_item;
3888 struct cache_tree corrupt_blocks;
3889 struct orphan_data_extent *orphan;
3890 struct orphan_data_extent *tmp;
3891 enum btrfs_tree_block_status status;
3892 struct node_refs nrefs;
3895 * Reuse the corrupt_block cache tree to record corrupted tree block
3897 * Unlike the usage in extent tree check, here we do it in a per
3898 * fs/subvol tree base.
3900 cache_tree_init(&corrupt_blocks);
3901 root->fs_info->corrupt_blocks = &corrupt_blocks;
3903 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3904 rec = get_root_rec(root_cache, root->root_key.objectid);
3905 BUG_ON(IS_ERR(rec));
3906 if (btrfs_root_refs(root_item) > 0)
3907 rec->found_root_item = 1;
3910 btrfs_init_path(&path);
3911 memset(&root_node, 0, sizeof(root_node));
3912 cache_tree_init(&root_node.root_cache);
3913 cache_tree_init(&root_node.inode_cache);
3914 memset(&nrefs, 0, sizeof(nrefs));
3916 /* Move the orphan extent record to corresponding inode_record */
3917 list_for_each_entry_safe(orphan, tmp,
3918 &root->orphan_data_extents, list) {
3919 struct inode_record *inode;
3921 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3923 BUG_ON(IS_ERR(inode));
3924 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3925 list_move(&orphan->list, &inode->orphan_extents);
3928 level = btrfs_header_level(root->node);
3929 memset(wc->nodes, 0, sizeof(wc->nodes));
3930 wc->nodes[level] = &root_node;
3931 wc->active_node = level;
3932 wc->root_level = level;
3934 /* We may not have checked the root block, lets do that now */
3935 if (btrfs_is_leaf(root->node))
3936 status = btrfs_check_leaf(root, NULL, root->node);
3938 status = btrfs_check_node(root, NULL, root->node);
3939 if (status != BTRFS_TREE_BLOCK_CLEAN)
3942 if (btrfs_root_refs(root_item) > 0 ||
3943 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3944 path.nodes[level] = root->node;
3945 extent_buffer_get(root->node);
3946 path.slots[level] = 0;
3948 struct btrfs_key key;
3949 struct btrfs_disk_key found_key;
3951 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3952 level = root_item->drop_level;
3953 path.lowest_level = level;
3954 if (level > btrfs_header_level(root->node) ||
3955 level >= BTRFS_MAX_LEVEL) {
3956 error("ignoring invalid drop level: %u", level);
3959 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3962 btrfs_node_key(path.nodes[level], &found_key,
3964 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3965 sizeof(found_key)));
3969 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3975 wret = walk_up_tree(root, &path, wc, &level);
3982 btrfs_release_path(&path);
3984 if (!cache_tree_empty(&corrupt_blocks)) {
3985 struct cache_extent *cache;
3986 struct btrfs_corrupt_block *corrupt;
3988 printf("The following tree block(s) is corrupted in tree %llu:\n",
3989 root->root_key.objectid);
3990 cache = first_cache_extent(&corrupt_blocks);
3992 corrupt = container_of(cache,
3993 struct btrfs_corrupt_block,
3995 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3996 cache->start, corrupt->level,
3997 corrupt->key.objectid, corrupt->key.type,
3998 corrupt->key.offset);
3999 cache = next_cache_extent(cache);
4002 printf("Try to repair the btree for root %llu\n",
4003 root->root_key.objectid);
4004 ret = repair_btree(root, &corrupt_blocks);
4006 fprintf(stderr, "Failed to repair btree: %s\n",
4009 printf("Btree for root %llu is fixed\n",
4010 root->root_key.objectid);
4014 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4018 if (root_node.current) {
4019 root_node.current->checked = 1;
4020 maybe_free_inode_rec(&root_node.inode_cache,
4024 err = check_inode_recs(root, &root_node.inode_cache);
4028 free_corrupt_blocks_tree(&corrupt_blocks);
4029 root->fs_info->corrupt_blocks = NULL;
4030 free_orphan_data_extents(&root->orphan_data_extents);
4034 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4035 struct cache_tree *root_cache)
4037 struct btrfs_path path;
4038 struct btrfs_key key;
4039 struct walk_control wc;
4040 struct extent_buffer *leaf, *tree_node;
4041 struct btrfs_root *tmp_root;
4042 struct btrfs_root *tree_root = fs_info->tree_root;
4046 if (ctx.progress_enabled) {
4047 ctx.tp = TASK_FS_ROOTS;
4048 task_start(ctx.info);
4052 * Just in case we made any changes to the extent tree that weren't
4053 * reflected into the free space cache yet.
4056 reset_cached_block_groups(fs_info);
4057 memset(&wc, 0, sizeof(wc));
4058 cache_tree_init(&wc.shared);
4059 btrfs_init_path(&path);
4064 key.type = BTRFS_ROOT_ITEM_KEY;
4065 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4070 tree_node = tree_root->node;
4072 if (tree_node != tree_root->node) {
4073 free_root_recs_tree(root_cache);
4074 btrfs_release_path(&path);
4077 leaf = path.nodes[0];
4078 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4079 ret = btrfs_next_leaf(tree_root, &path);
4085 leaf = path.nodes[0];
4087 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4088 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4089 fs_root_objectid(key.objectid)) {
4090 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4091 tmp_root = btrfs_read_fs_root_no_cache(
4094 key.offset = (u64)-1;
4095 tmp_root = btrfs_read_fs_root(
4098 if (IS_ERR(tmp_root)) {
4102 ret = check_fs_root(tmp_root, root_cache, &wc);
4103 if (ret == -EAGAIN) {
4104 free_root_recs_tree(root_cache);
4105 btrfs_release_path(&path);
4110 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4111 btrfs_free_fs_root(tmp_root);
4112 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4113 key.type == BTRFS_ROOT_BACKREF_KEY) {
4114 process_root_ref(leaf, path.slots[0], &key,
4121 btrfs_release_path(&path);
4123 free_extent_cache_tree(&wc.shared);
4124 if (!cache_tree_empty(&wc.shared))
4125 fprintf(stderr, "warning line %d\n", __LINE__);
4127 task_stop(ctx.info);
4133 * Find the @index according by @ino and name.
4134 * Notice:time efficiency is O(N)
4136 * @root: the root of the fs/file tree
4137 * @index_ret: the index as return value
4138 * @namebuf: the name to match
4139 * @name_len: the length of name to match
4140 * @file_type: the file_type of INODE_ITEM to match
4142 * Returns 0 if found and *@index_ret will be modified with right value
4143 * Returns< 0 not found and *@index_ret will be (u64)-1
4145 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4146 u64 *index_ret, char *namebuf, u32 name_len,
4149 struct btrfs_path path;
4150 struct extent_buffer *node;
4151 struct btrfs_dir_item *di;
4152 struct btrfs_key key;
4153 struct btrfs_key location;
4154 char name[BTRFS_NAME_LEN] = {0};
4166 /* search from the last index */
4167 key.objectid = dirid;
4168 key.offset = (u64)-1;
4169 key.type = BTRFS_DIR_INDEX_KEY;
4171 btrfs_init_path(&path);
4172 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4177 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4180 *index_ret = (64)-1;
4183 /* Check whether inode_id/filetype/name match */
4184 node = path.nodes[0];
4185 slot = path.slots[0];
4186 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4187 total = btrfs_item_size_nr(node, slot);
4188 while (cur < total) {
4190 len = btrfs_dir_name_len(node, di);
4191 data_len = btrfs_dir_data_len(node, di);
4193 btrfs_dir_item_key_to_cpu(node, di, &location);
4194 if (location.objectid != location_id ||
4195 location.type != BTRFS_INODE_ITEM_KEY ||
4196 location.offset != 0)
4199 filetype = btrfs_dir_type(node, di);
4200 if (file_type != filetype)
4203 if (len > BTRFS_NAME_LEN)
4204 len = BTRFS_NAME_LEN;
4206 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4207 if (len != name_len || strncmp(namebuf, name, len))
4210 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4211 *index_ret = key.offset;
4215 len += sizeof(*di) + data_len;
4216 di = (struct btrfs_dir_item *)((char *)di + len);
4222 btrfs_release_path(&path);
4227 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4228 * INODE_REF/INODE_EXTREF match.
4230 * @root: the root of the fs/file tree
4231 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4232 * value while find index
4233 * @location_key: location key of the struct btrfs_dir_item to match
4234 * @name: the name to match
4235 * @namelen: the length of name
4236 * @file_type: the type of file to math
4238 * Return 0 if no error occurred.
4239 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4240 * DIR_ITEM/DIR_INDEX
4241 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4242 * and DIR_ITEM/DIR_INDEX mismatch
4244 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4245 struct btrfs_key *location_key, char *name,
4246 u32 namelen, u8 file_type)
4248 struct btrfs_path path;
4249 struct extent_buffer *node;
4250 struct btrfs_dir_item *di;
4251 struct btrfs_key location;
4252 char namebuf[BTRFS_NAME_LEN] = {0};
4261 /* get the index by traversing all index */
4262 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4263 ret = find_dir_index(root, key->objectid,
4264 location_key->objectid, &key->offset,
4265 name, namelen, file_type);
4267 ret = DIR_INDEX_MISSING;
4271 btrfs_init_path(&path);
4272 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4274 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4279 /* Check whether inode_id/filetype/name match */
4280 node = path.nodes[0];
4281 slot = path.slots[0];
4282 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4283 total = btrfs_item_size_nr(node, slot);
4284 while (cur < total) {
4285 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4286 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4288 len = btrfs_dir_name_len(node, di);
4289 data_len = btrfs_dir_data_len(node, di);
4291 btrfs_dir_item_key_to_cpu(node, di, &location);
4292 if (location.objectid != location_key->objectid ||
4293 location.type != location_key->type ||
4294 location.offset != location_key->offset)
4297 filetype = btrfs_dir_type(node, di);
4298 if (file_type != filetype)
4301 if (len > BTRFS_NAME_LEN) {
4302 len = BTRFS_NAME_LEN;
4303 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4305 key->type == BTRFS_DIR_ITEM_KEY ?
4306 "DIR_ITEM" : "DIR_INDEX",
4307 key->objectid, key->offset, len);
4309 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4311 if (len != namelen || strncmp(namebuf, name, len))
4317 len += sizeof(*di) + data_len;
4318 di = (struct btrfs_dir_item *)((char *)di + len);
4323 btrfs_release_path(&path);
4328 * Prints inode ref error message
4330 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4331 u64 index, const char *namebuf, int name_len,
4332 u8 filetype, int err)
4337 /* root dir error */
4338 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4340 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4341 root->objectid, key->objectid, key->offset, namebuf);
4346 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4347 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4348 root->objectid, key->offset,
4349 btrfs_name_hash(namebuf, name_len),
4350 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4352 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4353 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4354 root->objectid, key->offset, index,
4355 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4360 * Insert the missing inode item.
4362 * Returns 0 means success.
4363 * Returns <0 means error.
4365 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4368 struct btrfs_key key;
4369 struct btrfs_trans_handle *trans;
4370 struct btrfs_path path;
4374 key.type = BTRFS_INODE_ITEM_KEY;
4377 btrfs_init_path(&path);
4378 trans = btrfs_start_transaction(root, 1);
4379 if (IS_ERR(trans)) {
4384 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4385 if (ret < 0 || !ret)
4388 /* insert inode item */
4389 create_inode_item_lowmem(trans, root, ino, filetype);
4392 btrfs_commit_transaction(trans, root);
4395 error("failed to repair root %llu INODE ITEM[%llu] missing",
4396 root->objectid, ino);
4397 btrfs_release_path(&path);
4402 * The ternary means dir item, dir index and relative inode ref.
4403 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4404 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4406 * If two of three is missing or mismatched, delete the existing one.
4407 * If one of three is missing or mismatched, add the missing one.
4409 * returns 0 means success.
4410 * returns not 0 means on error;
4412 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4413 u64 index, char *name, int name_len, u8 filetype,
4416 struct btrfs_trans_handle *trans;
4421 * stage shall be one of following valild values:
4422 * 0: Fine, nothing to do.
4423 * 1: One of three is wrong, so add missing one.
4424 * 2: Two of three is wrong, so delete existed one.
4426 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4428 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4430 if (err & (INODE_REF_MISSING))
4433 /* stage must be smllarer than 3 */
4436 trans = btrfs_start_transaction(root, 1);
4438 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4443 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4444 filetype, &index, 1, 1);
4448 btrfs_commit_transaction(trans, root);
4451 error("fail to repair inode %llu name %s filetype %u",
4452 ino, name, filetype);
4454 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4455 stage == 2 ? "Delete" : "Add",
4456 ino, name, filetype);
4462 * Traverse the given INODE_REF and call find_dir_item() to find related
4463 * DIR_ITEM/DIR_INDEX.
4465 * @root: the root of the fs/file tree
4466 * @ref_key: the key of the INODE_REF
4467 * @path the path provides node and slot
4468 * @refs: the count of INODE_REF
4469 * @mode: the st_mode of INODE_ITEM
4470 * @name_ret: returns with the first ref's name
4471 * @name_len_ret: len of the name_ret
4473 * Return 0 if no error occurred.
4475 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4476 struct btrfs_path *path, char *name_ret,
4477 u32 *namelen_ret, u64 *refs_ret, int mode)
4479 struct btrfs_key key;
4480 struct btrfs_key location;
4481 struct btrfs_inode_ref *ref;
4482 struct extent_buffer *node;
4483 char namebuf[BTRFS_NAME_LEN] = {0};
4493 int need_research = 0;
4501 /* since after repair, path and the dir item may be changed */
4502 if (need_research) {
4504 btrfs_release_path(path);
4505 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4506 /* the item was deleted, let path point to the last checked item */
4508 if (path->slots[0] == 0)
4509 btrfs_prev_leaf(root, path);
4517 location.objectid = ref_key->objectid;
4518 location.type = BTRFS_INODE_ITEM_KEY;
4519 location.offset = 0;
4520 node = path->nodes[0];
4521 slot = path->slots[0];
4523 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4524 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4525 total = btrfs_item_size_nr(node, slot);
4528 /* Update inode ref count */
4531 index = btrfs_inode_ref_index(node, ref);
4532 name_len = btrfs_inode_ref_name_len(node, ref);
4534 if (name_len <= BTRFS_NAME_LEN) {
4537 len = BTRFS_NAME_LEN;
4538 warning("root %llu INODE_REF[%llu %llu] name too long",
4539 root->objectid, ref_key->objectid, ref_key->offset);
4542 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4544 /* copy the first name found to name_ret */
4545 if (refs == 1 && name_ret) {
4546 memcpy(name_ret, namebuf, len);
4550 /* Check root dir ref */
4551 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4552 if (index != 0 || len != strlen("..") ||
4553 strncmp("..", namebuf, len) ||
4554 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4555 /* set err bits then repair will delete the ref */
4556 err |= DIR_INDEX_MISSING;
4557 err |= DIR_ITEM_MISSING;
4562 /* Find related DIR_INDEX */
4563 key.objectid = ref_key->offset;
4564 key.type = BTRFS_DIR_INDEX_KEY;
4566 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4567 imode_to_type(mode));
4569 /* Find related dir_item */
4570 key.objectid = ref_key->offset;
4571 key.type = BTRFS_DIR_ITEM_KEY;
4572 key.offset = btrfs_name_hash(namebuf, len);
4573 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4574 imode_to_type(mode));
4576 if (tmp_err && repair) {
4577 ret = repair_ternary_lowmem(root, ref_key->offset,
4578 ref_key->objectid, index, namebuf,
4579 name_len, imode_to_type(mode),
4586 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4587 imode_to_type(mode), tmp_err);
4589 len = sizeof(*ref) + name_len;
4590 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4601 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4602 * DIR_ITEM/DIR_INDEX.
4604 * @root: the root of the fs/file tree
4605 * @ref_key: the key of the INODE_EXTREF
4606 * @refs: the count of INODE_EXTREF
4607 * @mode: the st_mode of INODE_ITEM
4609 * Return 0 if no error occurred.
4611 static int check_inode_extref(struct btrfs_root *root,
4612 struct btrfs_key *ref_key,
4613 struct extent_buffer *node, int slot, u64 *refs,
4616 struct btrfs_key key;
4617 struct btrfs_key location;
4618 struct btrfs_inode_extref *extref;
4619 char namebuf[BTRFS_NAME_LEN] = {0};
4629 location.objectid = ref_key->objectid;
4630 location.type = BTRFS_INODE_ITEM_KEY;
4631 location.offset = 0;
4633 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4634 total = btrfs_item_size_nr(node, slot);
4637 /* update inode ref count */
4639 name_len = btrfs_inode_extref_name_len(node, extref);
4640 index = btrfs_inode_extref_index(node, extref);
4641 parent = btrfs_inode_extref_parent(node, extref);
4642 if (name_len <= BTRFS_NAME_LEN) {
4645 len = BTRFS_NAME_LEN;
4646 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4647 root->objectid, ref_key->objectid, ref_key->offset);
4649 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4651 /* Check root dir ref name */
4652 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4653 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4654 root->objectid, ref_key->objectid, ref_key->offset,
4656 err |= ROOT_DIR_ERROR;
4659 /* find related dir_index */
4660 key.objectid = parent;
4661 key.type = BTRFS_DIR_INDEX_KEY;
4663 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4666 /* find related dir_item */
4667 key.objectid = parent;
4668 key.type = BTRFS_DIR_ITEM_KEY;
4669 key.offset = btrfs_name_hash(namebuf, len);
4670 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4673 len = sizeof(*extref) + name_len;
4674 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4684 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4685 * DIR_ITEM/DIR_INDEX match.
4686 * Return with @index_ret.
4688 * @root: the root of the fs/file tree
4689 * @key: the key of the INODE_REF/INODE_EXTREF
4690 * @name: the name in the INODE_REF/INODE_EXTREF
4691 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4692 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4693 * value (64)-1 means do not check index
4694 * @ext_ref: the EXTENDED_IREF feature
4696 * Return 0 if no error occurred.
4697 * Return >0 for error bitmap
4699 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4700 char *name, int namelen, u64 *index_ret,
4701 unsigned int ext_ref)
4703 struct btrfs_path path;
4704 struct btrfs_inode_ref *ref;
4705 struct btrfs_inode_extref *extref;
4706 struct extent_buffer *node;
4707 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4720 btrfs_init_path(&path);
4721 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4723 ret = INODE_REF_MISSING;
4727 node = path.nodes[0];
4728 slot = path.slots[0];
4730 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4731 total = btrfs_item_size_nr(node, slot);
4733 /* Iterate all entry of INODE_REF */
4734 while (cur < total) {
4735 ret = INODE_REF_MISSING;
4737 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4738 ref_index = btrfs_inode_ref_index(node, ref);
4739 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4742 if (cur + sizeof(*ref) + ref_namelen > total ||
4743 ref_namelen > BTRFS_NAME_LEN) {
4744 warning("root %llu INODE %s[%llu %llu] name too long",
4746 key->type == BTRFS_INODE_REF_KEY ?
4748 key->objectid, key->offset);
4750 if (cur + sizeof(*ref) > total)
4752 len = min_t(u32, total - cur - sizeof(*ref),
4758 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4761 if (len != namelen || strncmp(ref_namebuf, name, len))
4764 *index_ret = ref_index;
4768 len = sizeof(*ref) + ref_namelen;
4769 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4774 /* Skip if not support EXTENDED_IREF feature */
4778 btrfs_release_path(&path);
4779 btrfs_init_path(&path);
4781 dir_id = key->offset;
4782 key->type = BTRFS_INODE_EXTREF_KEY;
4783 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4785 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4787 ret = INODE_REF_MISSING;
4791 node = path.nodes[0];
4792 slot = path.slots[0];
4794 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4796 total = btrfs_item_size_nr(node, slot);
4798 /* Iterate all entry of INODE_EXTREF */
4799 while (cur < total) {
4800 ret = INODE_REF_MISSING;
4802 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4803 ref_index = btrfs_inode_extref_index(node, extref);
4804 parent = btrfs_inode_extref_parent(node, extref);
4805 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4808 if (parent != dir_id)
4811 if (ref_namelen <= BTRFS_NAME_LEN) {
4814 len = BTRFS_NAME_LEN;
4815 warning("root %llu INODE %s[%llu %llu] name too long",
4817 key->type == BTRFS_INODE_REF_KEY ?
4819 key->objectid, key->offset);
4821 read_extent_buffer(node, ref_namebuf,
4822 (unsigned long)(extref + 1), len);
4824 if (len != namelen || strncmp(ref_namebuf, name, len))
4827 *index_ret = ref_index;
4832 len = sizeof(*extref) + ref_namelen;
4833 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4838 btrfs_release_path(&path);
4842 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4843 u64 ino, u64 index, const char *namebuf,
4844 int name_len, u8 filetype, int err)
4846 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4847 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4848 root->objectid, key->objectid, key->offset, namebuf,
4850 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4853 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4854 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4855 root->objectid, key->objectid, index, namebuf, filetype,
4856 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4859 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4861 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4862 root->objectid, ino, index, namebuf, filetype,
4863 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4866 if (err & INODE_REF_MISSING)
4868 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4869 root->objectid, ino, key->objectid, namebuf, filetype);
4874 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4876 * Returns error after repair
4878 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4879 u64 index, u8 filetype, char *namebuf, u32 name_len,
4884 if (err & INODE_ITEM_MISSING) {
4885 ret = repair_inode_item_missing(root, ino, filetype);
4887 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
4890 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
4891 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
4892 name_len, filetype, err);
4894 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
4895 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
4896 err &= ~(INODE_REF_MISSING);
4902 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
4905 struct btrfs_key key;
4906 struct btrfs_path path;
4908 struct btrfs_dir_item *di;
4918 key.offset = (u64)-1;
4920 btrfs_init_path(&path);
4921 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4926 /* if found, go to spacial case */
4931 ret = btrfs_previous_item(root, &path, ino, type);
4939 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
4941 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
4943 while (cur < total) {
4944 len = btrfs_dir_name_len(path.nodes[0], di);
4945 if (len > BTRFS_NAME_LEN)
4946 len = BTRFS_NAME_LEN;
4949 len += btrfs_dir_data_len(path.nodes[0], di);
4951 di = (struct btrfs_dir_item *)((char *)di + len);
4957 btrfs_release_path(&path);
4961 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
4968 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
4972 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
4976 *size = item_size + index_size;
4980 error("failed to count root %llu INODE[%llu] root size",
4981 root->objectid, ino);
4986 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4987 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4989 * @root: the root of the fs/file tree
4990 * @key: the key of the INODE_REF/INODE_EXTREF
4992 * @size: the st_size of the INODE_ITEM
4993 * @ext_ref: the EXTENDED_IREF feature
4995 * Return 0 if no error occurred.
4996 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
4998 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4999 struct btrfs_path *path, u64 *size,
5000 unsigned int ext_ref)
5002 struct btrfs_dir_item *di;
5003 struct btrfs_inode_item *ii;
5004 struct btrfs_key key;
5005 struct btrfs_key location;
5006 struct extent_buffer *node;
5008 char namebuf[BTRFS_NAME_LEN] = {0};
5020 int need_research = 0;
5023 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5024 * ignore index check.
5026 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5027 index = di_key->offset;
5034 /* since after repair, path and the dir item may be changed */
5035 if (need_research) {
5037 err |= DIR_COUNT_AGAIN;
5038 btrfs_release_path(path);
5039 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5040 /* the item was deleted, let path point the last checked item */
5042 if (path->slots[0] == 0)
5043 btrfs_prev_leaf(root, path);
5051 node = path->nodes[0];
5052 slot = path->slots[0];
5054 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5055 total = btrfs_item_size_nr(node, slot);
5056 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5058 while (cur < total) {
5059 data_len = btrfs_dir_data_len(node, di);
5062 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5064 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5065 di_key->objectid, di_key->offset, data_len);
5067 name_len = btrfs_dir_name_len(node, di);
5068 if (name_len <= BTRFS_NAME_LEN) {
5071 len = BTRFS_NAME_LEN;
5072 warning("root %llu %s[%llu %llu] name too long",
5074 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5075 di_key->objectid, di_key->offset);
5077 (*size) += name_len;
5078 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5080 filetype = btrfs_dir_type(node, di);
5082 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5083 di_key->offset != btrfs_name_hash(namebuf, len)) {
5085 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5086 root->objectid, di_key->objectid, di_key->offset,
5087 namebuf, len, filetype, di_key->offset,
5088 btrfs_name_hash(namebuf, len));
5091 btrfs_dir_item_key_to_cpu(node, di, &location);
5092 /* Ignore related ROOT_ITEM check */
5093 if (location.type == BTRFS_ROOT_ITEM_KEY)
5096 btrfs_release_path(path);
5097 /* Check relative INODE_ITEM(existence/filetype) */
5098 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5100 tmp_err |= INODE_ITEM_MISSING;
5104 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5105 struct btrfs_inode_item);
5106 mode = btrfs_inode_mode(path->nodes[0], ii);
5107 if (imode_to_type(mode) != filetype) {
5108 tmp_err |= INODE_ITEM_MISMATCH;
5112 /* Check relative INODE_REF/INODE_EXTREF */
5113 key.objectid = location.objectid;
5114 key.type = BTRFS_INODE_REF_KEY;
5115 key.offset = di_key->objectid;
5116 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5119 /* check relative INDEX/ITEM */
5120 key.objectid = di_key->objectid;
5121 if (key.type == BTRFS_DIR_ITEM_KEY) {
5122 key.type = BTRFS_DIR_INDEX_KEY;
5125 key.type = BTRFS_DIR_ITEM_KEY;
5126 key.offset = btrfs_name_hash(namebuf, name_len);
5129 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5130 name_len, filetype);
5131 /* find_dir_item may find index */
5132 if (key.type == BTRFS_DIR_INDEX_KEY)
5136 if (tmp_err && repair) {
5137 ret = repair_dir_item(root, di_key->objectid,
5138 location.objectid, index,
5139 imode_to_type(mode), namebuf,
5141 if (ret != tmp_err) {
5146 btrfs_release_path(path);
5147 print_dir_item_err(root, di_key, location.objectid, index,
5148 namebuf, name_len, filetype, tmp_err);
5150 len = sizeof(*di) + name_len + data_len;
5151 di = (struct btrfs_dir_item *)((char *)di + len);
5154 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5155 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5156 root->objectid, di_key->objectid,
5163 btrfs_release_path(path);
5164 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5166 err |= ret > 0 ? -ENOENT : ret;
5171 * Wrapper function of btrfs_punch_hole.
5173 * Returns 0 means success.
5174 * Returns not 0 means error.
5176 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5179 struct btrfs_trans_handle *trans;
5182 trans = btrfs_start_transaction(root, 1);
5184 return PTR_ERR(trans);
5186 ret = btrfs_punch_hole(trans, root, ino, start, len);
5188 error("failed to add hole [%llu, %llu] in inode [%llu]",
5191 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5194 btrfs_commit_transaction(trans, root);
5199 * Check file extent datasum/hole, update the size of the file extents,
5200 * check and update the last offset of the file extent.
5202 * @root: the root of fs/file tree.
5203 * @fkey: the key of the file extent.
5204 * @nodatasum: INODE_NODATASUM feature.
5205 * @size: the sum of all EXTENT_DATA items size for this inode.
5206 * @end: the offset of the last extent.
5208 * Return 0 if no error occurred.
5210 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5211 struct extent_buffer *node, int slot,
5212 unsigned int nodatasum, u64 *size, u64 *end)
5214 struct btrfs_file_extent_item *fi;
5217 u64 extent_num_bytes;
5219 u64 csum_found; /* In byte size, sectorsize aligned */
5220 u64 search_start; /* Logical range start we search for csum */
5221 u64 search_len; /* Logical range len we search for csum */
5222 unsigned int extent_type;
5223 unsigned int is_hole;
5228 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5230 /* Check inline extent */
5231 extent_type = btrfs_file_extent_type(node, fi);
5232 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5233 struct btrfs_item *e = btrfs_item_nr(slot);
5234 u32 item_inline_len;
5236 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5237 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5238 compressed = btrfs_file_extent_compression(node, fi);
5239 if (extent_num_bytes == 0) {
5241 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5242 root->objectid, fkey->objectid, fkey->offset);
5243 err |= FILE_EXTENT_ERROR;
5245 if (!compressed && extent_num_bytes != item_inline_len) {
5247 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5248 root->objectid, fkey->objectid, fkey->offset,
5249 extent_num_bytes, item_inline_len);
5250 err |= FILE_EXTENT_ERROR;
5252 *end += extent_num_bytes;
5253 *size += extent_num_bytes;
5257 /* Check extent type */
5258 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5259 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5260 err |= FILE_EXTENT_ERROR;
5261 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5262 root->objectid, fkey->objectid, fkey->offset);
5266 /* Check REG_EXTENT/PREALLOC_EXTENT */
5267 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5268 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5269 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5270 extent_offset = btrfs_file_extent_offset(node, fi);
5271 compressed = btrfs_file_extent_compression(node, fi);
5272 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5275 * Check EXTENT_DATA csum
5277 * For plain (uncompressed) extent, we should only check the range
5278 * we're referring to, as it's possible that part of prealloc extent
5279 * has been written, and has csum:
5281 * |<--- Original large preallocated extent A ---->|
5282 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5285 * For compressed extent, we should check the whole range.
5288 search_start = disk_bytenr + extent_offset;
5289 search_len = extent_num_bytes;
5291 search_start = disk_bytenr;
5292 search_len = disk_num_bytes;
5294 ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5295 if (csum_found > 0 && nodatasum) {
5296 err |= ODD_CSUM_ITEM;
5297 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5298 root->objectid, fkey->objectid, fkey->offset);
5299 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5300 !is_hole && (ret < 0 || csum_found < search_len)) {
5301 err |= CSUM_ITEM_MISSING;
5302 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5303 root->objectid, fkey->objectid, fkey->offset,
5304 csum_found, search_len);
5305 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5306 err |= ODD_CSUM_ITEM;
5307 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5308 root->objectid, fkey->objectid, fkey->offset, csum_found);
5311 /* Check EXTENT_DATA hole */
5312 if (!no_holes && *end != fkey->offset) {
5314 ret = punch_extent_hole(root, fkey->objectid,
5315 *end, fkey->offset - *end);
5316 if (!repair || ret) {
5317 err |= FILE_EXTENT_ERROR;
5319 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5320 root->objectid, fkey->objectid, fkey->offset,
5321 fkey->objectid, *end);
5325 *end += extent_num_bytes;
5327 *size += extent_num_bytes;
5333 * Set inode item nbytes to @nbytes
5335 * Returns 0 on success
5336 * Returns != 0 on error
5338 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5339 struct btrfs_path *path,
5340 u64 ino, u64 nbytes)
5342 struct btrfs_trans_handle *trans;
5343 struct btrfs_inode_item *ii;
5344 struct btrfs_key key;
5345 struct btrfs_key research_key;
5349 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5352 key.type = BTRFS_INODE_ITEM_KEY;
5355 trans = btrfs_start_transaction(root, 1);
5356 if (IS_ERR(trans)) {
5357 ret = PTR_ERR(trans);
5362 btrfs_release_path(path);
5363 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5371 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5372 struct btrfs_inode_item);
5373 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5374 btrfs_mark_buffer_dirty(path->nodes[0]);
5376 btrfs_commit_transaction(trans, root);
5379 error("failed to set nbytes in inode %llu root %llu",
5380 ino, root->root_key.objectid);
5382 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5383 root->root_key.objectid, nbytes);
5386 btrfs_release_path(path);
5387 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5394 * Set directory inode isize to @isize.
5396 * Returns 0 on success.
5397 * Returns != 0 on error.
5399 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5400 struct btrfs_path *path,
5403 struct btrfs_trans_handle *trans;
5404 struct btrfs_inode_item *ii;
5405 struct btrfs_key key;
5406 struct btrfs_key research_key;
5410 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5413 key.type = BTRFS_INODE_ITEM_KEY;
5416 trans = btrfs_start_transaction(root, 1);
5417 if (IS_ERR(trans)) {
5418 ret = PTR_ERR(trans);
5423 btrfs_release_path(path);
5424 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5432 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5433 struct btrfs_inode_item);
5434 btrfs_set_inode_size(path->nodes[0], ii, isize);
5435 btrfs_mark_buffer_dirty(path->nodes[0]);
5437 btrfs_commit_transaction(trans, root);
5440 error("failed to set isize in inode %llu root %llu",
5441 ino, root->root_key.objectid);
5443 printf("Set isize in inode %llu root %llu to %llu\n",
5444 ino, root->root_key.objectid, isize);
5446 btrfs_release_path(path);
5447 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5454 * Wrapper function for btrfs_add_orphan_item().
5456 * Returns 0 on success.
5457 * Returns != 0 on error.
5459 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5460 struct btrfs_path *path, u64 ino)
5462 struct btrfs_trans_handle *trans;
5463 struct btrfs_key research_key;
5467 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5469 trans = btrfs_start_transaction(root, 1);
5470 if (IS_ERR(trans)) {
5471 ret = PTR_ERR(trans);
5476 btrfs_release_path(path);
5477 ret = btrfs_add_orphan_item(trans, root, path, ino);
5479 btrfs_commit_transaction(trans, root);
5482 error("failed to add inode %llu as orphan item root %llu",
5483 ino, root->root_key.objectid);
5485 printf("Added inode %llu as orphan item root %llu\n",
5486 ino, root->root_key.objectid);
5488 btrfs_release_path(path);
5489 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5495 /* Set inode_item nlink to @ref_count.
5496 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5498 * Returns 0 on success
5500 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5501 struct btrfs_path *path, u64 ino,
5502 const char *name, u32 namelen,
5503 u64 ref_count, u8 filetype, u64 *nlink)
5505 struct btrfs_trans_handle *trans;
5506 struct btrfs_inode_item *ii;
5507 struct btrfs_key key;
5508 struct btrfs_key old_key;
5509 char namebuf[BTRFS_NAME_LEN] = {0};
5515 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5517 if (name && namelen) {
5518 ASSERT(namelen <= BTRFS_NAME_LEN);
5519 memcpy(namebuf, name, namelen);
5522 sprintf(namebuf, "%llu", ino);
5523 name_len = count_digits(ino);
5524 printf("Can't find file name for inode %llu, use %s instead\n",
5528 trans = btrfs_start_transaction(root, 1);
5529 if (IS_ERR(trans)) {
5530 ret = PTR_ERR(trans);
5534 btrfs_release_path(path);
5535 /* if refs is 0, put it into lostfound */
5536 if (ref_count == 0) {
5537 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5538 name_len, filetype, &ref_count);
5543 /* reset inode_item's nlink to ref_count */
5545 key.type = BTRFS_INODE_ITEM_KEY;
5548 btrfs_release_path(path);
5549 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5555 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5556 struct btrfs_inode_item);
5557 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5558 btrfs_mark_buffer_dirty(path->nodes[0]);
5563 btrfs_commit_transaction(trans, root);
5567 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5568 root->objectid, ino, namebuf, filetype);
5570 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5571 root->objectid, ino, namebuf, filetype);
5574 btrfs_release_path(path);
5575 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5582 * Check INODE_ITEM and related ITEMs (the same inode number)
5583 * 1. check link count
5584 * 2. check inode ref/extref
5585 * 3. check dir item/index
5587 * @ext_ref: the EXTENDED_IREF feature
5589 * Return 0 if no error occurred.
5590 * Return >0 for error or hit the traversal is done(by error bitmap)
5592 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5593 unsigned int ext_ref)
5595 struct extent_buffer *node;
5596 struct btrfs_inode_item *ii;
5597 struct btrfs_key key;
5598 struct btrfs_key last_key;
5607 u64 extent_size = 0;
5609 unsigned int nodatasum;
5613 char namebuf[BTRFS_NAME_LEN] = {0};
5616 node = path->nodes[0];
5617 slot = path->slots[0];
5619 btrfs_item_key_to_cpu(node, &key, slot);
5620 inode_id = key.objectid;
5622 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5623 ret = btrfs_next_item(root, path);
5629 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5630 isize = btrfs_inode_size(node, ii);
5631 nbytes = btrfs_inode_nbytes(node, ii);
5632 mode = btrfs_inode_mode(node, ii);
5633 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5634 nlink = btrfs_inode_nlink(node, ii);
5635 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5638 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5639 ret = btrfs_next_item(root, path);
5641 /* out will fill 'err' rusing current statistics */
5643 } else if (ret > 0) {
5648 node = path->nodes[0];
5649 slot = path->slots[0];
5650 btrfs_item_key_to_cpu(node, &key, slot);
5651 if (key.objectid != inode_id)
5655 case BTRFS_INODE_REF_KEY:
5656 ret = check_inode_ref(root, &key, path, namebuf,
5657 &name_len, &refs, mode);
5660 case BTRFS_INODE_EXTREF_KEY:
5661 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5662 warning("root %llu EXTREF[%llu %llu] isn't supported",
5663 root->objectid, key.objectid,
5665 ret = check_inode_extref(root, &key, node, slot, &refs,
5669 case BTRFS_DIR_ITEM_KEY:
5670 case BTRFS_DIR_INDEX_KEY:
5672 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5673 root->objectid, inode_id,
5674 imode_to_type(mode), key.objectid,
5677 ret = check_dir_item(root, &key, path, &size, ext_ref);
5680 case BTRFS_EXTENT_DATA_KEY:
5682 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5683 root->objectid, inode_id, key.objectid,
5686 ret = check_file_extent(root, &key, node, slot,
5687 nodatasum, &extent_size,
5691 case BTRFS_XATTR_ITEM_KEY:
5694 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5695 key.objectid, key.type, key.offset);
5700 if (err & LAST_ITEM) {
5701 btrfs_release_path(path);
5702 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5707 /* verify INODE_ITEM nlink/isize/nbytes */
5709 if (repair && (err & DIR_COUNT_AGAIN)) {
5710 err &= ~DIR_COUNT_AGAIN;
5711 count_dir_isize(root, inode_id, &size);
5714 if ((nlink != 1 || refs != 1) && repair) {
5715 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5716 namebuf, name_len, refs, imode_to_type(mode),
5721 err |= LINK_COUNT_ERROR;
5722 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5723 root->objectid, inode_id, nlink);
5727 * Just a warning, as dir inode nbytes is just an
5728 * instructive value.
5730 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5731 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5732 root->objectid, inode_id,
5733 root->fs_info->nodesize);
5736 if (isize != size) {
5738 ret = repair_dir_isize_lowmem(root, path,
5740 if (!repair || ret) {
5743 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5744 root->objectid, inode_id, isize, size);
5748 if (nlink != refs) {
5750 ret = repair_inode_nlinks_lowmem(root, path,
5751 inode_id, namebuf, name_len, refs,
5752 imode_to_type(mode), &nlink);
5753 if (!repair || ret) {
5754 err |= LINK_COUNT_ERROR;
5756 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5757 root->objectid, inode_id, nlink, refs);
5759 } else if (!nlink) {
5761 ret = repair_inode_orphan_item_lowmem(root,
5763 if (!repair || ret) {
5765 error("root %llu INODE[%llu] is orphan item",
5766 root->objectid, inode_id);
5770 if (!nbytes && !no_holes && extent_end < isize) {
5772 ret = punch_extent_hole(root, inode_id,
5773 extent_end, isize - extent_end);
5774 if (!repair || ret) {
5775 err |= NBYTES_ERROR;
5777 "root %llu INODE[%llu] size %llu should have a file extent hole",
5778 root->objectid, inode_id, isize);
5782 if (nbytes != extent_size) {
5784 ret = repair_inode_nbytes_lowmem(root, path,
5785 inode_id, extent_size);
5786 if (!repair || ret) {
5787 err |= NBYTES_ERROR;
5789 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5790 root->objectid, inode_id, nbytes,
5796 if (err & LAST_ITEM)
5797 btrfs_next_item(root, path);
5802 * Insert the missing inode item and inode ref.
5804 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5805 * Root dir should be handled specially because root dir is the root of fs.
5807 * returns err (>0 or 0) after repair
5809 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5811 struct btrfs_trans_handle *trans;
5812 struct btrfs_key key;
5813 struct btrfs_path path;
5814 int filetype = BTRFS_FT_DIR;
5817 btrfs_init_path(&path);
5819 if (err & INODE_REF_MISSING) {
5820 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5821 key.type = BTRFS_INODE_REF_KEY;
5822 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5824 trans = btrfs_start_transaction(root, 1);
5825 if (IS_ERR(trans)) {
5826 ret = PTR_ERR(trans);
5830 btrfs_release_path(&path);
5831 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5835 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5836 BTRFS_FIRST_FREE_OBJECTID,
5837 BTRFS_FIRST_FREE_OBJECTID, 0);
5841 printf("Add INODE_REF[%llu %llu] name %s\n",
5842 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5844 err &= ~INODE_REF_MISSING;
5847 error("fail to insert first inode's ref");
5848 btrfs_commit_transaction(trans, root);
5851 if (err & INODE_ITEM_MISSING) {
5852 ret = repair_inode_item_missing(root,
5853 BTRFS_FIRST_FREE_OBJECTID, filetype);
5856 err &= ~INODE_ITEM_MISSING;
5860 error("fail to repair first inode");
5861 btrfs_release_path(&path);
5866 * check first root dir's inode_item and inode_ref
5868 * returns 0 means no error
5869 * returns >0 means error
5870 * returns <0 means fatal error
5872 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5874 struct btrfs_path path;
5875 struct btrfs_key key;
5876 struct btrfs_inode_item *ii;
5882 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5883 key.type = BTRFS_INODE_ITEM_KEY;
5886 /* For root being dropped, we don't need to check first inode */
5887 if (btrfs_root_refs(&root->root_item) == 0 &&
5888 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5889 BTRFS_FIRST_FREE_OBJECTID)
5892 btrfs_init_path(&path);
5893 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5898 err |= INODE_ITEM_MISSING;
5900 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5901 struct btrfs_inode_item);
5902 mode = btrfs_inode_mode(path.nodes[0], ii);
5903 if (imode_to_type(mode) != BTRFS_FT_DIR)
5904 err |= INODE_ITEM_MISMATCH;
5907 /* lookup first inode ref */
5908 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5909 key.type = BTRFS_INODE_REF_KEY;
5910 /* special index value */
5913 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5919 btrfs_release_path(&path);
5922 err = repair_fs_first_inode(root, err);
5924 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5925 error("root dir INODE_ITEM is %s",
5926 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5927 if (err & INODE_REF_MISSING)
5928 error("root dir INODE_REF is missing");
5930 return ret < 0 ? ret : err;
5933 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5934 u64 parent, u64 root)
5936 struct rb_node *node;
5937 struct tree_backref *back = NULL;
5938 struct tree_backref match = {
5945 match.parent = parent;
5946 match.node.full_backref = 1;
5951 node = rb_search(&rec->backref_tree, &match.node.node,
5952 (rb_compare_keys)compare_extent_backref, NULL);
5954 back = to_tree_backref(rb_node_to_extent_backref(node));
5959 static struct data_backref *find_data_backref(struct extent_record *rec,
5960 u64 parent, u64 root,
5961 u64 owner, u64 offset,
5963 u64 disk_bytenr, u64 bytes)
5965 struct rb_node *node;
5966 struct data_backref *back = NULL;
5967 struct data_backref match = {
5974 .found_ref = found_ref,
5975 .disk_bytenr = disk_bytenr,
5979 match.parent = parent;
5980 match.node.full_backref = 1;
5985 node = rb_search(&rec->backref_tree, &match.node.node,
5986 (rb_compare_keys)compare_extent_backref, NULL);
5988 back = to_data_backref(rb_node_to_extent_backref(node));
5993 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
5994 * blocks and integrity of fs tree items.
5996 * @root: the root of the tree to be checked.
5997 * @ext_ref feature EXTENDED_IREF is enable or not.
5998 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
5999 * otherwise means check fs tree(s) items relationship and
6000 * @root MUST be a fs tree root.
6001 * Returns 0 represents OK.
6002 * Returns not 0 represents error.
6004 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6005 struct btrfs_root *root, unsigned int ext_ref,
6009 struct btrfs_path path;
6010 struct node_refs nrefs;
6011 struct btrfs_root_item *root_item = &root->root_item;
6016 memset(&nrefs, 0, sizeof(nrefs));
6019 * We need to manually check the first inode item (256)
6020 * As the following traversal function will only start from
6021 * the first inode item in the leaf, if inode item (256) is
6022 * missing we will skip it forever.
6024 ret = check_fs_first_inode(root, ext_ref);
6030 level = btrfs_header_level(root->node);
6031 btrfs_init_path(&path);
6033 if (btrfs_root_refs(root_item) > 0 ||
6034 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6035 path.nodes[level] = root->node;
6036 path.slots[level] = 0;
6037 extent_buffer_get(root->node);
6039 struct btrfs_key key;
6041 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6042 level = root_item->drop_level;
6043 path.lowest_level = level;
6044 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6051 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6052 ext_ref, check_all);
6056 /* if ret is negative, walk shall stop */
6062 ret = walk_up_tree_v2(root, &path, &level);
6064 /* Normal exit, reset ret to err */
6071 btrfs_release_path(&path);
6076 * Iterate all items in the tree and call check_inode_item() to check.
6078 * @root: the root of the tree to be checked.
6079 * @ext_ref: the EXTENDED_IREF feature
6081 * Return 0 if no error found.
6082 * Return <0 for error.
6084 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6086 reset_cached_block_groups(root->fs_info);
6087 return check_btrfs_root(NULL, root, ext_ref, 0);
6091 * Find the relative ref for root_ref and root_backref.
6093 * @root: the root of the root tree.
6094 * @ref_key: the key of the root ref.
6096 * Return 0 if no error occurred.
6098 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6099 struct extent_buffer *node, int slot)
6101 struct btrfs_path path;
6102 struct btrfs_key key;
6103 struct btrfs_root_ref *ref;
6104 struct btrfs_root_ref *backref;
6105 char ref_name[BTRFS_NAME_LEN] = {0};
6106 char backref_name[BTRFS_NAME_LEN] = {0};
6112 u32 backref_namelen;
6117 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6118 ref_dirid = btrfs_root_ref_dirid(node, ref);
6119 ref_seq = btrfs_root_ref_sequence(node, ref);
6120 ref_namelen = btrfs_root_ref_name_len(node, ref);
6122 if (ref_namelen <= BTRFS_NAME_LEN) {
6125 len = BTRFS_NAME_LEN;
6126 warning("%s[%llu %llu] ref_name too long",
6127 ref_key->type == BTRFS_ROOT_REF_KEY ?
6128 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6131 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6133 /* Find relative root_ref */
6134 key.objectid = ref_key->offset;
6135 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6136 key.offset = ref_key->objectid;
6138 btrfs_init_path(&path);
6139 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6141 err |= ROOT_REF_MISSING;
6142 error("%s[%llu %llu] couldn't find relative ref",
6143 ref_key->type == BTRFS_ROOT_REF_KEY ?
6144 "ROOT_REF" : "ROOT_BACKREF",
6145 ref_key->objectid, ref_key->offset);
6149 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6150 struct btrfs_root_ref);
6151 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6152 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6153 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6155 if (backref_namelen <= BTRFS_NAME_LEN) {
6156 len = backref_namelen;
6158 len = BTRFS_NAME_LEN;
6159 warning("%s[%llu %llu] ref_name too long",
6160 key.type == BTRFS_ROOT_REF_KEY ?
6161 "ROOT_REF" : "ROOT_BACKREF",
6162 key.objectid, key.offset);
6164 read_extent_buffer(path.nodes[0], backref_name,
6165 (unsigned long)(backref + 1), len);
6167 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6168 ref_namelen != backref_namelen ||
6169 strncmp(ref_name, backref_name, len)) {
6170 err |= ROOT_REF_MISMATCH;
6171 error("%s[%llu %llu] mismatch relative ref",
6172 ref_key->type == BTRFS_ROOT_REF_KEY ?
6173 "ROOT_REF" : "ROOT_BACKREF",
6174 ref_key->objectid, ref_key->offset);
6177 btrfs_release_path(&path);
6182 * Check all fs/file tree in low_memory mode.
6184 * 1. for fs tree root item, call check_fs_root_v2()
6185 * 2. for fs tree root ref/backref, call check_root_ref()
6187 * Return 0 if no error occurred.
6189 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6191 struct btrfs_root *tree_root = fs_info->tree_root;
6192 struct btrfs_root *cur_root = NULL;
6193 struct btrfs_path path;
6194 struct btrfs_key key;
6195 struct extent_buffer *node;
6196 unsigned int ext_ref;
6201 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6203 btrfs_init_path(&path);
6204 key.objectid = BTRFS_FS_TREE_OBJECTID;
6206 key.type = BTRFS_ROOT_ITEM_KEY;
6208 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6212 } else if (ret > 0) {
6218 node = path.nodes[0];
6219 slot = path.slots[0];
6220 btrfs_item_key_to_cpu(node, &key, slot);
6221 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6223 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6224 fs_root_objectid(key.objectid)) {
6225 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6226 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6229 key.offset = (u64)-1;
6230 cur_root = btrfs_read_fs_root(fs_info, &key);
6233 if (IS_ERR(cur_root)) {
6234 error("Fail to read fs/subvol tree: %lld",
6240 ret = check_fs_root_v2(cur_root, ext_ref);
6243 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6244 btrfs_free_fs_root(cur_root);
6245 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6246 key.type == BTRFS_ROOT_BACKREF_KEY) {
6247 ret = check_root_ref(tree_root, &key, node, slot);
6251 ret = btrfs_next_item(tree_root, &path);
6261 btrfs_release_path(&path);
6265 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6266 struct cache_tree *root_cache)
6270 if (!ctx.progress_enabled)
6271 fprintf(stderr, "checking fs roots\n");
6272 if (check_mode == CHECK_MODE_LOWMEM)
6273 ret = check_fs_roots_v2(fs_info);
6275 ret = check_fs_roots(fs_info, root_cache);
6280 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6282 struct extent_backref *back, *tmp;
6283 struct tree_backref *tback;
6284 struct data_backref *dback;
6288 rbtree_postorder_for_each_entry_safe(back, tmp,
6289 &rec->backref_tree, node) {
6290 if (!back->found_extent_tree) {
6294 if (back->is_data) {
6295 dback = to_data_backref(back);
6296 fprintf(stderr, "Data backref %llu %s %llu"
6297 " owner %llu offset %llu num_refs %lu"
6298 " not found in extent tree\n",
6299 (unsigned long long)rec->start,
6300 back->full_backref ?
6302 back->full_backref ?
6303 (unsigned long long)dback->parent:
6304 (unsigned long long)dback->root,
6305 (unsigned long long)dback->owner,
6306 (unsigned long long)dback->offset,
6307 (unsigned long)dback->num_refs);
6309 tback = to_tree_backref(back);
6310 fprintf(stderr, "Tree backref %llu parent %llu"
6311 " root %llu not found in extent tree\n",
6312 (unsigned long long)rec->start,
6313 (unsigned long long)tback->parent,
6314 (unsigned long long)tback->root);
6317 if (!back->is_data && !back->found_ref) {
6321 tback = to_tree_backref(back);
6322 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6323 (unsigned long long)rec->start,
6324 back->full_backref ? "parent" : "root",
6325 back->full_backref ?
6326 (unsigned long long)tback->parent :
6327 (unsigned long long)tback->root, back);
6329 if (back->is_data) {
6330 dback = to_data_backref(back);
6331 if (dback->found_ref != dback->num_refs) {
6335 fprintf(stderr, "Incorrect local backref count"
6336 " on %llu %s %llu owner %llu"
6337 " offset %llu found %u wanted %u back %p\n",
6338 (unsigned long long)rec->start,
6339 back->full_backref ?
6341 back->full_backref ?
6342 (unsigned long long)dback->parent:
6343 (unsigned long long)dback->root,
6344 (unsigned long long)dback->owner,
6345 (unsigned long long)dback->offset,
6346 dback->found_ref, dback->num_refs, back);
6348 if (dback->disk_bytenr != rec->start) {
6352 fprintf(stderr, "Backref disk bytenr does not"
6353 " match extent record, bytenr=%llu, "
6354 "ref bytenr=%llu\n",
6355 (unsigned long long)rec->start,
6356 (unsigned long long)dback->disk_bytenr);
6359 if (dback->bytes != rec->nr) {
6363 fprintf(stderr, "Backref bytes do not match "
6364 "extent backref, bytenr=%llu, ref "
6365 "bytes=%llu, backref bytes=%llu\n",
6366 (unsigned long long)rec->start,
6367 (unsigned long long)rec->nr,
6368 (unsigned long long)dback->bytes);
6371 if (!back->is_data) {
6374 dback = to_data_backref(back);
6375 found += dback->found_ref;
6378 if (found != rec->refs) {
6382 fprintf(stderr, "Incorrect global backref count "
6383 "on %llu found %llu wanted %llu\n",
6384 (unsigned long long)rec->start,
6385 (unsigned long long)found,
6386 (unsigned long long)rec->refs);
6392 static void __free_one_backref(struct rb_node *node)
6394 struct extent_backref *back = rb_node_to_extent_backref(node);
6399 static void free_all_extent_backrefs(struct extent_record *rec)
6401 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6404 static void free_extent_record_cache(struct cache_tree *extent_cache)
6406 struct cache_extent *cache;
6407 struct extent_record *rec;
6410 cache = first_cache_extent(extent_cache);
6413 rec = container_of(cache, struct extent_record, cache);
6414 remove_cache_extent(extent_cache, cache);
6415 free_all_extent_backrefs(rec);
6420 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6421 struct extent_record *rec)
6423 if (rec->content_checked && rec->owner_ref_checked &&
6424 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6425 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6426 !rec->bad_full_backref && !rec->crossing_stripes &&
6427 !rec->wrong_chunk_type) {
6428 remove_cache_extent(extent_cache, &rec->cache);
6429 free_all_extent_backrefs(rec);
6430 list_del_init(&rec->list);
6436 static int check_owner_ref(struct btrfs_root *root,
6437 struct extent_record *rec,
6438 struct extent_buffer *buf)
6440 struct extent_backref *node, *tmp;
6441 struct tree_backref *back;
6442 struct btrfs_root *ref_root;
6443 struct btrfs_key key;
6444 struct btrfs_path path;
6445 struct extent_buffer *parent;
6450 rbtree_postorder_for_each_entry_safe(node, tmp,
6451 &rec->backref_tree, node) {
6454 if (!node->found_ref)
6456 if (node->full_backref)
6458 back = to_tree_backref(node);
6459 if (btrfs_header_owner(buf) == back->root)
6462 BUG_ON(rec->is_root);
6464 /* try to find the block by search corresponding fs tree */
6465 key.objectid = btrfs_header_owner(buf);
6466 key.type = BTRFS_ROOT_ITEM_KEY;
6467 key.offset = (u64)-1;
6469 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6470 if (IS_ERR(ref_root))
6473 level = btrfs_header_level(buf);
6475 btrfs_item_key_to_cpu(buf, &key, 0);
6477 btrfs_node_key_to_cpu(buf, &key, 0);
6479 btrfs_init_path(&path);
6480 path.lowest_level = level + 1;
6481 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6485 parent = path.nodes[level + 1];
6486 if (parent && buf->start == btrfs_node_blockptr(parent,
6487 path.slots[level + 1]))
6490 btrfs_release_path(&path);
6491 return found ? 0 : 1;
6494 static int is_extent_tree_record(struct extent_record *rec)
6496 struct extent_backref *node, *tmp;
6497 struct tree_backref *back;
6500 rbtree_postorder_for_each_entry_safe(node, tmp,
6501 &rec->backref_tree, node) {
6504 back = to_tree_backref(node);
6505 if (node->full_backref)
6507 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6514 static int record_bad_block_io(struct btrfs_fs_info *info,
6515 struct cache_tree *extent_cache,
6518 struct extent_record *rec;
6519 struct cache_extent *cache;
6520 struct btrfs_key key;
6522 cache = lookup_cache_extent(extent_cache, start, len);
6526 rec = container_of(cache, struct extent_record, cache);
6527 if (!is_extent_tree_record(rec))
6530 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6531 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6534 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6535 struct extent_buffer *buf, int slot)
6537 if (btrfs_header_level(buf)) {
6538 struct btrfs_key_ptr ptr1, ptr2;
6540 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6541 sizeof(struct btrfs_key_ptr));
6542 read_extent_buffer(buf, &ptr2,
6543 btrfs_node_key_ptr_offset(slot + 1),
6544 sizeof(struct btrfs_key_ptr));
6545 write_extent_buffer(buf, &ptr1,
6546 btrfs_node_key_ptr_offset(slot + 1),
6547 sizeof(struct btrfs_key_ptr));
6548 write_extent_buffer(buf, &ptr2,
6549 btrfs_node_key_ptr_offset(slot),
6550 sizeof(struct btrfs_key_ptr));
6552 struct btrfs_disk_key key;
6553 btrfs_node_key(buf, &key, 0);
6554 btrfs_fixup_low_keys(root, path, &key,
6555 btrfs_header_level(buf) + 1);
6558 struct btrfs_item *item1, *item2;
6559 struct btrfs_key k1, k2;
6560 char *item1_data, *item2_data;
6561 u32 item1_offset, item2_offset, item1_size, item2_size;
6563 item1 = btrfs_item_nr(slot);
6564 item2 = btrfs_item_nr(slot + 1);
6565 btrfs_item_key_to_cpu(buf, &k1, slot);
6566 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6567 item1_offset = btrfs_item_offset(buf, item1);
6568 item2_offset = btrfs_item_offset(buf, item2);
6569 item1_size = btrfs_item_size(buf, item1);
6570 item2_size = btrfs_item_size(buf, item2);
6572 item1_data = malloc(item1_size);
6575 item2_data = malloc(item2_size);
6581 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6582 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6584 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6585 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6589 btrfs_set_item_offset(buf, item1, item2_offset);
6590 btrfs_set_item_offset(buf, item2, item1_offset);
6591 btrfs_set_item_size(buf, item1, item2_size);
6592 btrfs_set_item_size(buf, item2, item1_size);
6594 path->slots[0] = slot;
6595 btrfs_set_item_key_unsafe(root, path, &k2);
6596 path->slots[0] = slot + 1;
6597 btrfs_set_item_key_unsafe(root, path, &k1);
6602 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6604 struct extent_buffer *buf;
6605 struct btrfs_key k1, k2;
6607 int level = path->lowest_level;
6610 buf = path->nodes[level];
6611 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6613 btrfs_node_key_to_cpu(buf, &k1, i);
6614 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6616 btrfs_item_key_to_cpu(buf, &k1, i);
6617 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6619 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6621 ret = swap_values(root, path, buf, i);
6624 btrfs_mark_buffer_dirty(buf);
6630 static int delete_bogus_item(struct btrfs_root *root,
6631 struct btrfs_path *path,
6632 struct extent_buffer *buf, int slot)
6634 struct btrfs_key key;
6635 int nritems = btrfs_header_nritems(buf);
6637 btrfs_item_key_to_cpu(buf, &key, slot);
6639 /* These are all the keys we can deal with missing. */
6640 if (key.type != BTRFS_DIR_INDEX_KEY &&
6641 key.type != BTRFS_EXTENT_ITEM_KEY &&
6642 key.type != BTRFS_METADATA_ITEM_KEY &&
6643 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6644 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6647 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6648 (unsigned long long)key.objectid, key.type,
6649 (unsigned long long)key.offset, slot, buf->start);
6650 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6651 btrfs_item_nr_offset(slot + 1),
6652 sizeof(struct btrfs_item) *
6653 (nritems - slot - 1));
6654 btrfs_set_header_nritems(buf, nritems - 1);
6656 struct btrfs_disk_key disk_key;
6658 btrfs_item_key(buf, &disk_key, 0);
6659 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6661 btrfs_mark_buffer_dirty(buf);
6665 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6667 struct extent_buffer *buf;
6671 /* We should only get this for leaves */
6672 BUG_ON(path->lowest_level);
6673 buf = path->nodes[0];
6675 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6676 unsigned int shift = 0, offset;
6678 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6679 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6680 if (btrfs_item_end_nr(buf, i) >
6681 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6682 ret = delete_bogus_item(root, path, buf, i);
6685 fprintf(stderr, "item is off the end of the "
6686 "leaf, can't fix\n");
6690 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6691 btrfs_item_end_nr(buf, i);
6692 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6693 btrfs_item_offset_nr(buf, i - 1)) {
6694 if (btrfs_item_end_nr(buf, i) >
6695 btrfs_item_offset_nr(buf, i - 1)) {
6696 ret = delete_bogus_item(root, path, buf, i);
6699 fprintf(stderr, "items overlap, can't fix\n");
6703 shift = btrfs_item_offset_nr(buf, i - 1) -
6704 btrfs_item_end_nr(buf, i);
6709 printf("Shifting item nr %d by %u bytes in block %llu\n",
6710 i, shift, (unsigned long long)buf->start);
6711 offset = btrfs_item_offset_nr(buf, i);
6712 memmove_extent_buffer(buf,
6713 btrfs_leaf_data(buf) + offset + shift,
6714 btrfs_leaf_data(buf) + offset,
6715 btrfs_item_size_nr(buf, i));
6716 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6718 btrfs_mark_buffer_dirty(buf);
6722 * We may have moved things, in which case we want to exit so we don't
6723 * write those changes out. Once we have proper abort functionality in
6724 * progs this can be changed to something nicer.
6731 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6732 * then just return -EIO.
6734 static int try_to_fix_bad_block(struct btrfs_root *root,
6735 struct extent_buffer *buf,
6736 enum btrfs_tree_block_status status)
6738 struct btrfs_trans_handle *trans;
6739 struct ulist *roots;
6740 struct ulist_node *node;
6741 struct btrfs_root *search_root;
6742 struct btrfs_path path;
6743 struct ulist_iterator iter;
6744 struct btrfs_key root_key, key;
6747 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6748 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6751 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6755 btrfs_init_path(&path);
6756 ULIST_ITER_INIT(&iter);
6757 while ((node = ulist_next(roots, &iter))) {
6758 root_key.objectid = node->val;
6759 root_key.type = BTRFS_ROOT_ITEM_KEY;
6760 root_key.offset = (u64)-1;
6762 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6769 trans = btrfs_start_transaction(search_root, 0);
6770 if (IS_ERR(trans)) {
6771 ret = PTR_ERR(trans);
6775 path.lowest_level = btrfs_header_level(buf);
6776 path.skip_check_block = 1;
6777 if (path.lowest_level)
6778 btrfs_node_key_to_cpu(buf, &key, 0);
6780 btrfs_item_key_to_cpu(buf, &key, 0);
6781 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6784 btrfs_commit_transaction(trans, search_root);
6787 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6788 ret = fix_key_order(search_root, &path);
6789 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6790 ret = fix_item_offset(search_root, &path);
6792 btrfs_commit_transaction(trans, search_root);
6795 btrfs_release_path(&path);
6796 btrfs_commit_transaction(trans, search_root);
6799 btrfs_release_path(&path);
6803 static int check_block(struct btrfs_root *root,
6804 struct cache_tree *extent_cache,
6805 struct extent_buffer *buf, u64 flags)
6807 struct extent_record *rec;
6808 struct cache_extent *cache;
6809 struct btrfs_key key;
6810 enum btrfs_tree_block_status status;
6814 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6817 rec = container_of(cache, struct extent_record, cache);
6818 rec->generation = btrfs_header_generation(buf);
6820 level = btrfs_header_level(buf);
6821 if (btrfs_header_nritems(buf) > 0) {
6824 btrfs_item_key_to_cpu(buf, &key, 0);
6826 btrfs_node_key_to_cpu(buf, &key, 0);
6828 rec->info_objectid = key.objectid;
6830 rec->info_level = level;
6832 if (btrfs_is_leaf(buf))
6833 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6835 status = btrfs_check_node(root, &rec->parent_key, buf);
6837 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6839 status = try_to_fix_bad_block(root, buf, status);
6840 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6842 fprintf(stderr, "bad block %llu\n",
6843 (unsigned long long)buf->start);
6846 * Signal to callers we need to start the scan over
6847 * again since we'll have cowed blocks.
6852 rec->content_checked = 1;
6853 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6854 rec->owner_ref_checked = 1;
6856 ret = check_owner_ref(root, rec, buf);
6858 rec->owner_ref_checked = 1;
6862 maybe_free_extent_rec(extent_cache, rec);
6867 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6868 u64 parent, u64 root)
6870 struct list_head *cur = rec->backrefs.next;
6871 struct extent_backref *node;
6872 struct tree_backref *back;
6874 while(cur != &rec->backrefs) {
6875 node = to_extent_backref(cur);
6879 back = to_tree_backref(node);
6881 if (!node->full_backref)
6883 if (parent == back->parent)
6886 if (node->full_backref)
6888 if (back->root == root)
6896 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6897 u64 parent, u64 root)
6899 struct tree_backref *ref = malloc(sizeof(*ref));
6903 memset(&ref->node, 0, sizeof(ref->node));
6905 ref->parent = parent;
6906 ref->node.full_backref = 1;
6909 ref->node.full_backref = 0;
6916 static struct data_backref *find_data_backref(struct extent_record *rec,
6917 u64 parent, u64 root,
6918 u64 owner, u64 offset,
6920 u64 disk_bytenr, u64 bytes)
6922 struct list_head *cur = rec->backrefs.next;
6923 struct extent_backref *node;
6924 struct data_backref *back;
6926 while(cur != &rec->backrefs) {
6927 node = to_extent_backref(cur);
6931 back = to_data_backref(node);
6933 if (!node->full_backref)
6935 if (parent == back->parent)
6938 if (node->full_backref)
6940 if (back->root == root && back->owner == owner &&
6941 back->offset == offset) {
6942 if (found_ref && node->found_ref &&
6943 (back->bytes != bytes ||
6944 back->disk_bytenr != disk_bytenr))
6954 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6955 u64 parent, u64 root,
6956 u64 owner, u64 offset,
6959 struct data_backref *ref = malloc(sizeof(*ref));
6963 memset(&ref->node, 0, sizeof(ref->node));
6964 ref->node.is_data = 1;
6967 ref->parent = parent;
6970 ref->node.full_backref = 1;
6974 ref->offset = offset;
6975 ref->node.full_backref = 0;
6977 ref->bytes = max_size;
6980 if (max_size > rec->max_size)
6981 rec->max_size = max_size;
6985 /* Check if the type of extent matches with its chunk */
6986 static void check_extent_type(struct extent_record *rec)
6988 struct btrfs_block_group_cache *bg_cache;
6990 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6994 /* data extent, check chunk directly*/
6995 if (!rec->metadata) {
6996 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6997 rec->wrong_chunk_type = 1;
7001 /* metadata extent, check the obvious case first */
7002 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7003 BTRFS_BLOCK_GROUP_METADATA))) {
7004 rec->wrong_chunk_type = 1;
7009 * Check SYSTEM extent, as it's also marked as metadata, we can only
7010 * make sure it's a SYSTEM extent by its backref
7012 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7013 struct extent_backref *node;
7014 struct tree_backref *tback;
7017 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7018 if (node->is_data) {
7019 /* tree block shouldn't have data backref */
7020 rec->wrong_chunk_type = 1;
7023 tback = container_of(node, struct tree_backref, node);
7025 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7026 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7028 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7029 if (!(bg_cache->flags & bg_type))
7030 rec->wrong_chunk_type = 1;
7035 * Allocate a new extent record, fill default values from @tmpl and insert int
7036 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7037 * the cache, otherwise it fails.
7039 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7040 struct extent_record *tmpl)
7042 struct extent_record *rec;
7045 BUG_ON(tmpl->max_size == 0);
7046 rec = malloc(sizeof(*rec));
7049 rec->start = tmpl->start;
7050 rec->max_size = tmpl->max_size;
7051 rec->nr = max(tmpl->nr, tmpl->max_size);
7052 rec->found_rec = tmpl->found_rec;
7053 rec->content_checked = tmpl->content_checked;
7054 rec->owner_ref_checked = tmpl->owner_ref_checked;
7055 rec->num_duplicates = 0;
7056 rec->metadata = tmpl->metadata;
7057 rec->flag_block_full_backref = FLAG_UNSET;
7058 rec->bad_full_backref = 0;
7059 rec->crossing_stripes = 0;
7060 rec->wrong_chunk_type = 0;
7061 rec->is_root = tmpl->is_root;
7062 rec->refs = tmpl->refs;
7063 rec->extent_item_refs = tmpl->extent_item_refs;
7064 rec->parent_generation = tmpl->parent_generation;
7065 INIT_LIST_HEAD(&rec->backrefs);
7066 INIT_LIST_HEAD(&rec->dups);
7067 INIT_LIST_HEAD(&rec->list);
7068 rec->backref_tree = RB_ROOT;
7069 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7070 rec->cache.start = tmpl->start;
7071 rec->cache.size = tmpl->nr;
7072 ret = insert_cache_extent(extent_cache, &rec->cache);
7077 bytes_used += rec->nr;
7080 rec->crossing_stripes = check_crossing_stripes(global_info,
7081 rec->start, global_info->nodesize);
7082 check_extent_type(rec);
7087 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7089 * - refs - if found, increase refs
7090 * - is_root - if found, set
7091 * - content_checked - if found, set
7092 * - owner_ref_checked - if found, set
7094 * If not found, create a new one, initialize and insert.
7096 static int add_extent_rec(struct cache_tree *extent_cache,
7097 struct extent_record *tmpl)
7099 struct extent_record *rec;
7100 struct cache_extent *cache;
7104 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7106 rec = container_of(cache, struct extent_record, cache);
7110 rec->nr = max(tmpl->nr, tmpl->max_size);
7113 * We need to make sure to reset nr to whatever the extent
7114 * record says was the real size, this way we can compare it to
7117 if (tmpl->found_rec) {
7118 if (tmpl->start != rec->start || rec->found_rec) {
7119 struct extent_record *tmp;
7122 if (list_empty(&rec->list))
7123 list_add_tail(&rec->list,
7124 &duplicate_extents);
7127 * We have to do this song and dance in case we
7128 * find an extent record that falls inside of
7129 * our current extent record but does not have
7130 * the same objectid.
7132 tmp = malloc(sizeof(*tmp));
7135 tmp->start = tmpl->start;
7136 tmp->max_size = tmpl->max_size;
7139 tmp->metadata = tmpl->metadata;
7140 tmp->extent_item_refs = tmpl->extent_item_refs;
7141 INIT_LIST_HEAD(&tmp->list);
7142 list_add_tail(&tmp->list, &rec->dups);
7143 rec->num_duplicates++;
7150 if (tmpl->extent_item_refs && !dup) {
7151 if (rec->extent_item_refs) {
7152 fprintf(stderr, "block %llu rec "
7153 "extent_item_refs %llu, passed %llu\n",
7154 (unsigned long long)tmpl->start,
7155 (unsigned long long)
7156 rec->extent_item_refs,
7157 (unsigned long long)tmpl->extent_item_refs);
7159 rec->extent_item_refs = tmpl->extent_item_refs;
7163 if (tmpl->content_checked)
7164 rec->content_checked = 1;
7165 if (tmpl->owner_ref_checked)
7166 rec->owner_ref_checked = 1;
7167 memcpy(&rec->parent_key, &tmpl->parent_key,
7168 sizeof(tmpl->parent_key));
7169 if (tmpl->parent_generation)
7170 rec->parent_generation = tmpl->parent_generation;
7171 if (rec->max_size < tmpl->max_size)
7172 rec->max_size = tmpl->max_size;
7175 * A metadata extent can't cross stripe_len boundary, otherwise
7176 * kernel scrub won't be able to handle it.
7177 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7181 rec->crossing_stripes = check_crossing_stripes(
7182 global_info, rec->start,
7183 global_info->nodesize);
7184 check_extent_type(rec);
7185 maybe_free_extent_rec(extent_cache, rec);
7189 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7194 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7195 u64 parent, u64 root, int found_ref)
7197 struct extent_record *rec;
7198 struct tree_backref *back;
7199 struct cache_extent *cache;
7201 bool insert = false;
7203 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7205 struct extent_record tmpl;
7207 memset(&tmpl, 0, sizeof(tmpl));
7208 tmpl.start = bytenr;
7213 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7217 /* really a bug in cache_extent implement now */
7218 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7223 rec = container_of(cache, struct extent_record, cache);
7224 if (rec->start != bytenr) {
7226 * Several cause, from unaligned bytenr to over lapping extents
7231 back = find_tree_backref(rec, parent, root);
7233 back = alloc_tree_backref(rec, parent, root);
7240 if (back->node.found_ref) {
7241 fprintf(stderr, "Extent back ref already exists "
7242 "for %llu parent %llu root %llu \n",
7243 (unsigned long long)bytenr,
7244 (unsigned long long)parent,
7245 (unsigned long long)root);
7247 back->node.found_ref = 1;
7249 if (back->node.found_extent_tree) {
7250 fprintf(stderr, "Extent back ref already exists "
7251 "for %llu parent %llu root %llu \n",
7252 (unsigned long long)bytenr,
7253 (unsigned long long)parent,
7254 (unsigned long long)root);
7256 back->node.found_extent_tree = 1;
7259 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7260 compare_extent_backref));
7261 check_extent_type(rec);
7262 maybe_free_extent_rec(extent_cache, rec);
7266 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7267 u64 parent, u64 root, u64 owner, u64 offset,
7268 u32 num_refs, int found_ref, u64 max_size)
7270 struct extent_record *rec;
7271 struct data_backref *back;
7272 struct cache_extent *cache;
7274 bool insert = false;
7276 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7278 struct extent_record tmpl;
7280 memset(&tmpl, 0, sizeof(tmpl));
7281 tmpl.start = bytenr;
7283 tmpl.max_size = max_size;
7285 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7289 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7294 rec = container_of(cache, struct extent_record, cache);
7295 if (rec->max_size < max_size)
7296 rec->max_size = max_size;
7299 * If found_ref is set then max_size is the real size and must match the
7300 * existing refs. So if we have already found a ref then we need to
7301 * make sure that this ref matches the existing one, otherwise we need
7302 * to add a new backref so we can notice that the backrefs don't match
7303 * and we need to figure out who is telling the truth. This is to
7304 * account for that awful fsync bug I introduced where we'd end up with
7305 * a btrfs_file_extent_item that would have its length include multiple
7306 * prealloc extents or point inside of a prealloc extent.
7308 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7311 back = alloc_data_backref(rec, parent, root, owner, offset,
7318 BUG_ON(num_refs != 1);
7319 if (back->node.found_ref)
7320 BUG_ON(back->bytes != max_size);
7321 back->node.found_ref = 1;
7322 back->found_ref += 1;
7323 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7324 back->bytes = max_size;
7325 back->disk_bytenr = bytenr;
7327 /* Need to reinsert if not already in the tree */
7329 rb_erase(&back->node.node, &rec->backref_tree);
7334 rec->content_checked = 1;
7335 rec->owner_ref_checked = 1;
7337 if (back->node.found_extent_tree) {
7338 fprintf(stderr, "Extent back ref already exists "
7339 "for %llu parent %llu root %llu "
7340 "owner %llu offset %llu num_refs %lu\n",
7341 (unsigned long long)bytenr,
7342 (unsigned long long)parent,
7343 (unsigned long long)root,
7344 (unsigned long long)owner,
7345 (unsigned long long)offset,
7346 (unsigned long)num_refs);
7348 back->num_refs = num_refs;
7349 back->node.found_extent_tree = 1;
7352 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7353 compare_extent_backref));
7355 maybe_free_extent_rec(extent_cache, rec);
7359 static int add_pending(struct cache_tree *pending,
7360 struct cache_tree *seen, u64 bytenr, u32 size)
7363 ret = add_cache_extent(seen, bytenr, size);
7366 add_cache_extent(pending, bytenr, size);
7370 static int pick_next_pending(struct cache_tree *pending,
7371 struct cache_tree *reada,
7372 struct cache_tree *nodes,
7373 u64 last, struct block_info *bits, int bits_nr,
7376 unsigned long node_start = last;
7377 struct cache_extent *cache;
7380 cache = search_cache_extent(reada, 0);
7382 bits[0].start = cache->start;
7383 bits[0].size = cache->size;
7388 if (node_start > 32768)
7389 node_start -= 32768;
7391 cache = search_cache_extent(nodes, node_start);
7393 cache = search_cache_extent(nodes, 0);
7396 cache = search_cache_extent(pending, 0);
7401 bits[ret].start = cache->start;
7402 bits[ret].size = cache->size;
7403 cache = next_cache_extent(cache);
7405 } while (cache && ret < bits_nr);
7411 bits[ret].start = cache->start;
7412 bits[ret].size = cache->size;
7413 cache = next_cache_extent(cache);
7415 } while (cache && ret < bits_nr);
7417 if (bits_nr - ret > 8) {
7418 u64 lookup = bits[0].start + bits[0].size;
7419 struct cache_extent *next;
7420 next = search_cache_extent(pending, lookup);
7422 if (next->start - lookup > 32768)
7424 bits[ret].start = next->start;
7425 bits[ret].size = next->size;
7426 lookup = next->start + next->size;
7430 next = next_cache_extent(next);
7438 static void free_chunk_record(struct cache_extent *cache)
7440 struct chunk_record *rec;
7442 rec = container_of(cache, struct chunk_record, cache);
7443 list_del_init(&rec->list);
7444 list_del_init(&rec->dextents);
7448 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7450 cache_tree_free_extents(chunk_cache, free_chunk_record);
7453 static void free_device_record(struct rb_node *node)
7455 struct device_record *rec;
7457 rec = container_of(node, struct device_record, node);
7461 FREE_RB_BASED_TREE(device_cache, free_device_record);
7463 int insert_block_group_record(struct block_group_tree *tree,
7464 struct block_group_record *bg_rec)
7468 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7472 list_add_tail(&bg_rec->list, &tree->block_groups);
7476 static void free_block_group_record(struct cache_extent *cache)
7478 struct block_group_record *rec;
7480 rec = container_of(cache, struct block_group_record, cache);
7481 list_del_init(&rec->list);
7485 void free_block_group_tree(struct block_group_tree *tree)
7487 cache_tree_free_extents(&tree->tree, free_block_group_record);
7490 int insert_device_extent_record(struct device_extent_tree *tree,
7491 struct device_extent_record *de_rec)
7496 * Device extent is a bit different from the other extents, because
7497 * the extents which belong to the different devices may have the
7498 * same start and size, so we need use the special extent cache
7499 * search/insert functions.
7501 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7505 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7506 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7510 static void free_device_extent_record(struct cache_extent *cache)
7512 struct device_extent_record *rec;
7514 rec = container_of(cache, struct device_extent_record, cache);
7515 if (!list_empty(&rec->chunk_list))
7516 list_del_init(&rec->chunk_list);
7517 if (!list_empty(&rec->device_list))
7518 list_del_init(&rec->device_list);
7522 void free_device_extent_tree(struct device_extent_tree *tree)
7524 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7527 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7528 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7529 struct extent_buffer *leaf, int slot)
7531 struct btrfs_extent_ref_v0 *ref0;
7532 struct btrfs_key key;
7535 btrfs_item_key_to_cpu(leaf, &key, slot);
7536 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7537 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7538 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7541 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7542 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7548 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7549 struct btrfs_key *key,
7552 struct btrfs_chunk *ptr;
7553 struct chunk_record *rec;
7556 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7557 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7559 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7561 fprintf(stderr, "memory allocation failed\n");
7565 INIT_LIST_HEAD(&rec->list);
7566 INIT_LIST_HEAD(&rec->dextents);
7569 rec->cache.start = key->offset;
7570 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7572 rec->generation = btrfs_header_generation(leaf);
7574 rec->objectid = key->objectid;
7575 rec->type = key->type;
7576 rec->offset = key->offset;
7578 rec->length = rec->cache.size;
7579 rec->owner = btrfs_chunk_owner(leaf, ptr);
7580 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7581 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7582 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7583 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7584 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7585 rec->num_stripes = num_stripes;
7586 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7588 for (i = 0; i < rec->num_stripes; ++i) {
7589 rec->stripes[i].devid =
7590 btrfs_stripe_devid_nr(leaf, ptr, i);
7591 rec->stripes[i].offset =
7592 btrfs_stripe_offset_nr(leaf, ptr, i);
7593 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7594 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7601 static int process_chunk_item(struct cache_tree *chunk_cache,
7602 struct btrfs_key *key, struct extent_buffer *eb,
7605 struct chunk_record *rec;
7606 struct btrfs_chunk *chunk;
7609 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7611 * Do extra check for this chunk item,
7613 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7614 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7615 * and owner<->key_type check.
7617 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7620 error("chunk(%llu, %llu) is not valid, ignore it",
7621 key->offset, btrfs_chunk_length(eb, chunk));
7624 rec = btrfs_new_chunk_record(eb, key, slot);
7625 ret = insert_cache_extent(chunk_cache, &rec->cache);
7627 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7628 rec->offset, rec->length);
7635 static int process_device_item(struct rb_root *dev_cache,
7636 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7638 struct btrfs_dev_item *ptr;
7639 struct device_record *rec;
7642 ptr = btrfs_item_ptr(eb,
7643 slot, struct btrfs_dev_item);
7645 rec = malloc(sizeof(*rec));
7647 fprintf(stderr, "memory allocation failed\n");
7651 rec->devid = key->offset;
7652 rec->generation = btrfs_header_generation(eb);
7654 rec->objectid = key->objectid;
7655 rec->type = key->type;
7656 rec->offset = key->offset;
7658 rec->devid = btrfs_device_id(eb, ptr);
7659 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7660 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7662 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7664 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7671 struct block_group_record *
7672 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7675 struct btrfs_block_group_item *ptr;
7676 struct block_group_record *rec;
7678 rec = calloc(1, sizeof(*rec));
7680 fprintf(stderr, "memory allocation failed\n");
7684 rec->cache.start = key->objectid;
7685 rec->cache.size = key->offset;
7687 rec->generation = btrfs_header_generation(leaf);
7689 rec->objectid = key->objectid;
7690 rec->type = key->type;
7691 rec->offset = key->offset;
7693 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7694 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7696 INIT_LIST_HEAD(&rec->list);
7701 static int process_block_group_item(struct block_group_tree *block_group_cache,
7702 struct btrfs_key *key,
7703 struct extent_buffer *eb, int slot)
7705 struct block_group_record *rec;
7708 rec = btrfs_new_block_group_record(eb, key, slot);
7709 ret = insert_block_group_record(block_group_cache, rec);
7711 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7712 rec->objectid, rec->offset);
7719 struct device_extent_record *
7720 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7721 struct btrfs_key *key, int slot)
7723 struct device_extent_record *rec;
7724 struct btrfs_dev_extent *ptr;
7726 rec = calloc(1, sizeof(*rec));
7728 fprintf(stderr, "memory allocation failed\n");
7732 rec->cache.objectid = key->objectid;
7733 rec->cache.start = key->offset;
7735 rec->generation = btrfs_header_generation(leaf);
7737 rec->objectid = key->objectid;
7738 rec->type = key->type;
7739 rec->offset = key->offset;
7741 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7742 rec->chunk_objecteid =
7743 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7745 btrfs_dev_extent_chunk_offset(leaf, ptr);
7746 rec->length = btrfs_dev_extent_length(leaf, ptr);
7747 rec->cache.size = rec->length;
7749 INIT_LIST_HEAD(&rec->chunk_list);
7750 INIT_LIST_HEAD(&rec->device_list);
7756 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7757 struct btrfs_key *key, struct extent_buffer *eb,
7760 struct device_extent_record *rec;
7763 rec = btrfs_new_device_extent_record(eb, key, slot);
7764 ret = insert_device_extent_record(dev_extent_cache, rec);
7767 "Device extent[%llu, %llu, %llu] existed.\n",
7768 rec->objectid, rec->offset, rec->length);
7775 static int process_extent_item(struct btrfs_root *root,
7776 struct cache_tree *extent_cache,
7777 struct extent_buffer *eb, int slot)
7779 struct btrfs_extent_item *ei;
7780 struct btrfs_extent_inline_ref *iref;
7781 struct btrfs_extent_data_ref *dref;
7782 struct btrfs_shared_data_ref *sref;
7783 struct btrfs_key key;
7784 struct extent_record tmpl;
7789 u32 item_size = btrfs_item_size_nr(eb, slot);
7795 btrfs_item_key_to_cpu(eb, &key, slot);
7797 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7799 num_bytes = root->fs_info->nodesize;
7801 num_bytes = key.offset;
7804 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7805 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7806 key.objectid, root->fs_info->sectorsize);
7809 if (item_size < sizeof(*ei)) {
7810 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7811 struct btrfs_extent_item_v0 *ei0;
7812 if (item_size != sizeof(*ei0)) {
7814 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7815 key.objectid, key.type, key.offset,
7816 btrfs_header_bytenr(eb), slot);
7819 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7820 refs = btrfs_extent_refs_v0(eb, ei0);
7824 memset(&tmpl, 0, sizeof(tmpl));
7825 tmpl.start = key.objectid;
7826 tmpl.nr = num_bytes;
7827 tmpl.extent_item_refs = refs;
7828 tmpl.metadata = metadata;
7830 tmpl.max_size = num_bytes;
7832 return add_extent_rec(extent_cache, &tmpl);
7835 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7836 refs = btrfs_extent_refs(eb, ei);
7837 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7841 if (metadata && num_bytes != root->fs_info->nodesize) {
7842 error("ignore invalid metadata extent, length %llu does not equal to %u",
7843 num_bytes, root->fs_info->nodesize);
7846 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7847 error("ignore invalid data extent, length %llu is not aligned to %u",
7848 num_bytes, root->fs_info->sectorsize);
7852 memset(&tmpl, 0, sizeof(tmpl));
7853 tmpl.start = key.objectid;
7854 tmpl.nr = num_bytes;
7855 tmpl.extent_item_refs = refs;
7856 tmpl.metadata = metadata;
7858 tmpl.max_size = num_bytes;
7859 add_extent_rec(extent_cache, &tmpl);
7861 ptr = (unsigned long)(ei + 1);
7862 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7863 key.type == BTRFS_EXTENT_ITEM_KEY)
7864 ptr += sizeof(struct btrfs_tree_block_info);
7866 end = (unsigned long)ei + item_size;
7868 iref = (struct btrfs_extent_inline_ref *)ptr;
7869 type = btrfs_extent_inline_ref_type(eb, iref);
7870 offset = btrfs_extent_inline_ref_offset(eb, iref);
7872 case BTRFS_TREE_BLOCK_REF_KEY:
7873 ret = add_tree_backref(extent_cache, key.objectid,
7877 "add_tree_backref failed (extent items tree block): %s",
7880 case BTRFS_SHARED_BLOCK_REF_KEY:
7881 ret = add_tree_backref(extent_cache, key.objectid,
7885 "add_tree_backref failed (extent items shared block): %s",
7888 case BTRFS_EXTENT_DATA_REF_KEY:
7889 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7890 add_data_backref(extent_cache, key.objectid, 0,
7891 btrfs_extent_data_ref_root(eb, dref),
7892 btrfs_extent_data_ref_objectid(eb,
7894 btrfs_extent_data_ref_offset(eb, dref),
7895 btrfs_extent_data_ref_count(eb, dref),
7898 case BTRFS_SHARED_DATA_REF_KEY:
7899 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7900 add_data_backref(extent_cache, key.objectid, offset,
7902 btrfs_shared_data_ref_count(eb, sref),
7906 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7907 key.objectid, key.type, num_bytes);
7910 ptr += btrfs_extent_inline_ref_size(type);
7917 static int check_cache_range(struct btrfs_root *root,
7918 struct btrfs_block_group_cache *cache,
7919 u64 offset, u64 bytes)
7921 struct btrfs_free_space *entry;
7927 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7928 bytenr = btrfs_sb_offset(i);
7929 ret = btrfs_rmap_block(root->fs_info,
7930 cache->key.objectid, bytenr, 0,
7931 &logical, &nr, &stripe_len);
7936 if (logical[nr] + stripe_len <= offset)
7938 if (offset + bytes <= logical[nr])
7940 if (logical[nr] == offset) {
7941 if (stripe_len >= bytes) {
7945 bytes -= stripe_len;
7946 offset += stripe_len;
7947 } else if (logical[nr] < offset) {
7948 if (logical[nr] + stripe_len >=
7953 bytes = (offset + bytes) -
7954 (logical[nr] + stripe_len);
7955 offset = logical[nr] + stripe_len;
7958 * Could be tricky, the super may land in the
7959 * middle of the area we're checking. First
7960 * check the easiest case, it's at the end.
7962 if (logical[nr] + stripe_len >=
7964 bytes = logical[nr] - offset;
7968 /* Check the left side */
7969 ret = check_cache_range(root, cache,
7971 logical[nr] - offset);
7977 /* Now we continue with the right side */
7978 bytes = (offset + bytes) -
7979 (logical[nr] + stripe_len);
7980 offset = logical[nr] + stripe_len;
7987 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7989 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7990 offset, offset+bytes);
7994 if (entry->offset != offset) {
7995 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8000 if (entry->bytes != bytes) {
8001 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8002 bytes, entry->bytes, offset);
8006 unlink_free_space(cache->free_space_ctl, entry);
8011 static int verify_space_cache(struct btrfs_root *root,
8012 struct btrfs_block_group_cache *cache)
8014 struct btrfs_path path;
8015 struct extent_buffer *leaf;
8016 struct btrfs_key key;
8020 root = root->fs_info->extent_root;
8022 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8024 btrfs_init_path(&path);
8025 key.objectid = last;
8027 key.type = BTRFS_EXTENT_ITEM_KEY;
8028 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8033 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8034 ret = btrfs_next_leaf(root, &path);
8042 leaf = path.nodes[0];
8043 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8044 if (key.objectid >= cache->key.offset + cache->key.objectid)
8046 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8047 key.type != BTRFS_METADATA_ITEM_KEY) {
8052 if (last == key.objectid) {
8053 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8054 last = key.objectid + key.offset;
8056 last = key.objectid + root->fs_info->nodesize;
8061 ret = check_cache_range(root, cache, last,
8062 key.objectid - last);
8065 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8066 last = key.objectid + key.offset;
8068 last = key.objectid + root->fs_info->nodesize;
8072 if (last < cache->key.objectid + cache->key.offset)
8073 ret = check_cache_range(root, cache, last,
8074 cache->key.objectid +
8075 cache->key.offset - last);
8078 btrfs_release_path(&path);
8081 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8082 fprintf(stderr, "There are still entries left in the space "
8090 static int check_space_cache(struct btrfs_root *root)
8092 struct btrfs_block_group_cache *cache;
8093 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8097 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8098 btrfs_super_generation(root->fs_info->super_copy) !=
8099 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8100 printf("cache and super generation don't match, space cache "
8101 "will be invalidated\n");
8105 if (ctx.progress_enabled) {
8106 ctx.tp = TASK_FREE_SPACE;
8107 task_start(ctx.info);
8111 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8115 start = cache->key.objectid + cache->key.offset;
8116 if (!cache->free_space_ctl) {
8117 if (btrfs_init_free_space_ctl(cache,
8118 root->fs_info->sectorsize)) {
8123 btrfs_remove_free_space_cache(cache);
8126 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8127 ret = exclude_super_stripes(root, cache);
8129 fprintf(stderr, "could not exclude super stripes: %s\n",
8134 ret = load_free_space_tree(root->fs_info, cache);
8135 free_excluded_extents(root, cache);
8137 fprintf(stderr, "could not load free space tree: %s\n",
8144 ret = load_free_space_cache(root->fs_info, cache);
8149 ret = verify_space_cache(root, cache);
8151 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8152 cache->key.objectid);
8157 task_stop(ctx.info);
8159 return error ? -EINVAL : 0;
8162 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8163 u64 num_bytes, unsigned long leaf_offset,
8164 struct extent_buffer *eb) {
8166 struct btrfs_fs_info *fs_info = root->fs_info;
8168 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8170 unsigned long csum_offset;
8174 u64 data_checked = 0;
8180 if (num_bytes % fs_info->sectorsize)
8183 data = malloc(num_bytes);
8187 while (offset < num_bytes) {
8190 read_len = num_bytes - offset;
8191 /* read as much space once a time */
8192 ret = read_extent_data(fs_info, data + offset,
8193 bytenr + offset, &read_len, mirror);
8197 /* verify every 4k data's checksum */
8198 while (data_checked < read_len) {
8200 tmp = offset + data_checked;
8202 csum = btrfs_csum_data((char *)data + tmp,
8203 csum, fs_info->sectorsize);
8204 btrfs_csum_final(csum, (u8 *)&csum);
8206 csum_offset = leaf_offset +
8207 tmp / fs_info->sectorsize * csum_size;
8208 read_extent_buffer(eb, (char *)&csum_expected,
8209 csum_offset, csum_size);
8210 /* try another mirror */
8211 if (csum != csum_expected) {
8212 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8213 mirror, bytenr + tmp,
8214 csum, csum_expected);
8215 num_copies = btrfs_num_copies(root->fs_info,
8217 if (mirror < num_copies - 1) {
8222 data_checked += fs_info->sectorsize;
8231 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8234 struct btrfs_path path;
8235 struct extent_buffer *leaf;
8236 struct btrfs_key key;
8239 btrfs_init_path(&path);
8240 key.objectid = bytenr;
8241 key.type = BTRFS_EXTENT_ITEM_KEY;
8242 key.offset = (u64)-1;
8245 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8248 fprintf(stderr, "Error looking up extent record %d\n", ret);
8249 btrfs_release_path(&path);
8252 if (path.slots[0] > 0) {
8255 ret = btrfs_prev_leaf(root, &path);
8258 } else if (ret > 0) {
8265 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8268 * Block group items come before extent items if they have the same
8269 * bytenr, so walk back one more just in case. Dear future traveller,
8270 * first congrats on mastering time travel. Now if it's not too much
8271 * trouble could you go back to 2006 and tell Chris to make the
8272 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8273 * EXTENT_ITEM_KEY please?
8275 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8276 if (path.slots[0] > 0) {
8279 ret = btrfs_prev_leaf(root, &path);
8282 } else if (ret > 0) {
8287 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8291 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8292 ret = btrfs_next_leaf(root, &path);
8294 fprintf(stderr, "Error going to next leaf "
8296 btrfs_release_path(&path);
8302 leaf = path.nodes[0];
8303 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8304 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8308 if (key.objectid + key.offset < bytenr) {
8312 if (key.objectid > bytenr + num_bytes)
8315 if (key.objectid == bytenr) {
8316 if (key.offset >= num_bytes) {
8320 num_bytes -= key.offset;
8321 bytenr += key.offset;
8322 } else if (key.objectid < bytenr) {
8323 if (key.objectid + key.offset >= bytenr + num_bytes) {
8327 num_bytes = (bytenr + num_bytes) -
8328 (key.objectid + key.offset);
8329 bytenr = key.objectid + key.offset;
8331 if (key.objectid + key.offset < bytenr + num_bytes) {
8332 u64 new_start = key.objectid + key.offset;
8333 u64 new_bytes = bytenr + num_bytes - new_start;
8336 * Weird case, the extent is in the middle of
8337 * our range, we'll have to search one side
8338 * and then the other. Not sure if this happens
8339 * in real life, but no harm in coding it up
8340 * anyway just in case.
8342 btrfs_release_path(&path);
8343 ret = check_extent_exists(root, new_start,
8346 fprintf(stderr, "Right section didn't "
8350 num_bytes = key.objectid - bytenr;
8353 num_bytes = key.objectid - bytenr;
8360 if (num_bytes && !ret) {
8361 fprintf(stderr, "There are no extents for csum range "
8362 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8366 btrfs_release_path(&path);
8370 static int check_csums(struct btrfs_root *root)
8372 struct btrfs_path path;
8373 struct extent_buffer *leaf;
8374 struct btrfs_key key;
8375 u64 offset = 0, num_bytes = 0;
8376 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8380 unsigned long leaf_offset;
8382 root = root->fs_info->csum_root;
8383 if (!extent_buffer_uptodate(root->node)) {
8384 fprintf(stderr, "No valid csum tree found\n");
8388 btrfs_init_path(&path);
8389 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8390 key.type = BTRFS_EXTENT_CSUM_KEY;
8392 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8394 fprintf(stderr, "Error searching csum tree %d\n", ret);
8395 btrfs_release_path(&path);
8399 if (ret > 0 && path.slots[0])
8404 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8405 ret = btrfs_next_leaf(root, &path);
8407 fprintf(stderr, "Error going to next leaf "
8414 leaf = path.nodes[0];
8416 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8417 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8422 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8423 csum_size) * root->fs_info->sectorsize;
8424 if (!check_data_csum)
8425 goto skip_csum_check;
8426 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8427 ret = check_extent_csums(root, key.offset, data_len,
8433 offset = key.offset;
8434 } else if (key.offset != offset + num_bytes) {
8435 ret = check_extent_exists(root, offset, num_bytes);
8437 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8438 "there is no extent record\n",
8439 offset, offset+num_bytes);
8442 offset = key.offset;
8445 num_bytes += data_len;
8449 btrfs_release_path(&path);
8453 static int is_dropped_key(struct btrfs_key *key,
8454 struct btrfs_key *drop_key) {
8455 if (key->objectid < drop_key->objectid)
8457 else if (key->objectid == drop_key->objectid) {
8458 if (key->type < drop_key->type)
8460 else if (key->type == drop_key->type) {
8461 if (key->offset < drop_key->offset)
8469 * Here are the rules for FULL_BACKREF.
8471 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8472 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8474 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8475 * if it happened after the relocation occurred since we'll have dropped the
8476 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8477 * have no real way to know for sure.
8479 * We process the blocks one root at a time, and we start from the lowest root
8480 * objectid and go to the highest. So we can just lookup the owner backref for
8481 * the record and if we don't find it then we know it doesn't exist and we have
8484 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8485 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8486 * be set or not and then we can check later once we've gathered all the refs.
8488 static int calc_extent_flag(struct cache_tree *extent_cache,
8489 struct extent_buffer *buf,
8490 struct root_item_record *ri,
8493 struct extent_record *rec;
8494 struct cache_extent *cache;
8495 struct tree_backref *tback;
8498 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8499 /* we have added this extent before */
8503 rec = container_of(cache, struct extent_record, cache);
8506 * Except file/reloc tree, we can not have
8509 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8514 if (buf->start == ri->bytenr)
8517 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8520 owner = btrfs_header_owner(buf);
8521 if (owner == ri->objectid)
8524 tback = find_tree_backref(rec, 0, owner);
8529 if (rec->flag_block_full_backref != FLAG_UNSET &&
8530 rec->flag_block_full_backref != 0)
8531 rec->bad_full_backref = 1;
8534 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8535 if (rec->flag_block_full_backref != FLAG_UNSET &&
8536 rec->flag_block_full_backref != 1)
8537 rec->bad_full_backref = 1;
8541 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8543 fprintf(stderr, "Invalid key type(");
8544 print_key_type(stderr, 0, key_type);
8545 fprintf(stderr, ") found in root(");
8546 print_objectid(stderr, rootid, 0);
8547 fprintf(stderr, ")\n");
8551 * Check if the key is valid with its extent buffer.
8553 * This is a early check in case invalid key exists in a extent buffer
8554 * This is not comprehensive yet, but should prevent wrong key/item passed
8557 static int check_type_with_root(u64 rootid, u8 key_type)
8560 /* Only valid in chunk tree */
8561 case BTRFS_DEV_ITEM_KEY:
8562 case BTRFS_CHUNK_ITEM_KEY:
8563 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8566 /* valid in csum and log tree */
8567 case BTRFS_CSUM_TREE_OBJECTID:
8568 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8572 case BTRFS_EXTENT_ITEM_KEY:
8573 case BTRFS_METADATA_ITEM_KEY:
8574 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8575 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8578 case BTRFS_ROOT_ITEM_KEY:
8579 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8582 case BTRFS_DEV_EXTENT_KEY:
8583 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8589 report_mismatch_key_root(key_type, rootid);
8593 static int run_next_block(struct btrfs_root *root,
8594 struct block_info *bits,
8597 struct cache_tree *pending,
8598 struct cache_tree *seen,
8599 struct cache_tree *reada,
8600 struct cache_tree *nodes,
8601 struct cache_tree *extent_cache,
8602 struct cache_tree *chunk_cache,
8603 struct rb_root *dev_cache,
8604 struct block_group_tree *block_group_cache,
8605 struct device_extent_tree *dev_extent_cache,
8606 struct root_item_record *ri)
8608 struct btrfs_fs_info *fs_info = root->fs_info;
8609 struct extent_buffer *buf;
8610 struct extent_record *rec = NULL;
8621 struct btrfs_key key;
8622 struct cache_extent *cache;
8625 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8626 bits_nr, &reada_bits);
8631 for(i = 0; i < nritems; i++) {
8632 ret = add_cache_extent(reada, bits[i].start,
8637 /* fixme, get the parent transid */
8638 readahead_tree_block(fs_info, bits[i].start, 0);
8641 *last = bits[0].start;
8642 bytenr = bits[0].start;
8643 size = bits[0].size;
8645 cache = lookup_cache_extent(pending, bytenr, size);
8647 remove_cache_extent(pending, cache);
8650 cache = lookup_cache_extent(reada, bytenr, size);
8652 remove_cache_extent(reada, cache);
8655 cache = lookup_cache_extent(nodes, bytenr, size);
8657 remove_cache_extent(nodes, cache);
8660 cache = lookup_cache_extent(extent_cache, bytenr, size);
8662 rec = container_of(cache, struct extent_record, cache);
8663 gen = rec->parent_generation;
8666 /* fixme, get the real parent transid */
8667 buf = read_tree_block(root->fs_info, bytenr, gen);
8668 if (!extent_buffer_uptodate(buf)) {
8669 record_bad_block_io(root->fs_info,
8670 extent_cache, bytenr, size);
8674 nritems = btrfs_header_nritems(buf);
8677 if (!init_extent_tree) {
8678 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8679 btrfs_header_level(buf), 1, NULL,
8682 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8684 fprintf(stderr, "Couldn't calc extent flags\n");
8685 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8690 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8692 fprintf(stderr, "Couldn't calc extent flags\n");
8693 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8697 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8699 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8700 ri->objectid == btrfs_header_owner(buf)) {
8702 * Ok we got to this block from it's original owner and
8703 * we have FULL_BACKREF set. Relocation can leave
8704 * converted blocks over so this is altogether possible,
8705 * however it's not possible if the generation > the
8706 * last snapshot, so check for this case.
8708 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8709 btrfs_header_generation(buf) > ri->last_snapshot) {
8710 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8711 rec->bad_full_backref = 1;
8716 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8717 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8718 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8719 rec->bad_full_backref = 1;
8723 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8724 rec->flag_block_full_backref = 1;
8728 rec->flag_block_full_backref = 0;
8730 owner = btrfs_header_owner(buf);
8733 ret = check_block(root, extent_cache, buf, flags);
8737 if (btrfs_is_leaf(buf)) {
8738 btree_space_waste += btrfs_leaf_free_space(root, buf);
8739 for (i = 0; i < nritems; i++) {
8740 struct btrfs_file_extent_item *fi;
8741 btrfs_item_key_to_cpu(buf, &key, i);
8743 * Check key type against the leaf owner.
8744 * Could filter quite a lot of early error if
8747 if (check_type_with_root(btrfs_header_owner(buf),
8749 fprintf(stderr, "ignoring invalid key\n");
8752 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8753 process_extent_item(root, extent_cache, buf,
8757 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8758 process_extent_item(root, extent_cache, buf,
8762 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8764 btrfs_item_size_nr(buf, i);
8767 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8768 process_chunk_item(chunk_cache, &key, buf, i);
8771 if (key.type == BTRFS_DEV_ITEM_KEY) {
8772 process_device_item(dev_cache, &key, buf, i);
8775 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8776 process_block_group_item(block_group_cache,
8780 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8781 process_device_extent_item(dev_extent_cache,
8786 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8787 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8788 process_extent_ref_v0(extent_cache, buf, i);
8795 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8796 ret = add_tree_backref(extent_cache,
8797 key.objectid, 0, key.offset, 0);
8800 "add_tree_backref failed (leaf tree block): %s",
8804 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8805 ret = add_tree_backref(extent_cache,
8806 key.objectid, key.offset, 0, 0);
8809 "add_tree_backref failed (leaf shared block): %s",
8813 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8814 struct btrfs_extent_data_ref *ref;
8815 ref = btrfs_item_ptr(buf, i,
8816 struct btrfs_extent_data_ref);
8817 add_data_backref(extent_cache,
8819 btrfs_extent_data_ref_root(buf, ref),
8820 btrfs_extent_data_ref_objectid(buf,
8822 btrfs_extent_data_ref_offset(buf, ref),
8823 btrfs_extent_data_ref_count(buf, ref),
8824 0, root->fs_info->sectorsize);
8827 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8828 struct btrfs_shared_data_ref *ref;
8829 ref = btrfs_item_ptr(buf, i,
8830 struct btrfs_shared_data_ref);
8831 add_data_backref(extent_cache,
8832 key.objectid, key.offset, 0, 0, 0,
8833 btrfs_shared_data_ref_count(buf, ref),
8834 0, root->fs_info->sectorsize);
8837 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8838 struct bad_item *bad;
8840 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8844 bad = malloc(sizeof(struct bad_item));
8847 INIT_LIST_HEAD(&bad->list);
8848 memcpy(&bad->key, &key,
8849 sizeof(struct btrfs_key));
8850 bad->root_id = owner;
8851 list_add_tail(&bad->list, &delete_items);
8854 if (key.type != BTRFS_EXTENT_DATA_KEY)
8856 fi = btrfs_item_ptr(buf, i,
8857 struct btrfs_file_extent_item);
8858 if (btrfs_file_extent_type(buf, fi) ==
8859 BTRFS_FILE_EXTENT_INLINE)
8861 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8864 data_bytes_allocated +=
8865 btrfs_file_extent_disk_num_bytes(buf, fi);
8866 if (data_bytes_allocated < root->fs_info->sectorsize) {
8869 data_bytes_referenced +=
8870 btrfs_file_extent_num_bytes(buf, fi);
8871 add_data_backref(extent_cache,
8872 btrfs_file_extent_disk_bytenr(buf, fi),
8873 parent, owner, key.objectid, key.offset -
8874 btrfs_file_extent_offset(buf, fi), 1, 1,
8875 btrfs_file_extent_disk_num_bytes(buf, fi));
8879 struct btrfs_key first_key;
8881 first_key.objectid = 0;
8884 btrfs_item_key_to_cpu(buf, &first_key, 0);
8885 level = btrfs_header_level(buf);
8886 for (i = 0; i < nritems; i++) {
8887 struct extent_record tmpl;
8889 ptr = btrfs_node_blockptr(buf, i);
8890 size = root->fs_info->nodesize;
8891 btrfs_node_key_to_cpu(buf, &key, i);
8893 if ((level == ri->drop_level)
8894 && is_dropped_key(&key, &ri->drop_key)) {
8899 memset(&tmpl, 0, sizeof(tmpl));
8900 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8901 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8906 tmpl.max_size = size;
8907 ret = add_extent_rec(extent_cache, &tmpl);
8911 ret = add_tree_backref(extent_cache, ptr, parent,
8915 "add_tree_backref failed (non-leaf block): %s",
8921 add_pending(nodes, seen, ptr, size);
8923 add_pending(pending, seen, ptr, size);
8926 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
8927 nritems) * sizeof(struct btrfs_key_ptr);
8929 total_btree_bytes += buf->len;
8930 if (fs_root_objectid(btrfs_header_owner(buf)))
8931 total_fs_tree_bytes += buf->len;
8932 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8933 total_extent_tree_bytes += buf->len;
8935 free_extent_buffer(buf);
8939 static int add_root_to_pending(struct extent_buffer *buf,
8940 struct cache_tree *extent_cache,
8941 struct cache_tree *pending,
8942 struct cache_tree *seen,
8943 struct cache_tree *nodes,
8946 struct extent_record tmpl;
8949 if (btrfs_header_level(buf) > 0)
8950 add_pending(nodes, seen, buf->start, buf->len);
8952 add_pending(pending, seen, buf->start, buf->len);
8954 memset(&tmpl, 0, sizeof(tmpl));
8955 tmpl.start = buf->start;
8960 tmpl.max_size = buf->len;
8961 add_extent_rec(extent_cache, &tmpl);
8963 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8964 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8965 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8968 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8973 /* as we fix the tree, we might be deleting blocks that
8974 * we're tracking for repair. This hook makes sure we
8975 * remove any backrefs for blocks as we are fixing them.
8977 static int free_extent_hook(struct btrfs_trans_handle *trans,
8978 struct btrfs_root *root,
8979 u64 bytenr, u64 num_bytes, u64 parent,
8980 u64 root_objectid, u64 owner, u64 offset,
8983 struct extent_record *rec;
8984 struct cache_extent *cache;
8986 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8988 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8989 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8993 rec = container_of(cache, struct extent_record, cache);
8995 struct data_backref *back;
8996 back = find_data_backref(rec, parent, root_objectid, owner,
8997 offset, 1, bytenr, num_bytes);
9000 if (back->node.found_ref) {
9001 back->found_ref -= refs_to_drop;
9003 rec->refs -= refs_to_drop;
9005 if (back->node.found_extent_tree) {
9006 back->num_refs -= refs_to_drop;
9007 if (rec->extent_item_refs)
9008 rec->extent_item_refs -= refs_to_drop;
9010 if (back->found_ref == 0)
9011 back->node.found_ref = 0;
9012 if (back->num_refs == 0)
9013 back->node.found_extent_tree = 0;
9015 if (!back->node.found_extent_tree && back->node.found_ref) {
9016 rb_erase(&back->node.node, &rec->backref_tree);
9020 struct tree_backref *back;
9021 back = find_tree_backref(rec, parent, root_objectid);
9024 if (back->node.found_ref) {
9027 back->node.found_ref = 0;
9029 if (back->node.found_extent_tree) {
9030 if (rec->extent_item_refs)
9031 rec->extent_item_refs--;
9032 back->node.found_extent_tree = 0;
9034 if (!back->node.found_extent_tree && back->node.found_ref) {
9035 rb_erase(&back->node.node, &rec->backref_tree);
9039 maybe_free_extent_rec(extent_cache, rec);
9044 static int delete_extent_records(struct btrfs_trans_handle *trans,
9045 struct btrfs_root *root,
9046 struct btrfs_path *path,
9049 struct btrfs_key key;
9050 struct btrfs_key found_key;
9051 struct extent_buffer *leaf;
9056 key.objectid = bytenr;
9058 key.offset = (u64)-1;
9061 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9068 if (path->slots[0] == 0)
9074 leaf = path->nodes[0];
9075 slot = path->slots[0];
9077 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9078 if (found_key.objectid != bytenr)
9081 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9082 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9083 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9084 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9085 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9086 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9087 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9088 btrfs_release_path(path);
9089 if (found_key.type == 0) {
9090 if (found_key.offset == 0)
9092 key.offset = found_key.offset - 1;
9093 key.type = found_key.type;
9095 key.type = found_key.type - 1;
9096 key.offset = (u64)-1;
9100 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9101 found_key.objectid, found_key.type, found_key.offset);
9103 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9106 btrfs_release_path(path);
9108 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9109 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9110 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9111 found_key.offset : root->fs_info->nodesize;
9113 ret = btrfs_update_block_group(root, bytenr,
9120 btrfs_release_path(path);
9125 * for a single backref, this will allocate a new extent
9126 * and add the backref to it.
9128 static int record_extent(struct btrfs_trans_handle *trans,
9129 struct btrfs_fs_info *info,
9130 struct btrfs_path *path,
9131 struct extent_record *rec,
9132 struct extent_backref *back,
9133 int allocated, u64 flags)
9136 struct btrfs_root *extent_root = info->extent_root;
9137 struct extent_buffer *leaf;
9138 struct btrfs_key ins_key;
9139 struct btrfs_extent_item *ei;
9140 struct data_backref *dback;
9141 struct btrfs_tree_block_info *bi;
9144 rec->max_size = max_t(u64, rec->max_size,
9148 u32 item_size = sizeof(*ei);
9151 item_size += sizeof(*bi);
9153 ins_key.objectid = rec->start;
9154 ins_key.offset = rec->max_size;
9155 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9157 ret = btrfs_insert_empty_item(trans, extent_root, path,
9158 &ins_key, item_size);
9162 leaf = path->nodes[0];
9163 ei = btrfs_item_ptr(leaf, path->slots[0],
9164 struct btrfs_extent_item);
9166 btrfs_set_extent_refs(leaf, ei, 0);
9167 btrfs_set_extent_generation(leaf, ei, rec->generation);
9169 if (back->is_data) {
9170 btrfs_set_extent_flags(leaf, ei,
9171 BTRFS_EXTENT_FLAG_DATA);
9173 struct btrfs_disk_key copy_key;;
9175 bi = (struct btrfs_tree_block_info *)(ei + 1);
9176 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9179 btrfs_set_disk_key_objectid(©_key,
9180 rec->info_objectid);
9181 btrfs_set_disk_key_type(©_key, 0);
9182 btrfs_set_disk_key_offset(©_key, 0);
9184 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9185 btrfs_set_tree_block_key(leaf, bi, ©_key);
9187 btrfs_set_extent_flags(leaf, ei,
9188 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9191 btrfs_mark_buffer_dirty(leaf);
9192 ret = btrfs_update_block_group(extent_root, rec->start,
9193 rec->max_size, 1, 0);
9196 btrfs_release_path(path);
9199 if (back->is_data) {
9203 dback = to_data_backref(back);
9204 if (back->full_backref)
9205 parent = dback->parent;
9209 for (i = 0; i < dback->found_ref; i++) {
9210 /* if parent != 0, we're doing a full backref
9211 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9212 * just makes the backref allocator create a data
9215 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9216 rec->start, rec->max_size,
9220 BTRFS_FIRST_FREE_OBJECTID :
9226 fprintf(stderr, "adding new data backref"
9227 " on %llu %s %llu owner %llu"
9228 " offset %llu found %d\n",
9229 (unsigned long long)rec->start,
9230 back->full_backref ?
9232 back->full_backref ?
9233 (unsigned long long)parent :
9234 (unsigned long long)dback->root,
9235 (unsigned long long)dback->owner,
9236 (unsigned long long)dback->offset,
9240 struct tree_backref *tback;
9242 tback = to_tree_backref(back);
9243 if (back->full_backref)
9244 parent = tback->parent;
9248 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9249 rec->start, rec->max_size,
9250 parent, tback->root, 0, 0);
9251 fprintf(stderr, "adding new tree backref on "
9252 "start %llu len %llu parent %llu root %llu\n",
9253 rec->start, rec->max_size, parent, tback->root);
9256 btrfs_release_path(path);
9260 static struct extent_entry *find_entry(struct list_head *entries,
9261 u64 bytenr, u64 bytes)
9263 struct extent_entry *entry = NULL;
9265 list_for_each_entry(entry, entries, list) {
9266 if (entry->bytenr == bytenr && entry->bytes == bytes)
9273 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9275 struct extent_entry *entry, *best = NULL, *prev = NULL;
9277 list_for_each_entry(entry, entries, list) {
9279 * If there are as many broken entries as entries then we know
9280 * not to trust this particular entry.
9282 if (entry->broken == entry->count)
9286 * Special case, when there are only two entries and 'best' is
9296 * If our current entry == best then we can't be sure our best
9297 * is really the best, so we need to keep searching.
9299 if (best && best->count == entry->count) {
9305 /* Prev == entry, not good enough, have to keep searching */
9306 if (!prev->broken && prev->count == entry->count)
9310 best = (prev->count > entry->count) ? prev : entry;
9311 else if (best->count < entry->count)
9319 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9320 struct data_backref *dback, struct extent_entry *entry)
9322 struct btrfs_trans_handle *trans;
9323 struct btrfs_root *root;
9324 struct btrfs_file_extent_item *fi;
9325 struct extent_buffer *leaf;
9326 struct btrfs_key key;
9330 key.objectid = dback->root;
9331 key.type = BTRFS_ROOT_ITEM_KEY;
9332 key.offset = (u64)-1;
9333 root = btrfs_read_fs_root(info, &key);
9335 fprintf(stderr, "Couldn't find root for our ref\n");
9340 * The backref points to the original offset of the extent if it was
9341 * split, so we need to search down to the offset we have and then walk
9342 * forward until we find the backref we're looking for.
9344 key.objectid = dback->owner;
9345 key.type = BTRFS_EXTENT_DATA_KEY;
9346 key.offset = dback->offset;
9347 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9349 fprintf(stderr, "Error looking up ref %d\n", ret);
9354 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9355 ret = btrfs_next_leaf(root, path);
9357 fprintf(stderr, "Couldn't find our ref, next\n");
9361 leaf = path->nodes[0];
9362 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9363 if (key.objectid != dback->owner ||
9364 key.type != BTRFS_EXTENT_DATA_KEY) {
9365 fprintf(stderr, "Couldn't find our ref, search\n");
9368 fi = btrfs_item_ptr(leaf, path->slots[0],
9369 struct btrfs_file_extent_item);
9370 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9371 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9373 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9378 btrfs_release_path(path);
9380 trans = btrfs_start_transaction(root, 1);
9382 return PTR_ERR(trans);
9385 * Ok we have the key of the file extent we want to fix, now we can cow
9386 * down to the thing and fix it.
9388 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9390 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9391 key.objectid, key.type, key.offset, ret);
9395 fprintf(stderr, "Well that's odd, we just found this key "
9396 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9401 leaf = path->nodes[0];
9402 fi = btrfs_item_ptr(leaf, path->slots[0],
9403 struct btrfs_file_extent_item);
9405 if (btrfs_file_extent_compression(leaf, fi) &&
9406 dback->disk_bytenr != entry->bytenr) {
9407 fprintf(stderr, "Ref doesn't match the record start and is "
9408 "compressed, please take a btrfs-image of this file "
9409 "system and send it to a btrfs developer so they can "
9410 "complete this functionality for bytenr %Lu\n",
9411 dback->disk_bytenr);
9416 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9417 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9418 } else if (dback->disk_bytenr > entry->bytenr) {
9419 u64 off_diff, offset;
9421 off_diff = dback->disk_bytenr - entry->bytenr;
9422 offset = btrfs_file_extent_offset(leaf, fi);
9423 if (dback->disk_bytenr + offset +
9424 btrfs_file_extent_num_bytes(leaf, fi) >
9425 entry->bytenr + entry->bytes) {
9426 fprintf(stderr, "Ref is past the entry end, please "
9427 "take a btrfs-image of this file system and "
9428 "send it to a btrfs developer, ref %Lu\n",
9429 dback->disk_bytenr);
9434 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9435 btrfs_set_file_extent_offset(leaf, fi, offset);
9436 } else if (dback->disk_bytenr < entry->bytenr) {
9439 offset = btrfs_file_extent_offset(leaf, fi);
9440 if (dback->disk_bytenr + offset < entry->bytenr) {
9441 fprintf(stderr, "Ref is before the entry start, please"
9442 " take a btrfs-image of this file system and "
9443 "send it to a btrfs developer, ref %Lu\n",
9444 dback->disk_bytenr);
9449 offset += dback->disk_bytenr;
9450 offset -= entry->bytenr;
9451 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9452 btrfs_set_file_extent_offset(leaf, fi, offset);
9455 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9458 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9459 * only do this if we aren't using compression, otherwise it's a
9462 if (!btrfs_file_extent_compression(leaf, fi))
9463 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9465 printf("ram bytes may be wrong?\n");
9466 btrfs_mark_buffer_dirty(leaf);
9468 err = btrfs_commit_transaction(trans, root);
9469 btrfs_release_path(path);
9470 return ret ? ret : err;
9473 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9474 struct extent_record *rec)
9476 struct extent_backref *back, *tmp;
9477 struct data_backref *dback;
9478 struct extent_entry *entry, *best = NULL;
9481 int broken_entries = 0;
9486 * Metadata is easy and the backrefs should always agree on bytenr and
9487 * size, if not we've got bigger issues.
9492 rbtree_postorder_for_each_entry_safe(back, tmp,
9493 &rec->backref_tree, node) {
9494 if (back->full_backref || !back->is_data)
9497 dback = to_data_backref(back);
9500 * We only pay attention to backrefs that we found a real
9503 if (dback->found_ref == 0)
9507 * For now we only catch when the bytes don't match, not the
9508 * bytenr. We can easily do this at the same time, but I want
9509 * to have a fs image to test on before we just add repair
9510 * functionality willy-nilly so we know we won't screw up the
9514 entry = find_entry(&entries, dback->disk_bytenr,
9517 entry = malloc(sizeof(struct extent_entry));
9522 memset(entry, 0, sizeof(*entry));
9523 entry->bytenr = dback->disk_bytenr;
9524 entry->bytes = dback->bytes;
9525 list_add_tail(&entry->list, &entries);
9530 * If we only have on entry we may think the entries agree when
9531 * in reality they don't so we have to do some extra checking.
9533 if (dback->disk_bytenr != rec->start ||
9534 dback->bytes != rec->nr || back->broken)
9545 /* Yay all the backrefs agree, carry on good sir */
9546 if (nr_entries <= 1 && !mismatch)
9549 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9550 "%Lu\n", rec->start);
9553 * First we want to see if the backrefs can agree amongst themselves who
9554 * is right, so figure out which one of the entries has the highest
9557 best = find_most_right_entry(&entries);
9560 * Ok so we may have an even split between what the backrefs think, so
9561 * this is where we use the extent ref to see what it thinks.
9564 entry = find_entry(&entries, rec->start, rec->nr);
9565 if (!entry && (!broken_entries || !rec->found_rec)) {
9566 fprintf(stderr, "Backrefs don't agree with each other "
9567 "and extent record doesn't agree with anybody,"
9568 " so we can't fix bytenr %Lu bytes %Lu\n",
9569 rec->start, rec->nr);
9572 } else if (!entry) {
9574 * Ok our backrefs were broken, we'll assume this is the
9575 * correct value and add an entry for this range.
9577 entry = malloc(sizeof(struct extent_entry));
9582 memset(entry, 0, sizeof(*entry));
9583 entry->bytenr = rec->start;
9584 entry->bytes = rec->nr;
9585 list_add_tail(&entry->list, &entries);
9589 best = find_most_right_entry(&entries);
9591 fprintf(stderr, "Backrefs and extent record evenly "
9592 "split on who is right, this is going to "
9593 "require user input to fix bytenr %Lu bytes "
9594 "%Lu\n", rec->start, rec->nr);
9601 * I don't think this can happen currently as we'll abort() if we catch
9602 * this case higher up, but in case somebody removes that we still can't
9603 * deal with it properly here yet, so just bail out of that's the case.
9605 if (best->bytenr != rec->start) {
9606 fprintf(stderr, "Extent start and backref starts don't match, "
9607 "please use btrfs-image on this file system and send "
9608 "it to a btrfs developer so they can make fsck fix "
9609 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9610 rec->start, rec->nr);
9616 * Ok great we all agreed on an extent record, let's go find the real
9617 * references and fix up the ones that don't match.
9619 rbtree_postorder_for_each_entry_safe(back, tmp,
9620 &rec->backref_tree, node) {
9621 if (back->full_backref || !back->is_data)
9624 dback = to_data_backref(back);
9627 * Still ignoring backrefs that don't have a real ref attached
9630 if (dback->found_ref == 0)
9633 if (dback->bytes == best->bytes &&
9634 dback->disk_bytenr == best->bytenr)
9637 ret = repair_ref(info, path, dback, best);
9643 * Ok we messed with the actual refs, which means we need to drop our
9644 * entire cache and go back and rescan. I know this is a huge pain and
9645 * adds a lot of extra work, but it's the only way to be safe. Once all
9646 * the backrefs agree we may not need to do anything to the extent
9651 while (!list_empty(&entries)) {
9652 entry = list_entry(entries.next, struct extent_entry, list);
9653 list_del_init(&entry->list);
9659 static int process_duplicates(struct cache_tree *extent_cache,
9660 struct extent_record *rec)
9662 struct extent_record *good, *tmp;
9663 struct cache_extent *cache;
9667 * If we found a extent record for this extent then return, or if we
9668 * have more than one duplicate we are likely going to need to delete
9671 if (rec->found_rec || rec->num_duplicates > 1)
9674 /* Shouldn't happen but just in case */
9675 BUG_ON(!rec->num_duplicates);
9678 * So this happens if we end up with a backref that doesn't match the
9679 * actual extent entry. So either the backref is bad or the extent
9680 * entry is bad. Either way we want to have the extent_record actually
9681 * reflect what we found in the extent_tree, so we need to take the
9682 * duplicate out and use that as the extent_record since the only way we
9683 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9685 remove_cache_extent(extent_cache, &rec->cache);
9687 good = to_extent_record(rec->dups.next);
9688 list_del_init(&good->list);
9689 INIT_LIST_HEAD(&good->backrefs);
9690 INIT_LIST_HEAD(&good->dups);
9691 good->cache.start = good->start;
9692 good->cache.size = good->nr;
9693 good->content_checked = 0;
9694 good->owner_ref_checked = 0;
9695 good->num_duplicates = 0;
9696 good->refs = rec->refs;
9697 list_splice_init(&rec->backrefs, &good->backrefs);
9699 cache = lookup_cache_extent(extent_cache, good->start,
9703 tmp = container_of(cache, struct extent_record, cache);
9706 * If we find another overlapping extent and it's found_rec is
9707 * set then it's a duplicate and we need to try and delete
9710 if (tmp->found_rec || tmp->num_duplicates > 0) {
9711 if (list_empty(&good->list))
9712 list_add_tail(&good->list,
9713 &duplicate_extents);
9714 good->num_duplicates += tmp->num_duplicates + 1;
9715 list_splice_init(&tmp->dups, &good->dups);
9716 list_del_init(&tmp->list);
9717 list_add_tail(&tmp->list, &good->dups);
9718 remove_cache_extent(extent_cache, &tmp->cache);
9723 * Ok we have another non extent item backed extent rec, so lets
9724 * just add it to this extent and carry on like we did above.
9726 good->refs += tmp->refs;
9727 list_splice_init(&tmp->backrefs, &good->backrefs);
9728 remove_cache_extent(extent_cache, &tmp->cache);
9731 ret = insert_cache_extent(extent_cache, &good->cache);
9734 return good->num_duplicates ? 0 : 1;
9737 static int delete_duplicate_records(struct btrfs_root *root,
9738 struct extent_record *rec)
9740 struct btrfs_trans_handle *trans;
9741 LIST_HEAD(delete_list);
9742 struct btrfs_path path;
9743 struct extent_record *tmp, *good, *n;
9746 struct btrfs_key key;
9748 btrfs_init_path(&path);
9751 /* Find the record that covers all of the duplicates. */
9752 list_for_each_entry(tmp, &rec->dups, list) {
9753 if (good->start < tmp->start)
9755 if (good->nr > tmp->nr)
9758 if (tmp->start + tmp->nr < good->start + good->nr) {
9759 fprintf(stderr, "Ok we have overlapping extents that "
9760 "aren't completely covered by each other, this "
9761 "is going to require more careful thought. "
9762 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9763 tmp->start, tmp->nr, good->start, good->nr);
9770 list_add_tail(&rec->list, &delete_list);
9772 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9775 list_move_tail(&tmp->list, &delete_list);
9778 root = root->fs_info->extent_root;
9779 trans = btrfs_start_transaction(root, 1);
9780 if (IS_ERR(trans)) {
9781 ret = PTR_ERR(trans);
9785 list_for_each_entry(tmp, &delete_list, list) {
9786 if (tmp->found_rec == 0)
9788 key.objectid = tmp->start;
9789 key.type = BTRFS_EXTENT_ITEM_KEY;
9790 key.offset = tmp->nr;
9792 /* Shouldn't happen but just in case */
9793 if (tmp->metadata) {
9794 fprintf(stderr, "Well this shouldn't happen, extent "
9795 "record overlaps but is metadata? "
9796 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9800 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9806 ret = btrfs_del_item(trans, root, &path);
9809 btrfs_release_path(&path);
9812 err = btrfs_commit_transaction(trans, root);
9816 while (!list_empty(&delete_list)) {
9817 tmp = to_extent_record(delete_list.next);
9818 list_del_init(&tmp->list);
9824 while (!list_empty(&rec->dups)) {
9825 tmp = to_extent_record(rec->dups.next);
9826 list_del_init(&tmp->list);
9830 btrfs_release_path(&path);
9832 if (!ret && !nr_del)
9833 rec->num_duplicates = 0;
9835 return ret ? ret : nr_del;
9838 static int find_possible_backrefs(struct btrfs_fs_info *info,
9839 struct btrfs_path *path,
9840 struct cache_tree *extent_cache,
9841 struct extent_record *rec)
9843 struct btrfs_root *root;
9844 struct extent_backref *back, *tmp;
9845 struct data_backref *dback;
9846 struct cache_extent *cache;
9847 struct btrfs_file_extent_item *fi;
9848 struct btrfs_key key;
9852 rbtree_postorder_for_each_entry_safe(back, tmp,
9853 &rec->backref_tree, node) {
9854 /* Don't care about full backrefs (poor unloved backrefs) */
9855 if (back->full_backref || !back->is_data)
9858 dback = to_data_backref(back);
9860 /* We found this one, we don't need to do a lookup */
9861 if (dback->found_ref)
9864 key.objectid = dback->root;
9865 key.type = BTRFS_ROOT_ITEM_KEY;
9866 key.offset = (u64)-1;
9868 root = btrfs_read_fs_root(info, &key);
9870 /* No root, definitely a bad ref, skip */
9871 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9873 /* Other err, exit */
9875 return PTR_ERR(root);
9877 key.objectid = dback->owner;
9878 key.type = BTRFS_EXTENT_DATA_KEY;
9879 key.offset = dback->offset;
9880 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9882 btrfs_release_path(path);
9885 /* Didn't find it, we can carry on */
9890 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9891 struct btrfs_file_extent_item);
9892 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9893 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9894 btrfs_release_path(path);
9895 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9897 struct extent_record *tmp;
9898 tmp = container_of(cache, struct extent_record, cache);
9901 * If we found an extent record for the bytenr for this
9902 * particular backref then we can't add it to our
9903 * current extent record. We only want to add backrefs
9904 * that don't have a corresponding extent item in the
9905 * extent tree since they likely belong to this record
9906 * and we need to fix it if it doesn't match bytenrs.
9912 dback->found_ref += 1;
9913 dback->disk_bytenr = bytenr;
9914 dback->bytes = bytes;
9917 * Set this so the verify backref code knows not to trust the
9918 * values in this backref.
9927 * Record orphan data ref into corresponding root.
9929 * Return 0 if the extent item contains data ref and recorded.
9930 * Return 1 if the extent item contains no useful data ref
9931 * On that case, it may contains only shared_dataref or metadata backref
9932 * or the file extent exists(this should be handled by the extent bytenr
9934 * Return <0 if something goes wrong.
9936 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9937 struct extent_record *rec)
9939 struct btrfs_key key;
9940 struct btrfs_root *dest_root;
9941 struct extent_backref *back, *tmp;
9942 struct data_backref *dback;
9943 struct orphan_data_extent *orphan;
9944 struct btrfs_path path;
9945 int recorded_data_ref = 0;
9950 btrfs_init_path(&path);
9951 rbtree_postorder_for_each_entry_safe(back, tmp,
9952 &rec->backref_tree, node) {
9953 if (back->full_backref || !back->is_data ||
9954 !back->found_extent_tree)
9956 dback = to_data_backref(back);
9957 if (dback->found_ref)
9959 key.objectid = dback->root;
9960 key.type = BTRFS_ROOT_ITEM_KEY;
9961 key.offset = (u64)-1;
9963 dest_root = btrfs_read_fs_root(fs_info, &key);
9965 /* For non-exist root we just skip it */
9966 if (IS_ERR(dest_root) || !dest_root)
9969 key.objectid = dback->owner;
9970 key.type = BTRFS_EXTENT_DATA_KEY;
9971 key.offset = dback->offset;
9973 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9974 btrfs_release_path(&path);
9976 * For ret < 0, it's OK since the fs-tree may be corrupted,
9977 * we need to record it for inode/file extent rebuild.
9978 * For ret > 0, we record it only for file extent rebuild.
9979 * For ret == 0, the file extent exists but only bytenr
9980 * mismatch, let the original bytenr fix routine to handle,
9986 orphan = malloc(sizeof(*orphan));
9991 INIT_LIST_HEAD(&orphan->list);
9992 orphan->root = dback->root;
9993 orphan->objectid = dback->owner;
9994 orphan->offset = dback->offset;
9995 orphan->disk_bytenr = rec->cache.start;
9996 orphan->disk_len = rec->cache.size;
9997 list_add(&dest_root->orphan_data_extents, &orphan->list);
9998 recorded_data_ref = 1;
10001 btrfs_release_path(&path);
10003 return !recorded_data_ref;
10009 * when an incorrect extent item is found, this will delete
10010 * all of the existing entries for it and recreate them
10011 * based on what the tree scan found.
10013 static int fixup_extent_refs(struct btrfs_fs_info *info,
10014 struct cache_tree *extent_cache,
10015 struct extent_record *rec)
10017 struct btrfs_trans_handle *trans = NULL;
10019 struct btrfs_path path;
10020 struct cache_extent *cache;
10021 struct extent_backref *back, *tmp;
10025 if (rec->flag_block_full_backref)
10026 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10028 btrfs_init_path(&path);
10029 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10031 * Sometimes the backrefs themselves are so broken they don't
10032 * get attached to any meaningful rec, so first go back and
10033 * check any of our backrefs that we couldn't find and throw
10034 * them into the list if we find the backref so that
10035 * verify_backrefs can figure out what to do.
10037 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10042 /* step one, make sure all of the backrefs agree */
10043 ret = verify_backrefs(info, &path, rec);
10047 trans = btrfs_start_transaction(info->extent_root, 1);
10048 if (IS_ERR(trans)) {
10049 ret = PTR_ERR(trans);
10053 /* step two, delete all the existing records */
10054 ret = delete_extent_records(trans, info->extent_root, &path,
10060 /* was this block corrupt? If so, don't add references to it */
10061 cache = lookup_cache_extent(info->corrupt_blocks,
10062 rec->start, rec->max_size);
10068 /* step three, recreate all the refs we did find */
10069 rbtree_postorder_for_each_entry_safe(back, tmp,
10070 &rec->backref_tree, node) {
10072 * if we didn't find any references, don't create a
10073 * new extent record
10075 if (!back->found_ref)
10078 rec->bad_full_backref = 0;
10079 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10087 int err = btrfs_commit_transaction(trans, info->extent_root);
10093 fprintf(stderr, "Repaired extent references for %llu\n",
10094 (unsigned long long)rec->start);
10096 btrfs_release_path(&path);
10100 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10101 struct extent_record *rec)
10103 struct btrfs_trans_handle *trans;
10104 struct btrfs_root *root = fs_info->extent_root;
10105 struct btrfs_path path;
10106 struct btrfs_extent_item *ei;
10107 struct btrfs_key key;
10111 key.objectid = rec->start;
10112 if (rec->metadata) {
10113 key.type = BTRFS_METADATA_ITEM_KEY;
10114 key.offset = rec->info_level;
10116 key.type = BTRFS_EXTENT_ITEM_KEY;
10117 key.offset = rec->max_size;
10120 trans = btrfs_start_transaction(root, 0);
10122 return PTR_ERR(trans);
10124 btrfs_init_path(&path);
10125 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10127 btrfs_release_path(&path);
10128 btrfs_commit_transaction(trans, root);
10131 fprintf(stderr, "Didn't find extent for %llu\n",
10132 (unsigned long long)rec->start);
10133 btrfs_release_path(&path);
10134 btrfs_commit_transaction(trans, root);
10138 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10139 struct btrfs_extent_item);
10140 flags = btrfs_extent_flags(path.nodes[0], ei);
10141 if (rec->flag_block_full_backref) {
10142 fprintf(stderr, "setting full backref on %llu\n",
10143 (unsigned long long)key.objectid);
10144 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10146 fprintf(stderr, "clearing full backref on %llu\n",
10147 (unsigned long long)key.objectid);
10148 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10150 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10151 btrfs_mark_buffer_dirty(path.nodes[0]);
10152 btrfs_release_path(&path);
10153 ret = btrfs_commit_transaction(trans, root);
10155 fprintf(stderr, "Repaired extent flags for %llu\n",
10156 (unsigned long long)rec->start);
10161 /* right now we only prune from the extent allocation tree */
10162 static int prune_one_block(struct btrfs_trans_handle *trans,
10163 struct btrfs_fs_info *info,
10164 struct btrfs_corrupt_block *corrupt)
10167 struct btrfs_path path;
10168 struct extent_buffer *eb;
10172 int level = corrupt->level + 1;
10174 btrfs_init_path(&path);
10176 /* we want to stop at the parent to our busted block */
10177 path.lowest_level = level;
10179 ret = btrfs_search_slot(trans, info->extent_root,
10180 &corrupt->key, &path, -1, 1);
10185 eb = path.nodes[level];
10192 * hopefully the search gave us the block we want to prune,
10193 * lets try that first
10195 slot = path.slots[level];
10196 found = btrfs_node_blockptr(eb, slot);
10197 if (found == corrupt->cache.start)
10200 nritems = btrfs_header_nritems(eb);
10202 /* the search failed, lets scan this node and hope we find it */
10203 for (slot = 0; slot < nritems; slot++) {
10204 found = btrfs_node_blockptr(eb, slot);
10205 if (found == corrupt->cache.start)
10209 * we couldn't find the bad block. TODO, search all the nodes for pointers
10212 if (eb == info->extent_root->node) {
10217 btrfs_release_path(&path);
10222 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10223 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10226 btrfs_release_path(&path);
10230 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10232 struct btrfs_trans_handle *trans = NULL;
10233 struct cache_extent *cache;
10234 struct btrfs_corrupt_block *corrupt;
10237 cache = search_cache_extent(info->corrupt_blocks, 0);
10241 trans = btrfs_start_transaction(info->extent_root, 1);
10243 return PTR_ERR(trans);
10245 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10246 prune_one_block(trans, info, corrupt);
10247 remove_cache_extent(info->corrupt_blocks, cache);
10250 return btrfs_commit_transaction(trans, info->extent_root);
10254 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10256 struct btrfs_block_group_cache *cache;
10261 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10262 &start, &end, EXTENT_DIRTY);
10265 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10270 cache = btrfs_lookup_first_block_group(fs_info, start);
10275 start = cache->key.objectid + cache->key.offset;
10279 static int check_extent_refs(struct btrfs_root *root,
10280 struct cache_tree *extent_cache)
10282 struct extent_record *rec;
10283 struct cache_extent *cache;
10290 * if we're doing a repair, we have to make sure
10291 * we don't allocate from the problem extents.
10292 * In the worst case, this will be all the
10293 * extents in the FS
10295 cache = search_cache_extent(extent_cache, 0);
10297 rec = container_of(cache, struct extent_record, cache);
10298 set_extent_dirty(root->fs_info->excluded_extents,
10300 rec->start + rec->max_size - 1);
10301 cache = next_cache_extent(cache);
10304 /* pin down all the corrupted blocks too */
10305 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10307 set_extent_dirty(root->fs_info->excluded_extents,
10309 cache->start + cache->size - 1);
10310 cache = next_cache_extent(cache);
10312 prune_corrupt_blocks(root->fs_info);
10313 reset_cached_block_groups(root->fs_info);
10316 reset_cached_block_groups(root->fs_info);
10319 * We need to delete any duplicate entries we find first otherwise we
10320 * could mess up the extent tree when we have backrefs that actually
10321 * belong to a different extent item and not the weird duplicate one.
10323 while (repair && !list_empty(&duplicate_extents)) {
10324 rec = to_extent_record(duplicate_extents.next);
10325 list_del_init(&rec->list);
10327 /* Sometimes we can find a backref before we find an actual
10328 * extent, so we need to process it a little bit to see if there
10329 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10330 * if this is a backref screwup. If we need to delete stuff
10331 * process_duplicates() will return 0, otherwise it will return
10334 if (process_duplicates(extent_cache, rec))
10336 ret = delete_duplicate_records(root, rec);
10340 * delete_duplicate_records will return the number of entries
10341 * deleted, so if it's greater than 0 then we know we actually
10342 * did something and we need to remove.
10355 cache = search_cache_extent(extent_cache, 0);
10358 rec = container_of(cache, struct extent_record, cache);
10359 if (rec->num_duplicates) {
10360 fprintf(stderr, "extent item %llu has multiple extent "
10361 "items\n", (unsigned long long)rec->start);
10365 if (rec->refs != rec->extent_item_refs) {
10366 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10367 (unsigned long long)rec->start,
10368 (unsigned long long)rec->nr);
10369 fprintf(stderr, "extent item %llu, found %llu\n",
10370 (unsigned long long)rec->extent_item_refs,
10371 (unsigned long long)rec->refs);
10372 ret = record_orphan_data_extents(root->fs_info, rec);
10378 if (all_backpointers_checked(rec, 1)) {
10379 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10380 (unsigned long long)rec->start,
10381 (unsigned long long)rec->nr);
10385 if (!rec->owner_ref_checked) {
10386 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10387 (unsigned long long)rec->start,
10388 (unsigned long long)rec->nr);
10393 if (repair && fix) {
10394 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10400 if (rec->bad_full_backref) {
10401 fprintf(stderr, "bad full backref, on [%llu]\n",
10402 (unsigned long long)rec->start);
10404 ret = fixup_extent_flags(root->fs_info, rec);
10412 * Although it's not a extent ref's problem, we reuse this
10413 * routine for error reporting.
10414 * No repair function yet.
10416 if (rec->crossing_stripes) {
10418 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10419 rec->start, rec->start + rec->max_size);
10423 if (rec->wrong_chunk_type) {
10425 "bad extent [%llu, %llu), type mismatch with chunk\n",
10426 rec->start, rec->start + rec->max_size);
10431 remove_cache_extent(extent_cache, cache);
10432 free_all_extent_backrefs(rec);
10433 if (!init_extent_tree && repair && (!cur_err || fix))
10434 clear_extent_dirty(root->fs_info->excluded_extents,
10436 rec->start + rec->max_size - 1);
10441 if (ret && ret != -EAGAIN) {
10442 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10445 struct btrfs_trans_handle *trans;
10447 root = root->fs_info->extent_root;
10448 trans = btrfs_start_transaction(root, 1);
10449 if (IS_ERR(trans)) {
10450 ret = PTR_ERR(trans);
10454 ret = btrfs_fix_block_accounting(trans, root);
10457 ret = btrfs_commit_transaction(trans, root);
10469 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10473 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10474 stripe_size = length;
10475 stripe_size /= num_stripes;
10476 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10477 stripe_size = length * 2;
10478 stripe_size /= num_stripes;
10479 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10480 stripe_size = length;
10481 stripe_size /= (num_stripes - 1);
10482 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10483 stripe_size = length;
10484 stripe_size /= (num_stripes - 2);
10486 stripe_size = length;
10488 return stripe_size;
10492 * Check the chunk with its block group/dev list ref:
10493 * Return 0 if all refs seems valid.
10494 * Return 1 if part of refs seems valid, need later check for rebuild ref
10495 * like missing block group and needs to search extent tree to rebuild them.
10496 * Return -1 if essential refs are missing and unable to rebuild.
10498 static int check_chunk_refs(struct chunk_record *chunk_rec,
10499 struct block_group_tree *block_group_cache,
10500 struct device_extent_tree *dev_extent_cache,
10503 struct cache_extent *block_group_item;
10504 struct block_group_record *block_group_rec;
10505 struct cache_extent *dev_extent_item;
10506 struct device_extent_record *dev_extent_rec;
10510 int metadump_v2 = 0;
10514 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10516 chunk_rec->length);
10517 if (block_group_item) {
10518 block_group_rec = container_of(block_group_item,
10519 struct block_group_record,
10521 if (chunk_rec->length != block_group_rec->offset ||
10522 chunk_rec->offset != block_group_rec->objectid ||
10524 chunk_rec->type_flags != block_group_rec->flags)) {
10527 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10528 chunk_rec->objectid,
10533 chunk_rec->type_flags,
10534 block_group_rec->objectid,
10535 block_group_rec->type,
10536 block_group_rec->offset,
10537 block_group_rec->offset,
10538 block_group_rec->objectid,
10539 block_group_rec->flags);
10542 list_del_init(&block_group_rec->list);
10543 chunk_rec->bg_rec = block_group_rec;
10548 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10549 chunk_rec->objectid,
10554 chunk_rec->type_flags);
10561 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10562 chunk_rec->num_stripes);
10563 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10564 devid = chunk_rec->stripes[i].devid;
10565 offset = chunk_rec->stripes[i].offset;
10566 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10567 devid, offset, length);
10568 if (dev_extent_item) {
10569 dev_extent_rec = container_of(dev_extent_item,
10570 struct device_extent_record,
10572 if (dev_extent_rec->objectid != devid ||
10573 dev_extent_rec->offset != offset ||
10574 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10575 dev_extent_rec->length != length) {
10578 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10579 chunk_rec->objectid,
10582 chunk_rec->stripes[i].devid,
10583 chunk_rec->stripes[i].offset,
10584 dev_extent_rec->objectid,
10585 dev_extent_rec->offset,
10586 dev_extent_rec->length);
10589 list_move(&dev_extent_rec->chunk_list,
10590 &chunk_rec->dextents);
10595 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10596 chunk_rec->objectid,
10599 chunk_rec->stripes[i].devid,
10600 chunk_rec->stripes[i].offset);
10607 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10608 int check_chunks(struct cache_tree *chunk_cache,
10609 struct block_group_tree *block_group_cache,
10610 struct device_extent_tree *dev_extent_cache,
10611 struct list_head *good, struct list_head *bad,
10612 struct list_head *rebuild, int silent)
10614 struct cache_extent *chunk_item;
10615 struct chunk_record *chunk_rec;
10616 struct block_group_record *bg_rec;
10617 struct device_extent_record *dext_rec;
10621 chunk_item = first_cache_extent(chunk_cache);
10622 while (chunk_item) {
10623 chunk_rec = container_of(chunk_item, struct chunk_record,
10625 err = check_chunk_refs(chunk_rec, block_group_cache,
10626 dev_extent_cache, silent);
10629 if (err == 0 && good)
10630 list_add_tail(&chunk_rec->list, good);
10631 if (err > 0 && rebuild)
10632 list_add_tail(&chunk_rec->list, rebuild);
10633 if (err < 0 && bad)
10634 list_add_tail(&chunk_rec->list, bad);
10635 chunk_item = next_cache_extent(chunk_item);
10638 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10641 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10649 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10653 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10654 dext_rec->objectid,
10664 static int check_device_used(struct device_record *dev_rec,
10665 struct device_extent_tree *dext_cache)
10667 struct cache_extent *cache;
10668 struct device_extent_record *dev_extent_rec;
10669 u64 total_byte = 0;
10671 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10673 dev_extent_rec = container_of(cache,
10674 struct device_extent_record,
10676 if (dev_extent_rec->objectid != dev_rec->devid)
10679 list_del_init(&dev_extent_rec->device_list);
10680 total_byte += dev_extent_rec->length;
10681 cache = next_cache_extent(cache);
10684 if (total_byte != dev_rec->byte_used) {
10686 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10687 total_byte, dev_rec->byte_used, dev_rec->objectid,
10688 dev_rec->type, dev_rec->offset);
10696 * Unlike device size alignment check above, some super total_bytes check
10697 * failure can lead to mount failure for newer kernel.
10699 * So this function will return the error for a fatal super total_bytes problem.
10701 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10703 struct btrfs_device *dev;
10704 struct list_head *dev_list = &fs_info->fs_devices->devices;
10705 u64 total_bytes = 0;
10706 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10708 list_for_each_entry(dev, dev_list, dev_list)
10709 total_bytes += dev->total_bytes;
10711 /* Important check, which can cause unmountable fs */
10712 if (super_bytes < total_bytes) {
10713 error("super total bytes %llu smaller than real device(s) size %llu",
10714 super_bytes, total_bytes);
10715 error("mounting this fs may fail for newer kernels");
10716 error("this can be fixed by 'btrfs rescue fix-device-size'");
10721 * Optional check, just to make everything aligned and match with each
10724 * For a btrfs-image restored fs, we don't need to check it anyway.
10726 if (btrfs_super_flags(fs_info->super_copy) &
10727 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10729 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10730 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10731 super_bytes != total_bytes) {
10732 warning("minor unaligned/mismatch device size detected");
10734 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10739 /* check btrfs_dev_item -> btrfs_dev_extent */
10740 static int check_devices(struct rb_root *dev_cache,
10741 struct device_extent_tree *dev_extent_cache)
10743 struct rb_node *dev_node;
10744 struct device_record *dev_rec;
10745 struct device_extent_record *dext_rec;
10749 dev_node = rb_first(dev_cache);
10751 dev_rec = container_of(dev_node, struct device_record, node);
10752 err = check_device_used(dev_rec, dev_extent_cache);
10756 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10757 global_info->sectorsize);
10758 dev_node = rb_next(dev_node);
10760 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10763 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10764 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10771 static int add_root_item_to_list(struct list_head *head,
10772 u64 objectid, u64 bytenr, u64 last_snapshot,
10773 u8 level, u8 drop_level,
10774 struct btrfs_key *drop_key)
10777 struct root_item_record *ri_rec;
10778 ri_rec = malloc(sizeof(*ri_rec));
10781 ri_rec->bytenr = bytenr;
10782 ri_rec->objectid = objectid;
10783 ri_rec->level = level;
10784 ri_rec->drop_level = drop_level;
10785 ri_rec->last_snapshot = last_snapshot;
10787 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10788 list_add_tail(&ri_rec->list, head);
10793 static void free_root_item_list(struct list_head *list)
10795 struct root_item_record *ri_rec;
10797 while (!list_empty(list)) {
10798 ri_rec = list_first_entry(list, struct root_item_record,
10800 list_del_init(&ri_rec->list);
10805 static int deal_root_from_list(struct list_head *list,
10806 struct btrfs_root *root,
10807 struct block_info *bits,
10809 struct cache_tree *pending,
10810 struct cache_tree *seen,
10811 struct cache_tree *reada,
10812 struct cache_tree *nodes,
10813 struct cache_tree *extent_cache,
10814 struct cache_tree *chunk_cache,
10815 struct rb_root *dev_cache,
10816 struct block_group_tree *block_group_cache,
10817 struct device_extent_tree *dev_extent_cache)
10822 while (!list_empty(list)) {
10823 struct root_item_record *rec;
10824 struct extent_buffer *buf;
10825 rec = list_entry(list->next,
10826 struct root_item_record, list);
10828 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10829 if (!extent_buffer_uptodate(buf)) {
10830 free_extent_buffer(buf);
10834 ret = add_root_to_pending(buf, extent_cache, pending,
10835 seen, nodes, rec->objectid);
10839 * To rebuild extent tree, we need deal with snapshot
10840 * one by one, otherwise we deal with node firstly which
10841 * can maximize readahead.
10844 ret = run_next_block(root, bits, bits_nr, &last,
10845 pending, seen, reada, nodes,
10846 extent_cache, chunk_cache,
10847 dev_cache, block_group_cache,
10848 dev_extent_cache, rec);
10852 free_extent_buffer(buf);
10853 list_del(&rec->list);
10859 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10860 reada, nodes, extent_cache, chunk_cache,
10861 dev_cache, block_group_cache,
10862 dev_extent_cache, NULL);
10872 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10874 struct rb_root dev_cache;
10875 struct cache_tree chunk_cache;
10876 struct block_group_tree block_group_cache;
10877 struct device_extent_tree dev_extent_cache;
10878 struct cache_tree extent_cache;
10879 struct cache_tree seen;
10880 struct cache_tree pending;
10881 struct cache_tree reada;
10882 struct cache_tree nodes;
10883 struct extent_io_tree excluded_extents;
10884 struct cache_tree corrupt_blocks;
10885 struct btrfs_path path;
10886 struct btrfs_key key;
10887 struct btrfs_key found_key;
10889 struct block_info *bits;
10891 struct extent_buffer *leaf;
10893 struct btrfs_root_item ri;
10894 struct list_head dropping_trees;
10895 struct list_head normal_trees;
10896 struct btrfs_root *root1;
10897 struct btrfs_root *root;
10901 root = fs_info->fs_root;
10902 dev_cache = RB_ROOT;
10903 cache_tree_init(&chunk_cache);
10904 block_group_tree_init(&block_group_cache);
10905 device_extent_tree_init(&dev_extent_cache);
10907 cache_tree_init(&extent_cache);
10908 cache_tree_init(&seen);
10909 cache_tree_init(&pending);
10910 cache_tree_init(&nodes);
10911 cache_tree_init(&reada);
10912 cache_tree_init(&corrupt_blocks);
10913 extent_io_tree_init(&excluded_extents);
10914 INIT_LIST_HEAD(&dropping_trees);
10915 INIT_LIST_HEAD(&normal_trees);
10918 fs_info->excluded_extents = &excluded_extents;
10919 fs_info->fsck_extent_cache = &extent_cache;
10920 fs_info->free_extent_hook = free_extent_hook;
10921 fs_info->corrupt_blocks = &corrupt_blocks;
10925 bits = malloc(bits_nr * sizeof(struct block_info));
10931 if (ctx.progress_enabled) {
10932 ctx.tp = TASK_EXTENTS;
10933 task_start(ctx.info);
10937 root1 = fs_info->tree_root;
10938 level = btrfs_header_level(root1->node);
10939 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10940 root1->node->start, 0, level, 0, NULL);
10943 root1 = fs_info->chunk_root;
10944 level = btrfs_header_level(root1->node);
10945 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10946 root1->node->start, 0, level, 0, NULL);
10949 btrfs_init_path(&path);
10952 key.type = BTRFS_ROOT_ITEM_KEY;
10953 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10957 leaf = path.nodes[0];
10958 slot = path.slots[0];
10959 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10960 ret = btrfs_next_leaf(root, &path);
10963 leaf = path.nodes[0];
10964 slot = path.slots[0];
10966 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10967 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10968 unsigned long offset;
10971 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10972 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10973 last_snapshot = btrfs_root_last_snapshot(&ri);
10974 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10975 level = btrfs_root_level(&ri);
10976 ret = add_root_item_to_list(&normal_trees,
10977 found_key.objectid,
10978 btrfs_root_bytenr(&ri),
10979 last_snapshot, level,
10984 level = btrfs_root_level(&ri);
10985 objectid = found_key.objectid;
10986 btrfs_disk_key_to_cpu(&found_key,
10987 &ri.drop_progress);
10988 ret = add_root_item_to_list(&dropping_trees,
10990 btrfs_root_bytenr(&ri),
10991 last_snapshot, level,
10992 ri.drop_level, &found_key);
10999 btrfs_release_path(&path);
11002 * check_block can return -EAGAIN if it fixes something, please keep
11003 * this in mind when dealing with return values from these functions, if
11004 * we get -EAGAIN we want to fall through and restart the loop.
11006 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11007 &seen, &reada, &nodes, &extent_cache,
11008 &chunk_cache, &dev_cache, &block_group_cache,
11009 &dev_extent_cache);
11011 if (ret == -EAGAIN)
11015 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11016 &pending, &seen, &reada, &nodes,
11017 &extent_cache, &chunk_cache, &dev_cache,
11018 &block_group_cache, &dev_extent_cache);
11020 if (ret == -EAGAIN)
11025 ret = check_chunks(&chunk_cache, &block_group_cache,
11026 &dev_extent_cache, NULL, NULL, NULL, 0);
11028 if (ret == -EAGAIN)
11033 ret = check_extent_refs(root, &extent_cache);
11035 if (ret == -EAGAIN)
11040 ret = check_devices(&dev_cache, &dev_extent_cache);
11045 task_stop(ctx.info);
11047 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11048 extent_io_tree_cleanup(&excluded_extents);
11049 fs_info->fsck_extent_cache = NULL;
11050 fs_info->free_extent_hook = NULL;
11051 fs_info->corrupt_blocks = NULL;
11052 fs_info->excluded_extents = NULL;
11055 free_chunk_cache_tree(&chunk_cache);
11056 free_device_cache_tree(&dev_cache);
11057 free_block_group_tree(&block_group_cache);
11058 free_device_extent_tree(&dev_extent_cache);
11059 free_extent_cache_tree(&seen);
11060 free_extent_cache_tree(&pending);
11061 free_extent_cache_tree(&reada);
11062 free_extent_cache_tree(&nodes);
11063 free_root_item_list(&normal_trees);
11064 free_root_item_list(&dropping_trees);
11067 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11068 free_extent_cache_tree(&seen);
11069 free_extent_cache_tree(&pending);
11070 free_extent_cache_tree(&reada);
11071 free_extent_cache_tree(&nodes);
11072 free_chunk_cache_tree(&chunk_cache);
11073 free_block_group_tree(&block_group_cache);
11074 free_device_cache_tree(&dev_cache);
11075 free_device_extent_tree(&dev_extent_cache);
11076 free_extent_record_cache(&extent_cache);
11077 free_root_item_list(&normal_trees);
11078 free_root_item_list(&dropping_trees);
11079 extent_io_tree_cleanup(&excluded_extents);
11083 static int check_extent_inline_ref(struct extent_buffer *eb,
11084 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11087 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11090 case BTRFS_TREE_BLOCK_REF_KEY:
11091 case BTRFS_EXTENT_DATA_REF_KEY:
11092 case BTRFS_SHARED_BLOCK_REF_KEY:
11093 case BTRFS_SHARED_DATA_REF_KEY:
11097 error("extent[%llu %u %llu] has unknown ref type: %d",
11098 key->objectid, key->type, key->offset, type);
11099 ret = UNKNOWN_TYPE;
11107 * Check backrefs of a tree block given by @bytenr or @eb.
11109 * @root: the root containing the @bytenr or @eb
11110 * @eb: tree block extent buffer, can be NULL
11111 * @bytenr: bytenr of the tree block to search
11112 * @level: tree level of the tree block
11113 * @owner: owner of the tree block
11115 * Return >0 for any error found and output error message
11116 * Return 0 for no error found
11118 static int check_tree_block_ref(struct btrfs_root *root,
11119 struct extent_buffer *eb, u64 bytenr,
11120 int level, u64 owner, struct node_refs *nrefs)
11122 struct btrfs_key key;
11123 struct btrfs_root *extent_root = root->fs_info->extent_root;
11124 struct btrfs_path path;
11125 struct btrfs_extent_item *ei;
11126 struct btrfs_extent_inline_ref *iref;
11127 struct extent_buffer *leaf;
11132 int root_level = btrfs_header_level(root->node);
11134 u32 nodesize = root->fs_info->nodesize;
11143 btrfs_init_path(&path);
11144 key.objectid = bytenr;
11145 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11146 key.type = BTRFS_METADATA_ITEM_KEY;
11148 key.type = BTRFS_EXTENT_ITEM_KEY;
11149 key.offset = (u64)-1;
11151 /* Search for the backref in extent tree */
11152 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11154 err |= BACKREF_MISSING;
11157 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11159 err |= BACKREF_MISSING;
11163 leaf = path.nodes[0];
11164 slot = path.slots[0];
11165 btrfs_item_key_to_cpu(leaf, &key, slot);
11167 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11169 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11170 skinny_level = (int)key.offset;
11171 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11173 struct btrfs_tree_block_info *info;
11175 info = (struct btrfs_tree_block_info *)(ei + 1);
11176 skinny_level = btrfs_tree_block_level(leaf, info);
11177 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11186 * Due to the feature of shared tree blocks, if the upper node
11187 * is a fs root or shared node, the extent of checked node may
11188 * not be updated until the next CoW.
11191 strict = should_check_extent_strictly(root, nrefs,
11193 if (!(btrfs_extent_flags(leaf, ei) &
11194 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11196 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11197 key.objectid, nodesize,
11198 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11199 err = BACKREF_MISMATCH;
11201 header_gen = btrfs_header_generation(eb);
11202 extent_gen = btrfs_extent_generation(leaf, ei);
11203 if (header_gen != extent_gen) {
11205 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11206 key.objectid, nodesize, header_gen,
11208 err = BACKREF_MISMATCH;
11210 if (level != skinny_level) {
11212 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11213 key.objectid, nodesize, level, skinny_level);
11214 err = BACKREF_MISMATCH;
11216 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11218 "extent[%llu %u] is referred by other roots than %llu",
11219 key.objectid, nodesize, root->objectid);
11220 err = BACKREF_MISMATCH;
11225 * Iterate the extent/metadata item to find the exact backref
11227 item_size = btrfs_item_size_nr(leaf, slot);
11228 ptr = (unsigned long)iref;
11229 end = (unsigned long)ei + item_size;
11231 while (ptr < end) {
11232 iref = (struct btrfs_extent_inline_ref *)ptr;
11233 type = btrfs_extent_inline_ref_type(leaf, iref);
11234 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11236 ret = check_extent_inline_ref(leaf, &key, iref);
11241 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11242 if (offset == root->objectid)
11244 if (!strict && owner == offset)
11246 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11248 * Backref of tree reloc root points to itself, no need
11249 * to check backref any more.
11251 * This may be an error of loop backref, but extent tree
11252 * checker should have already handled it.
11253 * Here we only need to avoid infinite iteration.
11255 if (offset == bytenr) {
11259 * Check if the backref points to valid
11262 found_ref = !check_tree_block_ref( root, NULL,
11263 offset, level + 1, owner,
11270 ptr += btrfs_extent_inline_ref_size(type);
11274 * Inlined extent item doesn't have what we need, check
11275 * TREE_BLOCK_REF_KEY
11278 btrfs_release_path(&path);
11279 key.objectid = bytenr;
11280 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11281 key.offset = root->objectid;
11283 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11288 * Finally check SHARED BLOCK REF, any found will be good
11289 * Here we're not doing comprehensive extent backref checking,
11290 * only need to ensure there is some extent referring to this
11294 btrfs_release_path(&path);
11295 key.objectid = bytenr;
11296 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11297 key.offset = (u64)-1;
11299 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11301 err |= BACKREF_MISSING;
11304 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11306 err |= BACKREF_MISSING;
11312 err |= BACKREF_MISSING;
11314 btrfs_release_path(&path);
11315 if (nrefs && strict &&
11316 level < root_level && nrefs->full_backref[level + 1])
11317 parent = nrefs->bytenr[level + 1];
11318 if (eb && (err & BACKREF_MISSING))
11320 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11321 bytenr, nodesize, owner, level,
11322 parent ? "parent" : "root",
11323 parent ? parent : root->objectid);
11328 * If @err contains BACKREF_MISSING then add extent of the
11329 * file_extent_data_item.
11331 * Returns error bits after reapir.
11333 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11334 struct btrfs_root *root,
11335 struct btrfs_path *pathp,
11336 struct node_refs *nrefs,
11339 struct btrfs_file_extent_item *fi;
11340 struct btrfs_key fi_key;
11341 struct btrfs_key key;
11342 struct btrfs_extent_item *ei;
11343 struct btrfs_path path;
11344 struct btrfs_root *extent_root = root->fs_info->extent_root;
11345 struct extent_buffer *eb;
11357 eb = pathp->nodes[0];
11358 slot = pathp->slots[0];
11359 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11360 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11362 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11363 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11366 file_offset = fi_key.offset;
11367 generation = btrfs_file_extent_generation(eb, fi);
11368 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11369 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11370 extent_offset = btrfs_file_extent_offset(eb, fi);
11371 offset = file_offset - extent_offset;
11373 /* now repair only adds backref */
11374 if ((err & BACKREF_MISSING) == 0)
11377 /* search extent item */
11378 key.objectid = disk_bytenr;
11379 key.type = BTRFS_EXTENT_ITEM_KEY;
11380 key.offset = num_bytes;
11382 btrfs_init_path(&path);
11383 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11389 /* insert an extent item */
11391 key.objectid = disk_bytenr;
11392 key.type = BTRFS_EXTENT_ITEM_KEY;
11393 key.offset = num_bytes;
11394 size = sizeof(*ei);
11396 btrfs_release_path(&path);
11397 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11401 eb = path.nodes[0];
11402 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11404 btrfs_set_extent_refs(eb, ei, 0);
11405 btrfs_set_extent_generation(eb, ei, generation);
11406 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11408 btrfs_mark_buffer_dirty(eb);
11409 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11411 btrfs_release_path(&path);
11414 if (nrefs->full_backref[0])
11415 parent = btrfs_header_bytenr(eb);
11419 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11421 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11425 "failed to increase extent data backref[%llu %llu] root %llu",
11426 disk_bytenr, num_bytes, root->objectid);
11429 printf("Add one extent data backref [%llu %llu]\n",
11430 disk_bytenr, num_bytes);
11433 err &= ~BACKREF_MISSING;
11436 error("can't repair root %llu extent data item[%llu %llu]",
11437 root->objectid, disk_bytenr, num_bytes);
11442 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11444 * Return >0 any error found and output error message
11445 * Return 0 for no error found
11447 static int check_extent_data_item(struct btrfs_root *root,
11448 struct btrfs_path *pathp,
11449 struct node_refs *nrefs, int account_bytes)
11451 struct btrfs_file_extent_item *fi;
11452 struct extent_buffer *eb = pathp->nodes[0];
11453 struct btrfs_path path;
11454 struct btrfs_root *extent_root = root->fs_info->extent_root;
11455 struct btrfs_key fi_key;
11456 struct btrfs_key dbref_key;
11457 struct extent_buffer *leaf;
11458 struct btrfs_extent_item *ei;
11459 struct btrfs_extent_inline_ref *iref;
11460 struct btrfs_extent_data_ref *dref;
11463 u64 disk_num_bytes;
11464 u64 extent_num_bytes;
11471 int found_dbackref = 0;
11472 int slot = pathp->slots[0];
11477 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11478 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11480 /* Nothing to check for hole and inline data extents */
11481 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11482 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11485 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11486 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11487 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11488 offset = btrfs_file_extent_offset(eb, fi);
11490 /* Check unaligned disk_num_bytes and num_bytes */
11491 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11493 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11494 fi_key.objectid, fi_key.offset, disk_num_bytes,
11495 root->fs_info->sectorsize);
11496 err |= BYTES_UNALIGNED;
11497 } else if (account_bytes) {
11498 data_bytes_allocated += disk_num_bytes;
11500 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11502 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11503 fi_key.objectid, fi_key.offset, extent_num_bytes,
11504 root->fs_info->sectorsize);
11505 err |= BYTES_UNALIGNED;
11506 } else if (account_bytes) {
11507 data_bytes_referenced += extent_num_bytes;
11509 owner = btrfs_header_owner(eb);
11511 /* Check the extent item of the file extent in extent tree */
11512 btrfs_init_path(&path);
11513 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11514 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11515 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11517 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11521 leaf = path.nodes[0];
11522 slot = path.slots[0];
11523 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11525 extent_flags = btrfs_extent_flags(leaf, ei);
11527 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11529 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11530 disk_bytenr, disk_num_bytes,
11531 BTRFS_EXTENT_FLAG_DATA);
11532 err |= BACKREF_MISMATCH;
11535 /* Check data backref inside that extent item */
11536 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11537 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11538 ptr = (unsigned long)iref;
11539 end = (unsigned long)ei + item_size;
11540 strict = should_check_extent_strictly(root, nrefs, -1);
11542 while (ptr < end) {
11546 bool match = false;
11548 iref = (struct btrfs_extent_inline_ref *)ptr;
11549 type = btrfs_extent_inline_ref_type(leaf, iref);
11550 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11552 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11557 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11558 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11559 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11560 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11562 if (ref_objectid == fi_key.objectid &&
11563 ref_offset == fi_key.offset - offset)
11565 if (ref_root == root->objectid && match)
11566 found_dbackref = 1;
11567 else if (!strict && owner == ref_root && match)
11568 found_dbackref = 1;
11569 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11570 found_dbackref = !check_tree_block_ref(root, NULL,
11571 btrfs_extent_inline_ref_offset(leaf, iref),
11575 if (found_dbackref)
11577 ptr += btrfs_extent_inline_ref_size(type);
11580 if (!found_dbackref) {
11581 btrfs_release_path(&path);
11583 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11584 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11585 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11586 dbref_key.offset = hash_extent_data_ref(root->objectid,
11587 fi_key.objectid, fi_key.offset - offset);
11589 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11590 &dbref_key, &path, 0, 0);
11592 found_dbackref = 1;
11596 btrfs_release_path(&path);
11599 * Neither inlined nor EXTENT_DATA_REF found, try
11600 * SHARED_DATA_REF as last chance.
11602 dbref_key.objectid = disk_bytenr;
11603 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11604 dbref_key.offset = eb->start;
11606 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11607 &dbref_key, &path, 0, 0);
11609 found_dbackref = 1;
11615 if (!found_dbackref)
11616 err |= BACKREF_MISSING;
11617 btrfs_release_path(&path);
11618 if (err & BACKREF_MISSING) {
11619 error("data extent[%llu %llu] backref lost",
11620 disk_bytenr, disk_num_bytes);
11626 * Get real tree block level for the case like shared block
11627 * Return >= 0 as tree level
11628 * Return <0 for error
11630 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11632 struct extent_buffer *eb;
11633 struct btrfs_path path;
11634 struct btrfs_key key;
11635 struct btrfs_extent_item *ei;
11642 /* Search extent tree for extent generation and level */
11643 key.objectid = bytenr;
11644 key.type = BTRFS_METADATA_ITEM_KEY;
11645 key.offset = (u64)-1;
11647 btrfs_init_path(&path);
11648 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11651 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11659 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11660 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11661 struct btrfs_extent_item);
11662 flags = btrfs_extent_flags(path.nodes[0], ei);
11663 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11668 /* Get transid for later read_tree_block() check */
11669 transid = btrfs_extent_generation(path.nodes[0], ei);
11671 /* Get backref level as one source */
11672 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11673 backref_level = key.offset;
11675 struct btrfs_tree_block_info *info;
11677 info = (struct btrfs_tree_block_info *)(ei + 1);
11678 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11680 btrfs_release_path(&path);
11682 /* Get level from tree block as an alternative source */
11683 eb = read_tree_block(fs_info, bytenr, transid);
11684 if (!extent_buffer_uptodate(eb)) {
11685 free_extent_buffer(eb);
11688 header_level = btrfs_header_level(eb);
11689 free_extent_buffer(eb);
11691 if (header_level != backref_level)
11693 return header_level;
11696 btrfs_release_path(&path);
11701 * Check if a tree block backref is valid (points to a valid tree block)
11702 * if level == -1, level will be resolved
11703 * Return >0 for any error found and print error message
11705 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11706 u64 bytenr, int level)
11708 struct btrfs_root *root;
11709 struct btrfs_key key;
11710 struct btrfs_path path;
11711 struct extent_buffer *eb;
11712 struct extent_buffer *node;
11713 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11717 /* Query level for level == -1 special case */
11719 level = query_tree_block_level(fs_info, bytenr);
11721 err |= REFERENCER_MISSING;
11725 key.objectid = root_id;
11726 key.type = BTRFS_ROOT_ITEM_KEY;
11727 key.offset = (u64)-1;
11729 root = btrfs_read_fs_root(fs_info, &key);
11730 if (IS_ERR(root)) {
11731 err |= REFERENCER_MISSING;
11735 /* Read out the tree block to get item/node key */
11736 eb = read_tree_block(fs_info, bytenr, 0);
11737 if (!extent_buffer_uptodate(eb)) {
11738 err |= REFERENCER_MISSING;
11739 free_extent_buffer(eb);
11743 /* Empty tree, no need to check key */
11744 if (!btrfs_header_nritems(eb) && !level) {
11745 free_extent_buffer(eb);
11750 btrfs_node_key_to_cpu(eb, &key, 0);
11752 btrfs_item_key_to_cpu(eb, &key, 0);
11754 free_extent_buffer(eb);
11756 btrfs_init_path(&path);
11757 path.lowest_level = level;
11758 /* Search with the first key, to ensure we can reach it */
11759 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11761 err |= REFERENCER_MISSING;
11765 node = path.nodes[level];
11766 if (btrfs_header_bytenr(node) != bytenr) {
11768 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11769 bytenr, nodesize, bytenr,
11770 btrfs_header_bytenr(node));
11771 err |= REFERENCER_MISMATCH;
11773 if (btrfs_header_level(node) != level) {
11775 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11776 bytenr, nodesize, level,
11777 btrfs_header_level(node));
11778 err |= REFERENCER_MISMATCH;
11782 btrfs_release_path(&path);
11784 if (err & REFERENCER_MISSING) {
11786 error("extent [%llu %d] lost referencer (owner: %llu)",
11787 bytenr, nodesize, root_id);
11790 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11791 bytenr, nodesize, root_id, level);
11798 * Check if tree block @eb is tree reloc root.
11799 * Return 0 if it's not or any problem happens
11800 * Return 1 if it's a tree reloc root
11802 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11803 struct extent_buffer *eb)
11805 struct btrfs_root *tree_reloc_root;
11806 struct btrfs_key key;
11807 u64 bytenr = btrfs_header_bytenr(eb);
11808 u64 owner = btrfs_header_owner(eb);
11811 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11812 key.offset = owner;
11813 key.type = BTRFS_ROOT_ITEM_KEY;
11815 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11816 if (IS_ERR(tree_reloc_root))
11819 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11821 btrfs_free_fs_root(tree_reloc_root);
11826 * Check referencer for shared block backref
11827 * If level == -1, this function will resolve the level.
11829 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11830 u64 parent, u64 bytenr, int level)
11832 struct extent_buffer *eb;
11834 int found_parent = 0;
11837 eb = read_tree_block(fs_info, parent, 0);
11838 if (!extent_buffer_uptodate(eb))
11842 level = query_tree_block_level(fs_info, bytenr);
11846 /* It's possible it's a tree reloc root */
11847 if (parent == bytenr) {
11848 if (is_tree_reloc_root(fs_info, eb))
11853 if (level + 1 != btrfs_header_level(eb))
11856 nr = btrfs_header_nritems(eb);
11857 for (i = 0; i < nr; i++) {
11858 if (bytenr == btrfs_node_blockptr(eb, i)) {
11864 free_extent_buffer(eb);
11865 if (!found_parent) {
11867 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11868 bytenr, fs_info->nodesize, parent, level);
11869 return REFERENCER_MISSING;
11875 * Check referencer for normal (inlined) data ref
11876 * If len == 0, it will be resolved by searching in extent tree
11878 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11879 u64 root_id, u64 objectid, u64 offset,
11880 u64 bytenr, u64 len, u32 count)
11882 struct btrfs_root *root;
11883 struct btrfs_root *extent_root = fs_info->extent_root;
11884 struct btrfs_key key;
11885 struct btrfs_path path;
11886 struct extent_buffer *leaf;
11887 struct btrfs_file_extent_item *fi;
11888 u32 found_count = 0;
11893 key.objectid = bytenr;
11894 key.type = BTRFS_EXTENT_ITEM_KEY;
11895 key.offset = (u64)-1;
11897 btrfs_init_path(&path);
11898 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11901 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11904 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11905 if (key.objectid != bytenr ||
11906 key.type != BTRFS_EXTENT_ITEM_KEY)
11909 btrfs_release_path(&path);
11911 key.objectid = root_id;
11912 key.type = BTRFS_ROOT_ITEM_KEY;
11913 key.offset = (u64)-1;
11914 btrfs_init_path(&path);
11916 root = btrfs_read_fs_root(fs_info, &key);
11920 key.objectid = objectid;
11921 key.type = BTRFS_EXTENT_DATA_KEY;
11923 * It can be nasty as data backref offset is
11924 * file offset - file extent offset, which is smaller or
11925 * equal to original backref offset. The only special case is
11926 * overflow. So we need to special check and do further search.
11928 key.offset = offset & (1ULL << 63) ? 0 : offset;
11930 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11935 * Search afterwards to get correct one
11936 * NOTE: As we must do a comprehensive check on the data backref to
11937 * make sure the dref count also matches, we must iterate all file
11938 * extents for that inode.
11941 leaf = path.nodes[0];
11942 slot = path.slots[0];
11944 if (slot >= btrfs_header_nritems(leaf) ||
11945 btrfs_header_owner(leaf) != root_id)
11947 btrfs_item_key_to_cpu(leaf, &key, slot);
11948 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11950 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11952 * Except normal disk bytenr and disk num bytes, we still
11953 * need to do extra check on dbackref offset as
11954 * dbackref offset = file_offset - file_extent_offset
11956 * Also, we must check the leaf owner.
11957 * In case of shared tree blocks (snapshots) we can inherit
11958 * leaves from source snapshot.
11959 * In that case, reference from source snapshot should not
11962 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11963 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11964 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11965 offset && btrfs_header_owner(leaf) == root_id)
11969 ret = btrfs_next_item(root, &path);
11974 btrfs_release_path(&path);
11975 if (found_count != count) {
11977 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11978 bytenr, len, root_id, objectid, offset, count, found_count);
11979 return REFERENCER_MISSING;
11985 * Check if the referencer of a shared data backref exists
11987 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11988 u64 parent, u64 bytenr)
11990 struct extent_buffer *eb;
11991 struct btrfs_key key;
11992 struct btrfs_file_extent_item *fi;
11994 int found_parent = 0;
11997 eb = read_tree_block(fs_info, parent, 0);
11998 if (!extent_buffer_uptodate(eb))
12001 nr = btrfs_header_nritems(eb);
12002 for (i = 0; i < nr; i++) {
12003 btrfs_item_key_to_cpu(eb, &key, i);
12004 if (key.type != BTRFS_EXTENT_DATA_KEY)
12007 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12008 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12011 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12018 free_extent_buffer(eb);
12019 if (!found_parent) {
12020 error("shared extent %llu referencer lost (parent: %llu)",
12022 return REFERENCER_MISSING;
12028 * Only delete backref if REFERENCER_MISSING now
12030 * Returns <0 the extent was deleted
12031 * Returns >0 the backref was deleted but extent still exists, returned value
12032 * means error after repair
12033 * Returns 0 nothing happened
12035 static int repair_extent_item(struct btrfs_trans_handle *trans,
12036 struct btrfs_root *root, struct btrfs_path *path,
12037 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12038 u64 owner, u64 offset, int err)
12040 struct btrfs_key old_key;
12044 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12046 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12047 /* delete the backref */
12048 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12049 num_bytes, parent, root_objectid, owner, offset);
12052 err &= ~REFERENCER_MISSING;
12053 printf("Delete backref in extent [%llu %llu]\n",
12054 bytenr, num_bytes);
12056 error("fail to delete backref in extent [%llu %llu]",
12057 bytenr, num_bytes);
12061 /* btrfs_free_extent may delete the extent */
12062 btrfs_release_path(path);
12063 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12073 * This function will check a given extent item, including its backref and
12074 * itself (like crossing stripe boundary and type)
12076 * Since we don't use extent_record anymore, introduce new error bit
12078 static int check_extent_item(struct btrfs_trans_handle *trans,
12079 struct btrfs_fs_info *fs_info,
12080 struct btrfs_path *path)
12082 struct btrfs_extent_item *ei;
12083 struct btrfs_extent_inline_ref *iref;
12084 struct btrfs_extent_data_ref *dref;
12085 struct extent_buffer *eb = path->nodes[0];
12088 int slot = path->slots[0];
12090 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12091 u32 item_size = btrfs_item_size_nr(eb, slot);
12101 struct btrfs_key key;
12105 btrfs_item_key_to_cpu(eb, &key, slot);
12106 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12107 bytes_used += key.offset;
12108 num_bytes = key.offset;
12110 bytes_used += nodesize;
12111 num_bytes = nodesize;
12114 if (item_size < sizeof(*ei)) {
12116 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12117 * old thing when on disk format is still un-determined.
12118 * No need to care about it anymore
12120 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12124 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12125 flags = btrfs_extent_flags(eb, ei);
12127 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12129 if (metadata && check_crossing_stripes(global_info, key.objectid,
12131 error("bad metadata [%llu, %llu) crossing stripe boundary",
12132 key.objectid, key.objectid + nodesize);
12133 err |= CROSSING_STRIPE_BOUNDARY;
12136 ptr = (unsigned long)(ei + 1);
12138 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12139 /* Old EXTENT_ITEM metadata */
12140 struct btrfs_tree_block_info *info;
12142 info = (struct btrfs_tree_block_info *)ptr;
12143 level = btrfs_tree_block_level(eb, info);
12144 ptr += sizeof(struct btrfs_tree_block_info);
12146 /* New METADATA_ITEM */
12147 level = key.offset;
12149 end = (unsigned long)ei + item_size;
12152 /* Reached extent item end normally */
12156 /* Beyond extent item end, wrong item size */
12158 err |= ITEM_SIZE_MISMATCH;
12159 error("extent item at bytenr %llu slot %d has wrong size",
12168 /* Now check every backref in this extent item */
12169 iref = (struct btrfs_extent_inline_ref *)ptr;
12170 type = btrfs_extent_inline_ref_type(eb, iref);
12171 offset = btrfs_extent_inline_ref_offset(eb, iref);
12173 case BTRFS_TREE_BLOCK_REF_KEY:
12174 root_objectid = offset;
12176 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12180 case BTRFS_SHARED_BLOCK_REF_KEY:
12182 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12186 case BTRFS_EXTENT_DATA_REF_KEY:
12187 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12188 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12189 owner = btrfs_extent_data_ref_objectid(eb, dref);
12190 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12191 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12192 owner_offset, key.objectid, key.offset,
12193 btrfs_extent_data_ref_count(eb, dref));
12196 case BTRFS_SHARED_DATA_REF_KEY:
12198 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12202 error("extent[%llu %d %llu] has unknown ref type: %d",
12203 key.objectid, key.type, key.offset, type);
12204 ret = UNKNOWN_TYPE;
12209 if (err && repair) {
12210 ret = repair_extent_item(trans, fs_info->extent_root, path,
12211 key.objectid, num_bytes, parent, root_objectid,
12212 owner, owner_offset, ret);
12221 ptr += btrfs_extent_inline_ref_size(type);
12229 * Check if a dev extent item is referred correctly by its chunk
12231 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12232 struct extent_buffer *eb, int slot)
12234 struct btrfs_root *chunk_root = fs_info->chunk_root;
12235 struct btrfs_dev_extent *ptr;
12236 struct btrfs_path path;
12237 struct btrfs_key chunk_key;
12238 struct btrfs_key devext_key;
12239 struct btrfs_chunk *chunk;
12240 struct extent_buffer *l;
12244 int found_chunk = 0;
12247 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12248 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12249 length = btrfs_dev_extent_length(eb, ptr);
12251 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12252 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12253 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12255 btrfs_init_path(&path);
12256 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12261 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12262 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12267 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12270 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12271 for (i = 0; i < num_stripes; i++) {
12272 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12273 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12275 if (devid == devext_key.objectid &&
12276 offset == devext_key.offset) {
12282 btrfs_release_path(&path);
12283 if (!found_chunk) {
12285 "device extent[%llu, %llu, %llu] did not find the related chunk",
12286 devext_key.objectid, devext_key.offset, length);
12287 return REFERENCER_MISSING;
12293 * Check if the used space is correct with the dev item
12295 static int check_dev_item(struct btrfs_fs_info *fs_info,
12296 struct extent_buffer *eb, int slot)
12298 struct btrfs_root *dev_root = fs_info->dev_root;
12299 struct btrfs_dev_item *dev_item;
12300 struct btrfs_path path;
12301 struct btrfs_key key;
12302 struct btrfs_dev_extent *ptr;
12309 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12310 dev_id = btrfs_device_id(eb, dev_item);
12311 used = btrfs_device_bytes_used(eb, dev_item);
12312 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12314 key.objectid = dev_id;
12315 key.type = BTRFS_DEV_EXTENT_KEY;
12318 btrfs_init_path(&path);
12319 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12321 btrfs_item_key_to_cpu(eb, &key, slot);
12322 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12323 key.objectid, key.type, key.offset);
12324 btrfs_release_path(&path);
12325 return REFERENCER_MISSING;
12328 /* Iterate dev_extents to calculate the used space of a device */
12330 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12333 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12334 if (key.objectid > dev_id)
12336 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12339 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12340 struct btrfs_dev_extent);
12341 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12343 ret = btrfs_next_item(dev_root, &path);
12347 btrfs_release_path(&path);
12349 if (used != total) {
12350 btrfs_item_key_to_cpu(eb, &key, slot);
12352 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12353 total, used, BTRFS_ROOT_TREE_OBJECTID,
12354 BTRFS_DEV_EXTENT_KEY, dev_id);
12355 return ACCOUNTING_MISMATCH;
12357 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12363 * Check a block group item with its referener (chunk) and its used space
12364 * with extent/metadata item
12366 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12367 struct extent_buffer *eb, int slot)
12369 struct btrfs_root *extent_root = fs_info->extent_root;
12370 struct btrfs_root *chunk_root = fs_info->chunk_root;
12371 struct btrfs_block_group_item *bi;
12372 struct btrfs_block_group_item bg_item;
12373 struct btrfs_path path;
12374 struct btrfs_key bg_key;
12375 struct btrfs_key chunk_key;
12376 struct btrfs_key extent_key;
12377 struct btrfs_chunk *chunk;
12378 struct extent_buffer *leaf;
12379 struct btrfs_extent_item *ei;
12380 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12388 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12389 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12390 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12391 used = btrfs_block_group_used(&bg_item);
12392 bg_flags = btrfs_block_group_flags(&bg_item);
12394 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12395 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12396 chunk_key.offset = bg_key.objectid;
12398 btrfs_init_path(&path);
12399 /* Search for the referencer chunk */
12400 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12403 "block group[%llu %llu] did not find the related chunk item",
12404 bg_key.objectid, bg_key.offset);
12405 err |= REFERENCER_MISSING;
12407 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12408 struct btrfs_chunk);
12409 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12412 "block group[%llu %llu] related chunk item length does not match",
12413 bg_key.objectid, bg_key.offset);
12414 err |= REFERENCER_MISMATCH;
12417 btrfs_release_path(&path);
12419 /* Search from the block group bytenr */
12420 extent_key.objectid = bg_key.objectid;
12421 extent_key.type = 0;
12422 extent_key.offset = 0;
12424 btrfs_init_path(&path);
12425 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12429 /* Iterate extent tree to account used space */
12431 leaf = path.nodes[0];
12433 /* Search slot can point to the last item beyond leaf nritems */
12434 if (path.slots[0] >= btrfs_header_nritems(leaf))
12437 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12438 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12441 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12442 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12444 if (extent_key.objectid < bg_key.objectid)
12447 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12450 total += extent_key.offset;
12452 ei = btrfs_item_ptr(leaf, path.slots[0],
12453 struct btrfs_extent_item);
12454 flags = btrfs_extent_flags(leaf, ei);
12455 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12456 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12458 "bad extent[%llu, %llu) type mismatch with chunk",
12459 extent_key.objectid,
12460 extent_key.objectid + extent_key.offset);
12461 err |= CHUNK_TYPE_MISMATCH;
12463 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12464 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12465 BTRFS_BLOCK_GROUP_METADATA))) {
12467 "bad extent[%llu, %llu) type mismatch with chunk",
12468 extent_key.objectid,
12469 extent_key.objectid + nodesize);
12470 err |= CHUNK_TYPE_MISMATCH;
12474 ret = btrfs_next_item(extent_root, &path);
12480 btrfs_release_path(&path);
12482 if (total != used) {
12484 "block group[%llu %llu] used %llu but extent items used %llu",
12485 bg_key.objectid, bg_key.offset, used, total);
12486 err |= BG_ACCOUNTING_ERROR;
12492 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12493 * FIXME: We still need to repair error of dev_item.
12495 * Returns error after repair.
12497 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12498 struct btrfs_root *chunk_root,
12499 struct btrfs_path *path, int err)
12501 struct btrfs_chunk *chunk;
12502 struct btrfs_key chunk_key;
12503 struct extent_buffer *eb = path->nodes[0];
12505 int slot = path->slots[0];
12509 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12510 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12512 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12513 type = btrfs_chunk_type(path->nodes[0], chunk);
12514 length = btrfs_chunk_length(eb, chunk);
12516 if (err & REFERENCER_MISSING) {
12517 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12518 type, chunk_key.offset, length);
12520 error("fail to add block group item[%llu %llu]",
12521 chunk_key.offset, length);
12524 err &= ~REFERENCER_MISSING;
12525 printf("Added block group item[%llu %llu]\n",
12526 chunk_key.offset, length);
12535 * Check a chunk item.
12536 * Including checking all referred dev_extents and block group
12538 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12539 struct extent_buffer *eb, int slot)
12541 struct btrfs_root *extent_root = fs_info->extent_root;
12542 struct btrfs_root *dev_root = fs_info->dev_root;
12543 struct btrfs_path path;
12544 struct btrfs_key chunk_key;
12545 struct btrfs_key bg_key;
12546 struct btrfs_key devext_key;
12547 struct btrfs_chunk *chunk;
12548 struct extent_buffer *leaf;
12549 struct btrfs_block_group_item *bi;
12550 struct btrfs_block_group_item bg_item;
12551 struct btrfs_dev_extent *ptr;
12563 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12564 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12565 length = btrfs_chunk_length(eb, chunk);
12566 chunk_end = chunk_key.offset + length;
12567 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12570 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12572 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12575 type = btrfs_chunk_type(eb, chunk);
12577 bg_key.objectid = chunk_key.offset;
12578 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12579 bg_key.offset = length;
12581 btrfs_init_path(&path);
12582 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12585 "chunk[%llu %llu) did not find the related block group item",
12586 chunk_key.offset, chunk_end);
12587 err |= REFERENCER_MISSING;
12589 leaf = path.nodes[0];
12590 bi = btrfs_item_ptr(leaf, path.slots[0],
12591 struct btrfs_block_group_item);
12592 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12594 if (btrfs_block_group_flags(&bg_item) != type) {
12596 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12597 chunk_key.offset, chunk_end, type,
12598 btrfs_block_group_flags(&bg_item));
12599 err |= REFERENCER_MISSING;
12603 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12604 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12605 for (i = 0; i < num_stripes; i++) {
12606 btrfs_release_path(&path);
12607 btrfs_init_path(&path);
12608 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12609 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12610 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12612 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12615 goto not_match_dev;
12617 leaf = path.nodes[0];
12618 ptr = btrfs_item_ptr(leaf, path.slots[0],
12619 struct btrfs_dev_extent);
12620 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12621 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12622 if (objectid != chunk_key.objectid ||
12623 offset != chunk_key.offset ||
12624 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12625 goto not_match_dev;
12628 err |= BACKREF_MISSING;
12630 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12631 chunk_key.objectid, chunk_end, i);
12634 btrfs_release_path(&path);
12639 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12640 struct btrfs_root *root,
12641 struct btrfs_path *path)
12643 struct btrfs_key key;
12646 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12647 btrfs_release_path(path);
12648 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12654 ret = btrfs_del_item(trans, root, path);
12658 if (path->slots[0] == 0)
12659 btrfs_prev_leaf(root, path);
12664 error("failed to delete root %llu item[%llu, %u, %llu]",
12665 root->objectid, key.objectid, key.type, key.offset);
12667 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12668 root->objectid, key.objectid, key.type, key.offset);
12673 * Main entry function to check known items and update related accounting info
12675 static int check_leaf_items(struct btrfs_trans_handle *trans,
12676 struct btrfs_root *root, struct btrfs_path *path,
12677 struct node_refs *nrefs, int account_bytes)
12679 struct btrfs_fs_info *fs_info = root->fs_info;
12680 struct btrfs_key key;
12681 struct extent_buffer *eb;
12684 struct btrfs_extent_data_ref *dref;
12689 eb = path->nodes[0];
12690 slot = path->slots[0];
12691 if (slot >= btrfs_header_nritems(eb)) {
12693 error("empty leaf [%llu %u] root %llu", eb->start,
12694 root->fs_info->nodesize, root->objectid);
12700 btrfs_item_key_to_cpu(eb, &key, slot);
12704 case BTRFS_EXTENT_DATA_KEY:
12705 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12707 ret = repair_extent_data_item(trans, root, path, nrefs,
12711 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12712 ret = check_block_group_item(fs_info, eb, slot);
12714 ret & REFERENCER_MISSING)
12715 ret = delete_extent_tree_item(trans, root, path);
12718 case BTRFS_DEV_ITEM_KEY:
12719 ret = check_dev_item(fs_info, eb, slot);
12722 case BTRFS_CHUNK_ITEM_KEY:
12723 ret = check_chunk_item(fs_info, eb, slot);
12725 ret = repair_chunk_item(trans, root, path, ret);
12728 case BTRFS_DEV_EXTENT_KEY:
12729 ret = check_dev_extent_item(fs_info, eb, slot);
12732 case BTRFS_EXTENT_ITEM_KEY:
12733 case BTRFS_METADATA_ITEM_KEY:
12734 ret = check_extent_item(trans, fs_info, path);
12737 case BTRFS_EXTENT_CSUM_KEY:
12738 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12741 case BTRFS_TREE_BLOCK_REF_KEY:
12742 ret = check_tree_block_backref(fs_info, key.offset,
12745 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12746 ret = delete_extent_tree_item(trans, root, path);
12749 case BTRFS_EXTENT_DATA_REF_KEY:
12750 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12751 ret = check_extent_data_backref(fs_info,
12752 btrfs_extent_data_ref_root(eb, dref),
12753 btrfs_extent_data_ref_objectid(eb, dref),
12754 btrfs_extent_data_ref_offset(eb, dref),
12756 btrfs_extent_data_ref_count(eb, dref));
12758 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12759 ret = delete_extent_tree_item(trans, root, path);
12762 case BTRFS_SHARED_BLOCK_REF_KEY:
12763 ret = check_shared_block_backref(fs_info, key.offset,
12766 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12767 ret = delete_extent_tree_item(trans, root, path);
12770 case BTRFS_SHARED_DATA_REF_KEY:
12771 ret = check_shared_data_backref(fs_info, key.offset,
12774 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12775 ret = delete_extent_tree_item(trans, root, path);
12789 * Low memory usage version check_chunks_and_extents.
12791 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12793 struct btrfs_trans_handle *trans = NULL;
12794 struct btrfs_path path;
12795 struct btrfs_key old_key;
12796 struct btrfs_key key;
12797 struct btrfs_root *root1;
12798 struct btrfs_root *root;
12799 struct btrfs_root *cur_root;
12803 root = fs_info->fs_root;
12806 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12807 if (IS_ERR(trans)) {
12808 error("failed to start transaction before check");
12809 return PTR_ERR(trans);
12813 root1 = root->fs_info->chunk_root;
12814 ret = check_btrfs_root(trans, root1, 0, 1);
12817 root1 = root->fs_info->tree_root;
12818 ret = check_btrfs_root(trans, root1, 0, 1);
12821 btrfs_init_path(&path);
12822 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12824 key.type = BTRFS_ROOT_ITEM_KEY;
12826 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12828 error("cannot find extent tree in tree_root");
12833 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12834 if (key.type != BTRFS_ROOT_ITEM_KEY)
12837 key.offset = (u64)-1;
12839 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12840 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12843 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12844 if (IS_ERR(cur_root) || !cur_root) {
12845 error("failed to read tree: %lld", key.objectid);
12849 ret = check_btrfs_root(trans, cur_root, 0, 1);
12852 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12853 btrfs_free_fs_root(cur_root);
12855 btrfs_release_path(&path);
12856 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12857 &old_key, &path, 0, 0);
12861 ret = btrfs_next_item(root1, &path);
12867 /* if repair, update block accounting */
12869 ret = btrfs_fix_block_accounting(trans, root);
12873 err &= ~BG_ACCOUNTING_ERROR;
12877 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12879 btrfs_release_path(&path);
12884 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12888 if (!ctx.progress_enabled)
12889 fprintf(stderr, "checking extents\n");
12890 if (check_mode == CHECK_MODE_LOWMEM)
12891 ret = check_chunks_and_extents_v2(fs_info);
12893 ret = check_chunks_and_extents(fs_info);
12895 /* Also repair device size related problems */
12896 if (repair && !ret) {
12897 ret = btrfs_fix_device_and_super_size(fs_info);
12904 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12905 struct btrfs_root *root, int overwrite)
12907 struct extent_buffer *c;
12908 struct extent_buffer *old = root->node;
12911 struct btrfs_disk_key disk_key = {0,0,0};
12917 extent_buffer_get(c);
12920 c = btrfs_alloc_free_block(trans, root,
12921 root->fs_info->nodesize,
12922 root->root_key.objectid,
12923 &disk_key, level, 0, 0);
12926 extent_buffer_get(c);
12930 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12931 btrfs_set_header_level(c, level);
12932 btrfs_set_header_bytenr(c, c->start);
12933 btrfs_set_header_generation(c, trans->transid);
12934 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12935 btrfs_set_header_owner(c, root->root_key.objectid);
12937 write_extent_buffer(c, root->fs_info->fsid,
12938 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12940 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12941 btrfs_header_chunk_tree_uuid(c),
12944 btrfs_mark_buffer_dirty(c);
12946 * this case can happen in the following case:
12948 * 1.overwrite previous root.
12950 * 2.reinit reloc data root, this is because we skip pin
12951 * down reloc data tree before which means we can allocate
12952 * same block bytenr here.
12954 if (old->start == c->start) {
12955 btrfs_set_root_generation(&root->root_item,
12957 root->root_item.level = btrfs_header_level(root->node);
12958 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12959 &root->root_key, &root->root_item);
12961 free_extent_buffer(c);
12965 free_extent_buffer(old);
12967 add_root_to_dirty_list(root);
12971 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12972 struct extent_buffer *eb, int tree_root)
12974 struct extent_buffer *tmp;
12975 struct btrfs_root_item *ri;
12976 struct btrfs_key key;
12978 int level = btrfs_header_level(eb);
12984 * If we have pinned this block before, don't pin it again.
12985 * This can not only avoid forever loop with broken filesystem
12986 * but also give us some speedups.
12988 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12989 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12992 btrfs_pin_extent(fs_info, eb->start, eb->len);
12994 nritems = btrfs_header_nritems(eb);
12995 for (i = 0; i < nritems; i++) {
12997 btrfs_item_key_to_cpu(eb, &key, i);
12998 if (key.type != BTRFS_ROOT_ITEM_KEY)
13000 /* Skip the extent root and reloc roots */
13001 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13002 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13003 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13005 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13006 bytenr = btrfs_disk_root_bytenr(eb, ri);
13009 * If at any point we start needing the real root we
13010 * will have to build a stump root for the root we are
13011 * in, but for now this doesn't actually use the root so
13012 * just pass in extent_root.
13014 tmp = read_tree_block(fs_info, bytenr, 0);
13015 if (!extent_buffer_uptodate(tmp)) {
13016 fprintf(stderr, "Error reading root block\n");
13019 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13020 free_extent_buffer(tmp);
13024 bytenr = btrfs_node_blockptr(eb, i);
13026 /* If we aren't the tree root don't read the block */
13027 if (level == 1 && !tree_root) {
13028 btrfs_pin_extent(fs_info, bytenr,
13029 fs_info->nodesize);
13033 tmp = read_tree_block(fs_info, bytenr, 0);
13034 if (!extent_buffer_uptodate(tmp)) {
13035 fprintf(stderr, "Error reading tree block\n");
13038 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13039 free_extent_buffer(tmp);
13048 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13052 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13056 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13059 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13061 struct btrfs_block_group_cache *cache;
13062 struct btrfs_path path;
13063 struct extent_buffer *leaf;
13064 struct btrfs_chunk *chunk;
13065 struct btrfs_key key;
13069 btrfs_init_path(&path);
13071 key.type = BTRFS_CHUNK_ITEM_KEY;
13073 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13075 btrfs_release_path(&path);
13080 * We do this in case the block groups were screwed up and had alloc
13081 * bits that aren't actually set on the chunks. This happens with
13082 * restored images every time and could happen in real life I guess.
13084 fs_info->avail_data_alloc_bits = 0;
13085 fs_info->avail_metadata_alloc_bits = 0;
13086 fs_info->avail_system_alloc_bits = 0;
13088 /* First we need to create the in-memory block groups */
13090 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13091 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13093 btrfs_release_path(&path);
13101 leaf = path.nodes[0];
13102 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13103 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13108 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13109 btrfs_add_block_group(fs_info, 0,
13110 btrfs_chunk_type(leaf, chunk), key.offset,
13111 btrfs_chunk_length(leaf, chunk));
13112 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13113 key.offset + btrfs_chunk_length(leaf, chunk));
13118 cache = btrfs_lookup_first_block_group(fs_info, start);
13122 start = cache->key.objectid + cache->key.offset;
13125 btrfs_release_path(&path);
13129 static int reset_balance(struct btrfs_trans_handle *trans,
13130 struct btrfs_fs_info *fs_info)
13132 struct btrfs_root *root = fs_info->tree_root;
13133 struct btrfs_path path;
13134 struct extent_buffer *leaf;
13135 struct btrfs_key key;
13136 int del_slot, del_nr = 0;
13140 btrfs_init_path(&path);
13141 key.objectid = BTRFS_BALANCE_OBJECTID;
13142 key.type = BTRFS_BALANCE_ITEM_KEY;
13144 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13149 goto reinit_data_reloc;
13154 ret = btrfs_del_item(trans, root, &path);
13157 btrfs_release_path(&path);
13159 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13160 key.type = BTRFS_ROOT_ITEM_KEY;
13162 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13166 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13171 ret = btrfs_del_items(trans, root, &path,
13178 btrfs_release_path(&path);
13181 ret = btrfs_search_slot(trans, root, &key, &path,
13188 leaf = path.nodes[0];
13189 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13190 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13192 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13197 del_slot = path.slots[0];
13206 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13210 btrfs_release_path(&path);
13213 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13214 key.type = BTRFS_ROOT_ITEM_KEY;
13215 key.offset = (u64)-1;
13216 root = btrfs_read_fs_root(fs_info, &key);
13217 if (IS_ERR(root)) {
13218 fprintf(stderr, "Error reading data reloc tree\n");
13219 ret = PTR_ERR(root);
13222 record_root_in_trans(trans, root);
13223 ret = btrfs_fsck_reinit_root(trans, root, 0);
13226 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13228 btrfs_release_path(&path);
13232 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13233 struct btrfs_fs_info *fs_info)
13239 * The only reason we don't do this is because right now we're just
13240 * walking the trees we find and pinning down their bytes, we don't look
13241 * at any of the leaves. In order to do mixed groups we'd have to check
13242 * the leaves of any fs roots and pin down the bytes for any file
13243 * extents we find. Not hard but why do it if we don't have to?
13245 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13246 fprintf(stderr, "We don't support re-initing the extent tree "
13247 "for mixed block groups yet, please notify a btrfs "
13248 "developer you want to do this so they can add this "
13249 "functionality.\n");
13254 * first we need to walk all of the trees except the extent tree and pin
13255 * down the bytes that are in use so we don't overwrite any existing
13258 ret = pin_metadata_blocks(fs_info);
13260 fprintf(stderr, "error pinning down used bytes\n");
13265 * Need to drop all the block groups since we're going to recreate all
13268 btrfs_free_block_groups(fs_info);
13269 ret = reset_block_groups(fs_info);
13271 fprintf(stderr, "error resetting the block groups\n");
13275 /* Ok we can allocate now, reinit the extent root */
13276 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13278 fprintf(stderr, "extent root initialization failed\n");
13280 * When the transaction code is updated we should end the
13281 * transaction, but for now progs only knows about commit so
13282 * just return an error.
13288 * Now we have all the in-memory block groups setup so we can make
13289 * allocations properly, and the metadata we care about is safe since we
13290 * pinned all of it above.
13293 struct btrfs_block_group_cache *cache;
13295 cache = btrfs_lookup_first_block_group(fs_info, start);
13298 start = cache->key.objectid + cache->key.offset;
13299 ret = btrfs_insert_item(trans, fs_info->extent_root,
13300 &cache->key, &cache->item,
13301 sizeof(cache->item));
13303 fprintf(stderr, "Error adding block group\n");
13306 btrfs_extent_post_op(trans, fs_info->extent_root);
13309 ret = reset_balance(trans, fs_info);
13311 fprintf(stderr, "error resetting the pending balance\n");
13316 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13318 struct btrfs_path path;
13319 struct btrfs_trans_handle *trans;
13320 struct btrfs_key key;
13323 printf("Recowing metadata block %llu\n", eb->start);
13324 key.objectid = btrfs_header_owner(eb);
13325 key.type = BTRFS_ROOT_ITEM_KEY;
13326 key.offset = (u64)-1;
13328 root = btrfs_read_fs_root(root->fs_info, &key);
13329 if (IS_ERR(root)) {
13330 fprintf(stderr, "Couldn't find owner root %llu\n",
13332 return PTR_ERR(root);
13335 trans = btrfs_start_transaction(root, 1);
13337 return PTR_ERR(trans);
13339 btrfs_init_path(&path);
13340 path.lowest_level = btrfs_header_level(eb);
13341 if (path.lowest_level)
13342 btrfs_node_key_to_cpu(eb, &key, 0);
13344 btrfs_item_key_to_cpu(eb, &key, 0);
13346 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13347 btrfs_commit_transaction(trans, root);
13348 btrfs_release_path(&path);
13352 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13354 struct btrfs_path path;
13355 struct btrfs_trans_handle *trans;
13356 struct btrfs_key key;
13359 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13360 bad->key.type, bad->key.offset);
13361 key.objectid = bad->root_id;
13362 key.type = BTRFS_ROOT_ITEM_KEY;
13363 key.offset = (u64)-1;
13365 root = btrfs_read_fs_root(root->fs_info, &key);
13366 if (IS_ERR(root)) {
13367 fprintf(stderr, "Couldn't find owner root %llu\n",
13369 return PTR_ERR(root);
13372 trans = btrfs_start_transaction(root, 1);
13374 return PTR_ERR(trans);
13376 btrfs_init_path(&path);
13377 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13383 ret = btrfs_del_item(trans, root, &path);
13385 btrfs_commit_transaction(trans, root);
13386 btrfs_release_path(&path);
13390 static int zero_log_tree(struct btrfs_root *root)
13392 struct btrfs_trans_handle *trans;
13395 trans = btrfs_start_transaction(root, 1);
13396 if (IS_ERR(trans)) {
13397 ret = PTR_ERR(trans);
13400 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13401 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13402 ret = btrfs_commit_transaction(trans, root);
13406 static int populate_csum(struct btrfs_trans_handle *trans,
13407 struct btrfs_root *csum_root, char *buf, u64 start,
13410 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13415 while (offset < len) {
13416 sectorsize = fs_info->sectorsize;
13417 ret = read_extent_data(fs_info, buf, start + offset,
13421 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13422 start + offset, buf, sectorsize);
13425 offset += sectorsize;
13430 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13431 struct btrfs_root *csum_root,
13432 struct btrfs_root *cur_root)
13434 struct btrfs_path path;
13435 struct btrfs_key key;
13436 struct extent_buffer *node;
13437 struct btrfs_file_extent_item *fi;
13444 buf = malloc(cur_root->fs_info->sectorsize);
13448 btrfs_init_path(&path);
13452 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13455 /* Iterate all regular file extents and fill its csum */
13457 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13459 if (key.type != BTRFS_EXTENT_DATA_KEY)
13461 node = path.nodes[0];
13462 slot = path.slots[0];
13463 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13464 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13466 start = btrfs_file_extent_disk_bytenr(node, fi);
13467 len = btrfs_file_extent_disk_num_bytes(node, fi);
13469 ret = populate_csum(trans, csum_root, buf, start, len);
13470 if (ret == -EEXIST)
13476 * TODO: if next leaf is corrupted, jump to nearest next valid
13479 ret = btrfs_next_item(cur_root, &path);
13489 btrfs_release_path(&path);
13494 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13495 struct btrfs_root *csum_root)
13497 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13498 struct btrfs_path path;
13499 struct btrfs_root *tree_root = fs_info->tree_root;
13500 struct btrfs_root *cur_root;
13501 struct extent_buffer *node;
13502 struct btrfs_key key;
13506 btrfs_init_path(&path);
13507 key.objectid = BTRFS_FS_TREE_OBJECTID;
13509 key.type = BTRFS_ROOT_ITEM_KEY;
13510 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13519 node = path.nodes[0];
13520 slot = path.slots[0];
13521 btrfs_item_key_to_cpu(node, &key, slot);
13522 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13524 if (key.type != BTRFS_ROOT_ITEM_KEY)
13526 if (!is_fstree(key.objectid))
13528 key.offset = (u64)-1;
13530 cur_root = btrfs_read_fs_root(fs_info, &key);
13531 if (IS_ERR(cur_root) || !cur_root) {
13532 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13536 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13541 ret = btrfs_next_item(tree_root, &path);
13551 btrfs_release_path(&path);
13555 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13556 struct btrfs_root *csum_root)
13558 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13559 struct btrfs_path path;
13560 struct btrfs_extent_item *ei;
13561 struct extent_buffer *leaf;
13563 struct btrfs_key key;
13566 btrfs_init_path(&path);
13568 key.type = BTRFS_EXTENT_ITEM_KEY;
13570 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13572 btrfs_release_path(&path);
13576 buf = malloc(csum_root->fs_info->sectorsize);
13578 btrfs_release_path(&path);
13583 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13584 ret = btrfs_next_leaf(extent_root, &path);
13592 leaf = path.nodes[0];
13594 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13595 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13600 ei = btrfs_item_ptr(leaf, path.slots[0],
13601 struct btrfs_extent_item);
13602 if (!(btrfs_extent_flags(leaf, ei) &
13603 BTRFS_EXTENT_FLAG_DATA)) {
13608 ret = populate_csum(trans, csum_root, buf, key.objectid,
13615 btrfs_release_path(&path);
13621 * Recalculate the csum and put it into the csum tree.
13623 * Extent tree init will wipe out all the extent info, so in that case, we
13624 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13625 * will use fs/subvol trees to init the csum tree.
13627 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13628 struct btrfs_root *csum_root,
13629 int search_fs_tree)
13631 if (search_fs_tree)
13632 return fill_csum_tree_from_fs(trans, csum_root);
13634 return fill_csum_tree_from_extent(trans, csum_root);
13637 static void free_roots_info_cache(void)
13639 if (!roots_info_cache)
13642 while (!cache_tree_empty(roots_info_cache)) {
13643 struct cache_extent *entry;
13644 struct root_item_info *rii;
13646 entry = first_cache_extent(roots_info_cache);
13649 remove_cache_extent(roots_info_cache, entry);
13650 rii = container_of(entry, struct root_item_info, cache_extent);
13654 free(roots_info_cache);
13655 roots_info_cache = NULL;
13658 static int build_roots_info_cache(struct btrfs_fs_info *info)
13661 struct btrfs_key key;
13662 struct extent_buffer *leaf;
13663 struct btrfs_path path;
13665 if (!roots_info_cache) {
13666 roots_info_cache = malloc(sizeof(*roots_info_cache));
13667 if (!roots_info_cache)
13669 cache_tree_init(roots_info_cache);
13672 btrfs_init_path(&path);
13674 key.type = BTRFS_EXTENT_ITEM_KEY;
13676 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13679 leaf = path.nodes[0];
13682 struct btrfs_key found_key;
13683 struct btrfs_extent_item *ei;
13684 struct btrfs_extent_inline_ref *iref;
13685 int slot = path.slots[0];
13690 struct cache_extent *entry;
13691 struct root_item_info *rii;
13693 if (slot >= btrfs_header_nritems(leaf)) {
13694 ret = btrfs_next_leaf(info->extent_root, &path);
13701 leaf = path.nodes[0];
13702 slot = path.slots[0];
13705 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13707 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13708 found_key.type != BTRFS_METADATA_ITEM_KEY)
13711 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13712 flags = btrfs_extent_flags(leaf, ei);
13714 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13715 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13718 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13719 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13720 level = found_key.offset;
13722 struct btrfs_tree_block_info *binfo;
13724 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13725 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13726 level = btrfs_tree_block_level(leaf, binfo);
13730 * For a root extent, it must be of the following type and the
13731 * first (and only one) iref in the item.
13733 type = btrfs_extent_inline_ref_type(leaf, iref);
13734 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13737 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13738 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13740 rii = malloc(sizeof(struct root_item_info));
13745 rii->cache_extent.start = root_id;
13746 rii->cache_extent.size = 1;
13747 rii->level = (u8)-1;
13748 entry = &rii->cache_extent;
13749 ret = insert_cache_extent(roots_info_cache, entry);
13752 rii = container_of(entry, struct root_item_info,
13756 ASSERT(rii->cache_extent.start == root_id);
13757 ASSERT(rii->cache_extent.size == 1);
13759 if (level > rii->level || rii->level == (u8)-1) {
13760 rii->level = level;
13761 rii->bytenr = found_key.objectid;
13762 rii->gen = btrfs_extent_generation(leaf, ei);
13763 rii->node_count = 1;
13764 } else if (level == rii->level) {
13772 btrfs_release_path(&path);
13777 static int maybe_repair_root_item(struct btrfs_path *path,
13778 const struct btrfs_key *root_key,
13779 const int read_only_mode)
13781 const u64 root_id = root_key->objectid;
13782 struct cache_extent *entry;
13783 struct root_item_info *rii;
13784 struct btrfs_root_item ri;
13785 unsigned long offset;
13787 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13790 "Error: could not find extent items for root %llu\n",
13791 root_key->objectid);
13795 rii = container_of(entry, struct root_item_info, cache_extent);
13796 ASSERT(rii->cache_extent.start == root_id);
13797 ASSERT(rii->cache_extent.size == 1);
13799 if (rii->node_count != 1) {
13801 "Error: could not find btree root extent for root %llu\n",
13806 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13807 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13809 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13810 btrfs_root_level(&ri) != rii->level ||
13811 btrfs_root_generation(&ri) != rii->gen) {
13814 * If we're in repair mode but our caller told us to not update
13815 * the root item, i.e. just check if it needs to be updated, don't
13816 * print this message, since the caller will call us again shortly
13817 * for the same root item without read only mode (the caller will
13818 * open a transaction first).
13820 if (!(read_only_mode && repair))
13822 "%sroot item for root %llu,"
13823 " current bytenr %llu, current gen %llu, current level %u,"
13824 " new bytenr %llu, new gen %llu, new level %u\n",
13825 (read_only_mode ? "" : "fixing "),
13827 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13828 btrfs_root_level(&ri),
13829 rii->bytenr, rii->gen, rii->level);
13831 if (btrfs_root_generation(&ri) > rii->gen) {
13833 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13834 root_id, btrfs_root_generation(&ri), rii->gen);
13838 if (!read_only_mode) {
13839 btrfs_set_root_bytenr(&ri, rii->bytenr);
13840 btrfs_set_root_level(&ri, rii->level);
13841 btrfs_set_root_generation(&ri, rii->gen);
13842 write_extent_buffer(path->nodes[0], &ri,
13843 offset, sizeof(ri));
13853 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13854 * caused read-only snapshots to be corrupted if they were created at a moment
13855 * when the source subvolume/snapshot had orphan items. The issue was that the
13856 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13857 * node instead of the post orphan cleanup root node.
13858 * So this function, and its callees, just detects and fixes those cases. Even
13859 * though the regression was for read-only snapshots, this function applies to
13860 * any snapshot/subvolume root.
13861 * This must be run before any other repair code - not doing it so, makes other
13862 * repair code delete or modify backrefs in the extent tree for example, which
13863 * will result in an inconsistent fs after repairing the root items.
13865 static int repair_root_items(struct btrfs_fs_info *info)
13867 struct btrfs_path path;
13868 struct btrfs_key key;
13869 struct extent_buffer *leaf;
13870 struct btrfs_trans_handle *trans = NULL;
13873 int need_trans = 0;
13875 btrfs_init_path(&path);
13877 ret = build_roots_info_cache(info);
13881 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13882 key.type = BTRFS_ROOT_ITEM_KEY;
13887 * Avoid opening and committing transactions if a leaf doesn't have
13888 * any root items that need to be fixed, so that we avoid rotating
13889 * backup roots unnecessarily.
13892 trans = btrfs_start_transaction(info->tree_root, 1);
13893 if (IS_ERR(trans)) {
13894 ret = PTR_ERR(trans);
13899 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13903 leaf = path.nodes[0];
13906 struct btrfs_key found_key;
13908 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13909 int no_more_keys = find_next_key(&path, &key);
13911 btrfs_release_path(&path);
13913 ret = btrfs_commit_transaction(trans,
13925 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13927 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13929 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13932 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13936 if (!trans && repair) {
13939 btrfs_release_path(&path);
13949 free_roots_info_cache();
13950 btrfs_release_path(&path);
13952 btrfs_commit_transaction(trans, info->tree_root);
13959 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13961 struct btrfs_trans_handle *trans;
13962 struct btrfs_block_group_cache *bg_cache;
13966 /* Clear all free space cache inodes and its extent data */
13968 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13971 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13974 current = bg_cache->key.objectid + bg_cache->key.offset;
13977 /* Don't forget to set cache_generation to -1 */
13978 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13979 if (IS_ERR(trans)) {
13980 error("failed to update super block cache generation");
13981 return PTR_ERR(trans);
13983 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13984 btrfs_commit_transaction(trans, fs_info->tree_root);
13989 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13994 if (clear_version == 1) {
13995 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13997 "free space cache v2 detected, use --clear-space-cache v2");
14001 printf("Clearing free space cache\n");
14002 ret = clear_free_space_cache(fs_info);
14004 error("failed to clear free space cache");
14007 printf("Free space cache cleared\n");
14009 } else if (clear_version == 2) {
14010 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14011 printf("no free space cache v2 to clear\n");
14015 printf("Clear free space cache v2\n");
14016 ret = btrfs_clear_free_space_tree(fs_info);
14018 error("failed to clear free space cache v2: %d", ret);
14021 printf("free space cache v2 cleared\n");
14028 const char * const cmd_check_usage[] = {
14029 "btrfs check [options] <device>",
14030 "Check structural integrity of a filesystem (unmounted).",
14031 "Check structural integrity of an unmounted filesystem. Verify internal",
14032 "trees' consistency and item connectivity. In the repair mode try to",
14033 "fix the problems found. ",
14034 "WARNING: the repair mode is considered dangerous",
14036 "-s|--super <superblock> use this superblock copy",
14037 "-b|--backup use the first valid backup root copy",
14038 "--force skip mount checks, repair is not possible",
14039 "--repair try to repair the filesystem",
14040 "--readonly run in read-only mode (default)",
14041 "--init-csum-tree create a new CRC tree",
14042 "--init-extent-tree create a new extent tree",
14043 "--mode <MODE> allows choice of memory/IO trade-offs",
14044 " where MODE is one of:",
14045 " original - read inodes and extents to memory (requires",
14046 " more memory, does less IO)",
14047 " lowmem - try to use less memory but read blocks again",
14049 "--check-data-csum verify checksums of data blocks",
14050 "-Q|--qgroup-report print a report on qgroup consistency",
14051 "-E|--subvol-extents <subvolid>",
14052 " print subvolume extents and sharing state",
14053 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14054 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14055 "-p|--progress indicate progress",
14056 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14060 int cmd_check(int argc, char **argv)
14062 struct cache_tree root_cache;
14063 struct btrfs_root *root;
14064 struct btrfs_fs_info *info;
14067 u64 tree_root_bytenr = 0;
14068 u64 chunk_root_bytenr = 0;
14069 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14073 int init_csum_tree = 0;
14075 int clear_space_cache = 0;
14076 int qgroup_report = 0;
14077 int qgroups_repaired = 0;
14078 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14083 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14084 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14085 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14086 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14087 GETOPT_VAL_FORCE };
14088 static const struct option long_options[] = {
14089 { "super", required_argument, NULL, 's' },
14090 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14091 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14092 { "init-csum-tree", no_argument, NULL,
14093 GETOPT_VAL_INIT_CSUM },
14094 { "init-extent-tree", no_argument, NULL,
14095 GETOPT_VAL_INIT_EXTENT },
14096 { "check-data-csum", no_argument, NULL,
14097 GETOPT_VAL_CHECK_CSUM },
14098 { "backup", no_argument, NULL, 'b' },
14099 { "subvol-extents", required_argument, NULL, 'E' },
14100 { "qgroup-report", no_argument, NULL, 'Q' },
14101 { "tree-root", required_argument, NULL, 'r' },
14102 { "chunk-root", required_argument, NULL,
14103 GETOPT_VAL_CHUNK_TREE },
14104 { "progress", no_argument, NULL, 'p' },
14105 { "mode", required_argument, NULL,
14107 { "clear-space-cache", required_argument, NULL,
14108 GETOPT_VAL_CLEAR_SPACE_CACHE},
14109 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14110 { NULL, 0, NULL, 0}
14113 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14117 case 'a': /* ignored */ break;
14119 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14122 num = arg_strtou64(optarg);
14123 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14125 "super mirror should be less than %d",
14126 BTRFS_SUPER_MIRROR_MAX);
14129 bytenr = btrfs_sb_offset(((int)num));
14130 printf("using SB copy %llu, bytenr %llu\n", num,
14131 (unsigned long long)bytenr);
14137 subvolid = arg_strtou64(optarg);
14140 tree_root_bytenr = arg_strtou64(optarg);
14142 case GETOPT_VAL_CHUNK_TREE:
14143 chunk_root_bytenr = arg_strtou64(optarg);
14146 ctx.progress_enabled = true;
14150 usage(cmd_check_usage);
14151 case GETOPT_VAL_REPAIR:
14152 printf("enabling repair mode\n");
14154 ctree_flags |= OPEN_CTREE_WRITES;
14156 case GETOPT_VAL_READONLY:
14159 case GETOPT_VAL_INIT_CSUM:
14160 printf("Creating a new CRC tree\n");
14161 init_csum_tree = 1;
14163 ctree_flags |= OPEN_CTREE_WRITES;
14165 case GETOPT_VAL_INIT_EXTENT:
14166 init_extent_tree = 1;
14167 ctree_flags |= (OPEN_CTREE_WRITES |
14168 OPEN_CTREE_NO_BLOCK_GROUPS);
14171 case GETOPT_VAL_CHECK_CSUM:
14172 check_data_csum = 1;
14174 case GETOPT_VAL_MODE:
14175 check_mode = parse_check_mode(optarg);
14176 if (check_mode == CHECK_MODE_UNKNOWN) {
14177 error("unknown mode: %s", optarg);
14181 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14182 if (strcmp(optarg, "v1") == 0) {
14183 clear_space_cache = 1;
14184 } else if (strcmp(optarg, "v2") == 0) {
14185 clear_space_cache = 2;
14186 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14189 "invalid argument to --clear-space-cache, must be v1 or v2");
14192 ctree_flags |= OPEN_CTREE_WRITES;
14194 case GETOPT_VAL_FORCE:
14200 if (check_argc_exact(argc - optind, 1))
14201 usage(cmd_check_usage);
14203 if (ctx.progress_enabled) {
14204 ctx.tp = TASK_NOTHING;
14205 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14208 /* This check is the only reason for --readonly to exist */
14209 if (readonly && repair) {
14210 error("repair options are not compatible with --readonly");
14215 * experimental and dangerous
14217 if (repair && check_mode == CHECK_MODE_LOWMEM)
14218 warning("low-memory mode repair support is only partial");
14221 cache_tree_init(&root_cache);
14223 ret = check_mounted(argv[optind]);
14226 error("could not check mount status: %s",
14232 "%s is currently mounted, use --force if you really intend to check the filesystem",
14240 error("repair and --force is not yet supported");
14247 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14251 "filesystem mounted, continuing because of --force");
14253 /* A block device is mounted in exclusive mode by kernel */
14254 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14257 /* only allow partial opening under repair mode */
14259 ctree_flags |= OPEN_CTREE_PARTIAL;
14261 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14262 chunk_root_bytenr, ctree_flags);
14264 error("cannot open file system");
14270 global_info = info;
14271 root = info->fs_root;
14272 uuid_unparse(info->super_copy->fsid, uuidbuf);
14274 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14277 * Check the bare minimum before starting anything else that could rely
14278 * on it, namely the tree roots, any local consistency checks
14280 if (!extent_buffer_uptodate(info->tree_root->node) ||
14281 !extent_buffer_uptodate(info->dev_root->node) ||
14282 !extent_buffer_uptodate(info->chunk_root->node)) {
14283 error("critical roots corrupted, unable to check the filesystem");
14289 if (clear_space_cache) {
14290 ret = do_clear_free_space_cache(info, clear_space_cache);
14296 * repair mode will force us to commit transaction which
14297 * will make us fail to load log tree when mounting.
14299 if (repair && btrfs_super_log_root(info->super_copy)) {
14300 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14306 ret = zero_log_tree(root);
14309 error("failed to zero log tree: %d", ret);
14314 if (qgroup_report) {
14315 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14317 ret = qgroup_verify_all(info);
14324 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14325 subvolid, argv[optind], uuidbuf);
14326 ret = print_extent_state(info, subvolid);
14331 if (init_extent_tree || init_csum_tree) {
14332 struct btrfs_trans_handle *trans;
14334 trans = btrfs_start_transaction(info->extent_root, 0);
14335 if (IS_ERR(trans)) {
14336 error("error starting transaction");
14337 ret = PTR_ERR(trans);
14342 if (init_extent_tree) {
14343 printf("Creating a new extent tree\n");
14344 ret = reinit_extent_tree(trans, info);
14350 if (init_csum_tree) {
14351 printf("Reinitialize checksum tree\n");
14352 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14354 error("checksum tree initialization failed: %d",
14361 ret = fill_csum_tree(trans, info->csum_root,
14365 error("checksum tree refilling failed: %d", ret);
14370 * Ok now we commit and run the normal fsck, which will add
14371 * extent entries for all of the items it finds.
14373 ret = btrfs_commit_transaction(trans, info->extent_root);
14378 if (!extent_buffer_uptodate(info->extent_root->node)) {
14379 error("critical: extent_root, unable to check the filesystem");
14384 if (!extent_buffer_uptodate(info->csum_root->node)) {
14385 error("critical: csum_root, unable to check the filesystem");
14391 if (!init_extent_tree) {
14392 ret = repair_root_items(info);
14395 error("failed to repair root items: %s", strerror(-ret));
14399 fprintf(stderr, "Fixed %d roots.\n", ret);
14401 } else if (ret > 0) {
14403 "Found %d roots with an outdated root item.\n",
14406 "Please run a filesystem check with the option --repair to fix them.\n");
14413 ret = do_check_chunks_and_extents(info);
14417 "errors found in extent allocation tree or chunk allocation");
14419 /* Only re-check super size after we checked and repaired the fs */
14420 err |= !is_super_size_valid(info);
14422 if (!ctx.progress_enabled) {
14423 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14424 fprintf(stderr, "checking free space tree\n");
14426 fprintf(stderr, "checking free space cache\n");
14428 ret = check_space_cache(root);
14431 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14432 error("errors found in free space tree");
14434 error("errors found in free space cache");
14439 * We used to have to have these hole extents in between our real
14440 * extents so if we don't have this flag set we need to make sure there
14441 * are no gaps in the file extents for inodes, otherwise we can just
14442 * ignore it when this happens.
14444 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14445 ret = do_check_fs_roots(info, &root_cache);
14448 error("errors found in fs roots");
14452 fprintf(stderr, "checking csums\n");
14453 ret = check_csums(root);
14456 error("errors found in csum tree");
14460 fprintf(stderr, "checking root refs\n");
14461 /* For low memory mode, check_fs_roots_v2 handles root refs */
14462 if (check_mode != CHECK_MODE_LOWMEM) {
14463 ret = check_root_refs(root, &root_cache);
14466 error("errors found in root refs");
14471 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14472 struct extent_buffer *eb;
14474 eb = list_first_entry(&root->fs_info->recow_ebs,
14475 struct extent_buffer, recow);
14476 list_del_init(&eb->recow);
14477 ret = recow_extent_buffer(root, eb);
14480 error("fails to fix transid errors");
14485 while (!list_empty(&delete_items)) {
14486 struct bad_item *bad;
14488 bad = list_first_entry(&delete_items, struct bad_item, list);
14489 list_del_init(&bad->list);
14491 ret = delete_bad_item(root, bad);
14497 if (info->quota_enabled) {
14498 fprintf(stderr, "checking quota groups\n");
14499 ret = qgroup_verify_all(info);
14502 error("failed to check quota groups");
14506 ret = repair_qgroups(info, &qgroups_repaired);
14509 error("failed to repair quota groups");
14515 if (!list_empty(&root->fs_info->recow_ebs)) {
14516 error("transid errors in file system");
14521 printf("found %llu bytes used, ",
14522 (unsigned long long)bytes_used);
14524 printf("error(s) found\n");
14526 printf("no error found\n");
14527 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14528 printf("total tree bytes: %llu\n",
14529 (unsigned long long)total_btree_bytes);
14530 printf("total fs tree bytes: %llu\n",
14531 (unsigned long long)total_fs_tree_bytes);
14532 printf("total extent tree bytes: %llu\n",
14533 (unsigned long long)total_extent_tree_bytes);
14534 printf("btree space waste bytes: %llu\n",
14535 (unsigned long long)btree_space_waste);
14536 printf("file data blocks allocated: %llu\n referenced %llu\n",
14537 (unsigned long long)data_bytes_allocated,
14538 (unsigned long long)data_bytes_referenced);
14540 free_qgroup_counts();
14541 free_root_recs_tree(&root_cache);
14545 if (ctx.progress_enabled)
14546 task_deinit(ctx.info);