2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
54 TASK_NOTHING, /* have to be the last element */
59 enum task_position tp;
61 struct task_info *info;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
103 if (back1->parent < back2->parent)
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
110 if (back1->owner > back2->owner)
112 if (back1->owner < back2->owner)
115 if (back1->offset > back2->offset)
117 if (back1->offset < back2->offset)
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
123 if (back1->disk_bytenr < back2->disk_bytenr)
126 if (back1->bytes > back2->bytes)
128 if (back1->bytes < back2->bytes)
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
148 if (back1->parent < back2->parent)
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
162 if (ext1->is_data < ext2->is_data)
165 if (ext1->full_backref > ext2->full_backref)
167 if (ext1->full_backref < ext2->full_backref)
171 return compare_data_backref(node1, node2);
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string[] = {
184 "checking free space cache",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
198 task_period_wait(priv->info);
203 static int print_status_return(void *p)
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
245 if (hole1->start < hole2->start)
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole->node))
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
301 rb_erase(&next->node, holes);
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
315 hole = (struct file_extent_hole *)data;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
321 if (start >= hole->start && start < hole->start + hole->len)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
340 struct rb_node *node;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
368 rb_erase(node, holes);
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
376 ret = add_file_extent_hole(holes, next_start, next_len);
383 static int copy_file_extent_holes(struct rb_root *dst,
386 struct file_extent_hole *hole;
387 struct rb_node *node;
390 node = rb_first(src);
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
396 node = rb_next(node);
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
411 node = rb_first(holes);
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root)
420 if (root->last_trans != trans->transid) {
421 root->track_dirty = 1;
422 root->last_trans = trans->transid;
423 root->commit_root = root->node;
424 extent_buffer_get(root->node);
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
430 struct device_record *rec1;
431 struct device_record *rec2;
433 rec1 = rb_entry(node1, struct device_record, node);
434 rec2 = rb_entry(node2, struct device_record, node);
435 if (rec1->devid > rec2->devid)
437 else if (rec1->devid < rec2->devid)
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
445 struct inode_record *rec;
446 struct inode_backref *backref;
447 struct inode_backref *orig;
448 struct inode_backref *tmp;
449 struct orphan_data_extent *src_orphan;
450 struct orphan_data_extent *dst_orphan;
455 rec = malloc(sizeof(*rec));
457 return ERR_PTR(-ENOMEM);
458 memcpy(rec, orig_rec, sizeof(*rec));
460 INIT_LIST_HEAD(&rec->backrefs);
461 INIT_LIST_HEAD(&rec->orphan_extents);
462 rec->holes = RB_ROOT;
464 list_for_each_entry(orig, &orig_rec->backrefs, list) {
465 size = sizeof(*orig) + orig->namelen + 1;
466 backref = malloc(size);
471 memcpy(backref, orig, size);
472 list_add_tail(&backref->list, &rec->backrefs);
474 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475 dst_orphan = malloc(sizeof(*dst_orphan));
480 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
483 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
490 rb = rb_first(&rec->holes);
492 struct file_extent_hole *hole;
494 hole = rb_entry(rb, struct file_extent_hole, node);
500 if (!list_empty(&rec->backrefs))
501 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502 list_del(&orig->list);
506 if (!list_empty(&rec->orphan_extents))
507 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508 list_del(&orig->list);
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
520 struct orphan_data_extent *orphan;
522 if (list_empty(orphan_extents))
524 printf("The following data extent is lost in tree %llu:\n",
526 list_for_each_entry(orphan, orphan_extents, list) {
527 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528 orphan->objectid, orphan->offset, orphan->disk_bytenr,
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
535 u64 root_objectid = root->root_key.objectid;
536 int errors = rec->errors;
540 /* reloc root errors, we print its corresponding fs root objectid*/
541 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542 root_objectid = root->root_key.offset;
543 fprintf(stderr, "reloc");
545 fprintf(stderr, "root %llu inode %llu errors %x",
546 (unsigned long long) root_objectid,
547 (unsigned long long) rec->ino, rec->errors);
549 if (errors & I_ERR_NO_INODE_ITEM)
550 fprintf(stderr, ", no inode item");
551 if (errors & I_ERR_NO_ORPHAN_ITEM)
552 fprintf(stderr, ", no orphan item");
553 if (errors & I_ERR_DUP_INODE_ITEM)
554 fprintf(stderr, ", dup inode item");
555 if (errors & I_ERR_DUP_DIR_INDEX)
556 fprintf(stderr, ", dup dir index");
557 if (errors & I_ERR_ODD_DIR_ITEM)
558 fprintf(stderr, ", odd dir item");
559 if (errors & I_ERR_ODD_FILE_EXTENT)
560 fprintf(stderr, ", odd file extent");
561 if (errors & I_ERR_BAD_FILE_EXTENT)
562 fprintf(stderr, ", bad file extent");
563 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564 fprintf(stderr, ", file extent overlap");
565 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566 fprintf(stderr, ", file extent discount");
567 if (errors & I_ERR_DIR_ISIZE_WRONG)
568 fprintf(stderr, ", dir isize wrong");
569 if (errors & I_ERR_FILE_NBYTES_WRONG)
570 fprintf(stderr, ", nbytes wrong");
571 if (errors & I_ERR_ODD_CSUM_ITEM)
572 fprintf(stderr, ", odd csum item");
573 if (errors & I_ERR_SOME_CSUM_MISSING)
574 fprintf(stderr, ", some csum missing");
575 if (errors & I_ERR_LINK_COUNT_WRONG)
576 fprintf(stderr, ", link count wrong");
577 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578 fprintf(stderr, ", orphan file extent");
579 fprintf(stderr, "\n");
580 /* Print the orphan extents if needed */
581 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
584 /* Print the holes if needed */
585 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586 struct file_extent_hole *hole;
587 struct rb_node *node;
590 node = rb_first(&rec->holes);
591 fprintf(stderr, "Found file extent holes:\n");
594 hole = rb_entry(node, struct file_extent_hole, node);
595 fprintf(stderr, "\tstart: %llu, len: %llu\n",
596 hole->start, hole->len);
597 node = rb_next(node);
600 fprintf(stderr, "\tstart: 0, len: %llu\n",
602 root->fs_info->sectorsize));
606 static void print_ref_error(int errors)
608 if (errors & REF_ERR_NO_DIR_ITEM)
609 fprintf(stderr, ", no dir item");
610 if (errors & REF_ERR_NO_DIR_INDEX)
611 fprintf(stderr, ", no dir index");
612 if (errors & REF_ERR_NO_INODE_REF)
613 fprintf(stderr, ", no inode ref");
614 if (errors & REF_ERR_DUP_DIR_ITEM)
615 fprintf(stderr, ", dup dir item");
616 if (errors & REF_ERR_DUP_DIR_INDEX)
617 fprintf(stderr, ", dup dir index");
618 if (errors & REF_ERR_DUP_INODE_REF)
619 fprintf(stderr, ", dup inode ref");
620 if (errors & REF_ERR_INDEX_UNMATCH)
621 fprintf(stderr, ", index mismatch");
622 if (errors & REF_ERR_FILETYPE_UNMATCH)
623 fprintf(stderr, ", filetype mismatch");
624 if (errors & REF_ERR_NAME_TOO_LONG)
625 fprintf(stderr, ", name too long");
626 if (errors & REF_ERR_NO_ROOT_REF)
627 fprintf(stderr, ", no root ref");
628 if (errors & REF_ERR_NO_ROOT_BACKREF)
629 fprintf(stderr, ", no root backref");
630 if (errors & REF_ERR_DUP_ROOT_REF)
631 fprintf(stderr, ", dup root ref");
632 if (errors & REF_ERR_DUP_ROOT_BACKREF)
633 fprintf(stderr, ", dup root backref");
634 fprintf(stderr, "\n");
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
640 struct ptr_node *node;
641 struct cache_extent *cache;
642 struct inode_record *rec = NULL;
645 cache = lookup_cache_extent(inode_cache, ino, 1);
647 node = container_of(cache, struct ptr_node, cache);
649 if (mod && rec->refs > 1) {
650 node->data = clone_inode_rec(rec);
651 if (IS_ERR(node->data))
657 rec = calloc(1, sizeof(*rec));
659 return ERR_PTR(-ENOMEM);
661 rec->extent_start = (u64)-1;
663 INIT_LIST_HEAD(&rec->backrefs);
664 INIT_LIST_HEAD(&rec->orphan_extents);
665 rec->holes = RB_ROOT;
667 node = malloc(sizeof(*node));
670 return ERR_PTR(-ENOMEM);
672 node->cache.start = ino;
673 node->cache.size = 1;
676 if (ino == BTRFS_FREE_INO_OBJECTID)
679 ret = insert_cache_extent(inode_cache, &node->cache);
681 return ERR_PTR(-EEXIST);
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
688 struct orphan_data_extent *orphan;
690 while (!list_empty(orphan_extents)) {
691 orphan = list_entry(orphan_extents->next,
692 struct orphan_data_extent, list);
693 list_del(&orphan->list);
698 static void free_inode_rec(struct inode_record *rec)
700 struct inode_backref *backref;
705 while (!list_empty(&rec->backrefs)) {
706 backref = to_inode_backref(rec->backrefs.next);
707 list_del(&backref->list);
710 free_orphan_data_extents(&rec->orphan_extents);
711 free_file_extent_holes(&rec->holes);
715 static int can_free_inode_rec(struct inode_record *rec)
717 if (!rec->errors && rec->checked && rec->found_inode_item &&
718 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724 struct inode_record *rec)
726 struct cache_extent *cache;
727 struct inode_backref *tmp, *backref;
728 struct ptr_node *node;
731 if (!rec->found_inode_item)
734 filetype = imode_to_type(rec->imode);
735 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736 if (backref->found_dir_item && backref->found_dir_index) {
737 if (backref->filetype != filetype)
738 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739 if (!backref->errors && backref->found_inode_ref &&
740 rec->nlink == rec->found_link) {
741 list_del(&backref->list);
747 if (!rec->checked || rec->merging)
750 if (S_ISDIR(rec->imode)) {
751 if (rec->found_size != rec->isize)
752 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753 if (rec->found_file_extent)
754 rec->errors |= I_ERR_ODD_FILE_EXTENT;
755 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756 if (rec->found_dir_item)
757 rec->errors |= I_ERR_ODD_DIR_ITEM;
758 if (rec->found_size != rec->nbytes)
759 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760 if (rec->nlink > 0 && !no_holes &&
761 (rec->extent_end < rec->isize ||
762 first_extent_gap(&rec->holes) < rec->isize))
763 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
766 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767 if (rec->found_csum_item && rec->nodatasum)
768 rec->errors |= I_ERR_ODD_CSUM_ITEM;
769 if (rec->some_csum_missing && !rec->nodatasum)
770 rec->errors |= I_ERR_SOME_CSUM_MISSING;
773 BUG_ON(rec->refs != 1);
774 if (can_free_inode_rec(rec)) {
775 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776 node = container_of(cache, struct ptr_node, cache);
777 BUG_ON(node->data != rec);
778 remove_cache_extent(inode_cache, &node->cache);
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
786 struct btrfs_path path;
787 struct btrfs_key key;
790 key.objectid = BTRFS_ORPHAN_OBJECTID;
791 key.type = BTRFS_ORPHAN_ITEM_KEY;
794 btrfs_init_path(&path);
795 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796 btrfs_release_path(&path);
802 static int process_inode_item(struct extent_buffer *eb,
803 int slot, struct btrfs_key *key,
804 struct shared_node *active_node)
806 struct inode_record *rec;
807 struct btrfs_inode_item *item;
809 rec = active_node->current;
810 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811 if (rec->found_inode_item) {
812 rec->errors |= I_ERR_DUP_INODE_ITEM;
815 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816 rec->nlink = btrfs_inode_nlink(eb, item);
817 rec->isize = btrfs_inode_size(eb, item);
818 rec->nbytes = btrfs_inode_nbytes(eb, item);
819 rec->imode = btrfs_inode_mode(eb, item);
820 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
822 rec->found_inode_item = 1;
824 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825 maybe_free_inode_rec(&active_node->inode_cache, rec);
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
831 int namelen, u64 dir)
833 struct inode_backref *backref;
835 list_for_each_entry(backref, &rec->backrefs, list) {
836 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
838 if (backref->dir != dir || backref->namelen != namelen)
840 if (memcmp(name, backref->name, namelen))
845 backref = malloc(sizeof(*backref) + namelen + 1);
848 memset(backref, 0, sizeof(*backref));
850 backref->namelen = namelen;
851 memcpy(backref->name, name, namelen);
852 backref->name[namelen] = '\0';
853 list_add_tail(&backref->list, &rec->backrefs);
857 static int add_inode_backref(struct cache_tree *inode_cache,
858 u64 ino, u64 dir, u64 index,
859 const char *name, int namelen,
860 u8 filetype, u8 itemtype, int errors)
862 struct inode_record *rec;
863 struct inode_backref *backref;
865 rec = get_inode_rec(inode_cache, ino, 1);
867 backref = get_inode_backref(rec, name, namelen, dir);
870 backref->errors |= errors;
871 if (itemtype == BTRFS_DIR_INDEX_KEY) {
872 if (backref->found_dir_index)
873 backref->errors |= REF_ERR_DUP_DIR_INDEX;
874 if (backref->found_inode_ref && backref->index != index)
875 backref->errors |= REF_ERR_INDEX_UNMATCH;
876 if (backref->found_dir_item && backref->filetype != filetype)
877 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
879 backref->index = index;
880 backref->filetype = filetype;
881 backref->found_dir_index = 1;
882 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
884 if (backref->found_dir_item)
885 backref->errors |= REF_ERR_DUP_DIR_ITEM;
886 if (backref->found_dir_index && backref->filetype != filetype)
887 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
889 backref->filetype = filetype;
890 backref->found_dir_item = 1;
891 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893 if (backref->found_inode_ref)
894 backref->errors |= REF_ERR_DUP_INODE_REF;
895 if (backref->found_dir_index && backref->index != index)
896 backref->errors |= REF_ERR_INDEX_UNMATCH;
898 backref->index = index;
900 backref->ref_type = itemtype;
901 backref->found_inode_ref = 1;
906 maybe_free_inode_rec(inode_cache, rec);
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911 struct cache_tree *dst_cache)
913 struct inode_backref *backref;
918 list_for_each_entry(backref, &src->backrefs, list) {
919 if (backref->found_dir_index) {
920 add_inode_backref(dst_cache, dst->ino, backref->dir,
921 backref->index, backref->name,
922 backref->namelen, backref->filetype,
923 BTRFS_DIR_INDEX_KEY, backref->errors);
925 if (backref->found_dir_item) {
927 add_inode_backref(dst_cache, dst->ino,
928 backref->dir, 0, backref->name,
929 backref->namelen, backref->filetype,
930 BTRFS_DIR_ITEM_KEY, backref->errors);
932 if (backref->found_inode_ref) {
933 add_inode_backref(dst_cache, dst->ino,
934 backref->dir, backref->index,
935 backref->name, backref->namelen, 0,
936 backref->ref_type, backref->errors);
940 if (src->found_dir_item)
941 dst->found_dir_item = 1;
942 if (src->found_file_extent)
943 dst->found_file_extent = 1;
944 if (src->found_csum_item)
945 dst->found_csum_item = 1;
946 if (src->some_csum_missing)
947 dst->some_csum_missing = 1;
948 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949 ret = copy_file_extent_holes(&dst->holes, &src->holes);
954 BUG_ON(src->found_link < dir_count);
955 dst->found_link += src->found_link - dir_count;
956 dst->found_size += src->found_size;
957 if (src->extent_start != (u64)-1) {
958 if (dst->extent_start == (u64)-1) {
959 dst->extent_start = src->extent_start;
960 dst->extent_end = src->extent_end;
962 if (dst->extent_end > src->extent_start)
963 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964 else if (dst->extent_end < src->extent_start) {
965 ret = add_file_extent_hole(&dst->holes,
967 src->extent_start - dst->extent_end);
969 if (dst->extent_end < src->extent_end)
970 dst->extent_end = src->extent_end;
974 dst->errors |= src->errors;
975 if (src->found_inode_item) {
976 if (!dst->found_inode_item) {
977 dst->nlink = src->nlink;
978 dst->isize = src->isize;
979 dst->nbytes = src->nbytes;
980 dst->imode = src->imode;
981 dst->nodatasum = src->nodatasum;
982 dst->found_inode_item = 1;
984 dst->errors |= I_ERR_DUP_INODE_ITEM;
992 static int splice_shared_node(struct shared_node *src_node,
993 struct shared_node *dst_node)
995 struct cache_extent *cache;
996 struct ptr_node *node, *ins;
997 struct cache_tree *src, *dst;
998 struct inode_record *rec, *conflict;
1003 if (--src_node->refs == 0)
1005 if (src_node->current)
1006 current_ino = src_node->current->ino;
1008 src = &src_node->root_cache;
1009 dst = &dst_node->root_cache;
1011 cache = search_cache_extent(src, 0);
1013 node = container_of(cache, struct ptr_node, cache);
1015 cache = next_cache_extent(cache);
1018 remove_cache_extent(src, &node->cache);
1021 ins = malloc(sizeof(*ins));
1023 ins->cache.start = node->cache.start;
1024 ins->cache.size = node->cache.size;
1028 ret = insert_cache_extent(dst, &ins->cache);
1029 if (ret == -EEXIST) {
1030 conflict = get_inode_rec(dst, rec->ino, 1);
1031 BUG_ON(IS_ERR(conflict));
1032 merge_inode_recs(rec, conflict, dst);
1034 conflict->checked = 1;
1035 if (dst_node->current == conflict)
1036 dst_node->current = NULL;
1038 maybe_free_inode_rec(dst, conflict);
1039 free_inode_rec(rec);
1046 if (src == &src_node->root_cache) {
1047 src = &src_node->inode_cache;
1048 dst = &dst_node->inode_cache;
1052 if (current_ino > 0 && (!dst_node->current ||
1053 current_ino > dst_node->current->ino)) {
1054 if (dst_node->current) {
1055 dst_node->current->checked = 1;
1056 maybe_free_inode_rec(dst, dst_node->current);
1058 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059 BUG_ON(IS_ERR(dst_node->current));
1064 static void free_inode_ptr(struct cache_extent *cache)
1066 struct ptr_node *node;
1067 struct inode_record *rec;
1069 node = container_of(cache, struct ptr_node, cache);
1071 free_inode_rec(rec);
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1080 struct cache_extent *cache;
1081 struct shared_node *node;
1083 cache = lookup_cache_extent(shared, bytenr, 1);
1085 node = container_of(cache, struct shared_node, cache);
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1094 struct shared_node *node;
1096 node = calloc(1, sizeof(*node));
1099 node->cache.start = bytenr;
1100 node->cache.size = 1;
1101 cache_tree_init(&node->root_cache);
1102 cache_tree_init(&node->inode_cache);
1105 ret = insert_cache_extent(shared, &node->cache);
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111 struct walk_control *wc, int level)
1113 struct shared_node *node;
1114 struct shared_node *dest;
1117 if (level == wc->active_node)
1120 BUG_ON(wc->active_node <= level);
1121 node = find_shared_node(&wc->shared, bytenr);
1123 ret = add_shared_node(&wc->shared, bytenr, refs);
1125 node = find_shared_node(&wc->shared, bytenr);
1126 wc->nodes[level] = node;
1127 wc->active_node = level;
1131 if (wc->root_level == wc->active_node &&
1132 btrfs_root_refs(&root->root_item) == 0) {
1133 if (--node->refs == 0) {
1134 free_inode_recs_tree(&node->root_cache);
1135 free_inode_recs_tree(&node->inode_cache);
1136 remove_cache_extent(&wc->shared, &node->cache);
1142 dest = wc->nodes[wc->active_node];
1143 splice_shared_node(node, dest);
1144 if (node->refs == 0) {
1145 remove_cache_extent(&wc->shared, &node->cache);
1151 static int leave_shared_node(struct btrfs_root *root,
1152 struct walk_control *wc, int level)
1154 struct shared_node *node;
1155 struct shared_node *dest;
1158 if (level == wc->root_level)
1161 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1165 BUG_ON(i >= BTRFS_MAX_LEVEL);
1167 node = wc->nodes[wc->active_node];
1168 wc->nodes[wc->active_node] = NULL;
1169 wc->active_node = i;
1171 dest = wc->nodes[wc->active_node];
1172 if (wc->active_node < wc->root_level ||
1173 btrfs_root_refs(&root->root_item) > 0) {
1174 BUG_ON(node->refs <= 1);
1175 splice_shared_node(node, dest);
1177 BUG_ON(node->refs < 2);
1186 * 1 - if the root with id child_root_id is a child of root parent_root_id
1187 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1188 * has other root(s) as parent(s)
1189 * 2 - if the root child_root_id doesn't have any parent roots
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1194 struct btrfs_path path;
1195 struct btrfs_key key;
1196 struct extent_buffer *leaf;
1200 btrfs_init_path(&path);
1202 key.objectid = parent_root_id;
1203 key.type = BTRFS_ROOT_REF_KEY;
1204 key.offset = child_root_id;
1205 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1209 btrfs_release_path(&path);
1213 key.objectid = child_root_id;
1214 key.type = BTRFS_ROOT_BACKREF_KEY;
1216 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1222 leaf = path.nodes[0];
1223 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1227 leaf = path.nodes[0];
1230 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231 if (key.objectid != child_root_id ||
1232 key.type != BTRFS_ROOT_BACKREF_KEY)
1237 if (key.offset == parent_root_id) {
1238 btrfs_release_path(&path);
1245 btrfs_release_path(&path);
1248 return has_parent ? 0 : 2;
1251 static int process_dir_item(struct extent_buffer *eb,
1252 int slot, struct btrfs_key *key,
1253 struct shared_node *active_node)
1263 struct btrfs_dir_item *di;
1264 struct inode_record *rec;
1265 struct cache_tree *root_cache;
1266 struct cache_tree *inode_cache;
1267 struct btrfs_key location;
1268 char namebuf[BTRFS_NAME_LEN];
1270 root_cache = &active_node->root_cache;
1271 inode_cache = &active_node->inode_cache;
1272 rec = active_node->current;
1273 rec->found_dir_item = 1;
1275 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276 total = btrfs_item_size_nr(eb, slot);
1277 while (cur < total) {
1279 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280 name_len = btrfs_dir_name_len(eb, di);
1281 data_len = btrfs_dir_data_len(eb, di);
1282 filetype = btrfs_dir_type(eb, di);
1284 rec->found_size += name_len;
1285 if (cur + sizeof(*di) + name_len > total ||
1286 name_len > BTRFS_NAME_LEN) {
1287 error = REF_ERR_NAME_TOO_LONG;
1289 if (cur + sizeof(*di) > total)
1291 len = min_t(u32, total - cur - sizeof(*di),
1298 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1300 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301 key->offset != btrfs_name_hash(namebuf, len)) {
1302 rec->errors |= I_ERR_ODD_DIR_ITEM;
1303 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304 key->objectid, key->offset, namebuf, len, filetype,
1305 key->offset, btrfs_name_hash(namebuf, len));
1308 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309 add_inode_backref(inode_cache, location.objectid,
1310 key->objectid, key->offset, namebuf,
1311 len, filetype, key->type, error);
1312 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313 add_inode_backref(root_cache, location.objectid,
1314 key->objectid, key->offset,
1315 namebuf, len, filetype,
1319 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320 location.type, key->objectid, key->offset);
1321 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322 key->objectid, key->offset, namebuf,
1323 len, filetype, key->type, error);
1326 len = sizeof(*di) + name_len + data_len;
1327 di = (struct btrfs_dir_item *)((char *)di + len);
1330 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331 rec->errors |= I_ERR_DUP_DIR_INDEX;
1336 static int process_inode_ref(struct extent_buffer *eb,
1337 int slot, struct btrfs_key *key,
1338 struct shared_node *active_node)
1346 struct cache_tree *inode_cache;
1347 struct btrfs_inode_ref *ref;
1348 char namebuf[BTRFS_NAME_LEN];
1350 inode_cache = &active_node->inode_cache;
1352 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353 total = btrfs_item_size_nr(eb, slot);
1354 while (cur < total) {
1355 name_len = btrfs_inode_ref_name_len(eb, ref);
1356 index = btrfs_inode_ref_index(eb, ref);
1358 /* inode_ref + namelen should not cross item boundary */
1359 if (cur + sizeof(*ref) + name_len > total ||
1360 name_len > BTRFS_NAME_LEN) {
1361 if (total < cur + sizeof(*ref))
1364 /* Still try to read out the remaining part */
1365 len = min_t(u32, total - cur - sizeof(*ref),
1367 error = REF_ERR_NAME_TOO_LONG;
1373 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374 add_inode_backref(inode_cache, key->objectid, key->offset,
1375 index, namebuf, len, 0, key->type, error);
1377 len = sizeof(*ref) + name_len;
1378 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1384 static int process_inode_extref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1395 struct cache_tree *inode_cache;
1396 struct btrfs_inode_extref *extref;
1397 char namebuf[BTRFS_NAME_LEN];
1399 inode_cache = &active_node->inode_cache;
1401 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402 total = btrfs_item_size_nr(eb, slot);
1403 while (cur < total) {
1404 name_len = btrfs_inode_extref_name_len(eb, extref);
1405 index = btrfs_inode_extref_index(eb, extref);
1406 parent = btrfs_inode_extref_parent(eb, extref);
1407 if (name_len <= BTRFS_NAME_LEN) {
1411 len = BTRFS_NAME_LEN;
1412 error = REF_ERR_NAME_TOO_LONG;
1414 read_extent_buffer(eb, namebuf,
1415 (unsigned long)(extref + 1), len);
1416 add_inode_backref(inode_cache, key->objectid, parent,
1417 index, namebuf, len, 0, key->type, error);
1419 len = sizeof(*extref) + name_len;
1420 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1427 static int process_file_extent(struct btrfs_root *root,
1428 struct extent_buffer *eb,
1429 int slot, struct btrfs_key *key,
1430 struct shared_node *active_node)
1432 struct inode_record *rec;
1433 struct btrfs_file_extent_item *fi;
1435 u64 disk_bytenr = 0;
1436 u64 extent_offset = 0;
1437 u64 mask = root->fs_info->sectorsize - 1;
1441 rec = active_node->current;
1442 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443 rec->found_file_extent = 1;
1445 if (rec->extent_start == (u64)-1) {
1446 rec->extent_start = key->offset;
1447 rec->extent_end = key->offset;
1450 if (rec->extent_end > key->offset)
1451 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452 else if (rec->extent_end < key->offset) {
1453 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454 key->offset - rec->extent_end);
1459 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460 extent_type = btrfs_file_extent_type(eb, fi);
1462 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1465 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466 rec->found_size += num_bytes;
1467 num_bytes = (num_bytes + mask) & ~mask;
1468 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472 extent_offset = btrfs_file_extent_offset(eb, fi);
1473 if (num_bytes == 0 || (num_bytes & mask))
1474 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475 if (num_bytes + extent_offset >
1476 btrfs_file_extent_ram_bytes(eb, fi))
1477 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479 (btrfs_file_extent_compression(eb, fi) ||
1480 btrfs_file_extent_encryption(eb, fi) ||
1481 btrfs_file_extent_other_encoding(eb, fi)))
1482 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483 if (disk_bytenr > 0)
1484 rec->found_size += num_bytes;
1486 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1488 rec->extent_end = key->offset + num_bytes;
1491 * The data reloc tree will copy full extents into its inode and then
1492 * copy the corresponding csums. Because the extent it copied could be
1493 * a preallocated extent that hasn't been written to yet there may be no
1494 * csums to copy, ergo we won't have csums for our file extent. This is
1495 * ok so just don't bother checking csums if the inode belongs to the
1498 if (disk_bytenr > 0 &&
1499 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1501 if (btrfs_file_extent_compression(eb, fi))
1502 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1504 disk_bytenr += extent_offset;
1506 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1510 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1512 rec->found_csum_item = 1;
1513 if (found < num_bytes)
1514 rec->some_csum_missing = 1;
1515 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1517 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524 struct walk_control *wc)
1526 struct btrfs_key key;
1530 struct cache_tree *inode_cache;
1531 struct shared_node *active_node;
1533 if (wc->root_level == wc->active_node &&
1534 btrfs_root_refs(&root->root_item) == 0)
1537 active_node = wc->nodes[wc->active_node];
1538 inode_cache = &active_node->inode_cache;
1539 nritems = btrfs_header_nritems(eb);
1540 for (i = 0; i < nritems; i++) {
1541 btrfs_item_key_to_cpu(eb, &key, i);
1543 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1545 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1548 if (active_node->current == NULL ||
1549 active_node->current->ino < key.objectid) {
1550 if (active_node->current) {
1551 active_node->current->checked = 1;
1552 maybe_free_inode_rec(inode_cache,
1553 active_node->current);
1555 active_node->current = get_inode_rec(inode_cache,
1557 BUG_ON(IS_ERR(active_node->current));
1560 case BTRFS_DIR_ITEM_KEY:
1561 case BTRFS_DIR_INDEX_KEY:
1562 ret = process_dir_item(eb, i, &key, active_node);
1564 case BTRFS_INODE_REF_KEY:
1565 ret = process_inode_ref(eb, i, &key, active_node);
1567 case BTRFS_INODE_EXTREF_KEY:
1568 ret = process_inode_extref(eb, i, &key, active_node);
1570 case BTRFS_INODE_ITEM_KEY:
1571 ret = process_inode_item(eb, i, &key, active_node);
1573 case BTRFS_EXTENT_DATA_KEY:
1574 ret = process_file_extent(root, eb, i, &key,
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585 struct extent_buffer *eb, struct node_refs *nrefs,
1586 u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588 unsigned int ext_ref);
1591 * Returns >0 Found error, not fatal, should continue
1592 * Returns <0 Fatal error, must exit the whole check
1593 * Returns 0 No errors found
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596 struct node_refs *nrefs, int *level, int ext_ref)
1598 struct extent_buffer *cur = path->nodes[0];
1599 struct btrfs_key key;
1603 int root_level = btrfs_header_level(root->node);
1605 int ret = 0; /* Final return value */
1606 int err = 0; /* Positive error bitmap */
1608 cur_bytenr = cur->start;
1610 /* skip to first inode item or the first inode number change */
1611 nritems = btrfs_header_nritems(cur);
1612 for (i = 0; i < nritems; i++) {
1613 btrfs_item_key_to_cpu(cur, &key, i);
1615 first_ino = key.objectid;
1616 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617 (first_ino && first_ino != key.objectid))
1621 path->slots[0] = nritems;
1627 err |= check_inode_item(root, path, ext_ref);
1629 /* modify cur since check_inode_item may change path */
1630 cur = path->nodes[0];
1632 if (err & LAST_ITEM)
1635 /* still have inode items in thie leaf */
1636 if (cur->start == cur_bytenr)
1640 * we have switched to another leaf, above nodes may
1641 * have changed, here walk down the path, if a node
1642 * or leaf is shared, check whether we can skip this
1645 for (i = root_level; i >= 0; i--) {
1646 if (path->nodes[i]->start == nrefs->bytenr[i])
1649 ret = update_nodes_refs(root, path->nodes[i]->start,
1650 path->nodes[i], nrefs, i, 0);
1654 if (!nrefs->need_check[i]) {
1660 for (i = 0; i < *level; i++) {
1661 free_extent_buffer(path->nodes[i]);
1662 path->nodes[i] = NULL;
1671 static void reada_walk_down(struct btrfs_root *root,
1672 struct extent_buffer *node, int slot)
1674 struct btrfs_fs_info *fs_info = root->fs_info;
1681 level = btrfs_header_level(node);
1685 nritems = btrfs_header_nritems(node);
1686 for (i = slot; i < nritems; i++) {
1687 bytenr = btrfs_node_blockptr(node, i);
1688 ptr_gen = btrfs_node_ptr_generation(node, i);
1689 readahead_tree_block(fs_info, bytenr, ptr_gen);
1694 * Check the child node/leaf by the following condition:
1695 * 1. the first item key of the node/leaf should be the same with the one
1697 * 2. block in parent node should match the child node/leaf.
1698 * 3. generation of parent node and child's header should be consistent.
1700 * Or the child node/leaf pointed by the key in parent is not valid.
1702 * We hope to check leaf owner too, but since subvol may share leaves,
1703 * which makes leaf owner check not so strong, key check should be
1704 * sufficient enough for that case.
1706 static int check_child_node(struct extent_buffer *parent, int slot,
1707 struct extent_buffer *child)
1709 struct btrfs_key parent_key;
1710 struct btrfs_key child_key;
1713 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1714 if (btrfs_header_level(child) == 0)
1715 btrfs_item_key_to_cpu(child, &child_key, 0);
1717 btrfs_node_key_to_cpu(child, &child_key, 0);
1719 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1722 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1723 parent_key.objectid, parent_key.type, parent_key.offset,
1724 child_key.objectid, child_key.type, child_key.offset);
1726 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1728 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1729 btrfs_node_blockptr(parent, slot),
1730 btrfs_header_bytenr(child));
1732 if (btrfs_node_ptr_generation(parent, slot) !=
1733 btrfs_header_generation(child)) {
1735 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1736 btrfs_header_generation(child),
1737 btrfs_node_ptr_generation(parent, slot));
1743 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1744 * in every fs or file tree check. Here we find its all root ids, and only check
1745 * it in the fs or file tree which has the smallest root id.
1747 static int need_check(struct btrfs_root *root, struct ulist *roots)
1749 struct rb_node *node;
1750 struct ulist_node *u;
1753 * @roots can be empty if it belongs to tree reloc tree
1754 * In that case, we should always check the leaf, as we can't use
1755 * the tree owner to ensure some other root will check it.
1757 if (roots->nnodes == 1 || roots->nnodes == 0)
1760 node = rb_first(&roots->root);
1761 u = rb_entry(node, struct ulist_node, rb_node);
1763 * current root id is not smallest, we skip it and let it be checked
1764 * in the fs or file tree who hash the smallest root id.
1766 if (root->objectid != u->val)
1772 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1775 struct btrfs_root *extent_root = root->fs_info->extent_root;
1776 struct btrfs_root_item *ri = &root->root_item;
1777 struct btrfs_extent_inline_ref *iref;
1778 struct btrfs_extent_item *ei;
1779 struct btrfs_key key;
1780 struct btrfs_path *path = NULL;
1791 * Except file/reloc tree, we can not have FULL BACKREF MODE
1793 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1797 if (eb->start == btrfs_root_bytenr(ri))
1800 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1803 owner = btrfs_header_owner(eb);
1804 if (owner == root->objectid)
1807 path = btrfs_alloc_path();
1811 key.objectid = btrfs_header_bytenr(eb);
1813 key.offset = (u64)-1;
1815 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1822 ret = btrfs_previous_extent_item(extent_root, path,
1828 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1830 eb = path->nodes[0];
1831 slot = path->slots[0];
1832 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1834 flags = btrfs_extent_flags(eb, ei);
1835 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1838 ptr = (unsigned long)(ei + 1);
1839 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1841 if (key.type == BTRFS_EXTENT_ITEM_KEY)
1842 ptr += sizeof(struct btrfs_tree_block_info);
1845 /* Reached extent item ends normally */
1849 /* Beyond extent item end, wrong item size */
1851 error("extent item at bytenr %llu slot %d has wrong size",
1856 iref = (struct btrfs_extent_inline_ref *)ptr;
1857 offset = btrfs_extent_inline_ref_offset(eb, iref);
1858 type = btrfs_extent_inline_ref_type(eb, iref);
1860 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1862 ptr += btrfs_extent_inline_ref_size(type);
1866 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1870 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1872 btrfs_free_path(path);
1877 * for a tree node or leaf, we record its reference count, so later if we still
1878 * process this node or leaf, don't need to compute its reference count again.
1880 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1882 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1883 struct extent_buffer *eb, struct node_refs *nrefs,
1884 u64 level, int check_all)
1886 struct ulist *roots;
1889 int root_level = btrfs_header_level(root->node);
1893 if (nrefs->bytenr[level] == bytenr)
1896 if (bytenr != (u64)-1) {
1897 /* the return value of this function seems a mistake */
1898 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1899 level, 1, &refs, &flags);
1901 if (ret < 0 && !check_all)
1904 nrefs->bytenr[level] = bytenr;
1905 nrefs->refs[level] = refs;
1906 nrefs->full_backref[level] = 0;
1907 nrefs->checked[level] = 0;
1910 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1915 check = need_check(root, roots);
1917 nrefs->need_check[level] = check;
1920 nrefs->need_check[level] = 1;
1922 if (level == root_level) {
1923 nrefs->need_check[level] = 1;
1926 * The node refs may have not been
1927 * updated if upper needs checking (the
1928 * lowest root_objectid) the node can
1931 nrefs->need_check[level] =
1932 nrefs->need_check[level + 1];
1938 if (check_all && eb) {
1939 calc_extent_flag_v2(root, eb, &flags);
1940 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1941 nrefs->full_backref[level] = 1;
1948 * @level if @level == -1 means extent data item
1949 * else normal treeblocl.
1951 static int should_check_extent_strictly(struct btrfs_root *root,
1952 struct node_refs *nrefs, int level)
1954 int root_level = btrfs_header_level(root->node);
1956 if (level > root_level || level < -1)
1958 if (level == root_level)
1961 * if the upper node is marked full backref, it should contain shared
1962 * backref of the parent (except owner == root->objectid).
1964 while (++level <= root_level)
1965 if (nrefs->refs[level] > 1)
1971 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1972 struct walk_control *wc, int *level,
1973 struct node_refs *nrefs)
1975 enum btrfs_tree_block_status status;
1978 struct btrfs_fs_info *fs_info = root->fs_info;
1979 struct extent_buffer *next;
1980 struct extent_buffer *cur;
1984 WARN_ON(*level < 0);
1985 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1987 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1988 refs = nrefs->refs[*level];
1991 ret = btrfs_lookup_extent_info(NULL, root,
1992 path->nodes[*level]->start,
1993 *level, 1, &refs, NULL);
1998 nrefs->bytenr[*level] = path->nodes[*level]->start;
1999 nrefs->refs[*level] = refs;
2003 ret = enter_shared_node(root, path->nodes[*level]->start,
2011 while (*level >= 0) {
2012 WARN_ON(*level < 0);
2013 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014 cur = path->nodes[*level];
2016 if (btrfs_header_level(cur) != *level)
2019 if (path->slots[*level] >= btrfs_header_nritems(cur))
2022 ret = process_one_leaf(root, cur, wc);
2027 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2028 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2030 if (bytenr == nrefs->bytenr[*level - 1]) {
2031 refs = nrefs->refs[*level - 1];
2033 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2034 *level - 1, 1, &refs, NULL);
2038 nrefs->bytenr[*level - 1] = bytenr;
2039 nrefs->refs[*level - 1] = refs;
2044 ret = enter_shared_node(root, bytenr, refs,
2047 path->slots[*level]++;
2052 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2053 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2054 free_extent_buffer(next);
2055 reada_walk_down(root, cur, path->slots[*level]);
2056 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2057 if (!extent_buffer_uptodate(next)) {
2058 struct btrfs_key node_key;
2060 btrfs_node_key_to_cpu(path->nodes[*level],
2062 path->slots[*level]);
2063 btrfs_add_corrupt_extent_record(root->fs_info,
2065 path->nodes[*level]->start,
2066 root->fs_info->nodesize,
2073 ret = check_child_node(cur, path->slots[*level], next);
2075 free_extent_buffer(next);
2080 if (btrfs_is_leaf(next))
2081 status = btrfs_check_leaf(root, NULL, next);
2083 status = btrfs_check_node(root, NULL, next);
2084 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2085 free_extent_buffer(next);
2090 *level = *level - 1;
2091 free_extent_buffer(path->nodes[*level]);
2092 path->nodes[*level] = next;
2093 path->slots[*level] = 0;
2096 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2101 * Update global fs information.
2103 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2107 struct extent_buffer *eb = path->nodes[level];
2109 total_btree_bytes += eb->len;
2110 if (fs_root_objectid(root->objectid))
2111 total_fs_tree_bytes += eb->len;
2112 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2113 total_extent_tree_bytes += eb->len;
2116 btree_space_waste += btrfs_leaf_free_space(root, eb);
2118 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2119 btrfs_header_nritems(eb));
2120 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2125 * This function only handles BACKREF_MISSING,
2126 * If corresponding extent item exists, increase the ref, else insert an extent
2129 * Returns error bits after repair.
2131 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2132 struct btrfs_root *root,
2133 struct extent_buffer *node,
2134 struct node_refs *nrefs, int level, int err)
2136 struct btrfs_fs_info *fs_info = root->fs_info;
2137 struct btrfs_root *extent_root = fs_info->extent_root;
2138 struct btrfs_path path;
2139 struct btrfs_extent_item *ei;
2140 struct btrfs_tree_block_info *bi;
2141 struct btrfs_key key;
2142 struct extent_buffer *eb;
2143 u32 size = sizeof(*ei);
2144 u32 node_size = root->fs_info->nodesize;
2145 int insert_extent = 0;
2146 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2147 int root_level = btrfs_header_level(root->node);
2152 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2155 if ((err & BACKREF_MISSING) == 0)
2158 WARN_ON(level > BTRFS_MAX_LEVEL);
2161 btrfs_init_path(&path);
2162 bytenr = btrfs_header_bytenr(node);
2163 owner = btrfs_header_owner(node);
2164 generation = btrfs_header_generation(node);
2166 key.objectid = bytenr;
2168 key.offset = (u64)-1;
2170 /* Search for the extent item */
2171 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2177 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2181 /* calculate if the extent item flag is full backref or not */
2182 if (nrefs->full_backref[level] != 0)
2183 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2185 /* insert an extent item */
2186 if (insert_extent) {
2187 struct btrfs_disk_key copy_key;
2189 generation = btrfs_header_generation(node);
2191 if (level < root_level && nrefs->full_backref[level + 1] &&
2192 owner != root->objectid) {
2193 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2196 key.objectid = bytenr;
2197 if (!skinny_metadata) {
2198 key.type = BTRFS_EXTENT_ITEM_KEY;
2199 key.offset = node_size;
2200 size += sizeof(*bi);
2202 key.type = BTRFS_METADATA_ITEM_KEY;
2206 btrfs_release_path(&path);
2207 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2213 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2215 btrfs_set_extent_refs(eb, ei, 0);
2216 btrfs_set_extent_generation(eb, ei, generation);
2217 btrfs_set_extent_flags(eb, ei, flags);
2219 if (!skinny_metadata) {
2220 bi = (struct btrfs_tree_block_info *)(ei + 1);
2221 memset_extent_buffer(eb, 0, (unsigned long)bi,
2223 btrfs_set_disk_key_objectid(©_key, root->objectid);
2224 btrfs_set_disk_key_type(©_key, 0);
2225 btrfs_set_disk_key_offset(©_key, 0);
2227 btrfs_set_tree_block_level(eb, bi, level);
2228 btrfs_set_tree_block_key(eb, bi, ©_key);
2230 btrfs_mark_buffer_dirty(eb);
2231 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2232 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2234 nrefs->refs[level] = 0;
2235 nrefs->full_backref[level] =
2236 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2237 btrfs_release_path(&path);
2240 if (level < root_level && nrefs->full_backref[level + 1] &&
2241 owner != root->objectid)
2242 parent = nrefs->bytenr[level + 1];
2244 /* increase the ref */
2245 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2246 parent, root->objectid, level, 0);
2248 nrefs->refs[level]++;
2250 btrfs_release_path(&path);
2253 "failed to repair tree block ref start %llu root %llu due to %s",
2254 bytenr, root->objectid, strerror(-ret));
2256 printf("Added one tree block ref start %llu %s %llu\n",
2257 bytenr, parent ? "parent" : "root",
2258 parent ? parent : root->objectid);
2259 err &= ~BACKREF_MISSING;
2265 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2266 unsigned int ext_ref);
2267 static int check_tree_block_ref(struct btrfs_root *root,
2268 struct extent_buffer *eb, u64 bytenr,
2269 int level, u64 owner, struct node_refs *nrefs);
2270 static int check_leaf_items(struct btrfs_trans_handle *trans,
2271 struct btrfs_root *root, struct btrfs_path *path,
2272 struct node_refs *nrefs, int account_bytes);
2275 * @trans just for lowmem repair mode
2276 * @check all if not 0 then check all tree block backrefs and items
2277 * 0 then just check relationship of items in fs tree(s)
2279 * Returns >0 Found error, should continue
2280 * Returns <0 Fatal error, must exit the whole check
2281 * Returns 0 No errors found
2283 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2284 struct btrfs_root *root, struct btrfs_path *path,
2285 int *level, struct node_refs *nrefs, int ext_ref,
2289 enum btrfs_tree_block_status status;
2292 struct btrfs_fs_info *fs_info = root->fs_info;
2293 struct extent_buffer *next;
2294 struct extent_buffer *cur;
2298 int account_file_data = 0;
2300 WARN_ON(*level < 0);
2301 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2303 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2304 path->nodes[*level], nrefs, *level, check_all);
2308 while (*level >= 0) {
2309 WARN_ON(*level < 0);
2310 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2311 cur = path->nodes[*level];
2312 bytenr = btrfs_header_bytenr(cur);
2313 check = nrefs->need_check[*level];
2315 if (btrfs_header_level(cur) != *level)
2318 * Update bytes accounting and check tree block ref
2319 * NOTE: Doing accounting and check before checking nritems
2320 * is necessary because of empty node/leaf.
2322 if ((check_all && !nrefs->checked[*level]) ||
2323 (!check_all && nrefs->need_check[*level])) {
2324 ret = check_tree_block_ref(root, cur,
2325 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2326 btrfs_header_owner(cur), nrefs);
2329 ret = repair_tree_block_ref(trans, root,
2330 path->nodes[*level], nrefs, *level, ret);
2333 if (check_all && nrefs->need_check[*level] &&
2334 nrefs->refs[*level]) {
2335 account_bytes(root, path, *level);
2336 account_file_data = 1;
2338 nrefs->checked[*level] = 1;
2341 if (path->slots[*level] >= btrfs_header_nritems(cur))
2344 /* Don't forgot to check leaf/node validation */
2346 /* skip duplicate check */
2347 if (check || !check_all) {
2348 ret = btrfs_check_leaf(root, NULL, cur);
2349 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2357 ret = process_one_leaf_v2(root, path, nrefs,
2360 ret = check_leaf_items(trans, root, path,
2361 nrefs, account_file_data);
2365 if (check || !check_all) {
2366 ret = btrfs_check_node(root, NULL, cur);
2367 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2374 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2375 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2377 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2382 * check all trees in check_chunks_and_extent_v2
2383 * check shared node once in check_fs_roots
2385 if (!check_all && !nrefs->need_check[*level - 1]) {
2386 path->slots[*level]++;
2390 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392 free_extent_buffer(next);
2393 reada_walk_down(root, cur, path->slots[*level]);
2394 next = read_tree_block(fs_info, bytenr, ptr_gen);
2395 if (!extent_buffer_uptodate(next)) {
2396 struct btrfs_key node_key;
2398 btrfs_node_key_to_cpu(path->nodes[*level],
2400 path->slots[*level]);
2401 btrfs_add_corrupt_extent_record(fs_info,
2402 &node_key, path->nodes[*level]->start,
2403 fs_info->nodesize, *level);
2409 ret = check_child_node(cur, path->slots[*level], next);
2414 if (btrfs_is_leaf(next))
2415 status = btrfs_check_leaf(root, NULL, next);
2417 status = btrfs_check_node(root, NULL, next);
2418 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2419 free_extent_buffer(next);
2424 *level = *level - 1;
2425 free_extent_buffer(path->nodes[*level]);
2426 path->nodes[*level] = next;
2427 path->slots[*level] = 0;
2428 account_file_data = 0;
2430 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436 struct walk_control *wc, int *level)
2439 struct extent_buffer *leaf;
2441 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442 leaf = path->nodes[i];
2443 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2448 free_extent_buffer(path->nodes[*level]);
2449 path->nodes[*level] = NULL;
2450 BUG_ON(*level > wc->active_node);
2451 if (*level == wc->active_node)
2452 leave_shared_node(root, wc, *level);
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2463 struct extent_buffer *leaf;
2465 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466 leaf = path->nodes[i];
2467 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2472 free_extent_buffer(path->nodes[*level]);
2473 path->nodes[*level] = NULL;
2480 static int check_root_dir(struct inode_record *rec)
2482 struct inode_backref *backref;
2485 if (!rec->found_inode_item || rec->errors)
2487 if (rec->nlink != 1 || rec->found_link != 0)
2489 if (list_empty(&rec->backrefs))
2491 backref = to_inode_backref(rec->backrefs.next);
2492 if (!backref->found_inode_ref)
2494 if (backref->index != 0 || backref->namelen != 2 ||
2495 memcmp(backref->name, "..", 2))
2497 if (backref->found_dir_index || backref->found_dir_item)
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505 struct btrfs_root *root, struct btrfs_path *path,
2506 struct inode_record *rec)
2508 struct btrfs_inode_item *ei;
2509 struct btrfs_key key;
2512 key.objectid = rec->ino;
2513 key.type = BTRFS_INODE_ITEM_KEY;
2514 key.offset = (u64)-1;
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (!path->slots[0]) {
2527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528 if (key.objectid != rec->ino) {
2533 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534 struct btrfs_inode_item);
2535 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536 btrfs_mark_buffer_dirty(path->nodes[0]);
2537 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539 root->root_key.objectid);
2541 btrfs_release_path(path);
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *root,
2547 struct btrfs_path *path,
2548 struct inode_record *rec)
2552 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553 btrfs_release_path(path);
2555 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct inode_record *rec)
2564 struct btrfs_inode_item *ei;
2565 struct btrfs_key key;
2568 key.objectid = rec->ino;
2569 key.type = BTRFS_INODE_ITEM_KEY;
2572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2579 /* Since ret == 0, no need to check anything */
2580 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581 struct btrfs_inode_item);
2582 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583 btrfs_mark_buffer_dirty(path->nodes[0]);
2584 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585 printf("reset nbytes for ino %llu root %llu\n",
2586 rec->ino, root->root_key.objectid);
2588 btrfs_release_path(path);
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593 struct cache_tree *inode_cache,
2594 struct inode_record *rec,
2595 struct inode_backref *backref)
2597 struct btrfs_path path;
2598 struct btrfs_trans_handle *trans;
2599 struct btrfs_dir_item *dir_item;
2600 struct extent_buffer *leaf;
2601 struct btrfs_key key;
2602 struct btrfs_disk_key disk_key;
2603 struct inode_record *dir_rec;
2604 unsigned long name_ptr;
2605 u32 data_size = sizeof(*dir_item) + backref->namelen;
2608 trans = btrfs_start_transaction(root, 1);
2610 return PTR_ERR(trans);
2612 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613 (unsigned long long)rec->ino);
2615 btrfs_init_path(&path);
2616 key.objectid = backref->dir;
2617 key.type = BTRFS_DIR_INDEX_KEY;
2618 key.offset = backref->index;
2619 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2622 leaf = path.nodes[0];
2623 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625 disk_key.objectid = cpu_to_le64(rec->ino);
2626 disk_key.type = BTRFS_INODE_ITEM_KEY;
2627 disk_key.offset = 0;
2629 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631 btrfs_set_dir_data_len(leaf, dir_item, 0);
2632 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633 name_ptr = (unsigned long)(dir_item + 1);
2634 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635 btrfs_mark_buffer_dirty(leaf);
2636 btrfs_release_path(&path);
2637 btrfs_commit_transaction(trans, root);
2639 backref->found_dir_index = 1;
2640 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641 BUG_ON(IS_ERR(dir_rec));
2644 dir_rec->found_size += backref->namelen;
2645 if (dir_rec->found_size == dir_rec->isize &&
2646 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648 if (dir_rec->found_size != dir_rec->isize)
2649 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2654 static int delete_dir_index(struct btrfs_root *root,
2655 struct inode_backref *backref)
2657 struct btrfs_trans_handle *trans;
2658 struct btrfs_dir_item *di;
2659 struct btrfs_path path;
2662 trans = btrfs_start_transaction(root, 1);
2664 return PTR_ERR(trans);
2666 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667 (unsigned long long)backref->dir,
2668 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669 (unsigned long long)root->objectid);
2671 btrfs_init_path(&path);
2672 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673 backref->name, backref->namelen,
2674 backref->index, -1);
2677 btrfs_release_path(&path);
2678 btrfs_commit_transaction(trans, root);
2685 ret = btrfs_del_item(trans, root, &path);
2687 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689 btrfs_release_path(&path);
2690 btrfs_commit_transaction(trans, root);
2694 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2695 struct btrfs_root *root, u64 ino,
2698 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2700 return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2703 static int create_inode_item(struct btrfs_root *root,
2704 struct inode_record *rec, int root_dir)
2706 struct btrfs_trans_handle *trans;
2712 trans = btrfs_start_transaction(root, 1);
2713 if (IS_ERR(trans)) {
2714 ret = PTR_ERR(trans);
2718 nlink = root_dir ? 1 : rec->found_link;
2719 if (rec->found_dir_item) {
2720 if (rec->found_file_extent)
2721 fprintf(stderr, "root %llu inode %llu has both a dir "
2722 "item and extents, unsure if it is a dir or a "
2723 "regular file so setting it as a directory\n",
2724 (unsigned long long)root->objectid,
2725 (unsigned long long)rec->ino);
2726 mode = S_IFDIR | 0755;
2727 size = rec->found_size;
2728 } else if (!rec->found_dir_item) {
2729 size = rec->extent_end;
2730 mode = S_IFREG | 0755;
2733 ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2735 btrfs_commit_transaction(trans, root);
2739 static int repair_inode_backrefs(struct btrfs_root *root,
2740 struct inode_record *rec,
2741 struct cache_tree *inode_cache,
2744 struct inode_backref *tmp, *backref;
2745 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2749 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2750 if (!delete && rec->ino == root_dirid) {
2751 if (!rec->found_inode_item) {
2752 ret = create_inode_item(root, rec, 1);
2759 /* Index 0 for root dir's are special, don't mess with it */
2760 if (rec->ino == root_dirid && backref->index == 0)
2764 ((backref->found_dir_index && !backref->found_inode_ref) ||
2765 (backref->found_dir_index && backref->found_inode_ref &&
2766 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2767 ret = delete_dir_index(root, backref);
2771 list_del(&backref->list);
2776 if (!delete && !backref->found_dir_index &&
2777 backref->found_dir_item && backref->found_inode_ref) {
2778 ret = add_missing_dir_index(root, inode_cache, rec,
2783 if (backref->found_dir_item &&
2784 backref->found_dir_index) {
2785 if (!backref->errors &&
2786 backref->found_inode_ref) {
2787 list_del(&backref->list);
2794 if (!delete && (!backref->found_dir_index &&
2795 !backref->found_dir_item &&
2796 backref->found_inode_ref)) {
2797 struct btrfs_trans_handle *trans;
2798 struct btrfs_key location;
2800 ret = check_dir_conflict(root, backref->name,
2806 * let nlink fixing routine to handle it,
2807 * which can do it better.
2812 location.objectid = rec->ino;
2813 location.type = BTRFS_INODE_ITEM_KEY;
2814 location.offset = 0;
2816 trans = btrfs_start_transaction(root, 1);
2817 if (IS_ERR(trans)) {
2818 ret = PTR_ERR(trans);
2821 fprintf(stderr, "adding missing dir index/item pair "
2823 (unsigned long long)rec->ino);
2824 ret = btrfs_insert_dir_item(trans, root, backref->name,
2826 backref->dir, &location,
2827 imode_to_type(rec->imode),
2830 btrfs_commit_transaction(trans, root);
2834 if (!delete && (backref->found_inode_ref &&
2835 backref->found_dir_index &&
2836 backref->found_dir_item &&
2837 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2838 !rec->found_inode_item)) {
2839 ret = create_inode_item(root, rec, 0);
2846 return ret ? ret : repaired;
2850 * To determine the file type for nlink/inode_item repair
2852 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2853 * Return -ENOENT if file type is not found.
2855 static int find_file_type(struct inode_record *rec, u8 *type)
2857 struct inode_backref *backref;
2859 /* For inode item recovered case */
2860 if (rec->found_inode_item) {
2861 *type = imode_to_type(rec->imode);
2865 list_for_each_entry(backref, &rec->backrefs, list) {
2866 if (backref->found_dir_index || backref->found_dir_item) {
2867 *type = backref->filetype;
2875 * To determine the file name for nlink repair
2877 * Return 0 if file name is found, set name and namelen.
2878 * Return -ENOENT if file name is not found.
2880 static int find_file_name(struct inode_record *rec,
2881 char *name, int *namelen)
2883 struct inode_backref *backref;
2885 list_for_each_entry(backref, &rec->backrefs, list) {
2886 if (backref->found_dir_index || backref->found_dir_item ||
2887 backref->found_inode_ref) {
2888 memcpy(name, backref->name, backref->namelen);
2889 *namelen = backref->namelen;
2896 /* Reset the nlink of the inode to the correct one */
2897 static int reset_nlink(struct btrfs_trans_handle *trans,
2898 struct btrfs_root *root,
2899 struct btrfs_path *path,
2900 struct inode_record *rec)
2902 struct inode_backref *backref;
2903 struct inode_backref *tmp;
2904 struct btrfs_key key;
2905 struct btrfs_inode_item *inode_item;
2908 /* We don't believe this either, reset it and iterate backref */
2909 rec->found_link = 0;
2911 /* Remove all backref including the valid ones */
2912 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2913 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2914 backref->index, backref->name,
2915 backref->namelen, 0);
2919 /* remove invalid backref, so it won't be added back */
2920 if (!(backref->found_dir_index &&
2921 backref->found_dir_item &&
2922 backref->found_inode_ref)) {
2923 list_del(&backref->list);
2930 /* Set nlink to 0 */
2931 key.objectid = rec->ino;
2932 key.type = BTRFS_INODE_ITEM_KEY;
2934 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2941 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2942 struct btrfs_inode_item);
2943 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2944 btrfs_mark_buffer_dirty(path->nodes[0]);
2945 btrfs_release_path(path);
2948 * Add back valid inode_ref/dir_item/dir_index,
2949 * add_link() will handle the nlink inc, so new nlink must be correct
2951 list_for_each_entry(backref, &rec->backrefs, list) {
2952 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2953 backref->name, backref->namelen,
2954 backref->filetype, &backref->index, 1, 0);
2959 btrfs_release_path(path);
2963 static int get_highest_inode(struct btrfs_trans_handle *trans,
2964 struct btrfs_root *root,
2965 struct btrfs_path *path,
2968 struct btrfs_key key, found_key;
2971 btrfs_init_path(path);
2972 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2974 key.type = BTRFS_INODE_ITEM_KEY;
2975 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2977 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2978 path->slots[0] - 1);
2979 *highest_ino = found_key.objectid;
2982 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2984 btrfs_release_path(path);
2989 * Link inode to dir 'lost+found'. Increase @ref_count.
2991 * Returns 0 means success.
2992 * Returns <0 means failure.
2994 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
2995 struct btrfs_root *root,
2996 struct btrfs_path *path,
2997 u64 ino, char *namebuf, u32 name_len,
2998 u8 filetype, u64 *ref_count)
3000 char *dir_name = "lost+found";
3005 btrfs_release_path(path);
3006 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3011 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3012 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3015 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3018 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3019 namebuf, name_len, filetype, NULL, 1, 0);
3021 * Add ".INO" suffix several times to handle case where
3022 * "FILENAME.INO" is already taken by another file.
3024 while (ret == -EEXIST) {
3026 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3028 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3032 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3034 name_len += count_digits(ino) + 1;
3035 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3036 name_len, filetype, NULL, 1, 0);
3039 error("failed to link the inode %llu to %s dir: %s",
3040 ino, dir_name, strerror(-ret));
3045 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3046 name_len, namebuf, dir_name);
3048 btrfs_release_path(path);
3050 error("failed to move file '%.*s' to '%s' dir", name_len,
3055 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3056 struct btrfs_root *root,
3057 struct btrfs_path *path,
3058 struct inode_record *rec)
3060 char namebuf[BTRFS_NAME_LEN] = {0};
3063 int name_recovered = 0;
3064 int type_recovered = 0;
3068 * Get file name and type first before these invalid inode ref
3069 * are deleted by remove_all_invalid_backref()
3071 name_recovered = !find_file_name(rec, namebuf, &namelen);
3072 type_recovered = !find_file_type(rec, &type);
3074 if (!name_recovered) {
3075 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3076 rec->ino, rec->ino);
3077 namelen = count_digits(rec->ino);
3078 sprintf(namebuf, "%llu", rec->ino);
3081 if (!type_recovered) {
3082 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3084 type = BTRFS_FT_REG_FILE;
3088 ret = reset_nlink(trans, root, path, rec);
3091 "Failed to reset nlink for inode %llu: %s\n",
3092 rec->ino, strerror(-ret));
3096 if (rec->found_link == 0) {
3097 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3098 namebuf, namelen, type,
3099 (u64 *)&rec->found_link);
3103 printf("Fixed the nlink of inode %llu\n", rec->ino);
3106 * Clear the flag anyway, or we will loop forever for the same inode
3107 * as it will not be removed from the bad inode list and the dead loop
3110 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3111 btrfs_release_path(path);
3116 * Check if there is any normal(reg or prealloc) file extent for given
3118 * This is used to determine the file type when neither its dir_index/item or
3119 * inode_item exists.
3121 * This will *NOT* report error, if any error happens, just consider it does
3122 * not have any normal file extent.
3124 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3126 struct btrfs_path path;
3127 struct btrfs_key key;
3128 struct btrfs_key found_key;
3129 struct btrfs_file_extent_item *fi;
3133 btrfs_init_path(&path);
3135 key.type = BTRFS_EXTENT_DATA_KEY;
3138 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3143 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3144 ret = btrfs_next_leaf(root, &path);
3151 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3153 if (found_key.objectid != ino ||
3154 found_key.type != BTRFS_EXTENT_DATA_KEY)
3156 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3157 struct btrfs_file_extent_item);
3158 type = btrfs_file_extent_type(path.nodes[0], fi);
3159 if (type != BTRFS_FILE_EXTENT_INLINE) {
3165 btrfs_release_path(&path);
3169 static u32 btrfs_type_to_imode(u8 type)
3171 static u32 imode_by_btrfs_type[] = {
3172 [BTRFS_FT_REG_FILE] = S_IFREG,
3173 [BTRFS_FT_DIR] = S_IFDIR,
3174 [BTRFS_FT_CHRDEV] = S_IFCHR,
3175 [BTRFS_FT_BLKDEV] = S_IFBLK,
3176 [BTRFS_FT_FIFO] = S_IFIFO,
3177 [BTRFS_FT_SOCK] = S_IFSOCK,
3178 [BTRFS_FT_SYMLINK] = S_IFLNK,
3181 return imode_by_btrfs_type[(type)];
3184 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3185 struct btrfs_root *root,
3186 struct btrfs_path *path,
3187 struct inode_record *rec)
3191 int type_recovered = 0;
3194 printf("Trying to rebuild inode:%llu\n", rec->ino);
3196 type_recovered = !find_file_type(rec, &filetype);
3199 * Try to determine inode type if type not found.
3201 * For found regular file extent, it must be FILE.
3202 * For found dir_item/index, it must be DIR.
3204 * For undetermined one, use FILE as fallback.
3207 * 1. If found backref(inode_index/item is already handled) to it,
3209 * Need new inode-inode ref structure to allow search for that.
3211 if (!type_recovered) {
3212 if (rec->found_file_extent &&
3213 find_normal_file_extent(root, rec->ino)) {
3215 filetype = BTRFS_FT_REG_FILE;
3216 } else if (rec->found_dir_item) {
3218 filetype = BTRFS_FT_DIR;
3219 } else if (!list_empty(&rec->orphan_extents)) {
3221 filetype = BTRFS_FT_REG_FILE;
3223 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3226 filetype = BTRFS_FT_REG_FILE;
3230 ret = btrfs_new_inode(trans, root, rec->ino,
3231 mode | btrfs_type_to_imode(filetype));
3236 * Here inode rebuild is done, we only rebuild the inode item,
3237 * don't repair the nlink(like move to lost+found).
3238 * That is the job of nlink repair.
3240 * We just fill the record and return
3242 rec->found_dir_item = 1;
3243 rec->imode = mode | btrfs_type_to_imode(filetype);
3245 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3246 /* Ensure the inode_nlinks repair function will be called */
3247 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3252 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3253 struct btrfs_root *root,
3254 struct btrfs_path *path,
3255 struct inode_record *rec)
3257 struct orphan_data_extent *orphan;
3258 struct orphan_data_extent *tmp;
3261 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3263 * Check for conflicting file extents
3265 * Here we don't know whether the extents is compressed or not,
3266 * so we can only assume it not compressed nor data offset,
3267 * and use its disk_len as extent length.
3269 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3270 orphan->offset, orphan->disk_len, 0);
3271 btrfs_release_path(path);
3276 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3277 orphan->disk_bytenr, orphan->disk_len);
3278 ret = btrfs_free_extent(trans,
3279 root->fs_info->extent_root,
3280 orphan->disk_bytenr, orphan->disk_len,
3281 0, root->objectid, orphan->objectid,
3286 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3287 orphan->offset, orphan->disk_bytenr,
3288 orphan->disk_len, orphan->disk_len);
3292 /* Update file size info */
3293 rec->found_size += orphan->disk_len;
3294 if (rec->found_size == rec->nbytes)
3295 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3297 /* Update the file extent hole info too */
3298 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3302 if (RB_EMPTY_ROOT(&rec->holes))
3303 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3305 list_del(&orphan->list);
3308 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3313 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3314 struct btrfs_root *root,
3315 struct btrfs_path *path,
3316 struct inode_record *rec)
3318 struct rb_node *node;
3319 struct file_extent_hole *hole;
3323 node = rb_first(&rec->holes);
3327 hole = rb_entry(node, struct file_extent_hole, node);
3328 ret = btrfs_punch_hole(trans, root, rec->ino,
3329 hole->start, hole->len);
3332 ret = del_file_extent_hole(&rec->holes, hole->start,
3336 if (RB_EMPTY_ROOT(&rec->holes))
3337 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3338 node = rb_first(&rec->holes);
3340 /* special case for a file losing all its file extent */
3342 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3343 round_up(rec->isize,
3344 root->fs_info->sectorsize));
3348 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3349 rec->ino, root->objectid);
3354 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3356 struct btrfs_trans_handle *trans;
3357 struct btrfs_path path;
3360 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3361 I_ERR_NO_ORPHAN_ITEM |
3362 I_ERR_LINK_COUNT_WRONG |
3363 I_ERR_NO_INODE_ITEM |
3364 I_ERR_FILE_EXTENT_ORPHAN |
3365 I_ERR_FILE_EXTENT_DISCOUNT|
3366 I_ERR_FILE_NBYTES_WRONG)))
3370 * For nlink repair, it may create a dir and add link, so
3371 * 2 for parent(256)'s dir_index and dir_item
3372 * 2 for lost+found dir's inode_item and inode_ref
3373 * 1 for the new inode_ref of the file
3374 * 2 for lost+found dir's dir_index and dir_item for the file
3376 trans = btrfs_start_transaction(root, 7);
3378 return PTR_ERR(trans);
3380 btrfs_init_path(&path);
3381 if (rec->errors & I_ERR_NO_INODE_ITEM)
3382 ret = repair_inode_no_item(trans, root, &path, rec);
3383 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3384 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3385 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3386 ret = repair_inode_discount_extent(trans, root, &path, rec);
3387 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3388 ret = repair_inode_isize(trans, root, &path, rec);
3389 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3390 ret = repair_inode_orphan_item(trans, root, &path, rec);
3391 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3392 ret = repair_inode_nlinks(trans, root, &path, rec);
3393 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3394 ret = repair_inode_nbytes(trans, root, &path, rec);
3395 btrfs_commit_transaction(trans, root);
3396 btrfs_release_path(&path);
3400 static int check_inode_recs(struct btrfs_root *root,
3401 struct cache_tree *inode_cache)
3403 struct cache_extent *cache;
3404 struct ptr_node *node;
3405 struct inode_record *rec;
3406 struct inode_backref *backref;
3411 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3413 if (btrfs_root_refs(&root->root_item) == 0) {
3414 if (!cache_tree_empty(inode_cache))
3415 fprintf(stderr, "warning line %d\n", __LINE__);
3420 * We need to repair backrefs first because we could change some of the
3421 * errors in the inode recs.
3423 * We also need to go through and delete invalid backrefs first and then
3424 * add the correct ones second. We do this because we may get EEXIST
3425 * when adding back the correct index because we hadn't yet deleted the
3428 * For example, if we were missing a dir index then the directories
3429 * isize would be wrong, so if we fixed the isize to what we thought it
3430 * would be and then fixed the backref we'd still have a invalid fs, so
3431 * we need to add back the dir index and then check to see if the isize
3436 if (stage == 3 && !err)
3439 cache = search_cache_extent(inode_cache, 0);
3440 while (repair && cache) {
3441 node = container_of(cache, struct ptr_node, cache);
3443 cache = next_cache_extent(cache);
3445 /* Need to free everything up and rescan */
3447 remove_cache_extent(inode_cache, &node->cache);
3449 free_inode_rec(rec);
3453 if (list_empty(&rec->backrefs))
3456 ret = repair_inode_backrefs(root, rec, inode_cache,
3470 rec = get_inode_rec(inode_cache, root_dirid, 0);
3471 BUG_ON(IS_ERR(rec));
3473 ret = check_root_dir(rec);
3475 fprintf(stderr, "root %llu root dir %llu error\n",
3476 (unsigned long long)root->root_key.objectid,
3477 (unsigned long long)root_dirid);
3478 print_inode_error(root, rec);
3483 struct btrfs_trans_handle *trans;
3485 trans = btrfs_start_transaction(root, 1);
3486 if (IS_ERR(trans)) {
3487 err = PTR_ERR(trans);
3492 "root %llu missing its root dir, recreating\n",
3493 (unsigned long long)root->objectid);
3495 ret = btrfs_make_root_dir(trans, root, root_dirid);
3498 btrfs_commit_transaction(trans, root);
3502 fprintf(stderr, "root %llu root dir %llu not found\n",
3503 (unsigned long long)root->root_key.objectid,
3504 (unsigned long long)root_dirid);
3508 cache = search_cache_extent(inode_cache, 0);
3511 node = container_of(cache, struct ptr_node, cache);
3513 remove_cache_extent(inode_cache, &node->cache);
3515 if (rec->ino == root_dirid ||
3516 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3517 free_inode_rec(rec);
3521 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3522 ret = check_orphan_item(root, rec->ino);
3524 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3525 if (can_free_inode_rec(rec)) {
3526 free_inode_rec(rec);
3531 if (!rec->found_inode_item)
3532 rec->errors |= I_ERR_NO_INODE_ITEM;
3533 if (rec->found_link != rec->nlink)
3534 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3536 ret = try_repair_inode(root, rec);
3537 if (ret == 0 && can_free_inode_rec(rec)) {
3538 free_inode_rec(rec);
3544 if (!(repair && ret == 0))
3546 print_inode_error(root, rec);
3547 list_for_each_entry(backref, &rec->backrefs, list) {
3548 if (!backref->found_dir_item)
3549 backref->errors |= REF_ERR_NO_DIR_ITEM;
3550 if (!backref->found_dir_index)
3551 backref->errors |= REF_ERR_NO_DIR_INDEX;
3552 if (!backref->found_inode_ref)
3553 backref->errors |= REF_ERR_NO_INODE_REF;
3554 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3555 " namelen %u name %s filetype %d errors %x",
3556 (unsigned long long)backref->dir,
3557 (unsigned long long)backref->index,
3558 backref->namelen, backref->name,
3559 backref->filetype, backref->errors);
3560 print_ref_error(backref->errors);
3562 free_inode_rec(rec);
3564 return (error > 0) ? -1 : 0;
3567 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3570 struct cache_extent *cache;
3571 struct root_record *rec = NULL;
3574 cache = lookup_cache_extent(root_cache, objectid, 1);
3576 rec = container_of(cache, struct root_record, cache);
3578 rec = calloc(1, sizeof(*rec));
3580 return ERR_PTR(-ENOMEM);
3581 rec->objectid = objectid;
3582 INIT_LIST_HEAD(&rec->backrefs);
3583 rec->cache.start = objectid;
3584 rec->cache.size = 1;
3586 ret = insert_cache_extent(root_cache, &rec->cache);
3588 return ERR_PTR(-EEXIST);
3593 static struct root_backref *get_root_backref(struct root_record *rec,
3594 u64 ref_root, u64 dir, u64 index,
3595 const char *name, int namelen)
3597 struct root_backref *backref;
3599 list_for_each_entry(backref, &rec->backrefs, list) {
3600 if (backref->ref_root != ref_root || backref->dir != dir ||
3601 backref->namelen != namelen)
3603 if (memcmp(name, backref->name, namelen))
3608 backref = calloc(1, sizeof(*backref) + namelen + 1);
3611 backref->ref_root = ref_root;
3613 backref->index = index;
3614 backref->namelen = namelen;
3615 memcpy(backref->name, name, namelen);
3616 backref->name[namelen] = '\0';
3617 list_add_tail(&backref->list, &rec->backrefs);
3621 static void free_root_record(struct cache_extent *cache)
3623 struct root_record *rec;
3624 struct root_backref *backref;
3626 rec = container_of(cache, struct root_record, cache);
3627 while (!list_empty(&rec->backrefs)) {
3628 backref = to_root_backref(rec->backrefs.next);
3629 list_del(&backref->list);
3636 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3638 static int add_root_backref(struct cache_tree *root_cache,
3639 u64 root_id, u64 ref_root, u64 dir, u64 index,
3640 const char *name, int namelen,
3641 int item_type, int errors)
3643 struct root_record *rec;
3644 struct root_backref *backref;
3646 rec = get_root_rec(root_cache, root_id);
3647 BUG_ON(IS_ERR(rec));
3648 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3651 backref->errors |= errors;
3653 if (item_type != BTRFS_DIR_ITEM_KEY) {
3654 if (backref->found_dir_index || backref->found_back_ref ||
3655 backref->found_forward_ref) {
3656 if (backref->index != index)
3657 backref->errors |= REF_ERR_INDEX_UNMATCH;
3659 backref->index = index;
3663 if (item_type == BTRFS_DIR_ITEM_KEY) {
3664 if (backref->found_forward_ref)
3666 backref->found_dir_item = 1;
3667 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3668 backref->found_dir_index = 1;
3669 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3670 if (backref->found_forward_ref)
3671 backref->errors |= REF_ERR_DUP_ROOT_REF;
3672 else if (backref->found_dir_item)
3674 backref->found_forward_ref = 1;
3675 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3676 if (backref->found_back_ref)
3677 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3678 backref->found_back_ref = 1;
3683 if (backref->found_forward_ref && backref->found_dir_item)
3684 backref->reachable = 1;
3688 static int merge_root_recs(struct btrfs_root *root,
3689 struct cache_tree *src_cache,
3690 struct cache_tree *dst_cache)
3692 struct cache_extent *cache;
3693 struct ptr_node *node;
3694 struct inode_record *rec;
3695 struct inode_backref *backref;
3698 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3699 free_inode_recs_tree(src_cache);
3704 cache = search_cache_extent(src_cache, 0);
3707 node = container_of(cache, struct ptr_node, cache);
3709 remove_cache_extent(src_cache, &node->cache);
3712 ret = is_child_root(root, root->objectid, rec->ino);
3718 list_for_each_entry(backref, &rec->backrefs, list) {
3719 BUG_ON(backref->found_inode_ref);
3720 if (backref->found_dir_item)
3721 add_root_backref(dst_cache, rec->ino,
3722 root->root_key.objectid, backref->dir,
3723 backref->index, backref->name,
3724 backref->namelen, BTRFS_DIR_ITEM_KEY,
3726 if (backref->found_dir_index)
3727 add_root_backref(dst_cache, rec->ino,
3728 root->root_key.objectid, backref->dir,
3729 backref->index, backref->name,
3730 backref->namelen, BTRFS_DIR_INDEX_KEY,
3734 free_inode_rec(rec);
3741 static int check_root_refs(struct btrfs_root *root,
3742 struct cache_tree *root_cache)
3744 struct root_record *rec;
3745 struct root_record *ref_root;
3746 struct root_backref *backref;
3747 struct cache_extent *cache;
3753 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3754 BUG_ON(IS_ERR(rec));
3757 /* fixme: this can not detect circular references */
3760 cache = search_cache_extent(root_cache, 0);
3764 rec = container_of(cache, struct root_record, cache);
3765 cache = next_cache_extent(cache);
3767 if (rec->found_ref == 0)
3770 list_for_each_entry(backref, &rec->backrefs, list) {
3771 if (!backref->reachable)
3774 ref_root = get_root_rec(root_cache,
3776 BUG_ON(IS_ERR(ref_root));
3777 if (ref_root->found_ref > 0)
3780 backref->reachable = 0;
3782 if (rec->found_ref == 0)
3788 cache = search_cache_extent(root_cache, 0);
3792 rec = container_of(cache, struct root_record, cache);
3793 cache = next_cache_extent(cache);
3795 if (rec->found_ref == 0 &&
3796 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3797 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3798 ret = check_orphan_item(root->fs_info->tree_root,
3804 * If we don't have a root item then we likely just have
3805 * a dir item in a snapshot for this root but no actual
3806 * ref key or anything so it's meaningless.
3808 if (!rec->found_root_item)
3811 fprintf(stderr, "fs tree %llu not referenced\n",
3812 (unsigned long long)rec->objectid);
3816 if (rec->found_ref > 0 && !rec->found_root_item)
3818 list_for_each_entry(backref, &rec->backrefs, list) {
3819 if (!backref->found_dir_item)
3820 backref->errors |= REF_ERR_NO_DIR_ITEM;
3821 if (!backref->found_dir_index)
3822 backref->errors |= REF_ERR_NO_DIR_INDEX;
3823 if (!backref->found_back_ref)
3824 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3825 if (!backref->found_forward_ref)
3826 backref->errors |= REF_ERR_NO_ROOT_REF;
3827 if (backref->reachable && backref->errors)
3834 fprintf(stderr, "fs tree %llu refs %u %s\n",
3835 (unsigned long long)rec->objectid, rec->found_ref,
3836 rec->found_root_item ? "" : "not found");
3838 list_for_each_entry(backref, &rec->backrefs, list) {
3839 if (!backref->reachable)
3841 if (!backref->errors && rec->found_root_item)
3843 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3844 " index %llu namelen %u name %s errors %x\n",
3845 (unsigned long long)backref->ref_root,
3846 (unsigned long long)backref->dir,
3847 (unsigned long long)backref->index,
3848 backref->namelen, backref->name,
3850 print_ref_error(backref->errors);
3853 return errors > 0 ? 1 : 0;
3856 static int process_root_ref(struct extent_buffer *eb, int slot,
3857 struct btrfs_key *key,
3858 struct cache_tree *root_cache)
3864 struct btrfs_root_ref *ref;
3865 char namebuf[BTRFS_NAME_LEN];
3868 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3870 dirid = btrfs_root_ref_dirid(eb, ref);
3871 index = btrfs_root_ref_sequence(eb, ref);
3872 name_len = btrfs_root_ref_name_len(eb, ref);
3874 if (name_len <= BTRFS_NAME_LEN) {
3878 len = BTRFS_NAME_LEN;
3879 error = REF_ERR_NAME_TOO_LONG;
3881 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3883 if (key->type == BTRFS_ROOT_REF_KEY) {
3884 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3885 index, namebuf, len, key->type, error);
3887 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3888 index, namebuf, len, key->type, error);
3893 static void free_corrupt_block(struct cache_extent *cache)
3895 struct btrfs_corrupt_block *corrupt;
3897 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3901 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3904 * Repair the btree of the given root.
3906 * The fix is to remove the node key in corrupt_blocks cache_tree.
3907 * and rebalance the tree.
3908 * After the fix, the btree should be writeable.
3910 static int repair_btree(struct btrfs_root *root,
3911 struct cache_tree *corrupt_blocks)
3913 struct btrfs_trans_handle *trans;
3914 struct btrfs_path path;
3915 struct btrfs_corrupt_block *corrupt;
3916 struct cache_extent *cache;
3917 struct btrfs_key key;
3922 if (cache_tree_empty(corrupt_blocks))
3925 trans = btrfs_start_transaction(root, 1);
3926 if (IS_ERR(trans)) {
3927 ret = PTR_ERR(trans);
3928 fprintf(stderr, "Error starting transaction: %s\n",
3932 btrfs_init_path(&path);
3933 cache = first_cache_extent(corrupt_blocks);
3935 corrupt = container_of(cache, struct btrfs_corrupt_block,
3937 level = corrupt->level;
3938 path.lowest_level = level;
3939 key.objectid = corrupt->key.objectid;
3940 key.type = corrupt->key.type;
3941 key.offset = corrupt->key.offset;
3944 * Here we don't want to do any tree balance, since it may
3945 * cause a balance with corrupted brother leaf/node,
3946 * so ins_len set to 0 here.
3947 * Balance will be done after all corrupt node/leaf is deleted.
3949 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3952 offset = btrfs_node_blockptr(path.nodes[level],
3955 /* Remove the ptr */
3956 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3960 * Remove the corresponding extent
3961 * return value is not concerned.
3963 btrfs_release_path(&path);
3964 ret = btrfs_free_extent(trans, root, offset,
3965 root->fs_info->nodesize, 0,
3966 root->root_key.objectid, level - 1, 0);
3967 cache = next_cache_extent(cache);
3970 /* Balance the btree using btrfs_search_slot() */
3971 cache = first_cache_extent(corrupt_blocks);
3973 corrupt = container_of(cache, struct btrfs_corrupt_block,
3975 memcpy(&key, &corrupt->key, sizeof(key));
3976 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3979 /* return will always >0 since it won't find the item */
3981 btrfs_release_path(&path);
3982 cache = next_cache_extent(cache);
3985 btrfs_commit_transaction(trans, root);
3986 btrfs_release_path(&path);
3990 static int check_fs_root(struct btrfs_root *root,
3991 struct cache_tree *root_cache,
3992 struct walk_control *wc)
3998 struct btrfs_path path;
3999 struct shared_node root_node;
4000 struct root_record *rec;
4001 struct btrfs_root_item *root_item = &root->root_item;
4002 struct cache_tree corrupt_blocks;
4003 struct orphan_data_extent *orphan;
4004 struct orphan_data_extent *tmp;
4005 enum btrfs_tree_block_status status;
4006 struct node_refs nrefs;
4009 * Reuse the corrupt_block cache tree to record corrupted tree block
4011 * Unlike the usage in extent tree check, here we do it in a per
4012 * fs/subvol tree base.
4014 cache_tree_init(&corrupt_blocks);
4015 root->fs_info->corrupt_blocks = &corrupt_blocks;
4017 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4018 rec = get_root_rec(root_cache, root->root_key.objectid);
4019 BUG_ON(IS_ERR(rec));
4020 if (btrfs_root_refs(root_item) > 0)
4021 rec->found_root_item = 1;
4024 btrfs_init_path(&path);
4025 memset(&root_node, 0, sizeof(root_node));
4026 cache_tree_init(&root_node.root_cache);
4027 cache_tree_init(&root_node.inode_cache);
4028 memset(&nrefs, 0, sizeof(nrefs));
4030 /* Move the orphan extent record to corresponding inode_record */
4031 list_for_each_entry_safe(orphan, tmp,
4032 &root->orphan_data_extents, list) {
4033 struct inode_record *inode;
4035 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4037 BUG_ON(IS_ERR(inode));
4038 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4039 list_move(&orphan->list, &inode->orphan_extents);
4042 level = btrfs_header_level(root->node);
4043 memset(wc->nodes, 0, sizeof(wc->nodes));
4044 wc->nodes[level] = &root_node;
4045 wc->active_node = level;
4046 wc->root_level = level;
4048 /* We may not have checked the root block, lets do that now */
4049 if (btrfs_is_leaf(root->node))
4050 status = btrfs_check_leaf(root, NULL, root->node);
4052 status = btrfs_check_node(root, NULL, root->node);
4053 if (status != BTRFS_TREE_BLOCK_CLEAN)
4056 if (btrfs_root_refs(root_item) > 0 ||
4057 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4058 path.nodes[level] = root->node;
4059 extent_buffer_get(root->node);
4060 path.slots[level] = 0;
4062 struct btrfs_key key;
4063 struct btrfs_disk_key found_key;
4065 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4066 level = root_item->drop_level;
4067 path.lowest_level = level;
4068 if (level > btrfs_header_level(root->node) ||
4069 level >= BTRFS_MAX_LEVEL) {
4070 error("ignoring invalid drop level: %u", level);
4073 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4076 btrfs_node_key(path.nodes[level], &found_key,
4078 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4079 sizeof(found_key)));
4083 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4089 wret = walk_up_tree(root, &path, wc, &level);
4096 btrfs_release_path(&path);
4098 if (!cache_tree_empty(&corrupt_blocks)) {
4099 struct cache_extent *cache;
4100 struct btrfs_corrupt_block *corrupt;
4102 printf("The following tree block(s) is corrupted in tree %llu:\n",
4103 root->root_key.objectid);
4104 cache = first_cache_extent(&corrupt_blocks);
4106 corrupt = container_of(cache,
4107 struct btrfs_corrupt_block,
4109 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4110 cache->start, corrupt->level,
4111 corrupt->key.objectid, corrupt->key.type,
4112 corrupt->key.offset);
4113 cache = next_cache_extent(cache);
4116 printf("Try to repair the btree for root %llu\n",
4117 root->root_key.objectid);
4118 ret = repair_btree(root, &corrupt_blocks);
4120 fprintf(stderr, "Failed to repair btree: %s\n",
4123 printf("Btree for root %llu is fixed\n",
4124 root->root_key.objectid);
4128 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4132 if (root_node.current) {
4133 root_node.current->checked = 1;
4134 maybe_free_inode_rec(&root_node.inode_cache,
4138 err = check_inode_recs(root, &root_node.inode_cache);
4142 free_corrupt_blocks_tree(&corrupt_blocks);
4143 root->fs_info->corrupt_blocks = NULL;
4144 free_orphan_data_extents(&root->orphan_data_extents);
4148 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4149 struct cache_tree *root_cache)
4151 struct btrfs_path path;
4152 struct btrfs_key key;
4153 struct walk_control wc;
4154 struct extent_buffer *leaf, *tree_node;
4155 struct btrfs_root *tmp_root;
4156 struct btrfs_root *tree_root = fs_info->tree_root;
4160 if (ctx.progress_enabled) {
4161 ctx.tp = TASK_FS_ROOTS;
4162 task_start(ctx.info);
4166 * Just in case we made any changes to the extent tree that weren't
4167 * reflected into the free space cache yet.
4170 reset_cached_block_groups(fs_info);
4171 memset(&wc, 0, sizeof(wc));
4172 cache_tree_init(&wc.shared);
4173 btrfs_init_path(&path);
4178 key.type = BTRFS_ROOT_ITEM_KEY;
4179 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4184 tree_node = tree_root->node;
4186 if (tree_node != tree_root->node) {
4187 free_root_recs_tree(root_cache);
4188 btrfs_release_path(&path);
4191 leaf = path.nodes[0];
4192 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4193 ret = btrfs_next_leaf(tree_root, &path);
4199 leaf = path.nodes[0];
4201 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4202 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4203 fs_root_objectid(key.objectid)) {
4204 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4205 tmp_root = btrfs_read_fs_root_no_cache(
4208 key.offset = (u64)-1;
4209 tmp_root = btrfs_read_fs_root(
4212 if (IS_ERR(tmp_root)) {
4216 ret = check_fs_root(tmp_root, root_cache, &wc);
4217 if (ret == -EAGAIN) {
4218 free_root_recs_tree(root_cache);
4219 btrfs_release_path(&path);
4224 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4225 btrfs_free_fs_root(tmp_root);
4226 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4227 key.type == BTRFS_ROOT_BACKREF_KEY) {
4228 process_root_ref(leaf, path.slots[0], &key,
4235 btrfs_release_path(&path);
4237 free_extent_cache_tree(&wc.shared);
4238 if (!cache_tree_empty(&wc.shared))
4239 fprintf(stderr, "warning line %d\n", __LINE__);
4241 task_stop(ctx.info);
4247 * Find the @index according by @ino and name.
4248 * Notice:time efficiency is O(N)
4250 * @root: the root of the fs/file tree
4251 * @index_ret: the index as return value
4252 * @namebuf: the name to match
4253 * @name_len: the length of name to match
4254 * @file_type: the file_type of INODE_ITEM to match
4256 * Returns 0 if found and *@index_ret will be modified with right value
4257 * Returns< 0 not found and *@index_ret will be (u64)-1
4259 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4260 u64 *index_ret, char *namebuf, u32 name_len,
4263 struct btrfs_path path;
4264 struct extent_buffer *node;
4265 struct btrfs_dir_item *di;
4266 struct btrfs_key key;
4267 struct btrfs_key location;
4268 char name[BTRFS_NAME_LEN] = {0};
4280 /* search from the last index */
4281 key.objectid = dirid;
4282 key.offset = (u64)-1;
4283 key.type = BTRFS_DIR_INDEX_KEY;
4285 btrfs_init_path(&path);
4286 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4291 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4294 *index_ret = (64)-1;
4297 /* Check whether inode_id/filetype/name match */
4298 node = path.nodes[0];
4299 slot = path.slots[0];
4300 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4301 total = btrfs_item_size_nr(node, slot);
4302 while (cur < total) {
4304 len = btrfs_dir_name_len(node, di);
4305 data_len = btrfs_dir_data_len(node, di);
4307 btrfs_dir_item_key_to_cpu(node, di, &location);
4308 if (location.objectid != location_id ||
4309 location.type != BTRFS_INODE_ITEM_KEY ||
4310 location.offset != 0)
4313 filetype = btrfs_dir_type(node, di);
4314 if (file_type != filetype)
4317 if (len > BTRFS_NAME_LEN)
4318 len = BTRFS_NAME_LEN;
4320 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4321 if (len != name_len || strncmp(namebuf, name, len))
4324 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4325 *index_ret = key.offset;
4329 len += sizeof(*di) + data_len;
4330 di = (struct btrfs_dir_item *)((char *)di + len);
4336 btrfs_release_path(&path);
4341 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4342 * INODE_REF/INODE_EXTREF match.
4344 * @root: the root of the fs/file tree
4345 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4346 * value while find index
4347 * @location_key: location key of the struct btrfs_dir_item to match
4348 * @name: the name to match
4349 * @namelen: the length of name
4350 * @file_type: the type of file to math
4352 * Return 0 if no error occurred.
4353 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4354 * DIR_ITEM/DIR_INDEX
4355 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4356 * and DIR_ITEM/DIR_INDEX mismatch
4358 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4359 struct btrfs_key *location_key, char *name,
4360 u32 namelen, u8 file_type)
4362 struct btrfs_path path;
4363 struct extent_buffer *node;
4364 struct btrfs_dir_item *di;
4365 struct btrfs_key location;
4366 char namebuf[BTRFS_NAME_LEN] = {0};
4375 /* get the index by traversing all index */
4376 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4377 ret = find_dir_index(root, key->objectid,
4378 location_key->objectid, &key->offset,
4379 name, namelen, file_type);
4381 ret = DIR_INDEX_MISSING;
4385 btrfs_init_path(&path);
4386 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4388 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4393 /* Check whether inode_id/filetype/name match */
4394 node = path.nodes[0];
4395 slot = path.slots[0];
4396 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4397 total = btrfs_item_size_nr(node, slot);
4398 while (cur < total) {
4399 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4400 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4402 len = btrfs_dir_name_len(node, di);
4403 data_len = btrfs_dir_data_len(node, di);
4405 btrfs_dir_item_key_to_cpu(node, di, &location);
4406 if (location.objectid != location_key->objectid ||
4407 location.type != location_key->type ||
4408 location.offset != location_key->offset)
4411 filetype = btrfs_dir_type(node, di);
4412 if (file_type != filetype)
4415 if (len > BTRFS_NAME_LEN) {
4416 len = BTRFS_NAME_LEN;
4417 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4419 key->type == BTRFS_DIR_ITEM_KEY ?
4420 "DIR_ITEM" : "DIR_INDEX",
4421 key->objectid, key->offset, len);
4423 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4425 if (len != namelen || strncmp(namebuf, name, len))
4431 len += sizeof(*di) + data_len;
4432 di = (struct btrfs_dir_item *)((char *)di + len);
4437 btrfs_release_path(&path);
4442 * Prints inode ref error message
4444 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4445 u64 index, const char *namebuf, int name_len,
4446 u8 filetype, int err)
4451 /* root dir error */
4452 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4454 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4455 root->objectid, key->objectid, key->offset, namebuf);
4460 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4461 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4462 root->objectid, key->offset,
4463 btrfs_name_hash(namebuf, name_len),
4464 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4466 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4467 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4468 root->objectid, key->offset, index,
4469 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4474 * Insert the missing inode item.
4476 * Returns 0 means success.
4477 * Returns <0 means error.
4479 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4482 struct btrfs_key key;
4483 struct btrfs_trans_handle *trans;
4484 struct btrfs_path path;
4488 key.type = BTRFS_INODE_ITEM_KEY;
4491 btrfs_init_path(&path);
4492 trans = btrfs_start_transaction(root, 1);
4493 if (IS_ERR(trans)) {
4498 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4499 if (ret < 0 || !ret)
4502 /* insert inode item */
4503 create_inode_item_lowmem(trans, root, ino, filetype);
4506 btrfs_commit_transaction(trans, root);
4509 error("failed to repair root %llu INODE ITEM[%llu] missing",
4510 root->objectid, ino);
4511 btrfs_release_path(&path);
4516 * The ternary means dir item, dir index and relative inode ref.
4517 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4518 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4520 * If two of three is missing or mismatched, delete the existing one.
4521 * If one of three is missing or mismatched, add the missing one.
4523 * returns 0 means success.
4524 * returns not 0 means on error;
4526 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4527 u64 index, char *name, int name_len, u8 filetype,
4530 struct btrfs_trans_handle *trans;
4535 * stage shall be one of following valild values:
4536 * 0: Fine, nothing to do.
4537 * 1: One of three is wrong, so add missing one.
4538 * 2: Two of three is wrong, so delete existed one.
4540 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4542 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4544 if (err & (INODE_REF_MISSING))
4547 /* stage must be smllarer than 3 */
4550 trans = btrfs_start_transaction(root, 1);
4552 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4557 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4558 filetype, &index, 1, 1);
4562 btrfs_commit_transaction(trans, root);
4565 error("fail to repair inode %llu name %s filetype %u",
4566 ino, name, filetype);
4568 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4569 stage == 2 ? "Delete" : "Add",
4570 ino, name, filetype);
4576 * Traverse the given INODE_REF and call find_dir_item() to find related
4577 * DIR_ITEM/DIR_INDEX.
4579 * @root: the root of the fs/file tree
4580 * @ref_key: the key of the INODE_REF
4581 * @path the path provides node and slot
4582 * @refs: the count of INODE_REF
4583 * @mode: the st_mode of INODE_ITEM
4584 * @name_ret: returns with the first ref's name
4585 * @name_len_ret: len of the name_ret
4587 * Return 0 if no error occurred.
4589 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4590 struct btrfs_path *path, char *name_ret,
4591 u32 *namelen_ret, u64 *refs_ret, int mode)
4593 struct btrfs_key key;
4594 struct btrfs_key location;
4595 struct btrfs_inode_ref *ref;
4596 struct extent_buffer *node;
4597 char namebuf[BTRFS_NAME_LEN] = {0};
4607 int need_research = 0;
4615 /* since after repair, path and the dir item may be changed */
4616 if (need_research) {
4618 btrfs_release_path(path);
4619 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4620 /* the item was deleted, let path point to the last checked item */
4622 if (path->slots[0] == 0)
4623 btrfs_prev_leaf(root, path);
4631 location.objectid = ref_key->objectid;
4632 location.type = BTRFS_INODE_ITEM_KEY;
4633 location.offset = 0;
4634 node = path->nodes[0];
4635 slot = path->slots[0];
4637 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4638 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4639 total = btrfs_item_size_nr(node, slot);
4642 /* Update inode ref count */
4645 index = btrfs_inode_ref_index(node, ref);
4646 name_len = btrfs_inode_ref_name_len(node, ref);
4648 if (name_len <= BTRFS_NAME_LEN) {
4651 len = BTRFS_NAME_LEN;
4652 warning("root %llu INODE_REF[%llu %llu] name too long",
4653 root->objectid, ref_key->objectid, ref_key->offset);
4656 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4658 /* copy the first name found to name_ret */
4659 if (refs == 1 && name_ret) {
4660 memcpy(name_ret, namebuf, len);
4664 /* Check root dir ref */
4665 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4666 if (index != 0 || len != strlen("..") ||
4667 strncmp("..", namebuf, len) ||
4668 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4669 /* set err bits then repair will delete the ref */
4670 err |= DIR_INDEX_MISSING;
4671 err |= DIR_ITEM_MISSING;
4676 /* Find related DIR_INDEX */
4677 key.objectid = ref_key->offset;
4678 key.type = BTRFS_DIR_INDEX_KEY;
4680 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4681 imode_to_type(mode));
4683 /* Find related dir_item */
4684 key.objectid = ref_key->offset;
4685 key.type = BTRFS_DIR_ITEM_KEY;
4686 key.offset = btrfs_name_hash(namebuf, len);
4687 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4688 imode_to_type(mode));
4690 if (tmp_err && repair) {
4691 ret = repair_ternary_lowmem(root, ref_key->offset,
4692 ref_key->objectid, index, namebuf,
4693 name_len, imode_to_type(mode),
4700 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4701 imode_to_type(mode), tmp_err);
4703 len = sizeof(*ref) + name_len;
4704 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4715 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4716 * DIR_ITEM/DIR_INDEX.
4718 * @root: the root of the fs/file tree
4719 * @ref_key: the key of the INODE_EXTREF
4720 * @refs: the count of INODE_EXTREF
4721 * @mode: the st_mode of INODE_ITEM
4723 * Return 0 if no error occurred.
4725 static int check_inode_extref(struct btrfs_root *root,
4726 struct btrfs_key *ref_key,
4727 struct extent_buffer *node, int slot, u64 *refs,
4730 struct btrfs_key key;
4731 struct btrfs_key location;
4732 struct btrfs_inode_extref *extref;
4733 char namebuf[BTRFS_NAME_LEN] = {0};
4743 location.objectid = ref_key->objectid;
4744 location.type = BTRFS_INODE_ITEM_KEY;
4745 location.offset = 0;
4747 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4748 total = btrfs_item_size_nr(node, slot);
4751 /* update inode ref count */
4753 name_len = btrfs_inode_extref_name_len(node, extref);
4754 index = btrfs_inode_extref_index(node, extref);
4755 parent = btrfs_inode_extref_parent(node, extref);
4756 if (name_len <= BTRFS_NAME_LEN) {
4759 len = BTRFS_NAME_LEN;
4760 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4761 root->objectid, ref_key->objectid, ref_key->offset);
4763 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4765 /* Check root dir ref name */
4766 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4767 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4768 root->objectid, ref_key->objectid, ref_key->offset,
4770 err |= ROOT_DIR_ERROR;
4773 /* find related dir_index */
4774 key.objectid = parent;
4775 key.type = BTRFS_DIR_INDEX_KEY;
4777 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4780 /* find related dir_item */
4781 key.objectid = parent;
4782 key.type = BTRFS_DIR_ITEM_KEY;
4783 key.offset = btrfs_name_hash(namebuf, len);
4784 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4787 len = sizeof(*extref) + name_len;
4788 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4798 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4799 * DIR_ITEM/DIR_INDEX match.
4800 * Return with @index_ret.
4802 * @root: the root of the fs/file tree
4803 * @key: the key of the INODE_REF/INODE_EXTREF
4804 * @name: the name in the INODE_REF/INODE_EXTREF
4805 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4806 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4807 * value (64)-1 means do not check index
4808 * @ext_ref: the EXTENDED_IREF feature
4810 * Return 0 if no error occurred.
4811 * Return >0 for error bitmap
4813 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4814 char *name, int namelen, u64 *index_ret,
4815 unsigned int ext_ref)
4817 struct btrfs_path path;
4818 struct btrfs_inode_ref *ref;
4819 struct btrfs_inode_extref *extref;
4820 struct extent_buffer *node;
4821 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4834 btrfs_init_path(&path);
4835 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4837 ret = INODE_REF_MISSING;
4841 node = path.nodes[0];
4842 slot = path.slots[0];
4844 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4845 total = btrfs_item_size_nr(node, slot);
4847 /* Iterate all entry of INODE_REF */
4848 while (cur < total) {
4849 ret = INODE_REF_MISSING;
4851 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4852 ref_index = btrfs_inode_ref_index(node, ref);
4853 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4856 if (cur + sizeof(*ref) + ref_namelen > total ||
4857 ref_namelen > BTRFS_NAME_LEN) {
4858 warning("root %llu INODE %s[%llu %llu] name too long",
4860 key->type == BTRFS_INODE_REF_KEY ?
4862 key->objectid, key->offset);
4864 if (cur + sizeof(*ref) > total)
4866 len = min_t(u32, total - cur - sizeof(*ref),
4872 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4875 if (len != namelen || strncmp(ref_namebuf, name, len))
4878 *index_ret = ref_index;
4882 len = sizeof(*ref) + ref_namelen;
4883 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4888 /* Skip if not support EXTENDED_IREF feature */
4892 btrfs_release_path(&path);
4893 btrfs_init_path(&path);
4895 dir_id = key->offset;
4896 key->type = BTRFS_INODE_EXTREF_KEY;
4897 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4899 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4901 ret = INODE_REF_MISSING;
4905 node = path.nodes[0];
4906 slot = path.slots[0];
4908 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4910 total = btrfs_item_size_nr(node, slot);
4912 /* Iterate all entry of INODE_EXTREF */
4913 while (cur < total) {
4914 ret = INODE_REF_MISSING;
4916 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4917 ref_index = btrfs_inode_extref_index(node, extref);
4918 parent = btrfs_inode_extref_parent(node, extref);
4919 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4922 if (parent != dir_id)
4925 if (ref_namelen <= BTRFS_NAME_LEN) {
4928 len = BTRFS_NAME_LEN;
4929 warning("root %llu INODE %s[%llu %llu] name too long",
4931 key->type == BTRFS_INODE_REF_KEY ?
4933 key->objectid, key->offset);
4935 read_extent_buffer(node, ref_namebuf,
4936 (unsigned long)(extref + 1), len);
4938 if (len != namelen || strncmp(ref_namebuf, name, len))
4941 *index_ret = ref_index;
4946 len = sizeof(*extref) + ref_namelen;
4947 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4952 btrfs_release_path(&path);
4956 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4957 u64 ino, u64 index, const char *namebuf,
4958 int name_len, u8 filetype, int err)
4960 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4961 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4962 root->objectid, key->objectid, key->offset, namebuf,
4964 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4967 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4968 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4969 root->objectid, key->objectid, index, namebuf, filetype,
4970 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4973 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4975 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4976 root->objectid, ino, index, namebuf, filetype,
4977 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4980 if (err & INODE_REF_MISSING)
4982 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4983 root->objectid, ino, key->objectid, namebuf, filetype);
4988 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4990 * Returns error after repair
4992 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4993 u64 index, u8 filetype, char *namebuf, u32 name_len,
4998 if (err & INODE_ITEM_MISSING) {
4999 ret = repair_inode_item_missing(root, ino, filetype);
5001 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5004 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5005 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5006 name_len, filetype, err);
5008 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5009 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5010 err &= ~(INODE_REF_MISSING);
5016 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5019 struct btrfs_key key;
5020 struct btrfs_path path;
5022 struct btrfs_dir_item *di;
5032 key.offset = (u64)-1;
5034 btrfs_init_path(&path);
5035 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5040 /* if found, go to spacial case */
5045 ret = btrfs_previous_item(root, &path, ino, type);
5053 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5055 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5057 while (cur < total) {
5058 len = btrfs_dir_name_len(path.nodes[0], di);
5059 if (len > BTRFS_NAME_LEN)
5060 len = BTRFS_NAME_LEN;
5063 len += btrfs_dir_data_len(path.nodes[0], di);
5065 di = (struct btrfs_dir_item *)((char *)di + len);
5071 btrfs_release_path(&path);
5075 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5082 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5086 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5090 *size = item_size + index_size;
5094 error("failed to count root %llu INODE[%llu] root size",
5095 root->objectid, ino);
5100 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5101 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5103 * @root: the root of the fs/file tree
5104 * @key: the key of the INODE_REF/INODE_EXTREF
5106 * @size: the st_size of the INODE_ITEM
5107 * @ext_ref: the EXTENDED_IREF feature
5109 * Return 0 if no error occurred.
5110 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5112 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5113 struct btrfs_path *path, u64 *size,
5114 unsigned int ext_ref)
5116 struct btrfs_dir_item *di;
5117 struct btrfs_inode_item *ii;
5118 struct btrfs_key key;
5119 struct btrfs_key location;
5120 struct extent_buffer *node;
5122 char namebuf[BTRFS_NAME_LEN] = {0};
5134 int need_research = 0;
5137 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5138 * ignore index check.
5140 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5141 index = di_key->offset;
5148 /* since after repair, path and the dir item may be changed */
5149 if (need_research) {
5151 err |= DIR_COUNT_AGAIN;
5152 btrfs_release_path(path);
5153 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5154 /* the item was deleted, let path point the last checked item */
5156 if (path->slots[0] == 0)
5157 btrfs_prev_leaf(root, path);
5165 node = path->nodes[0];
5166 slot = path->slots[0];
5168 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5169 total = btrfs_item_size_nr(node, slot);
5170 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5172 while (cur < total) {
5173 data_len = btrfs_dir_data_len(node, di);
5176 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5178 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5179 di_key->objectid, di_key->offset, data_len);
5181 name_len = btrfs_dir_name_len(node, di);
5182 if (name_len <= BTRFS_NAME_LEN) {
5185 len = BTRFS_NAME_LEN;
5186 warning("root %llu %s[%llu %llu] name too long",
5188 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5189 di_key->objectid, di_key->offset);
5191 (*size) += name_len;
5192 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5194 filetype = btrfs_dir_type(node, di);
5196 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5197 di_key->offset != btrfs_name_hash(namebuf, len)) {
5199 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5200 root->objectid, di_key->objectid, di_key->offset,
5201 namebuf, len, filetype, di_key->offset,
5202 btrfs_name_hash(namebuf, len));
5205 btrfs_dir_item_key_to_cpu(node, di, &location);
5206 /* Ignore related ROOT_ITEM check */
5207 if (location.type == BTRFS_ROOT_ITEM_KEY)
5210 btrfs_release_path(path);
5211 /* Check relative INODE_ITEM(existence/filetype) */
5212 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5214 tmp_err |= INODE_ITEM_MISSING;
5218 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5219 struct btrfs_inode_item);
5220 mode = btrfs_inode_mode(path->nodes[0], ii);
5221 if (imode_to_type(mode) != filetype) {
5222 tmp_err |= INODE_ITEM_MISMATCH;
5226 /* Check relative INODE_REF/INODE_EXTREF */
5227 key.objectid = location.objectid;
5228 key.type = BTRFS_INODE_REF_KEY;
5229 key.offset = di_key->objectid;
5230 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5233 /* check relative INDEX/ITEM */
5234 key.objectid = di_key->objectid;
5235 if (key.type == BTRFS_DIR_ITEM_KEY) {
5236 key.type = BTRFS_DIR_INDEX_KEY;
5239 key.type = BTRFS_DIR_ITEM_KEY;
5240 key.offset = btrfs_name_hash(namebuf, name_len);
5243 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5244 name_len, filetype);
5245 /* find_dir_item may find index */
5246 if (key.type == BTRFS_DIR_INDEX_KEY)
5250 if (tmp_err && repair) {
5251 ret = repair_dir_item(root, di_key->objectid,
5252 location.objectid, index,
5253 imode_to_type(mode), namebuf,
5255 if (ret != tmp_err) {
5260 btrfs_release_path(path);
5261 print_dir_item_err(root, di_key, location.objectid, index,
5262 namebuf, name_len, filetype, tmp_err);
5264 len = sizeof(*di) + name_len + data_len;
5265 di = (struct btrfs_dir_item *)((char *)di + len);
5268 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5269 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5270 root->objectid, di_key->objectid,
5277 btrfs_release_path(path);
5278 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5280 err |= ret > 0 ? -ENOENT : ret;
5285 * Wrapper function of btrfs_punch_hole.
5287 * Returns 0 means success.
5288 * Returns not 0 means error.
5290 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5293 struct btrfs_trans_handle *trans;
5296 trans = btrfs_start_transaction(root, 1);
5298 return PTR_ERR(trans);
5300 ret = btrfs_punch_hole(trans, root, ino, start, len);
5302 error("failed to add hole [%llu, %llu] in inode [%llu]",
5305 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5308 btrfs_commit_transaction(trans, root);
5313 * Check file extent datasum/hole, update the size of the file extents,
5314 * check and update the last offset of the file extent.
5316 * @root: the root of fs/file tree.
5317 * @fkey: the key of the file extent.
5318 * @nodatasum: INODE_NODATASUM feature.
5319 * @size: the sum of all EXTENT_DATA items size for this inode.
5320 * @end: the offset of the last extent.
5322 * Return 0 if no error occurred.
5324 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5325 struct extent_buffer *node, int slot,
5326 unsigned int nodatasum, u64 *size, u64 *end)
5328 struct btrfs_file_extent_item *fi;
5331 u64 extent_num_bytes;
5333 u64 csum_found; /* In byte size, sectorsize aligned */
5334 u64 search_start; /* Logical range start we search for csum */
5335 u64 search_len; /* Logical range len we search for csum */
5336 unsigned int extent_type;
5337 unsigned int is_hole;
5342 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5344 /* Check inline extent */
5345 extent_type = btrfs_file_extent_type(node, fi);
5346 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5347 struct btrfs_item *e = btrfs_item_nr(slot);
5348 u32 item_inline_len;
5350 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5351 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5352 compressed = btrfs_file_extent_compression(node, fi);
5353 if (extent_num_bytes == 0) {
5355 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5356 root->objectid, fkey->objectid, fkey->offset);
5357 err |= FILE_EXTENT_ERROR;
5359 if (!compressed && extent_num_bytes != item_inline_len) {
5361 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5362 root->objectid, fkey->objectid, fkey->offset,
5363 extent_num_bytes, item_inline_len);
5364 err |= FILE_EXTENT_ERROR;
5366 *end += extent_num_bytes;
5367 *size += extent_num_bytes;
5371 /* Check extent type */
5372 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5373 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5374 err |= FILE_EXTENT_ERROR;
5375 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5376 root->objectid, fkey->objectid, fkey->offset);
5380 /* Check REG_EXTENT/PREALLOC_EXTENT */
5381 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5382 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5383 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5384 extent_offset = btrfs_file_extent_offset(node, fi);
5385 compressed = btrfs_file_extent_compression(node, fi);
5386 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5389 * Check EXTENT_DATA csum
5391 * For plain (uncompressed) extent, we should only check the range
5392 * we're referring to, as it's possible that part of prealloc extent
5393 * has been written, and has csum:
5395 * |<--- Original large preallocated extent A ---->|
5396 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5399 * For compressed extent, we should check the whole range.
5402 search_start = disk_bytenr + extent_offset;
5403 search_len = extent_num_bytes;
5405 search_start = disk_bytenr;
5406 search_len = disk_num_bytes;
5408 ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5409 if (csum_found > 0 && nodatasum) {
5410 err |= ODD_CSUM_ITEM;
5411 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5412 root->objectid, fkey->objectid, fkey->offset);
5413 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5414 !is_hole && (ret < 0 || csum_found < search_len)) {
5415 err |= CSUM_ITEM_MISSING;
5416 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5417 root->objectid, fkey->objectid, fkey->offset,
5418 csum_found, search_len);
5419 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5420 err |= ODD_CSUM_ITEM;
5421 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5422 root->objectid, fkey->objectid, fkey->offset, csum_found);
5425 /* Check EXTENT_DATA hole */
5426 if (!no_holes && *end != fkey->offset) {
5428 ret = punch_extent_hole(root, fkey->objectid,
5429 *end, fkey->offset - *end);
5430 if (!repair || ret) {
5431 err |= FILE_EXTENT_ERROR;
5433 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5434 root->objectid, fkey->objectid, fkey->offset,
5435 fkey->objectid, *end);
5439 *end += extent_num_bytes;
5441 *size += extent_num_bytes;
5447 * Set inode item nbytes to @nbytes
5449 * Returns 0 on success
5450 * Returns != 0 on error
5452 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5453 struct btrfs_path *path,
5454 u64 ino, u64 nbytes)
5456 struct btrfs_trans_handle *trans;
5457 struct btrfs_inode_item *ii;
5458 struct btrfs_key key;
5459 struct btrfs_key research_key;
5463 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5466 key.type = BTRFS_INODE_ITEM_KEY;
5469 trans = btrfs_start_transaction(root, 1);
5470 if (IS_ERR(trans)) {
5471 ret = PTR_ERR(trans);
5476 btrfs_release_path(path);
5477 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5485 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5486 struct btrfs_inode_item);
5487 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5488 btrfs_mark_buffer_dirty(path->nodes[0]);
5490 btrfs_commit_transaction(trans, root);
5493 error("failed to set nbytes in inode %llu root %llu",
5494 ino, root->root_key.objectid);
5496 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5497 root->root_key.objectid, nbytes);
5500 btrfs_release_path(path);
5501 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5508 * Set directory inode isize to @isize.
5510 * Returns 0 on success.
5511 * Returns != 0 on error.
5513 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5514 struct btrfs_path *path,
5517 struct btrfs_trans_handle *trans;
5518 struct btrfs_inode_item *ii;
5519 struct btrfs_key key;
5520 struct btrfs_key research_key;
5524 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5527 key.type = BTRFS_INODE_ITEM_KEY;
5530 trans = btrfs_start_transaction(root, 1);
5531 if (IS_ERR(trans)) {
5532 ret = PTR_ERR(trans);
5537 btrfs_release_path(path);
5538 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5546 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5547 struct btrfs_inode_item);
5548 btrfs_set_inode_size(path->nodes[0], ii, isize);
5549 btrfs_mark_buffer_dirty(path->nodes[0]);
5551 btrfs_commit_transaction(trans, root);
5554 error("failed to set isize in inode %llu root %llu",
5555 ino, root->root_key.objectid);
5557 printf("Set isize in inode %llu root %llu to %llu\n",
5558 ino, root->root_key.objectid, isize);
5560 btrfs_release_path(path);
5561 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5568 * Wrapper function for btrfs_add_orphan_item().
5570 * Returns 0 on success.
5571 * Returns != 0 on error.
5573 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5574 struct btrfs_path *path, u64 ino)
5576 struct btrfs_trans_handle *trans;
5577 struct btrfs_key research_key;
5581 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5583 trans = btrfs_start_transaction(root, 1);
5584 if (IS_ERR(trans)) {
5585 ret = PTR_ERR(trans);
5590 btrfs_release_path(path);
5591 ret = btrfs_add_orphan_item(trans, root, path, ino);
5593 btrfs_commit_transaction(trans, root);
5596 error("failed to add inode %llu as orphan item root %llu",
5597 ino, root->root_key.objectid);
5599 printf("Added inode %llu as orphan item root %llu\n",
5600 ino, root->root_key.objectid);
5602 btrfs_release_path(path);
5603 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5609 /* Set inode_item nlink to @ref_count.
5610 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5612 * Returns 0 on success
5614 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5615 struct btrfs_path *path, u64 ino,
5616 const char *name, u32 namelen,
5617 u64 ref_count, u8 filetype, u64 *nlink)
5619 struct btrfs_trans_handle *trans;
5620 struct btrfs_inode_item *ii;
5621 struct btrfs_key key;
5622 struct btrfs_key old_key;
5623 char namebuf[BTRFS_NAME_LEN] = {0};
5629 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5631 if (name && namelen) {
5632 ASSERT(namelen <= BTRFS_NAME_LEN);
5633 memcpy(namebuf, name, namelen);
5636 sprintf(namebuf, "%llu", ino);
5637 name_len = count_digits(ino);
5638 printf("Can't find file name for inode %llu, use %s instead\n",
5642 trans = btrfs_start_transaction(root, 1);
5643 if (IS_ERR(trans)) {
5644 ret = PTR_ERR(trans);
5648 btrfs_release_path(path);
5649 /* if refs is 0, put it into lostfound */
5650 if (ref_count == 0) {
5651 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5652 name_len, filetype, &ref_count);
5657 /* reset inode_item's nlink to ref_count */
5659 key.type = BTRFS_INODE_ITEM_KEY;
5662 btrfs_release_path(path);
5663 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5669 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5670 struct btrfs_inode_item);
5671 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5672 btrfs_mark_buffer_dirty(path->nodes[0]);
5677 btrfs_commit_transaction(trans, root);
5681 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5682 root->objectid, ino, namebuf, filetype);
5684 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5685 root->objectid, ino, namebuf, filetype);
5688 btrfs_release_path(path);
5689 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5696 * Check INODE_ITEM and related ITEMs (the same inode number)
5697 * 1. check link count
5698 * 2. check inode ref/extref
5699 * 3. check dir item/index
5701 * @ext_ref: the EXTENDED_IREF feature
5703 * Return 0 if no error occurred.
5704 * Return >0 for error or hit the traversal is done(by error bitmap)
5706 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5707 unsigned int ext_ref)
5709 struct extent_buffer *node;
5710 struct btrfs_inode_item *ii;
5711 struct btrfs_key key;
5712 struct btrfs_key last_key;
5721 u64 extent_size = 0;
5723 unsigned int nodatasum;
5727 char namebuf[BTRFS_NAME_LEN] = {0};
5730 node = path->nodes[0];
5731 slot = path->slots[0];
5733 btrfs_item_key_to_cpu(node, &key, slot);
5734 inode_id = key.objectid;
5736 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5737 ret = btrfs_next_item(root, path);
5743 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5744 isize = btrfs_inode_size(node, ii);
5745 nbytes = btrfs_inode_nbytes(node, ii);
5746 mode = btrfs_inode_mode(node, ii);
5747 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5748 nlink = btrfs_inode_nlink(node, ii);
5749 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5752 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5753 ret = btrfs_next_item(root, path);
5755 /* out will fill 'err' rusing current statistics */
5757 } else if (ret > 0) {
5762 node = path->nodes[0];
5763 slot = path->slots[0];
5764 btrfs_item_key_to_cpu(node, &key, slot);
5765 if (key.objectid != inode_id)
5769 case BTRFS_INODE_REF_KEY:
5770 ret = check_inode_ref(root, &key, path, namebuf,
5771 &name_len, &refs, mode);
5774 case BTRFS_INODE_EXTREF_KEY:
5775 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5776 warning("root %llu EXTREF[%llu %llu] isn't supported",
5777 root->objectid, key.objectid,
5779 ret = check_inode_extref(root, &key, node, slot, &refs,
5783 case BTRFS_DIR_ITEM_KEY:
5784 case BTRFS_DIR_INDEX_KEY:
5786 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5787 root->objectid, inode_id,
5788 imode_to_type(mode), key.objectid,
5791 ret = check_dir_item(root, &key, path, &size, ext_ref);
5794 case BTRFS_EXTENT_DATA_KEY:
5796 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5797 root->objectid, inode_id, key.objectid,
5800 ret = check_file_extent(root, &key, node, slot,
5801 nodatasum, &extent_size,
5805 case BTRFS_XATTR_ITEM_KEY:
5808 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5809 key.objectid, key.type, key.offset);
5814 if (err & LAST_ITEM) {
5815 btrfs_release_path(path);
5816 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5821 /* verify INODE_ITEM nlink/isize/nbytes */
5823 if (repair && (err & DIR_COUNT_AGAIN)) {
5824 err &= ~DIR_COUNT_AGAIN;
5825 count_dir_isize(root, inode_id, &size);
5828 if ((nlink != 1 || refs != 1) && repair) {
5829 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5830 namebuf, name_len, refs, imode_to_type(mode),
5835 err |= LINK_COUNT_ERROR;
5836 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5837 root->objectid, inode_id, nlink);
5841 * Just a warning, as dir inode nbytes is just an
5842 * instructive value.
5844 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5845 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5846 root->objectid, inode_id,
5847 root->fs_info->nodesize);
5850 if (isize != size) {
5852 ret = repair_dir_isize_lowmem(root, path,
5854 if (!repair || ret) {
5857 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5858 root->objectid, inode_id, isize, size);
5862 if (nlink != refs) {
5864 ret = repair_inode_nlinks_lowmem(root, path,
5865 inode_id, namebuf, name_len, refs,
5866 imode_to_type(mode), &nlink);
5867 if (!repair || ret) {
5868 err |= LINK_COUNT_ERROR;
5870 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5871 root->objectid, inode_id, nlink, refs);
5873 } else if (!nlink) {
5875 ret = repair_inode_orphan_item_lowmem(root,
5877 if (!repair || ret) {
5879 error("root %llu INODE[%llu] is orphan item",
5880 root->objectid, inode_id);
5884 if (!nbytes && !no_holes && extent_end < isize) {
5886 ret = punch_extent_hole(root, inode_id,
5887 extent_end, isize - extent_end);
5888 if (!repair || ret) {
5889 err |= NBYTES_ERROR;
5891 "root %llu INODE[%llu] size %llu should have a file extent hole",
5892 root->objectid, inode_id, isize);
5896 if (nbytes != extent_size) {
5898 ret = repair_inode_nbytes_lowmem(root, path,
5899 inode_id, extent_size);
5900 if (!repair || ret) {
5901 err |= NBYTES_ERROR;
5903 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5904 root->objectid, inode_id, nbytes,
5910 if (err & LAST_ITEM)
5911 btrfs_next_item(root, path);
5916 * Insert the missing inode item and inode ref.
5918 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5919 * Root dir should be handled specially because root dir is the root of fs.
5921 * returns err (>0 or 0) after repair
5923 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5925 struct btrfs_trans_handle *trans;
5926 struct btrfs_key key;
5927 struct btrfs_path path;
5928 int filetype = BTRFS_FT_DIR;
5931 btrfs_init_path(&path);
5933 if (err & INODE_REF_MISSING) {
5934 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5935 key.type = BTRFS_INODE_REF_KEY;
5936 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5938 trans = btrfs_start_transaction(root, 1);
5939 if (IS_ERR(trans)) {
5940 ret = PTR_ERR(trans);
5944 btrfs_release_path(&path);
5945 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5949 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5950 BTRFS_FIRST_FREE_OBJECTID,
5951 BTRFS_FIRST_FREE_OBJECTID, 0);
5955 printf("Add INODE_REF[%llu %llu] name %s\n",
5956 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5958 err &= ~INODE_REF_MISSING;
5961 error("fail to insert first inode's ref");
5962 btrfs_commit_transaction(trans, root);
5965 if (err & INODE_ITEM_MISSING) {
5966 ret = repair_inode_item_missing(root,
5967 BTRFS_FIRST_FREE_OBJECTID, filetype);
5970 err &= ~INODE_ITEM_MISSING;
5974 error("fail to repair first inode");
5975 btrfs_release_path(&path);
5980 * check first root dir's inode_item and inode_ref
5982 * returns 0 means no error
5983 * returns >0 means error
5984 * returns <0 means fatal error
5986 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5988 struct btrfs_path path;
5989 struct btrfs_key key;
5990 struct btrfs_inode_item *ii;
5996 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5997 key.type = BTRFS_INODE_ITEM_KEY;
6000 /* For root being dropped, we don't need to check first inode */
6001 if (btrfs_root_refs(&root->root_item) == 0 &&
6002 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6003 BTRFS_FIRST_FREE_OBJECTID)
6006 btrfs_init_path(&path);
6007 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6012 err |= INODE_ITEM_MISSING;
6014 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6015 struct btrfs_inode_item);
6016 mode = btrfs_inode_mode(path.nodes[0], ii);
6017 if (imode_to_type(mode) != BTRFS_FT_DIR)
6018 err |= INODE_ITEM_MISMATCH;
6021 /* lookup first inode ref */
6022 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6023 key.type = BTRFS_INODE_REF_KEY;
6024 /* special index value */
6027 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6033 btrfs_release_path(&path);
6036 err = repair_fs_first_inode(root, err);
6038 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6039 error("root dir INODE_ITEM is %s",
6040 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6041 if (err & INODE_REF_MISSING)
6042 error("root dir INODE_REF is missing");
6044 return ret < 0 ? ret : err;
6047 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6048 u64 parent, u64 root)
6050 struct rb_node *node;
6051 struct tree_backref *back = NULL;
6052 struct tree_backref match = {
6059 match.parent = parent;
6060 match.node.full_backref = 1;
6065 node = rb_search(&rec->backref_tree, &match.node.node,
6066 (rb_compare_keys)compare_extent_backref, NULL);
6068 back = to_tree_backref(rb_node_to_extent_backref(node));
6073 static struct data_backref *find_data_backref(struct extent_record *rec,
6074 u64 parent, u64 root,
6075 u64 owner, u64 offset,
6077 u64 disk_bytenr, u64 bytes)
6079 struct rb_node *node;
6080 struct data_backref *back = NULL;
6081 struct data_backref match = {
6088 .found_ref = found_ref,
6089 .disk_bytenr = disk_bytenr,
6093 match.parent = parent;
6094 match.node.full_backref = 1;
6099 node = rb_search(&rec->backref_tree, &match.node.node,
6100 (rb_compare_keys)compare_extent_backref, NULL);
6102 back = to_data_backref(rb_node_to_extent_backref(node));
6107 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6108 * blocks and integrity of fs tree items.
6110 * @root: the root of the tree to be checked.
6111 * @ext_ref feature EXTENDED_IREF is enable or not.
6112 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6113 * otherwise means check fs tree(s) items relationship and
6114 * @root MUST be a fs tree root.
6115 * Returns 0 represents OK.
6116 * Returns not 0 represents error.
6118 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6119 struct btrfs_root *root, unsigned int ext_ref,
6123 struct btrfs_path path;
6124 struct node_refs nrefs;
6125 struct btrfs_root_item *root_item = &root->root_item;
6130 memset(&nrefs, 0, sizeof(nrefs));
6133 * We need to manually check the first inode item (256)
6134 * As the following traversal function will only start from
6135 * the first inode item in the leaf, if inode item (256) is
6136 * missing we will skip it forever.
6138 ret = check_fs_first_inode(root, ext_ref);
6144 level = btrfs_header_level(root->node);
6145 btrfs_init_path(&path);
6147 if (btrfs_root_refs(root_item) > 0 ||
6148 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6149 path.nodes[level] = root->node;
6150 path.slots[level] = 0;
6151 extent_buffer_get(root->node);
6153 struct btrfs_key key;
6155 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6156 level = root_item->drop_level;
6157 path.lowest_level = level;
6158 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6165 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6166 ext_ref, check_all);
6170 /* if ret is negative, walk shall stop */
6176 ret = walk_up_tree_v2(root, &path, &level);
6178 /* Normal exit, reset ret to err */
6185 btrfs_release_path(&path);
6190 * Iterate all items in the tree and call check_inode_item() to check.
6192 * @root: the root of the tree to be checked.
6193 * @ext_ref: the EXTENDED_IREF feature
6195 * Return 0 if no error found.
6196 * Return <0 for error.
6198 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6200 reset_cached_block_groups(root->fs_info);
6201 return check_btrfs_root(NULL, root, ext_ref, 0);
6205 * Find the relative ref for root_ref and root_backref.
6207 * @root: the root of the root tree.
6208 * @ref_key: the key of the root ref.
6210 * Return 0 if no error occurred.
6212 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6213 struct extent_buffer *node, int slot)
6215 struct btrfs_path path;
6216 struct btrfs_key key;
6217 struct btrfs_root_ref *ref;
6218 struct btrfs_root_ref *backref;
6219 char ref_name[BTRFS_NAME_LEN] = {0};
6220 char backref_name[BTRFS_NAME_LEN] = {0};
6226 u32 backref_namelen;
6231 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6232 ref_dirid = btrfs_root_ref_dirid(node, ref);
6233 ref_seq = btrfs_root_ref_sequence(node, ref);
6234 ref_namelen = btrfs_root_ref_name_len(node, ref);
6236 if (ref_namelen <= BTRFS_NAME_LEN) {
6239 len = BTRFS_NAME_LEN;
6240 warning("%s[%llu %llu] ref_name too long",
6241 ref_key->type == BTRFS_ROOT_REF_KEY ?
6242 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6245 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6247 /* Find relative root_ref */
6248 key.objectid = ref_key->offset;
6249 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6250 key.offset = ref_key->objectid;
6252 btrfs_init_path(&path);
6253 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6255 err |= ROOT_REF_MISSING;
6256 error("%s[%llu %llu] couldn't find relative ref",
6257 ref_key->type == BTRFS_ROOT_REF_KEY ?
6258 "ROOT_REF" : "ROOT_BACKREF",
6259 ref_key->objectid, ref_key->offset);
6263 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6264 struct btrfs_root_ref);
6265 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6266 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6267 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6269 if (backref_namelen <= BTRFS_NAME_LEN) {
6270 len = backref_namelen;
6272 len = BTRFS_NAME_LEN;
6273 warning("%s[%llu %llu] ref_name too long",
6274 key.type == BTRFS_ROOT_REF_KEY ?
6275 "ROOT_REF" : "ROOT_BACKREF",
6276 key.objectid, key.offset);
6278 read_extent_buffer(path.nodes[0], backref_name,
6279 (unsigned long)(backref + 1), len);
6281 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6282 ref_namelen != backref_namelen ||
6283 strncmp(ref_name, backref_name, len)) {
6284 err |= ROOT_REF_MISMATCH;
6285 error("%s[%llu %llu] mismatch relative ref",
6286 ref_key->type == BTRFS_ROOT_REF_KEY ?
6287 "ROOT_REF" : "ROOT_BACKREF",
6288 ref_key->objectid, ref_key->offset);
6291 btrfs_release_path(&path);
6296 * Check all fs/file tree in low_memory mode.
6298 * 1. for fs tree root item, call check_fs_root_v2()
6299 * 2. for fs tree root ref/backref, call check_root_ref()
6301 * Return 0 if no error occurred.
6303 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6305 struct btrfs_root *tree_root = fs_info->tree_root;
6306 struct btrfs_root *cur_root = NULL;
6307 struct btrfs_path path;
6308 struct btrfs_key key;
6309 struct extent_buffer *node;
6310 unsigned int ext_ref;
6315 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6317 btrfs_init_path(&path);
6318 key.objectid = BTRFS_FS_TREE_OBJECTID;
6320 key.type = BTRFS_ROOT_ITEM_KEY;
6322 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6326 } else if (ret > 0) {
6332 node = path.nodes[0];
6333 slot = path.slots[0];
6334 btrfs_item_key_to_cpu(node, &key, slot);
6335 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6337 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6338 fs_root_objectid(key.objectid)) {
6339 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6340 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6343 key.offset = (u64)-1;
6344 cur_root = btrfs_read_fs_root(fs_info, &key);
6347 if (IS_ERR(cur_root)) {
6348 error("Fail to read fs/subvol tree: %lld",
6354 ret = check_fs_root_v2(cur_root, ext_ref);
6357 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6358 btrfs_free_fs_root(cur_root);
6359 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6360 key.type == BTRFS_ROOT_BACKREF_KEY) {
6361 ret = check_root_ref(tree_root, &key, node, slot);
6365 ret = btrfs_next_item(tree_root, &path);
6375 btrfs_release_path(&path);
6379 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6380 struct cache_tree *root_cache)
6384 if (!ctx.progress_enabled)
6385 fprintf(stderr, "checking fs roots\n");
6386 if (check_mode == CHECK_MODE_LOWMEM)
6387 ret = check_fs_roots_v2(fs_info);
6389 ret = check_fs_roots(fs_info, root_cache);
6394 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6396 struct extent_backref *back, *tmp;
6397 struct tree_backref *tback;
6398 struct data_backref *dback;
6402 rbtree_postorder_for_each_entry_safe(back, tmp,
6403 &rec->backref_tree, node) {
6404 if (!back->found_extent_tree) {
6408 if (back->is_data) {
6409 dback = to_data_backref(back);
6410 fprintf(stderr, "Data backref %llu %s %llu"
6411 " owner %llu offset %llu num_refs %lu"
6412 " not found in extent tree\n",
6413 (unsigned long long)rec->start,
6414 back->full_backref ?
6416 back->full_backref ?
6417 (unsigned long long)dback->parent:
6418 (unsigned long long)dback->root,
6419 (unsigned long long)dback->owner,
6420 (unsigned long long)dback->offset,
6421 (unsigned long)dback->num_refs);
6423 tback = to_tree_backref(back);
6424 fprintf(stderr, "Tree backref %llu parent %llu"
6425 " root %llu not found in extent tree\n",
6426 (unsigned long long)rec->start,
6427 (unsigned long long)tback->parent,
6428 (unsigned long long)tback->root);
6431 if (!back->is_data && !back->found_ref) {
6435 tback = to_tree_backref(back);
6436 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6437 (unsigned long long)rec->start,
6438 back->full_backref ? "parent" : "root",
6439 back->full_backref ?
6440 (unsigned long long)tback->parent :
6441 (unsigned long long)tback->root, back);
6443 if (back->is_data) {
6444 dback = to_data_backref(back);
6445 if (dback->found_ref != dback->num_refs) {
6449 fprintf(stderr, "Incorrect local backref count"
6450 " on %llu %s %llu owner %llu"
6451 " offset %llu found %u wanted %u back %p\n",
6452 (unsigned long long)rec->start,
6453 back->full_backref ?
6455 back->full_backref ?
6456 (unsigned long long)dback->parent:
6457 (unsigned long long)dback->root,
6458 (unsigned long long)dback->owner,
6459 (unsigned long long)dback->offset,
6460 dback->found_ref, dback->num_refs, back);
6462 if (dback->disk_bytenr != rec->start) {
6466 fprintf(stderr, "Backref disk bytenr does not"
6467 " match extent record, bytenr=%llu, "
6468 "ref bytenr=%llu\n",
6469 (unsigned long long)rec->start,
6470 (unsigned long long)dback->disk_bytenr);
6473 if (dback->bytes != rec->nr) {
6477 fprintf(stderr, "Backref bytes do not match "
6478 "extent backref, bytenr=%llu, ref "
6479 "bytes=%llu, backref bytes=%llu\n",
6480 (unsigned long long)rec->start,
6481 (unsigned long long)rec->nr,
6482 (unsigned long long)dback->bytes);
6485 if (!back->is_data) {
6488 dback = to_data_backref(back);
6489 found += dback->found_ref;
6492 if (found != rec->refs) {
6496 fprintf(stderr, "Incorrect global backref count "
6497 "on %llu found %llu wanted %llu\n",
6498 (unsigned long long)rec->start,
6499 (unsigned long long)found,
6500 (unsigned long long)rec->refs);
6506 static void __free_one_backref(struct rb_node *node)
6508 struct extent_backref *back = rb_node_to_extent_backref(node);
6513 static void free_all_extent_backrefs(struct extent_record *rec)
6515 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6518 static void free_extent_record_cache(struct cache_tree *extent_cache)
6520 struct cache_extent *cache;
6521 struct extent_record *rec;
6524 cache = first_cache_extent(extent_cache);
6527 rec = container_of(cache, struct extent_record, cache);
6528 remove_cache_extent(extent_cache, cache);
6529 free_all_extent_backrefs(rec);
6534 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6535 struct extent_record *rec)
6537 if (rec->content_checked && rec->owner_ref_checked &&
6538 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6539 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6540 !rec->bad_full_backref && !rec->crossing_stripes &&
6541 !rec->wrong_chunk_type) {
6542 remove_cache_extent(extent_cache, &rec->cache);
6543 free_all_extent_backrefs(rec);
6544 list_del_init(&rec->list);
6550 static int check_owner_ref(struct btrfs_root *root,
6551 struct extent_record *rec,
6552 struct extent_buffer *buf)
6554 struct extent_backref *node, *tmp;
6555 struct tree_backref *back;
6556 struct btrfs_root *ref_root;
6557 struct btrfs_key key;
6558 struct btrfs_path path;
6559 struct extent_buffer *parent;
6564 rbtree_postorder_for_each_entry_safe(node, tmp,
6565 &rec->backref_tree, node) {
6568 if (!node->found_ref)
6570 if (node->full_backref)
6572 back = to_tree_backref(node);
6573 if (btrfs_header_owner(buf) == back->root)
6576 BUG_ON(rec->is_root);
6578 /* try to find the block by search corresponding fs tree */
6579 key.objectid = btrfs_header_owner(buf);
6580 key.type = BTRFS_ROOT_ITEM_KEY;
6581 key.offset = (u64)-1;
6583 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6584 if (IS_ERR(ref_root))
6587 level = btrfs_header_level(buf);
6589 btrfs_item_key_to_cpu(buf, &key, 0);
6591 btrfs_node_key_to_cpu(buf, &key, 0);
6593 btrfs_init_path(&path);
6594 path.lowest_level = level + 1;
6595 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6599 parent = path.nodes[level + 1];
6600 if (parent && buf->start == btrfs_node_blockptr(parent,
6601 path.slots[level + 1]))
6604 btrfs_release_path(&path);
6605 return found ? 0 : 1;
6608 static int is_extent_tree_record(struct extent_record *rec)
6610 struct extent_backref *node, *tmp;
6611 struct tree_backref *back;
6614 rbtree_postorder_for_each_entry_safe(node, tmp,
6615 &rec->backref_tree, node) {
6618 back = to_tree_backref(node);
6619 if (node->full_backref)
6621 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6628 static int record_bad_block_io(struct btrfs_fs_info *info,
6629 struct cache_tree *extent_cache,
6632 struct extent_record *rec;
6633 struct cache_extent *cache;
6634 struct btrfs_key key;
6636 cache = lookup_cache_extent(extent_cache, start, len);
6640 rec = container_of(cache, struct extent_record, cache);
6641 if (!is_extent_tree_record(rec))
6644 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6645 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6648 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6649 struct extent_buffer *buf, int slot)
6651 if (btrfs_header_level(buf)) {
6652 struct btrfs_key_ptr ptr1, ptr2;
6654 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6655 sizeof(struct btrfs_key_ptr));
6656 read_extent_buffer(buf, &ptr2,
6657 btrfs_node_key_ptr_offset(slot + 1),
6658 sizeof(struct btrfs_key_ptr));
6659 write_extent_buffer(buf, &ptr1,
6660 btrfs_node_key_ptr_offset(slot + 1),
6661 sizeof(struct btrfs_key_ptr));
6662 write_extent_buffer(buf, &ptr2,
6663 btrfs_node_key_ptr_offset(slot),
6664 sizeof(struct btrfs_key_ptr));
6666 struct btrfs_disk_key key;
6667 btrfs_node_key(buf, &key, 0);
6668 btrfs_fixup_low_keys(root, path, &key,
6669 btrfs_header_level(buf) + 1);
6672 struct btrfs_item *item1, *item2;
6673 struct btrfs_key k1, k2;
6674 char *item1_data, *item2_data;
6675 u32 item1_offset, item2_offset, item1_size, item2_size;
6677 item1 = btrfs_item_nr(slot);
6678 item2 = btrfs_item_nr(slot + 1);
6679 btrfs_item_key_to_cpu(buf, &k1, slot);
6680 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6681 item1_offset = btrfs_item_offset(buf, item1);
6682 item2_offset = btrfs_item_offset(buf, item2);
6683 item1_size = btrfs_item_size(buf, item1);
6684 item2_size = btrfs_item_size(buf, item2);
6686 item1_data = malloc(item1_size);
6689 item2_data = malloc(item2_size);
6695 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6696 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6698 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6699 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6703 btrfs_set_item_offset(buf, item1, item2_offset);
6704 btrfs_set_item_offset(buf, item2, item1_offset);
6705 btrfs_set_item_size(buf, item1, item2_size);
6706 btrfs_set_item_size(buf, item2, item1_size);
6708 path->slots[0] = slot;
6709 btrfs_set_item_key_unsafe(root, path, &k2);
6710 path->slots[0] = slot + 1;
6711 btrfs_set_item_key_unsafe(root, path, &k1);
6716 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6718 struct extent_buffer *buf;
6719 struct btrfs_key k1, k2;
6721 int level = path->lowest_level;
6724 buf = path->nodes[level];
6725 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6727 btrfs_node_key_to_cpu(buf, &k1, i);
6728 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6730 btrfs_item_key_to_cpu(buf, &k1, i);
6731 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6733 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6735 ret = swap_values(root, path, buf, i);
6738 btrfs_mark_buffer_dirty(buf);
6744 static int delete_bogus_item(struct btrfs_root *root,
6745 struct btrfs_path *path,
6746 struct extent_buffer *buf, int slot)
6748 struct btrfs_key key;
6749 int nritems = btrfs_header_nritems(buf);
6751 btrfs_item_key_to_cpu(buf, &key, slot);
6753 /* These are all the keys we can deal with missing. */
6754 if (key.type != BTRFS_DIR_INDEX_KEY &&
6755 key.type != BTRFS_EXTENT_ITEM_KEY &&
6756 key.type != BTRFS_METADATA_ITEM_KEY &&
6757 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6758 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6761 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6762 (unsigned long long)key.objectid, key.type,
6763 (unsigned long long)key.offset, slot, buf->start);
6764 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6765 btrfs_item_nr_offset(slot + 1),
6766 sizeof(struct btrfs_item) *
6767 (nritems - slot - 1));
6768 btrfs_set_header_nritems(buf, nritems - 1);
6770 struct btrfs_disk_key disk_key;
6772 btrfs_item_key(buf, &disk_key, 0);
6773 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6775 btrfs_mark_buffer_dirty(buf);
6779 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6781 struct extent_buffer *buf;
6785 /* We should only get this for leaves */
6786 BUG_ON(path->lowest_level);
6787 buf = path->nodes[0];
6789 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6790 unsigned int shift = 0, offset;
6792 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6793 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6794 if (btrfs_item_end_nr(buf, i) >
6795 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6796 ret = delete_bogus_item(root, path, buf, i);
6799 fprintf(stderr, "item is off the end of the "
6800 "leaf, can't fix\n");
6804 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6805 btrfs_item_end_nr(buf, i);
6806 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6807 btrfs_item_offset_nr(buf, i - 1)) {
6808 if (btrfs_item_end_nr(buf, i) >
6809 btrfs_item_offset_nr(buf, i - 1)) {
6810 ret = delete_bogus_item(root, path, buf, i);
6813 fprintf(stderr, "items overlap, can't fix\n");
6817 shift = btrfs_item_offset_nr(buf, i - 1) -
6818 btrfs_item_end_nr(buf, i);
6823 printf("Shifting item nr %d by %u bytes in block %llu\n",
6824 i, shift, (unsigned long long)buf->start);
6825 offset = btrfs_item_offset_nr(buf, i);
6826 memmove_extent_buffer(buf,
6827 btrfs_leaf_data(buf) + offset + shift,
6828 btrfs_leaf_data(buf) + offset,
6829 btrfs_item_size_nr(buf, i));
6830 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6832 btrfs_mark_buffer_dirty(buf);
6836 * We may have moved things, in which case we want to exit so we don't
6837 * write those changes out. Once we have proper abort functionality in
6838 * progs this can be changed to something nicer.
6845 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6846 * then just return -EIO.
6848 static int try_to_fix_bad_block(struct btrfs_root *root,
6849 struct extent_buffer *buf,
6850 enum btrfs_tree_block_status status)
6852 struct btrfs_trans_handle *trans;
6853 struct ulist *roots;
6854 struct ulist_node *node;
6855 struct btrfs_root *search_root;
6856 struct btrfs_path path;
6857 struct ulist_iterator iter;
6858 struct btrfs_key root_key, key;
6861 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6862 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6865 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6869 btrfs_init_path(&path);
6870 ULIST_ITER_INIT(&iter);
6871 while ((node = ulist_next(roots, &iter))) {
6872 root_key.objectid = node->val;
6873 root_key.type = BTRFS_ROOT_ITEM_KEY;
6874 root_key.offset = (u64)-1;
6876 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6883 trans = btrfs_start_transaction(search_root, 0);
6884 if (IS_ERR(trans)) {
6885 ret = PTR_ERR(trans);
6889 path.lowest_level = btrfs_header_level(buf);
6890 path.skip_check_block = 1;
6891 if (path.lowest_level)
6892 btrfs_node_key_to_cpu(buf, &key, 0);
6894 btrfs_item_key_to_cpu(buf, &key, 0);
6895 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6898 btrfs_commit_transaction(trans, search_root);
6901 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6902 ret = fix_key_order(search_root, &path);
6903 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6904 ret = fix_item_offset(search_root, &path);
6906 btrfs_commit_transaction(trans, search_root);
6909 btrfs_release_path(&path);
6910 btrfs_commit_transaction(trans, search_root);
6913 btrfs_release_path(&path);
6917 static int check_block(struct btrfs_root *root,
6918 struct cache_tree *extent_cache,
6919 struct extent_buffer *buf, u64 flags)
6921 struct extent_record *rec;
6922 struct cache_extent *cache;
6923 struct btrfs_key key;
6924 enum btrfs_tree_block_status status;
6928 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6931 rec = container_of(cache, struct extent_record, cache);
6932 rec->generation = btrfs_header_generation(buf);
6934 level = btrfs_header_level(buf);
6935 if (btrfs_header_nritems(buf) > 0) {
6938 btrfs_item_key_to_cpu(buf, &key, 0);
6940 btrfs_node_key_to_cpu(buf, &key, 0);
6942 rec->info_objectid = key.objectid;
6944 rec->info_level = level;
6946 if (btrfs_is_leaf(buf))
6947 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6949 status = btrfs_check_node(root, &rec->parent_key, buf);
6951 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6953 status = try_to_fix_bad_block(root, buf, status);
6954 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6956 fprintf(stderr, "bad block %llu\n",
6957 (unsigned long long)buf->start);
6960 * Signal to callers we need to start the scan over
6961 * again since we'll have cowed blocks.
6966 rec->content_checked = 1;
6967 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6968 rec->owner_ref_checked = 1;
6970 ret = check_owner_ref(root, rec, buf);
6972 rec->owner_ref_checked = 1;
6976 maybe_free_extent_rec(extent_cache, rec);
6981 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6982 u64 parent, u64 root)
6984 struct list_head *cur = rec->backrefs.next;
6985 struct extent_backref *node;
6986 struct tree_backref *back;
6988 while(cur != &rec->backrefs) {
6989 node = to_extent_backref(cur);
6993 back = to_tree_backref(node);
6995 if (!node->full_backref)
6997 if (parent == back->parent)
7000 if (node->full_backref)
7002 if (back->root == root)
7010 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7011 u64 parent, u64 root)
7013 struct tree_backref *ref = malloc(sizeof(*ref));
7017 memset(&ref->node, 0, sizeof(ref->node));
7019 ref->parent = parent;
7020 ref->node.full_backref = 1;
7023 ref->node.full_backref = 0;
7030 static struct data_backref *find_data_backref(struct extent_record *rec,
7031 u64 parent, u64 root,
7032 u64 owner, u64 offset,
7034 u64 disk_bytenr, u64 bytes)
7036 struct list_head *cur = rec->backrefs.next;
7037 struct extent_backref *node;
7038 struct data_backref *back;
7040 while(cur != &rec->backrefs) {
7041 node = to_extent_backref(cur);
7045 back = to_data_backref(node);
7047 if (!node->full_backref)
7049 if (parent == back->parent)
7052 if (node->full_backref)
7054 if (back->root == root && back->owner == owner &&
7055 back->offset == offset) {
7056 if (found_ref && node->found_ref &&
7057 (back->bytes != bytes ||
7058 back->disk_bytenr != disk_bytenr))
7068 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7069 u64 parent, u64 root,
7070 u64 owner, u64 offset,
7073 struct data_backref *ref = malloc(sizeof(*ref));
7077 memset(&ref->node, 0, sizeof(ref->node));
7078 ref->node.is_data = 1;
7081 ref->parent = parent;
7084 ref->node.full_backref = 1;
7088 ref->offset = offset;
7089 ref->node.full_backref = 0;
7091 ref->bytes = max_size;
7094 if (max_size > rec->max_size)
7095 rec->max_size = max_size;
7099 /* Check if the type of extent matches with its chunk */
7100 static void check_extent_type(struct extent_record *rec)
7102 struct btrfs_block_group_cache *bg_cache;
7104 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7108 /* data extent, check chunk directly*/
7109 if (!rec->metadata) {
7110 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7111 rec->wrong_chunk_type = 1;
7115 /* metadata extent, check the obvious case first */
7116 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7117 BTRFS_BLOCK_GROUP_METADATA))) {
7118 rec->wrong_chunk_type = 1;
7123 * Check SYSTEM extent, as it's also marked as metadata, we can only
7124 * make sure it's a SYSTEM extent by its backref
7126 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7127 struct extent_backref *node;
7128 struct tree_backref *tback;
7131 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7132 if (node->is_data) {
7133 /* tree block shouldn't have data backref */
7134 rec->wrong_chunk_type = 1;
7137 tback = container_of(node, struct tree_backref, node);
7139 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7140 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7142 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7143 if (!(bg_cache->flags & bg_type))
7144 rec->wrong_chunk_type = 1;
7149 * Allocate a new extent record, fill default values from @tmpl and insert int
7150 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7151 * the cache, otherwise it fails.
7153 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7154 struct extent_record *tmpl)
7156 struct extent_record *rec;
7159 BUG_ON(tmpl->max_size == 0);
7160 rec = malloc(sizeof(*rec));
7163 rec->start = tmpl->start;
7164 rec->max_size = tmpl->max_size;
7165 rec->nr = max(tmpl->nr, tmpl->max_size);
7166 rec->found_rec = tmpl->found_rec;
7167 rec->content_checked = tmpl->content_checked;
7168 rec->owner_ref_checked = tmpl->owner_ref_checked;
7169 rec->num_duplicates = 0;
7170 rec->metadata = tmpl->metadata;
7171 rec->flag_block_full_backref = FLAG_UNSET;
7172 rec->bad_full_backref = 0;
7173 rec->crossing_stripes = 0;
7174 rec->wrong_chunk_type = 0;
7175 rec->is_root = tmpl->is_root;
7176 rec->refs = tmpl->refs;
7177 rec->extent_item_refs = tmpl->extent_item_refs;
7178 rec->parent_generation = tmpl->parent_generation;
7179 INIT_LIST_HEAD(&rec->backrefs);
7180 INIT_LIST_HEAD(&rec->dups);
7181 INIT_LIST_HEAD(&rec->list);
7182 rec->backref_tree = RB_ROOT;
7183 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7184 rec->cache.start = tmpl->start;
7185 rec->cache.size = tmpl->nr;
7186 ret = insert_cache_extent(extent_cache, &rec->cache);
7191 bytes_used += rec->nr;
7194 rec->crossing_stripes = check_crossing_stripes(global_info,
7195 rec->start, global_info->nodesize);
7196 check_extent_type(rec);
7201 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7203 * - refs - if found, increase refs
7204 * - is_root - if found, set
7205 * - content_checked - if found, set
7206 * - owner_ref_checked - if found, set
7208 * If not found, create a new one, initialize and insert.
7210 static int add_extent_rec(struct cache_tree *extent_cache,
7211 struct extent_record *tmpl)
7213 struct extent_record *rec;
7214 struct cache_extent *cache;
7218 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7220 rec = container_of(cache, struct extent_record, cache);
7224 rec->nr = max(tmpl->nr, tmpl->max_size);
7227 * We need to make sure to reset nr to whatever the extent
7228 * record says was the real size, this way we can compare it to
7231 if (tmpl->found_rec) {
7232 if (tmpl->start != rec->start || rec->found_rec) {
7233 struct extent_record *tmp;
7236 if (list_empty(&rec->list))
7237 list_add_tail(&rec->list,
7238 &duplicate_extents);
7241 * We have to do this song and dance in case we
7242 * find an extent record that falls inside of
7243 * our current extent record but does not have
7244 * the same objectid.
7246 tmp = malloc(sizeof(*tmp));
7249 tmp->start = tmpl->start;
7250 tmp->max_size = tmpl->max_size;
7253 tmp->metadata = tmpl->metadata;
7254 tmp->extent_item_refs = tmpl->extent_item_refs;
7255 INIT_LIST_HEAD(&tmp->list);
7256 list_add_tail(&tmp->list, &rec->dups);
7257 rec->num_duplicates++;
7264 if (tmpl->extent_item_refs && !dup) {
7265 if (rec->extent_item_refs) {
7266 fprintf(stderr, "block %llu rec "
7267 "extent_item_refs %llu, passed %llu\n",
7268 (unsigned long long)tmpl->start,
7269 (unsigned long long)
7270 rec->extent_item_refs,
7271 (unsigned long long)tmpl->extent_item_refs);
7273 rec->extent_item_refs = tmpl->extent_item_refs;
7277 if (tmpl->content_checked)
7278 rec->content_checked = 1;
7279 if (tmpl->owner_ref_checked)
7280 rec->owner_ref_checked = 1;
7281 memcpy(&rec->parent_key, &tmpl->parent_key,
7282 sizeof(tmpl->parent_key));
7283 if (tmpl->parent_generation)
7284 rec->parent_generation = tmpl->parent_generation;
7285 if (rec->max_size < tmpl->max_size)
7286 rec->max_size = tmpl->max_size;
7289 * A metadata extent can't cross stripe_len boundary, otherwise
7290 * kernel scrub won't be able to handle it.
7291 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7295 rec->crossing_stripes = check_crossing_stripes(
7296 global_info, rec->start,
7297 global_info->nodesize);
7298 check_extent_type(rec);
7299 maybe_free_extent_rec(extent_cache, rec);
7303 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7308 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7309 u64 parent, u64 root, int found_ref)
7311 struct extent_record *rec;
7312 struct tree_backref *back;
7313 struct cache_extent *cache;
7315 bool insert = false;
7317 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7319 struct extent_record tmpl;
7321 memset(&tmpl, 0, sizeof(tmpl));
7322 tmpl.start = bytenr;
7327 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7331 /* really a bug in cache_extent implement now */
7332 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7337 rec = container_of(cache, struct extent_record, cache);
7338 if (rec->start != bytenr) {
7340 * Several cause, from unaligned bytenr to over lapping extents
7345 back = find_tree_backref(rec, parent, root);
7347 back = alloc_tree_backref(rec, parent, root);
7354 if (back->node.found_ref) {
7355 fprintf(stderr, "Extent back ref already exists "
7356 "for %llu parent %llu root %llu \n",
7357 (unsigned long long)bytenr,
7358 (unsigned long long)parent,
7359 (unsigned long long)root);
7361 back->node.found_ref = 1;
7363 if (back->node.found_extent_tree) {
7364 fprintf(stderr, "Extent back ref already exists "
7365 "for %llu parent %llu root %llu \n",
7366 (unsigned long long)bytenr,
7367 (unsigned long long)parent,
7368 (unsigned long long)root);
7370 back->node.found_extent_tree = 1;
7373 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7374 compare_extent_backref));
7375 check_extent_type(rec);
7376 maybe_free_extent_rec(extent_cache, rec);
7380 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7381 u64 parent, u64 root, u64 owner, u64 offset,
7382 u32 num_refs, int found_ref, u64 max_size)
7384 struct extent_record *rec;
7385 struct data_backref *back;
7386 struct cache_extent *cache;
7388 bool insert = false;
7390 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7392 struct extent_record tmpl;
7394 memset(&tmpl, 0, sizeof(tmpl));
7395 tmpl.start = bytenr;
7397 tmpl.max_size = max_size;
7399 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7403 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7408 rec = container_of(cache, struct extent_record, cache);
7409 if (rec->max_size < max_size)
7410 rec->max_size = max_size;
7413 * If found_ref is set then max_size is the real size and must match the
7414 * existing refs. So if we have already found a ref then we need to
7415 * make sure that this ref matches the existing one, otherwise we need
7416 * to add a new backref so we can notice that the backrefs don't match
7417 * and we need to figure out who is telling the truth. This is to
7418 * account for that awful fsync bug I introduced where we'd end up with
7419 * a btrfs_file_extent_item that would have its length include multiple
7420 * prealloc extents or point inside of a prealloc extent.
7422 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7425 back = alloc_data_backref(rec, parent, root, owner, offset,
7432 BUG_ON(num_refs != 1);
7433 if (back->node.found_ref)
7434 BUG_ON(back->bytes != max_size);
7435 back->node.found_ref = 1;
7436 back->found_ref += 1;
7437 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7438 back->bytes = max_size;
7439 back->disk_bytenr = bytenr;
7441 /* Need to reinsert if not already in the tree */
7443 rb_erase(&back->node.node, &rec->backref_tree);
7448 rec->content_checked = 1;
7449 rec->owner_ref_checked = 1;
7451 if (back->node.found_extent_tree) {
7452 fprintf(stderr, "Extent back ref already exists "
7453 "for %llu parent %llu root %llu "
7454 "owner %llu offset %llu num_refs %lu\n",
7455 (unsigned long long)bytenr,
7456 (unsigned long long)parent,
7457 (unsigned long long)root,
7458 (unsigned long long)owner,
7459 (unsigned long long)offset,
7460 (unsigned long)num_refs);
7462 back->num_refs = num_refs;
7463 back->node.found_extent_tree = 1;
7466 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7467 compare_extent_backref));
7469 maybe_free_extent_rec(extent_cache, rec);
7473 static int add_pending(struct cache_tree *pending,
7474 struct cache_tree *seen, u64 bytenr, u32 size)
7477 ret = add_cache_extent(seen, bytenr, size);
7480 add_cache_extent(pending, bytenr, size);
7484 static int pick_next_pending(struct cache_tree *pending,
7485 struct cache_tree *reada,
7486 struct cache_tree *nodes,
7487 u64 last, struct block_info *bits, int bits_nr,
7490 unsigned long node_start = last;
7491 struct cache_extent *cache;
7494 cache = search_cache_extent(reada, 0);
7496 bits[0].start = cache->start;
7497 bits[0].size = cache->size;
7502 if (node_start > 32768)
7503 node_start -= 32768;
7505 cache = search_cache_extent(nodes, node_start);
7507 cache = search_cache_extent(nodes, 0);
7510 cache = search_cache_extent(pending, 0);
7515 bits[ret].start = cache->start;
7516 bits[ret].size = cache->size;
7517 cache = next_cache_extent(cache);
7519 } while (cache && ret < bits_nr);
7525 bits[ret].start = cache->start;
7526 bits[ret].size = cache->size;
7527 cache = next_cache_extent(cache);
7529 } while (cache && ret < bits_nr);
7531 if (bits_nr - ret > 8) {
7532 u64 lookup = bits[0].start + bits[0].size;
7533 struct cache_extent *next;
7534 next = search_cache_extent(pending, lookup);
7536 if (next->start - lookup > 32768)
7538 bits[ret].start = next->start;
7539 bits[ret].size = next->size;
7540 lookup = next->start + next->size;
7544 next = next_cache_extent(next);
7552 static void free_chunk_record(struct cache_extent *cache)
7554 struct chunk_record *rec;
7556 rec = container_of(cache, struct chunk_record, cache);
7557 list_del_init(&rec->list);
7558 list_del_init(&rec->dextents);
7562 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7564 cache_tree_free_extents(chunk_cache, free_chunk_record);
7567 static void free_device_record(struct rb_node *node)
7569 struct device_record *rec;
7571 rec = container_of(node, struct device_record, node);
7575 FREE_RB_BASED_TREE(device_cache, free_device_record);
7577 int insert_block_group_record(struct block_group_tree *tree,
7578 struct block_group_record *bg_rec)
7582 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7586 list_add_tail(&bg_rec->list, &tree->block_groups);
7590 static void free_block_group_record(struct cache_extent *cache)
7592 struct block_group_record *rec;
7594 rec = container_of(cache, struct block_group_record, cache);
7595 list_del_init(&rec->list);
7599 void free_block_group_tree(struct block_group_tree *tree)
7601 cache_tree_free_extents(&tree->tree, free_block_group_record);
7604 int insert_device_extent_record(struct device_extent_tree *tree,
7605 struct device_extent_record *de_rec)
7610 * Device extent is a bit different from the other extents, because
7611 * the extents which belong to the different devices may have the
7612 * same start and size, so we need use the special extent cache
7613 * search/insert functions.
7615 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7619 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7620 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7624 static void free_device_extent_record(struct cache_extent *cache)
7626 struct device_extent_record *rec;
7628 rec = container_of(cache, struct device_extent_record, cache);
7629 if (!list_empty(&rec->chunk_list))
7630 list_del_init(&rec->chunk_list);
7631 if (!list_empty(&rec->device_list))
7632 list_del_init(&rec->device_list);
7636 void free_device_extent_tree(struct device_extent_tree *tree)
7638 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7641 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7642 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7643 struct extent_buffer *leaf, int slot)
7645 struct btrfs_extent_ref_v0 *ref0;
7646 struct btrfs_key key;
7649 btrfs_item_key_to_cpu(leaf, &key, slot);
7650 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7651 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7652 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7655 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7656 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7662 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7663 struct btrfs_key *key,
7666 struct btrfs_chunk *ptr;
7667 struct chunk_record *rec;
7670 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7671 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7673 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7675 fprintf(stderr, "memory allocation failed\n");
7679 INIT_LIST_HEAD(&rec->list);
7680 INIT_LIST_HEAD(&rec->dextents);
7683 rec->cache.start = key->offset;
7684 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7686 rec->generation = btrfs_header_generation(leaf);
7688 rec->objectid = key->objectid;
7689 rec->type = key->type;
7690 rec->offset = key->offset;
7692 rec->length = rec->cache.size;
7693 rec->owner = btrfs_chunk_owner(leaf, ptr);
7694 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7695 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7696 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7697 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7698 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7699 rec->num_stripes = num_stripes;
7700 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7702 for (i = 0; i < rec->num_stripes; ++i) {
7703 rec->stripes[i].devid =
7704 btrfs_stripe_devid_nr(leaf, ptr, i);
7705 rec->stripes[i].offset =
7706 btrfs_stripe_offset_nr(leaf, ptr, i);
7707 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7708 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7715 static int process_chunk_item(struct cache_tree *chunk_cache,
7716 struct btrfs_key *key, struct extent_buffer *eb,
7719 struct chunk_record *rec;
7720 struct btrfs_chunk *chunk;
7723 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7725 * Do extra check for this chunk item,
7727 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7728 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7729 * and owner<->key_type check.
7731 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7734 error("chunk(%llu, %llu) is not valid, ignore it",
7735 key->offset, btrfs_chunk_length(eb, chunk));
7738 rec = btrfs_new_chunk_record(eb, key, slot);
7739 ret = insert_cache_extent(chunk_cache, &rec->cache);
7741 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7742 rec->offset, rec->length);
7749 static int process_device_item(struct rb_root *dev_cache,
7750 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7752 struct btrfs_dev_item *ptr;
7753 struct device_record *rec;
7756 ptr = btrfs_item_ptr(eb,
7757 slot, struct btrfs_dev_item);
7759 rec = malloc(sizeof(*rec));
7761 fprintf(stderr, "memory allocation failed\n");
7765 rec->devid = key->offset;
7766 rec->generation = btrfs_header_generation(eb);
7768 rec->objectid = key->objectid;
7769 rec->type = key->type;
7770 rec->offset = key->offset;
7772 rec->devid = btrfs_device_id(eb, ptr);
7773 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7774 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7776 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7778 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7785 struct block_group_record *
7786 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7789 struct btrfs_block_group_item *ptr;
7790 struct block_group_record *rec;
7792 rec = calloc(1, sizeof(*rec));
7794 fprintf(stderr, "memory allocation failed\n");
7798 rec->cache.start = key->objectid;
7799 rec->cache.size = key->offset;
7801 rec->generation = btrfs_header_generation(leaf);
7803 rec->objectid = key->objectid;
7804 rec->type = key->type;
7805 rec->offset = key->offset;
7807 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7808 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7810 INIT_LIST_HEAD(&rec->list);
7815 static int process_block_group_item(struct block_group_tree *block_group_cache,
7816 struct btrfs_key *key,
7817 struct extent_buffer *eb, int slot)
7819 struct block_group_record *rec;
7822 rec = btrfs_new_block_group_record(eb, key, slot);
7823 ret = insert_block_group_record(block_group_cache, rec);
7825 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7826 rec->objectid, rec->offset);
7833 struct device_extent_record *
7834 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7835 struct btrfs_key *key, int slot)
7837 struct device_extent_record *rec;
7838 struct btrfs_dev_extent *ptr;
7840 rec = calloc(1, sizeof(*rec));
7842 fprintf(stderr, "memory allocation failed\n");
7846 rec->cache.objectid = key->objectid;
7847 rec->cache.start = key->offset;
7849 rec->generation = btrfs_header_generation(leaf);
7851 rec->objectid = key->objectid;
7852 rec->type = key->type;
7853 rec->offset = key->offset;
7855 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7856 rec->chunk_objecteid =
7857 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7859 btrfs_dev_extent_chunk_offset(leaf, ptr);
7860 rec->length = btrfs_dev_extent_length(leaf, ptr);
7861 rec->cache.size = rec->length;
7863 INIT_LIST_HEAD(&rec->chunk_list);
7864 INIT_LIST_HEAD(&rec->device_list);
7870 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7871 struct btrfs_key *key, struct extent_buffer *eb,
7874 struct device_extent_record *rec;
7877 rec = btrfs_new_device_extent_record(eb, key, slot);
7878 ret = insert_device_extent_record(dev_extent_cache, rec);
7881 "Device extent[%llu, %llu, %llu] existed.\n",
7882 rec->objectid, rec->offset, rec->length);
7889 static int process_extent_item(struct btrfs_root *root,
7890 struct cache_tree *extent_cache,
7891 struct extent_buffer *eb, int slot)
7893 struct btrfs_extent_item *ei;
7894 struct btrfs_extent_inline_ref *iref;
7895 struct btrfs_extent_data_ref *dref;
7896 struct btrfs_shared_data_ref *sref;
7897 struct btrfs_key key;
7898 struct extent_record tmpl;
7903 u32 item_size = btrfs_item_size_nr(eb, slot);
7909 btrfs_item_key_to_cpu(eb, &key, slot);
7911 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7913 num_bytes = root->fs_info->nodesize;
7915 num_bytes = key.offset;
7918 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7919 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7920 key.objectid, root->fs_info->sectorsize);
7923 if (item_size < sizeof(*ei)) {
7924 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7925 struct btrfs_extent_item_v0 *ei0;
7926 if (item_size != sizeof(*ei0)) {
7928 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7929 key.objectid, key.type, key.offset,
7930 btrfs_header_bytenr(eb), slot);
7933 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7934 refs = btrfs_extent_refs_v0(eb, ei0);
7938 memset(&tmpl, 0, sizeof(tmpl));
7939 tmpl.start = key.objectid;
7940 tmpl.nr = num_bytes;
7941 tmpl.extent_item_refs = refs;
7942 tmpl.metadata = metadata;
7944 tmpl.max_size = num_bytes;
7946 return add_extent_rec(extent_cache, &tmpl);
7949 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7950 refs = btrfs_extent_refs(eb, ei);
7951 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7955 if (metadata && num_bytes != root->fs_info->nodesize) {
7956 error("ignore invalid metadata extent, length %llu does not equal to %u",
7957 num_bytes, root->fs_info->nodesize);
7960 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7961 error("ignore invalid data extent, length %llu is not aligned to %u",
7962 num_bytes, root->fs_info->sectorsize);
7966 memset(&tmpl, 0, sizeof(tmpl));
7967 tmpl.start = key.objectid;
7968 tmpl.nr = num_bytes;
7969 tmpl.extent_item_refs = refs;
7970 tmpl.metadata = metadata;
7972 tmpl.max_size = num_bytes;
7973 add_extent_rec(extent_cache, &tmpl);
7975 ptr = (unsigned long)(ei + 1);
7976 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7977 key.type == BTRFS_EXTENT_ITEM_KEY)
7978 ptr += sizeof(struct btrfs_tree_block_info);
7980 end = (unsigned long)ei + item_size;
7982 iref = (struct btrfs_extent_inline_ref *)ptr;
7983 type = btrfs_extent_inline_ref_type(eb, iref);
7984 offset = btrfs_extent_inline_ref_offset(eb, iref);
7986 case BTRFS_TREE_BLOCK_REF_KEY:
7987 ret = add_tree_backref(extent_cache, key.objectid,
7991 "add_tree_backref failed (extent items tree block): %s",
7994 case BTRFS_SHARED_BLOCK_REF_KEY:
7995 ret = add_tree_backref(extent_cache, key.objectid,
7999 "add_tree_backref failed (extent items shared block): %s",
8002 case BTRFS_EXTENT_DATA_REF_KEY:
8003 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8004 add_data_backref(extent_cache, key.objectid, 0,
8005 btrfs_extent_data_ref_root(eb, dref),
8006 btrfs_extent_data_ref_objectid(eb,
8008 btrfs_extent_data_ref_offset(eb, dref),
8009 btrfs_extent_data_ref_count(eb, dref),
8012 case BTRFS_SHARED_DATA_REF_KEY:
8013 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8014 add_data_backref(extent_cache, key.objectid, offset,
8016 btrfs_shared_data_ref_count(eb, sref),
8020 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8021 key.objectid, key.type, num_bytes);
8024 ptr += btrfs_extent_inline_ref_size(type);
8031 static int check_cache_range(struct btrfs_root *root,
8032 struct btrfs_block_group_cache *cache,
8033 u64 offset, u64 bytes)
8035 struct btrfs_free_space *entry;
8041 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8042 bytenr = btrfs_sb_offset(i);
8043 ret = btrfs_rmap_block(root->fs_info,
8044 cache->key.objectid, bytenr, 0,
8045 &logical, &nr, &stripe_len);
8050 if (logical[nr] + stripe_len <= offset)
8052 if (offset + bytes <= logical[nr])
8054 if (logical[nr] == offset) {
8055 if (stripe_len >= bytes) {
8059 bytes -= stripe_len;
8060 offset += stripe_len;
8061 } else if (logical[nr] < offset) {
8062 if (logical[nr] + stripe_len >=
8067 bytes = (offset + bytes) -
8068 (logical[nr] + stripe_len);
8069 offset = logical[nr] + stripe_len;
8072 * Could be tricky, the super may land in the
8073 * middle of the area we're checking. First
8074 * check the easiest case, it's at the end.
8076 if (logical[nr] + stripe_len >=
8078 bytes = logical[nr] - offset;
8082 /* Check the left side */
8083 ret = check_cache_range(root, cache,
8085 logical[nr] - offset);
8091 /* Now we continue with the right side */
8092 bytes = (offset + bytes) -
8093 (logical[nr] + stripe_len);
8094 offset = logical[nr] + stripe_len;
8101 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8103 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8104 offset, offset+bytes);
8108 if (entry->offset != offset) {
8109 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8114 if (entry->bytes != bytes) {
8115 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8116 bytes, entry->bytes, offset);
8120 unlink_free_space(cache->free_space_ctl, entry);
8125 static int verify_space_cache(struct btrfs_root *root,
8126 struct btrfs_block_group_cache *cache)
8128 struct btrfs_path path;
8129 struct extent_buffer *leaf;
8130 struct btrfs_key key;
8134 root = root->fs_info->extent_root;
8136 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8138 btrfs_init_path(&path);
8139 key.objectid = last;
8141 key.type = BTRFS_EXTENT_ITEM_KEY;
8142 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8147 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8148 ret = btrfs_next_leaf(root, &path);
8156 leaf = path.nodes[0];
8157 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8158 if (key.objectid >= cache->key.offset + cache->key.objectid)
8160 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8161 key.type != BTRFS_METADATA_ITEM_KEY) {
8166 if (last == key.objectid) {
8167 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8168 last = key.objectid + key.offset;
8170 last = key.objectid + root->fs_info->nodesize;
8175 ret = check_cache_range(root, cache, last,
8176 key.objectid - last);
8179 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8180 last = key.objectid + key.offset;
8182 last = key.objectid + root->fs_info->nodesize;
8186 if (last < cache->key.objectid + cache->key.offset)
8187 ret = check_cache_range(root, cache, last,
8188 cache->key.objectid +
8189 cache->key.offset - last);
8192 btrfs_release_path(&path);
8195 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8196 fprintf(stderr, "There are still entries left in the space "
8204 static int check_space_cache(struct btrfs_root *root)
8206 struct btrfs_block_group_cache *cache;
8207 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8211 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8212 btrfs_super_generation(root->fs_info->super_copy) !=
8213 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8214 printf("cache and super generation don't match, space cache "
8215 "will be invalidated\n");
8219 if (ctx.progress_enabled) {
8220 ctx.tp = TASK_FREE_SPACE;
8221 task_start(ctx.info);
8225 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8229 start = cache->key.objectid + cache->key.offset;
8230 if (!cache->free_space_ctl) {
8231 if (btrfs_init_free_space_ctl(cache,
8232 root->fs_info->sectorsize)) {
8237 btrfs_remove_free_space_cache(cache);
8240 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8241 ret = exclude_super_stripes(root, cache);
8243 fprintf(stderr, "could not exclude super stripes: %s\n",
8248 ret = load_free_space_tree(root->fs_info, cache);
8249 free_excluded_extents(root, cache);
8251 fprintf(stderr, "could not load free space tree: %s\n",
8258 ret = load_free_space_cache(root->fs_info, cache);
8263 ret = verify_space_cache(root, cache);
8265 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8266 cache->key.objectid);
8271 task_stop(ctx.info);
8273 return error ? -EINVAL : 0;
8276 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8277 u64 num_bytes, unsigned long leaf_offset,
8278 struct extent_buffer *eb) {
8280 struct btrfs_fs_info *fs_info = root->fs_info;
8282 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8284 unsigned long csum_offset;
8288 u64 data_checked = 0;
8294 if (num_bytes % fs_info->sectorsize)
8297 data = malloc(num_bytes);
8301 while (offset < num_bytes) {
8304 read_len = num_bytes - offset;
8305 /* read as much space once a time */
8306 ret = read_extent_data(fs_info, data + offset,
8307 bytenr + offset, &read_len, mirror);
8311 /* verify every 4k data's checksum */
8312 while (data_checked < read_len) {
8314 tmp = offset + data_checked;
8316 csum = btrfs_csum_data((char *)data + tmp,
8317 csum, fs_info->sectorsize);
8318 btrfs_csum_final(csum, (u8 *)&csum);
8320 csum_offset = leaf_offset +
8321 tmp / fs_info->sectorsize * csum_size;
8322 read_extent_buffer(eb, (char *)&csum_expected,
8323 csum_offset, csum_size);
8324 /* try another mirror */
8325 if (csum != csum_expected) {
8326 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8327 mirror, bytenr + tmp,
8328 csum, csum_expected);
8329 num_copies = btrfs_num_copies(root->fs_info,
8331 if (mirror < num_copies - 1) {
8336 data_checked += fs_info->sectorsize;
8345 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8348 struct btrfs_path path;
8349 struct extent_buffer *leaf;
8350 struct btrfs_key key;
8353 btrfs_init_path(&path);
8354 key.objectid = bytenr;
8355 key.type = BTRFS_EXTENT_ITEM_KEY;
8356 key.offset = (u64)-1;
8359 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8362 fprintf(stderr, "Error looking up extent record %d\n", ret);
8363 btrfs_release_path(&path);
8366 if (path.slots[0] > 0) {
8369 ret = btrfs_prev_leaf(root, &path);
8372 } else if (ret > 0) {
8379 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8382 * Block group items come before extent items if they have the same
8383 * bytenr, so walk back one more just in case. Dear future traveller,
8384 * first congrats on mastering time travel. Now if it's not too much
8385 * trouble could you go back to 2006 and tell Chris to make the
8386 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8387 * EXTENT_ITEM_KEY please?
8389 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8390 if (path.slots[0] > 0) {
8393 ret = btrfs_prev_leaf(root, &path);
8396 } else if (ret > 0) {
8401 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8405 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8406 ret = btrfs_next_leaf(root, &path);
8408 fprintf(stderr, "Error going to next leaf "
8410 btrfs_release_path(&path);
8416 leaf = path.nodes[0];
8417 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8418 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8422 if (key.objectid + key.offset < bytenr) {
8426 if (key.objectid > bytenr + num_bytes)
8429 if (key.objectid == bytenr) {
8430 if (key.offset >= num_bytes) {
8434 num_bytes -= key.offset;
8435 bytenr += key.offset;
8436 } else if (key.objectid < bytenr) {
8437 if (key.objectid + key.offset >= bytenr + num_bytes) {
8441 num_bytes = (bytenr + num_bytes) -
8442 (key.objectid + key.offset);
8443 bytenr = key.objectid + key.offset;
8445 if (key.objectid + key.offset < bytenr + num_bytes) {
8446 u64 new_start = key.objectid + key.offset;
8447 u64 new_bytes = bytenr + num_bytes - new_start;
8450 * Weird case, the extent is in the middle of
8451 * our range, we'll have to search one side
8452 * and then the other. Not sure if this happens
8453 * in real life, but no harm in coding it up
8454 * anyway just in case.
8456 btrfs_release_path(&path);
8457 ret = check_extent_exists(root, new_start,
8460 fprintf(stderr, "Right section didn't "
8464 num_bytes = key.objectid - bytenr;
8467 num_bytes = key.objectid - bytenr;
8474 if (num_bytes && !ret) {
8475 fprintf(stderr, "There are no extents for csum range "
8476 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8480 btrfs_release_path(&path);
8484 static int check_csums(struct btrfs_root *root)
8486 struct btrfs_path path;
8487 struct extent_buffer *leaf;
8488 struct btrfs_key key;
8489 u64 offset = 0, num_bytes = 0;
8490 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8494 unsigned long leaf_offset;
8496 root = root->fs_info->csum_root;
8497 if (!extent_buffer_uptodate(root->node)) {
8498 fprintf(stderr, "No valid csum tree found\n");
8502 btrfs_init_path(&path);
8503 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8504 key.type = BTRFS_EXTENT_CSUM_KEY;
8506 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8508 fprintf(stderr, "Error searching csum tree %d\n", ret);
8509 btrfs_release_path(&path);
8513 if (ret > 0 && path.slots[0])
8518 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8519 ret = btrfs_next_leaf(root, &path);
8521 fprintf(stderr, "Error going to next leaf "
8528 leaf = path.nodes[0];
8530 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8531 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8536 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8537 csum_size) * root->fs_info->sectorsize;
8538 if (!check_data_csum)
8539 goto skip_csum_check;
8540 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8541 ret = check_extent_csums(root, key.offset, data_len,
8547 offset = key.offset;
8548 } else if (key.offset != offset + num_bytes) {
8549 ret = check_extent_exists(root, offset, num_bytes);
8551 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8552 "there is no extent record\n",
8553 offset, offset+num_bytes);
8556 offset = key.offset;
8559 num_bytes += data_len;
8563 btrfs_release_path(&path);
8567 static int is_dropped_key(struct btrfs_key *key,
8568 struct btrfs_key *drop_key) {
8569 if (key->objectid < drop_key->objectid)
8571 else if (key->objectid == drop_key->objectid) {
8572 if (key->type < drop_key->type)
8574 else if (key->type == drop_key->type) {
8575 if (key->offset < drop_key->offset)
8583 * Here are the rules for FULL_BACKREF.
8585 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8586 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8588 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8589 * if it happened after the relocation occurred since we'll have dropped the
8590 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8591 * have no real way to know for sure.
8593 * We process the blocks one root at a time, and we start from the lowest root
8594 * objectid and go to the highest. So we can just lookup the owner backref for
8595 * the record and if we don't find it then we know it doesn't exist and we have
8598 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8599 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8600 * be set or not and then we can check later once we've gathered all the refs.
8602 static int calc_extent_flag(struct cache_tree *extent_cache,
8603 struct extent_buffer *buf,
8604 struct root_item_record *ri,
8607 struct extent_record *rec;
8608 struct cache_extent *cache;
8609 struct tree_backref *tback;
8612 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8613 /* we have added this extent before */
8617 rec = container_of(cache, struct extent_record, cache);
8620 * Except file/reloc tree, we can not have
8623 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8628 if (buf->start == ri->bytenr)
8631 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8634 owner = btrfs_header_owner(buf);
8635 if (owner == ri->objectid)
8638 tback = find_tree_backref(rec, 0, owner);
8643 if (rec->flag_block_full_backref != FLAG_UNSET &&
8644 rec->flag_block_full_backref != 0)
8645 rec->bad_full_backref = 1;
8648 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8649 if (rec->flag_block_full_backref != FLAG_UNSET &&
8650 rec->flag_block_full_backref != 1)
8651 rec->bad_full_backref = 1;
8655 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8657 fprintf(stderr, "Invalid key type(");
8658 print_key_type(stderr, 0, key_type);
8659 fprintf(stderr, ") found in root(");
8660 print_objectid(stderr, rootid, 0);
8661 fprintf(stderr, ")\n");
8665 * Check if the key is valid with its extent buffer.
8667 * This is a early check in case invalid key exists in a extent buffer
8668 * This is not comprehensive yet, but should prevent wrong key/item passed
8671 static int check_type_with_root(u64 rootid, u8 key_type)
8674 /* Only valid in chunk tree */
8675 case BTRFS_DEV_ITEM_KEY:
8676 case BTRFS_CHUNK_ITEM_KEY:
8677 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8680 /* valid in csum and log tree */
8681 case BTRFS_CSUM_TREE_OBJECTID:
8682 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8686 case BTRFS_EXTENT_ITEM_KEY:
8687 case BTRFS_METADATA_ITEM_KEY:
8688 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8689 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8692 case BTRFS_ROOT_ITEM_KEY:
8693 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8696 case BTRFS_DEV_EXTENT_KEY:
8697 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8703 report_mismatch_key_root(key_type, rootid);
8707 static int run_next_block(struct btrfs_root *root,
8708 struct block_info *bits,
8711 struct cache_tree *pending,
8712 struct cache_tree *seen,
8713 struct cache_tree *reada,
8714 struct cache_tree *nodes,
8715 struct cache_tree *extent_cache,
8716 struct cache_tree *chunk_cache,
8717 struct rb_root *dev_cache,
8718 struct block_group_tree *block_group_cache,
8719 struct device_extent_tree *dev_extent_cache,
8720 struct root_item_record *ri)
8722 struct btrfs_fs_info *fs_info = root->fs_info;
8723 struct extent_buffer *buf;
8724 struct extent_record *rec = NULL;
8735 struct btrfs_key key;
8736 struct cache_extent *cache;
8739 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8740 bits_nr, &reada_bits);
8745 for(i = 0; i < nritems; i++) {
8746 ret = add_cache_extent(reada, bits[i].start,
8751 /* fixme, get the parent transid */
8752 readahead_tree_block(fs_info, bits[i].start, 0);
8755 *last = bits[0].start;
8756 bytenr = bits[0].start;
8757 size = bits[0].size;
8759 cache = lookup_cache_extent(pending, bytenr, size);
8761 remove_cache_extent(pending, cache);
8764 cache = lookup_cache_extent(reada, bytenr, size);
8766 remove_cache_extent(reada, cache);
8769 cache = lookup_cache_extent(nodes, bytenr, size);
8771 remove_cache_extent(nodes, cache);
8774 cache = lookup_cache_extent(extent_cache, bytenr, size);
8776 rec = container_of(cache, struct extent_record, cache);
8777 gen = rec->parent_generation;
8780 /* fixme, get the real parent transid */
8781 buf = read_tree_block(root->fs_info, bytenr, gen);
8782 if (!extent_buffer_uptodate(buf)) {
8783 record_bad_block_io(root->fs_info,
8784 extent_cache, bytenr, size);
8788 nritems = btrfs_header_nritems(buf);
8791 if (!init_extent_tree) {
8792 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8793 btrfs_header_level(buf), 1, NULL,
8796 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8798 fprintf(stderr, "Couldn't calc extent flags\n");
8799 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8804 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8806 fprintf(stderr, "Couldn't calc extent flags\n");
8807 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8811 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8813 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8814 ri->objectid == btrfs_header_owner(buf)) {
8816 * Ok we got to this block from it's original owner and
8817 * we have FULL_BACKREF set. Relocation can leave
8818 * converted blocks over so this is altogether possible,
8819 * however it's not possible if the generation > the
8820 * last snapshot, so check for this case.
8822 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8823 btrfs_header_generation(buf) > ri->last_snapshot) {
8824 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8825 rec->bad_full_backref = 1;
8830 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8831 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8832 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8833 rec->bad_full_backref = 1;
8837 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8838 rec->flag_block_full_backref = 1;
8842 rec->flag_block_full_backref = 0;
8844 owner = btrfs_header_owner(buf);
8847 ret = check_block(root, extent_cache, buf, flags);
8851 if (btrfs_is_leaf(buf)) {
8852 btree_space_waste += btrfs_leaf_free_space(root, buf);
8853 for (i = 0; i < nritems; i++) {
8854 struct btrfs_file_extent_item *fi;
8855 btrfs_item_key_to_cpu(buf, &key, i);
8857 * Check key type against the leaf owner.
8858 * Could filter quite a lot of early error if
8861 if (check_type_with_root(btrfs_header_owner(buf),
8863 fprintf(stderr, "ignoring invalid key\n");
8866 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8867 process_extent_item(root, extent_cache, buf,
8871 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8872 process_extent_item(root, extent_cache, buf,
8876 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8878 btrfs_item_size_nr(buf, i);
8881 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8882 process_chunk_item(chunk_cache, &key, buf, i);
8885 if (key.type == BTRFS_DEV_ITEM_KEY) {
8886 process_device_item(dev_cache, &key, buf, i);
8889 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8890 process_block_group_item(block_group_cache,
8894 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8895 process_device_extent_item(dev_extent_cache,
8900 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8901 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8902 process_extent_ref_v0(extent_cache, buf, i);
8909 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8910 ret = add_tree_backref(extent_cache,
8911 key.objectid, 0, key.offset, 0);
8914 "add_tree_backref failed (leaf tree block): %s",
8918 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8919 ret = add_tree_backref(extent_cache,
8920 key.objectid, key.offset, 0, 0);
8923 "add_tree_backref failed (leaf shared block): %s",
8927 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8928 struct btrfs_extent_data_ref *ref;
8929 ref = btrfs_item_ptr(buf, i,
8930 struct btrfs_extent_data_ref);
8931 add_data_backref(extent_cache,
8933 btrfs_extent_data_ref_root(buf, ref),
8934 btrfs_extent_data_ref_objectid(buf,
8936 btrfs_extent_data_ref_offset(buf, ref),
8937 btrfs_extent_data_ref_count(buf, ref),
8938 0, root->fs_info->sectorsize);
8941 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8942 struct btrfs_shared_data_ref *ref;
8943 ref = btrfs_item_ptr(buf, i,
8944 struct btrfs_shared_data_ref);
8945 add_data_backref(extent_cache,
8946 key.objectid, key.offset, 0, 0, 0,
8947 btrfs_shared_data_ref_count(buf, ref),
8948 0, root->fs_info->sectorsize);
8951 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8952 struct bad_item *bad;
8954 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8958 bad = malloc(sizeof(struct bad_item));
8961 INIT_LIST_HEAD(&bad->list);
8962 memcpy(&bad->key, &key,
8963 sizeof(struct btrfs_key));
8964 bad->root_id = owner;
8965 list_add_tail(&bad->list, &delete_items);
8968 if (key.type != BTRFS_EXTENT_DATA_KEY)
8970 fi = btrfs_item_ptr(buf, i,
8971 struct btrfs_file_extent_item);
8972 if (btrfs_file_extent_type(buf, fi) ==
8973 BTRFS_FILE_EXTENT_INLINE)
8975 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8978 data_bytes_allocated +=
8979 btrfs_file_extent_disk_num_bytes(buf, fi);
8980 if (data_bytes_allocated < root->fs_info->sectorsize) {
8983 data_bytes_referenced +=
8984 btrfs_file_extent_num_bytes(buf, fi);
8985 add_data_backref(extent_cache,
8986 btrfs_file_extent_disk_bytenr(buf, fi),
8987 parent, owner, key.objectid, key.offset -
8988 btrfs_file_extent_offset(buf, fi), 1, 1,
8989 btrfs_file_extent_disk_num_bytes(buf, fi));
8993 struct btrfs_key first_key;
8995 first_key.objectid = 0;
8998 btrfs_item_key_to_cpu(buf, &first_key, 0);
8999 level = btrfs_header_level(buf);
9000 for (i = 0; i < nritems; i++) {
9001 struct extent_record tmpl;
9003 ptr = btrfs_node_blockptr(buf, i);
9004 size = root->fs_info->nodesize;
9005 btrfs_node_key_to_cpu(buf, &key, i);
9007 if ((level == ri->drop_level)
9008 && is_dropped_key(&key, &ri->drop_key)) {
9013 memset(&tmpl, 0, sizeof(tmpl));
9014 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9015 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9020 tmpl.max_size = size;
9021 ret = add_extent_rec(extent_cache, &tmpl);
9025 ret = add_tree_backref(extent_cache, ptr, parent,
9029 "add_tree_backref failed (non-leaf block): %s",
9035 add_pending(nodes, seen, ptr, size);
9037 add_pending(pending, seen, ptr, size);
9040 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9041 nritems) * sizeof(struct btrfs_key_ptr);
9043 total_btree_bytes += buf->len;
9044 if (fs_root_objectid(btrfs_header_owner(buf)))
9045 total_fs_tree_bytes += buf->len;
9046 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9047 total_extent_tree_bytes += buf->len;
9049 free_extent_buffer(buf);
9053 static int add_root_to_pending(struct extent_buffer *buf,
9054 struct cache_tree *extent_cache,
9055 struct cache_tree *pending,
9056 struct cache_tree *seen,
9057 struct cache_tree *nodes,
9060 struct extent_record tmpl;
9063 if (btrfs_header_level(buf) > 0)
9064 add_pending(nodes, seen, buf->start, buf->len);
9066 add_pending(pending, seen, buf->start, buf->len);
9068 memset(&tmpl, 0, sizeof(tmpl));
9069 tmpl.start = buf->start;
9074 tmpl.max_size = buf->len;
9075 add_extent_rec(extent_cache, &tmpl);
9077 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9078 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9079 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9082 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9087 /* as we fix the tree, we might be deleting blocks that
9088 * we're tracking for repair. This hook makes sure we
9089 * remove any backrefs for blocks as we are fixing them.
9091 static int free_extent_hook(struct btrfs_trans_handle *trans,
9092 struct btrfs_root *root,
9093 u64 bytenr, u64 num_bytes, u64 parent,
9094 u64 root_objectid, u64 owner, u64 offset,
9097 struct extent_record *rec;
9098 struct cache_extent *cache;
9100 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9102 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9103 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9107 rec = container_of(cache, struct extent_record, cache);
9109 struct data_backref *back;
9110 back = find_data_backref(rec, parent, root_objectid, owner,
9111 offset, 1, bytenr, num_bytes);
9114 if (back->node.found_ref) {
9115 back->found_ref -= refs_to_drop;
9117 rec->refs -= refs_to_drop;
9119 if (back->node.found_extent_tree) {
9120 back->num_refs -= refs_to_drop;
9121 if (rec->extent_item_refs)
9122 rec->extent_item_refs -= refs_to_drop;
9124 if (back->found_ref == 0)
9125 back->node.found_ref = 0;
9126 if (back->num_refs == 0)
9127 back->node.found_extent_tree = 0;
9129 if (!back->node.found_extent_tree && back->node.found_ref) {
9130 rb_erase(&back->node.node, &rec->backref_tree);
9134 struct tree_backref *back;
9135 back = find_tree_backref(rec, parent, root_objectid);
9138 if (back->node.found_ref) {
9141 back->node.found_ref = 0;
9143 if (back->node.found_extent_tree) {
9144 if (rec->extent_item_refs)
9145 rec->extent_item_refs--;
9146 back->node.found_extent_tree = 0;
9148 if (!back->node.found_extent_tree && back->node.found_ref) {
9149 rb_erase(&back->node.node, &rec->backref_tree);
9153 maybe_free_extent_rec(extent_cache, rec);
9158 static int delete_extent_records(struct btrfs_trans_handle *trans,
9159 struct btrfs_root *root,
9160 struct btrfs_path *path,
9163 struct btrfs_key key;
9164 struct btrfs_key found_key;
9165 struct extent_buffer *leaf;
9170 key.objectid = bytenr;
9172 key.offset = (u64)-1;
9175 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9182 if (path->slots[0] == 0)
9188 leaf = path->nodes[0];
9189 slot = path->slots[0];
9191 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9192 if (found_key.objectid != bytenr)
9195 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9196 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9197 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9198 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9199 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9200 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9201 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9202 btrfs_release_path(path);
9203 if (found_key.type == 0) {
9204 if (found_key.offset == 0)
9206 key.offset = found_key.offset - 1;
9207 key.type = found_key.type;
9209 key.type = found_key.type - 1;
9210 key.offset = (u64)-1;
9214 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9215 found_key.objectid, found_key.type, found_key.offset);
9217 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9220 btrfs_release_path(path);
9222 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9223 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9224 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9225 found_key.offset : root->fs_info->nodesize;
9227 ret = btrfs_update_block_group(root, bytenr,
9234 btrfs_release_path(path);
9239 * for a single backref, this will allocate a new extent
9240 * and add the backref to it.
9242 static int record_extent(struct btrfs_trans_handle *trans,
9243 struct btrfs_fs_info *info,
9244 struct btrfs_path *path,
9245 struct extent_record *rec,
9246 struct extent_backref *back,
9247 int allocated, u64 flags)
9250 struct btrfs_root *extent_root = info->extent_root;
9251 struct extent_buffer *leaf;
9252 struct btrfs_key ins_key;
9253 struct btrfs_extent_item *ei;
9254 struct data_backref *dback;
9255 struct btrfs_tree_block_info *bi;
9258 rec->max_size = max_t(u64, rec->max_size,
9262 u32 item_size = sizeof(*ei);
9265 item_size += sizeof(*bi);
9267 ins_key.objectid = rec->start;
9268 ins_key.offset = rec->max_size;
9269 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9271 ret = btrfs_insert_empty_item(trans, extent_root, path,
9272 &ins_key, item_size);
9276 leaf = path->nodes[0];
9277 ei = btrfs_item_ptr(leaf, path->slots[0],
9278 struct btrfs_extent_item);
9280 btrfs_set_extent_refs(leaf, ei, 0);
9281 btrfs_set_extent_generation(leaf, ei, rec->generation);
9283 if (back->is_data) {
9284 btrfs_set_extent_flags(leaf, ei,
9285 BTRFS_EXTENT_FLAG_DATA);
9287 struct btrfs_disk_key copy_key;;
9289 bi = (struct btrfs_tree_block_info *)(ei + 1);
9290 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9293 btrfs_set_disk_key_objectid(©_key,
9294 rec->info_objectid);
9295 btrfs_set_disk_key_type(©_key, 0);
9296 btrfs_set_disk_key_offset(©_key, 0);
9298 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9299 btrfs_set_tree_block_key(leaf, bi, ©_key);
9301 btrfs_set_extent_flags(leaf, ei,
9302 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9305 btrfs_mark_buffer_dirty(leaf);
9306 ret = btrfs_update_block_group(extent_root, rec->start,
9307 rec->max_size, 1, 0);
9310 btrfs_release_path(path);
9313 if (back->is_data) {
9317 dback = to_data_backref(back);
9318 if (back->full_backref)
9319 parent = dback->parent;
9323 for (i = 0; i < dback->found_ref; i++) {
9324 /* if parent != 0, we're doing a full backref
9325 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9326 * just makes the backref allocator create a data
9329 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9330 rec->start, rec->max_size,
9334 BTRFS_FIRST_FREE_OBJECTID :
9340 fprintf(stderr, "adding new data backref"
9341 " on %llu %s %llu owner %llu"
9342 " offset %llu found %d\n",
9343 (unsigned long long)rec->start,
9344 back->full_backref ?
9346 back->full_backref ?
9347 (unsigned long long)parent :
9348 (unsigned long long)dback->root,
9349 (unsigned long long)dback->owner,
9350 (unsigned long long)dback->offset,
9354 struct tree_backref *tback;
9356 tback = to_tree_backref(back);
9357 if (back->full_backref)
9358 parent = tback->parent;
9362 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9363 rec->start, rec->max_size,
9364 parent, tback->root, 0, 0);
9365 fprintf(stderr, "adding new tree backref on "
9366 "start %llu len %llu parent %llu root %llu\n",
9367 rec->start, rec->max_size, parent, tback->root);
9370 btrfs_release_path(path);
9374 static struct extent_entry *find_entry(struct list_head *entries,
9375 u64 bytenr, u64 bytes)
9377 struct extent_entry *entry = NULL;
9379 list_for_each_entry(entry, entries, list) {
9380 if (entry->bytenr == bytenr && entry->bytes == bytes)
9387 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9389 struct extent_entry *entry, *best = NULL, *prev = NULL;
9391 list_for_each_entry(entry, entries, list) {
9393 * If there are as many broken entries as entries then we know
9394 * not to trust this particular entry.
9396 if (entry->broken == entry->count)
9400 * Special case, when there are only two entries and 'best' is
9410 * If our current entry == best then we can't be sure our best
9411 * is really the best, so we need to keep searching.
9413 if (best && best->count == entry->count) {
9419 /* Prev == entry, not good enough, have to keep searching */
9420 if (!prev->broken && prev->count == entry->count)
9424 best = (prev->count > entry->count) ? prev : entry;
9425 else if (best->count < entry->count)
9433 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9434 struct data_backref *dback, struct extent_entry *entry)
9436 struct btrfs_trans_handle *trans;
9437 struct btrfs_root *root;
9438 struct btrfs_file_extent_item *fi;
9439 struct extent_buffer *leaf;
9440 struct btrfs_key key;
9444 key.objectid = dback->root;
9445 key.type = BTRFS_ROOT_ITEM_KEY;
9446 key.offset = (u64)-1;
9447 root = btrfs_read_fs_root(info, &key);
9449 fprintf(stderr, "Couldn't find root for our ref\n");
9454 * The backref points to the original offset of the extent if it was
9455 * split, so we need to search down to the offset we have and then walk
9456 * forward until we find the backref we're looking for.
9458 key.objectid = dback->owner;
9459 key.type = BTRFS_EXTENT_DATA_KEY;
9460 key.offset = dback->offset;
9461 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9463 fprintf(stderr, "Error looking up ref %d\n", ret);
9468 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9469 ret = btrfs_next_leaf(root, path);
9471 fprintf(stderr, "Couldn't find our ref, next\n");
9475 leaf = path->nodes[0];
9476 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9477 if (key.objectid != dback->owner ||
9478 key.type != BTRFS_EXTENT_DATA_KEY) {
9479 fprintf(stderr, "Couldn't find our ref, search\n");
9482 fi = btrfs_item_ptr(leaf, path->slots[0],
9483 struct btrfs_file_extent_item);
9484 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9485 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9487 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9492 btrfs_release_path(path);
9494 trans = btrfs_start_transaction(root, 1);
9496 return PTR_ERR(trans);
9499 * Ok we have the key of the file extent we want to fix, now we can cow
9500 * down to the thing and fix it.
9502 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9504 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9505 key.objectid, key.type, key.offset, ret);
9509 fprintf(stderr, "Well that's odd, we just found this key "
9510 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9515 leaf = path->nodes[0];
9516 fi = btrfs_item_ptr(leaf, path->slots[0],
9517 struct btrfs_file_extent_item);
9519 if (btrfs_file_extent_compression(leaf, fi) &&
9520 dback->disk_bytenr != entry->bytenr) {
9521 fprintf(stderr, "Ref doesn't match the record start and is "
9522 "compressed, please take a btrfs-image of this file "
9523 "system and send it to a btrfs developer so they can "
9524 "complete this functionality for bytenr %Lu\n",
9525 dback->disk_bytenr);
9530 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9531 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9532 } else if (dback->disk_bytenr > entry->bytenr) {
9533 u64 off_diff, offset;
9535 off_diff = dback->disk_bytenr - entry->bytenr;
9536 offset = btrfs_file_extent_offset(leaf, fi);
9537 if (dback->disk_bytenr + offset +
9538 btrfs_file_extent_num_bytes(leaf, fi) >
9539 entry->bytenr + entry->bytes) {
9540 fprintf(stderr, "Ref is past the entry end, please "
9541 "take a btrfs-image of this file system and "
9542 "send it to a btrfs developer, ref %Lu\n",
9543 dback->disk_bytenr);
9548 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9549 btrfs_set_file_extent_offset(leaf, fi, offset);
9550 } else if (dback->disk_bytenr < entry->bytenr) {
9553 offset = btrfs_file_extent_offset(leaf, fi);
9554 if (dback->disk_bytenr + offset < entry->bytenr) {
9555 fprintf(stderr, "Ref is before the entry start, please"
9556 " take a btrfs-image of this file system and "
9557 "send it to a btrfs developer, ref %Lu\n",
9558 dback->disk_bytenr);
9563 offset += dback->disk_bytenr;
9564 offset -= entry->bytenr;
9565 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9566 btrfs_set_file_extent_offset(leaf, fi, offset);
9569 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9572 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9573 * only do this if we aren't using compression, otherwise it's a
9576 if (!btrfs_file_extent_compression(leaf, fi))
9577 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9579 printf("ram bytes may be wrong?\n");
9580 btrfs_mark_buffer_dirty(leaf);
9582 err = btrfs_commit_transaction(trans, root);
9583 btrfs_release_path(path);
9584 return ret ? ret : err;
9587 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9588 struct extent_record *rec)
9590 struct extent_backref *back, *tmp;
9591 struct data_backref *dback;
9592 struct extent_entry *entry, *best = NULL;
9595 int broken_entries = 0;
9600 * Metadata is easy and the backrefs should always agree on bytenr and
9601 * size, if not we've got bigger issues.
9606 rbtree_postorder_for_each_entry_safe(back, tmp,
9607 &rec->backref_tree, node) {
9608 if (back->full_backref || !back->is_data)
9611 dback = to_data_backref(back);
9614 * We only pay attention to backrefs that we found a real
9617 if (dback->found_ref == 0)
9621 * For now we only catch when the bytes don't match, not the
9622 * bytenr. We can easily do this at the same time, but I want
9623 * to have a fs image to test on before we just add repair
9624 * functionality willy-nilly so we know we won't screw up the
9628 entry = find_entry(&entries, dback->disk_bytenr,
9631 entry = malloc(sizeof(struct extent_entry));
9636 memset(entry, 0, sizeof(*entry));
9637 entry->bytenr = dback->disk_bytenr;
9638 entry->bytes = dback->bytes;
9639 list_add_tail(&entry->list, &entries);
9644 * If we only have on entry we may think the entries agree when
9645 * in reality they don't so we have to do some extra checking.
9647 if (dback->disk_bytenr != rec->start ||
9648 dback->bytes != rec->nr || back->broken)
9659 /* Yay all the backrefs agree, carry on good sir */
9660 if (nr_entries <= 1 && !mismatch)
9663 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9664 "%Lu\n", rec->start);
9667 * First we want to see if the backrefs can agree amongst themselves who
9668 * is right, so figure out which one of the entries has the highest
9671 best = find_most_right_entry(&entries);
9674 * Ok so we may have an even split between what the backrefs think, so
9675 * this is where we use the extent ref to see what it thinks.
9678 entry = find_entry(&entries, rec->start, rec->nr);
9679 if (!entry && (!broken_entries || !rec->found_rec)) {
9680 fprintf(stderr, "Backrefs don't agree with each other "
9681 "and extent record doesn't agree with anybody,"
9682 " so we can't fix bytenr %Lu bytes %Lu\n",
9683 rec->start, rec->nr);
9686 } else if (!entry) {
9688 * Ok our backrefs were broken, we'll assume this is the
9689 * correct value and add an entry for this range.
9691 entry = malloc(sizeof(struct extent_entry));
9696 memset(entry, 0, sizeof(*entry));
9697 entry->bytenr = rec->start;
9698 entry->bytes = rec->nr;
9699 list_add_tail(&entry->list, &entries);
9703 best = find_most_right_entry(&entries);
9705 fprintf(stderr, "Backrefs and extent record evenly "
9706 "split on who is right, this is going to "
9707 "require user input to fix bytenr %Lu bytes "
9708 "%Lu\n", rec->start, rec->nr);
9715 * I don't think this can happen currently as we'll abort() if we catch
9716 * this case higher up, but in case somebody removes that we still can't
9717 * deal with it properly here yet, so just bail out of that's the case.
9719 if (best->bytenr != rec->start) {
9720 fprintf(stderr, "Extent start and backref starts don't match, "
9721 "please use btrfs-image on this file system and send "
9722 "it to a btrfs developer so they can make fsck fix "
9723 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9724 rec->start, rec->nr);
9730 * Ok great we all agreed on an extent record, let's go find the real
9731 * references and fix up the ones that don't match.
9733 rbtree_postorder_for_each_entry_safe(back, tmp,
9734 &rec->backref_tree, node) {
9735 if (back->full_backref || !back->is_data)
9738 dback = to_data_backref(back);
9741 * Still ignoring backrefs that don't have a real ref attached
9744 if (dback->found_ref == 0)
9747 if (dback->bytes == best->bytes &&
9748 dback->disk_bytenr == best->bytenr)
9751 ret = repair_ref(info, path, dback, best);
9757 * Ok we messed with the actual refs, which means we need to drop our
9758 * entire cache and go back and rescan. I know this is a huge pain and
9759 * adds a lot of extra work, but it's the only way to be safe. Once all
9760 * the backrefs agree we may not need to do anything to the extent
9765 while (!list_empty(&entries)) {
9766 entry = list_entry(entries.next, struct extent_entry, list);
9767 list_del_init(&entry->list);
9773 static int process_duplicates(struct cache_tree *extent_cache,
9774 struct extent_record *rec)
9776 struct extent_record *good, *tmp;
9777 struct cache_extent *cache;
9781 * If we found a extent record for this extent then return, or if we
9782 * have more than one duplicate we are likely going to need to delete
9785 if (rec->found_rec || rec->num_duplicates > 1)
9788 /* Shouldn't happen but just in case */
9789 BUG_ON(!rec->num_duplicates);
9792 * So this happens if we end up with a backref that doesn't match the
9793 * actual extent entry. So either the backref is bad or the extent
9794 * entry is bad. Either way we want to have the extent_record actually
9795 * reflect what we found in the extent_tree, so we need to take the
9796 * duplicate out and use that as the extent_record since the only way we
9797 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9799 remove_cache_extent(extent_cache, &rec->cache);
9801 good = to_extent_record(rec->dups.next);
9802 list_del_init(&good->list);
9803 INIT_LIST_HEAD(&good->backrefs);
9804 INIT_LIST_HEAD(&good->dups);
9805 good->cache.start = good->start;
9806 good->cache.size = good->nr;
9807 good->content_checked = 0;
9808 good->owner_ref_checked = 0;
9809 good->num_duplicates = 0;
9810 good->refs = rec->refs;
9811 list_splice_init(&rec->backrefs, &good->backrefs);
9813 cache = lookup_cache_extent(extent_cache, good->start,
9817 tmp = container_of(cache, struct extent_record, cache);
9820 * If we find another overlapping extent and it's found_rec is
9821 * set then it's a duplicate and we need to try and delete
9824 if (tmp->found_rec || tmp->num_duplicates > 0) {
9825 if (list_empty(&good->list))
9826 list_add_tail(&good->list,
9827 &duplicate_extents);
9828 good->num_duplicates += tmp->num_duplicates + 1;
9829 list_splice_init(&tmp->dups, &good->dups);
9830 list_del_init(&tmp->list);
9831 list_add_tail(&tmp->list, &good->dups);
9832 remove_cache_extent(extent_cache, &tmp->cache);
9837 * Ok we have another non extent item backed extent rec, so lets
9838 * just add it to this extent and carry on like we did above.
9840 good->refs += tmp->refs;
9841 list_splice_init(&tmp->backrefs, &good->backrefs);
9842 remove_cache_extent(extent_cache, &tmp->cache);
9845 ret = insert_cache_extent(extent_cache, &good->cache);
9848 return good->num_duplicates ? 0 : 1;
9851 static int delete_duplicate_records(struct btrfs_root *root,
9852 struct extent_record *rec)
9854 struct btrfs_trans_handle *trans;
9855 LIST_HEAD(delete_list);
9856 struct btrfs_path path;
9857 struct extent_record *tmp, *good, *n;
9860 struct btrfs_key key;
9862 btrfs_init_path(&path);
9865 /* Find the record that covers all of the duplicates. */
9866 list_for_each_entry(tmp, &rec->dups, list) {
9867 if (good->start < tmp->start)
9869 if (good->nr > tmp->nr)
9872 if (tmp->start + tmp->nr < good->start + good->nr) {
9873 fprintf(stderr, "Ok we have overlapping extents that "
9874 "aren't completely covered by each other, this "
9875 "is going to require more careful thought. "
9876 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9877 tmp->start, tmp->nr, good->start, good->nr);
9884 list_add_tail(&rec->list, &delete_list);
9886 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9889 list_move_tail(&tmp->list, &delete_list);
9892 root = root->fs_info->extent_root;
9893 trans = btrfs_start_transaction(root, 1);
9894 if (IS_ERR(trans)) {
9895 ret = PTR_ERR(trans);
9899 list_for_each_entry(tmp, &delete_list, list) {
9900 if (tmp->found_rec == 0)
9902 key.objectid = tmp->start;
9903 key.type = BTRFS_EXTENT_ITEM_KEY;
9904 key.offset = tmp->nr;
9906 /* Shouldn't happen but just in case */
9907 if (tmp->metadata) {
9908 fprintf(stderr, "Well this shouldn't happen, extent "
9909 "record overlaps but is metadata? "
9910 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9914 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9920 ret = btrfs_del_item(trans, root, &path);
9923 btrfs_release_path(&path);
9926 err = btrfs_commit_transaction(trans, root);
9930 while (!list_empty(&delete_list)) {
9931 tmp = to_extent_record(delete_list.next);
9932 list_del_init(&tmp->list);
9938 while (!list_empty(&rec->dups)) {
9939 tmp = to_extent_record(rec->dups.next);
9940 list_del_init(&tmp->list);
9944 btrfs_release_path(&path);
9946 if (!ret && !nr_del)
9947 rec->num_duplicates = 0;
9949 return ret ? ret : nr_del;
9952 static int find_possible_backrefs(struct btrfs_fs_info *info,
9953 struct btrfs_path *path,
9954 struct cache_tree *extent_cache,
9955 struct extent_record *rec)
9957 struct btrfs_root *root;
9958 struct extent_backref *back, *tmp;
9959 struct data_backref *dback;
9960 struct cache_extent *cache;
9961 struct btrfs_file_extent_item *fi;
9962 struct btrfs_key key;
9966 rbtree_postorder_for_each_entry_safe(back, tmp,
9967 &rec->backref_tree, node) {
9968 /* Don't care about full backrefs (poor unloved backrefs) */
9969 if (back->full_backref || !back->is_data)
9972 dback = to_data_backref(back);
9974 /* We found this one, we don't need to do a lookup */
9975 if (dback->found_ref)
9978 key.objectid = dback->root;
9979 key.type = BTRFS_ROOT_ITEM_KEY;
9980 key.offset = (u64)-1;
9982 root = btrfs_read_fs_root(info, &key);
9984 /* No root, definitely a bad ref, skip */
9985 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9987 /* Other err, exit */
9989 return PTR_ERR(root);
9991 key.objectid = dback->owner;
9992 key.type = BTRFS_EXTENT_DATA_KEY;
9993 key.offset = dback->offset;
9994 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9996 btrfs_release_path(path);
9999 /* Didn't find it, we can carry on */
10004 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10005 struct btrfs_file_extent_item);
10006 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10007 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10008 btrfs_release_path(path);
10009 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10011 struct extent_record *tmp;
10012 tmp = container_of(cache, struct extent_record, cache);
10015 * If we found an extent record for the bytenr for this
10016 * particular backref then we can't add it to our
10017 * current extent record. We only want to add backrefs
10018 * that don't have a corresponding extent item in the
10019 * extent tree since they likely belong to this record
10020 * and we need to fix it if it doesn't match bytenrs.
10022 if (tmp->found_rec)
10026 dback->found_ref += 1;
10027 dback->disk_bytenr = bytenr;
10028 dback->bytes = bytes;
10031 * Set this so the verify backref code knows not to trust the
10032 * values in this backref.
10041 * Record orphan data ref into corresponding root.
10043 * Return 0 if the extent item contains data ref and recorded.
10044 * Return 1 if the extent item contains no useful data ref
10045 * On that case, it may contains only shared_dataref or metadata backref
10046 * or the file extent exists(this should be handled by the extent bytenr
10047 * recovery routine)
10048 * Return <0 if something goes wrong.
10050 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10051 struct extent_record *rec)
10053 struct btrfs_key key;
10054 struct btrfs_root *dest_root;
10055 struct extent_backref *back, *tmp;
10056 struct data_backref *dback;
10057 struct orphan_data_extent *orphan;
10058 struct btrfs_path path;
10059 int recorded_data_ref = 0;
10064 btrfs_init_path(&path);
10065 rbtree_postorder_for_each_entry_safe(back, tmp,
10066 &rec->backref_tree, node) {
10067 if (back->full_backref || !back->is_data ||
10068 !back->found_extent_tree)
10070 dback = to_data_backref(back);
10071 if (dback->found_ref)
10073 key.objectid = dback->root;
10074 key.type = BTRFS_ROOT_ITEM_KEY;
10075 key.offset = (u64)-1;
10077 dest_root = btrfs_read_fs_root(fs_info, &key);
10079 /* For non-exist root we just skip it */
10080 if (IS_ERR(dest_root) || !dest_root)
10083 key.objectid = dback->owner;
10084 key.type = BTRFS_EXTENT_DATA_KEY;
10085 key.offset = dback->offset;
10087 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10088 btrfs_release_path(&path);
10090 * For ret < 0, it's OK since the fs-tree may be corrupted,
10091 * we need to record it for inode/file extent rebuild.
10092 * For ret > 0, we record it only for file extent rebuild.
10093 * For ret == 0, the file extent exists but only bytenr
10094 * mismatch, let the original bytenr fix routine to handle,
10100 orphan = malloc(sizeof(*orphan));
10105 INIT_LIST_HEAD(&orphan->list);
10106 orphan->root = dback->root;
10107 orphan->objectid = dback->owner;
10108 orphan->offset = dback->offset;
10109 orphan->disk_bytenr = rec->cache.start;
10110 orphan->disk_len = rec->cache.size;
10111 list_add(&dest_root->orphan_data_extents, &orphan->list);
10112 recorded_data_ref = 1;
10115 btrfs_release_path(&path);
10117 return !recorded_data_ref;
10123 * when an incorrect extent item is found, this will delete
10124 * all of the existing entries for it and recreate them
10125 * based on what the tree scan found.
10127 static int fixup_extent_refs(struct btrfs_fs_info *info,
10128 struct cache_tree *extent_cache,
10129 struct extent_record *rec)
10131 struct btrfs_trans_handle *trans = NULL;
10133 struct btrfs_path path;
10134 struct cache_extent *cache;
10135 struct extent_backref *back, *tmp;
10139 if (rec->flag_block_full_backref)
10140 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10142 btrfs_init_path(&path);
10143 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10145 * Sometimes the backrefs themselves are so broken they don't
10146 * get attached to any meaningful rec, so first go back and
10147 * check any of our backrefs that we couldn't find and throw
10148 * them into the list if we find the backref so that
10149 * verify_backrefs can figure out what to do.
10151 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10156 /* step one, make sure all of the backrefs agree */
10157 ret = verify_backrefs(info, &path, rec);
10161 trans = btrfs_start_transaction(info->extent_root, 1);
10162 if (IS_ERR(trans)) {
10163 ret = PTR_ERR(trans);
10167 /* step two, delete all the existing records */
10168 ret = delete_extent_records(trans, info->extent_root, &path,
10174 /* was this block corrupt? If so, don't add references to it */
10175 cache = lookup_cache_extent(info->corrupt_blocks,
10176 rec->start, rec->max_size);
10182 /* step three, recreate all the refs we did find */
10183 rbtree_postorder_for_each_entry_safe(back, tmp,
10184 &rec->backref_tree, node) {
10186 * if we didn't find any references, don't create a
10187 * new extent record
10189 if (!back->found_ref)
10192 rec->bad_full_backref = 0;
10193 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10201 int err = btrfs_commit_transaction(trans, info->extent_root);
10207 fprintf(stderr, "Repaired extent references for %llu\n",
10208 (unsigned long long)rec->start);
10210 btrfs_release_path(&path);
10214 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10215 struct extent_record *rec)
10217 struct btrfs_trans_handle *trans;
10218 struct btrfs_root *root = fs_info->extent_root;
10219 struct btrfs_path path;
10220 struct btrfs_extent_item *ei;
10221 struct btrfs_key key;
10225 key.objectid = rec->start;
10226 if (rec->metadata) {
10227 key.type = BTRFS_METADATA_ITEM_KEY;
10228 key.offset = rec->info_level;
10230 key.type = BTRFS_EXTENT_ITEM_KEY;
10231 key.offset = rec->max_size;
10234 trans = btrfs_start_transaction(root, 0);
10236 return PTR_ERR(trans);
10238 btrfs_init_path(&path);
10239 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10241 btrfs_release_path(&path);
10242 btrfs_commit_transaction(trans, root);
10245 fprintf(stderr, "Didn't find extent for %llu\n",
10246 (unsigned long long)rec->start);
10247 btrfs_release_path(&path);
10248 btrfs_commit_transaction(trans, root);
10252 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10253 struct btrfs_extent_item);
10254 flags = btrfs_extent_flags(path.nodes[0], ei);
10255 if (rec->flag_block_full_backref) {
10256 fprintf(stderr, "setting full backref on %llu\n",
10257 (unsigned long long)key.objectid);
10258 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10260 fprintf(stderr, "clearing full backref on %llu\n",
10261 (unsigned long long)key.objectid);
10262 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10264 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10265 btrfs_mark_buffer_dirty(path.nodes[0]);
10266 btrfs_release_path(&path);
10267 ret = btrfs_commit_transaction(trans, root);
10269 fprintf(stderr, "Repaired extent flags for %llu\n",
10270 (unsigned long long)rec->start);
10275 /* right now we only prune from the extent allocation tree */
10276 static int prune_one_block(struct btrfs_trans_handle *trans,
10277 struct btrfs_fs_info *info,
10278 struct btrfs_corrupt_block *corrupt)
10281 struct btrfs_path path;
10282 struct extent_buffer *eb;
10286 int level = corrupt->level + 1;
10288 btrfs_init_path(&path);
10290 /* we want to stop at the parent to our busted block */
10291 path.lowest_level = level;
10293 ret = btrfs_search_slot(trans, info->extent_root,
10294 &corrupt->key, &path, -1, 1);
10299 eb = path.nodes[level];
10306 * hopefully the search gave us the block we want to prune,
10307 * lets try that first
10309 slot = path.slots[level];
10310 found = btrfs_node_blockptr(eb, slot);
10311 if (found == corrupt->cache.start)
10314 nritems = btrfs_header_nritems(eb);
10316 /* the search failed, lets scan this node and hope we find it */
10317 for (slot = 0; slot < nritems; slot++) {
10318 found = btrfs_node_blockptr(eb, slot);
10319 if (found == corrupt->cache.start)
10323 * we couldn't find the bad block. TODO, search all the nodes for pointers
10326 if (eb == info->extent_root->node) {
10331 btrfs_release_path(&path);
10336 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10337 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10340 btrfs_release_path(&path);
10344 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10346 struct btrfs_trans_handle *trans = NULL;
10347 struct cache_extent *cache;
10348 struct btrfs_corrupt_block *corrupt;
10351 cache = search_cache_extent(info->corrupt_blocks, 0);
10355 trans = btrfs_start_transaction(info->extent_root, 1);
10357 return PTR_ERR(trans);
10359 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10360 prune_one_block(trans, info, corrupt);
10361 remove_cache_extent(info->corrupt_blocks, cache);
10364 return btrfs_commit_transaction(trans, info->extent_root);
10368 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10370 struct btrfs_block_group_cache *cache;
10375 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10376 &start, &end, EXTENT_DIRTY);
10379 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10384 cache = btrfs_lookup_first_block_group(fs_info, start);
10389 start = cache->key.objectid + cache->key.offset;
10393 static int check_extent_refs(struct btrfs_root *root,
10394 struct cache_tree *extent_cache)
10396 struct extent_record *rec;
10397 struct cache_extent *cache;
10404 * if we're doing a repair, we have to make sure
10405 * we don't allocate from the problem extents.
10406 * In the worst case, this will be all the
10407 * extents in the FS
10409 cache = search_cache_extent(extent_cache, 0);
10411 rec = container_of(cache, struct extent_record, cache);
10412 set_extent_dirty(root->fs_info->excluded_extents,
10414 rec->start + rec->max_size - 1);
10415 cache = next_cache_extent(cache);
10418 /* pin down all the corrupted blocks too */
10419 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10421 set_extent_dirty(root->fs_info->excluded_extents,
10423 cache->start + cache->size - 1);
10424 cache = next_cache_extent(cache);
10426 prune_corrupt_blocks(root->fs_info);
10427 reset_cached_block_groups(root->fs_info);
10430 reset_cached_block_groups(root->fs_info);
10433 * We need to delete any duplicate entries we find first otherwise we
10434 * could mess up the extent tree when we have backrefs that actually
10435 * belong to a different extent item and not the weird duplicate one.
10437 while (repair && !list_empty(&duplicate_extents)) {
10438 rec = to_extent_record(duplicate_extents.next);
10439 list_del_init(&rec->list);
10441 /* Sometimes we can find a backref before we find an actual
10442 * extent, so we need to process it a little bit to see if there
10443 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10444 * if this is a backref screwup. If we need to delete stuff
10445 * process_duplicates() will return 0, otherwise it will return
10448 if (process_duplicates(extent_cache, rec))
10450 ret = delete_duplicate_records(root, rec);
10454 * delete_duplicate_records will return the number of entries
10455 * deleted, so if it's greater than 0 then we know we actually
10456 * did something and we need to remove.
10469 cache = search_cache_extent(extent_cache, 0);
10472 rec = container_of(cache, struct extent_record, cache);
10473 if (rec->num_duplicates) {
10474 fprintf(stderr, "extent item %llu has multiple extent "
10475 "items\n", (unsigned long long)rec->start);
10479 if (rec->refs != rec->extent_item_refs) {
10480 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10481 (unsigned long long)rec->start,
10482 (unsigned long long)rec->nr);
10483 fprintf(stderr, "extent item %llu, found %llu\n",
10484 (unsigned long long)rec->extent_item_refs,
10485 (unsigned long long)rec->refs);
10486 ret = record_orphan_data_extents(root->fs_info, rec);
10492 if (all_backpointers_checked(rec, 1)) {
10493 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10494 (unsigned long long)rec->start,
10495 (unsigned long long)rec->nr);
10499 if (!rec->owner_ref_checked) {
10500 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10501 (unsigned long long)rec->start,
10502 (unsigned long long)rec->nr);
10507 if (repair && fix) {
10508 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10514 if (rec->bad_full_backref) {
10515 fprintf(stderr, "bad full backref, on [%llu]\n",
10516 (unsigned long long)rec->start);
10518 ret = fixup_extent_flags(root->fs_info, rec);
10526 * Although it's not a extent ref's problem, we reuse this
10527 * routine for error reporting.
10528 * No repair function yet.
10530 if (rec->crossing_stripes) {
10532 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10533 rec->start, rec->start + rec->max_size);
10537 if (rec->wrong_chunk_type) {
10539 "bad extent [%llu, %llu), type mismatch with chunk\n",
10540 rec->start, rec->start + rec->max_size);
10545 remove_cache_extent(extent_cache, cache);
10546 free_all_extent_backrefs(rec);
10547 if (!init_extent_tree && repair && (!cur_err || fix))
10548 clear_extent_dirty(root->fs_info->excluded_extents,
10550 rec->start + rec->max_size - 1);
10555 if (ret && ret != -EAGAIN) {
10556 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10559 struct btrfs_trans_handle *trans;
10561 root = root->fs_info->extent_root;
10562 trans = btrfs_start_transaction(root, 1);
10563 if (IS_ERR(trans)) {
10564 ret = PTR_ERR(trans);
10568 ret = btrfs_fix_block_accounting(trans, root);
10571 ret = btrfs_commit_transaction(trans, root);
10583 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10587 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10588 stripe_size = length;
10589 stripe_size /= num_stripes;
10590 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10591 stripe_size = length * 2;
10592 stripe_size /= num_stripes;
10593 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10594 stripe_size = length;
10595 stripe_size /= (num_stripes - 1);
10596 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10597 stripe_size = length;
10598 stripe_size /= (num_stripes - 2);
10600 stripe_size = length;
10602 return stripe_size;
10606 * Check the chunk with its block group/dev list ref:
10607 * Return 0 if all refs seems valid.
10608 * Return 1 if part of refs seems valid, need later check for rebuild ref
10609 * like missing block group and needs to search extent tree to rebuild them.
10610 * Return -1 if essential refs are missing and unable to rebuild.
10612 static int check_chunk_refs(struct chunk_record *chunk_rec,
10613 struct block_group_tree *block_group_cache,
10614 struct device_extent_tree *dev_extent_cache,
10617 struct cache_extent *block_group_item;
10618 struct block_group_record *block_group_rec;
10619 struct cache_extent *dev_extent_item;
10620 struct device_extent_record *dev_extent_rec;
10624 int metadump_v2 = 0;
10628 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10630 chunk_rec->length);
10631 if (block_group_item) {
10632 block_group_rec = container_of(block_group_item,
10633 struct block_group_record,
10635 if (chunk_rec->length != block_group_rec->offset ||
10636 chunk_rec->offset != block_group_rec->objectid ||
10638 chunk_rec->type_flags != block_group_rec->flags)) {
10641 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10642 chunk_rec->objectid,
10647 chunk_rec->type_flags,
10648 block_group_rec->objectid,
10649 block_group_rec->type,
10650 block_group_rec->offset,
10651 block_group_rec->offset,
10652 block_group_rec->objectid,
10653 block_group_rec->flags);
10656 list_del_init(&block_group_rec->list);
10657 chunk_rec->bg_rec = block_group_rec;
10662 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10663 chunk_rec->objectid,
10668 chunk_rec->type_flags);
10675 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10676 chunk_rec->num_stripes);
10677 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10678 devid = chunk_rec->stripes[i].devid;
10679 offset = chunk_rec->stripes[i].offset;
10680 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10681 devid, offset, length);
10682 if (dev_extent_item) {
10683 dev_extent_rec = container_of(dev_extent_item,
10684 struct device_extent_record,
10686 if (dev_extent_rec->objectid != devid ||
10687 dev_extent_rec->offset != offset ||
10688 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10689 dev_extent_rec->length != length) {
10692 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10693 chunk_rec->objectid,
10696 chunk_rec->stripes[i].devid,
10697 chunk_rec->stripes[i].offset,
10698 dev_extent_rec->objectid,
10699 dev_extent_rec->offset,
10700 dev_extent_rec->length);
10703 list_move(&dev_extent_rec->chunk_list,
10704 &chunk_rec->dextents);
10709 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10710 chunk_rec->objectid,
10713 chunk_rec->stripes[i].devid,
10714 chunk_rec->stripes[i].offset);
10721 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10722 int check_chunks(struct cache_tree *chunk_cache,
10723 struct block_group_tree *block_group_cache,
10724 struct device_extent_tree *dev_extent_cache,
10725 struct list_head *good, struct list_head *bad,
10726 struct list_head *rebuild, int silent)
10728 struct cache_extent *chunk_item;
10729 struct chunk_record *chunk_rec;
10730 struct block_group_record *bg_rec;
10731 struct device_extent_record *dext_rec;
10735 chunk_item = first_cache_extent(chunk_cache);
10736 while (chunk_item) {
10737 chunk_rec = container_of(chunk_item, struct chunk_record,
10739 err = check_chunk_refs(chunk_rec, block_group_cache,
10740 dev_extent_cache, silent);
10743 if (err == 0 && good)
10744 list_add_tail(&chunk_rec->list, good);
10745 if (err > 0 && rebuild)
10746 list_add_tail(&chunk_rec->list, rebuild);
10747 if (err < 0 && bad)
10748 list_add_tail(&chunk_rec->list, bad);
10749 chunk_item = next_cache_extent(chunk_item);
10752 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10755 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10763 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10767 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10768 dext_rec->objectid,
10778 static int check_device_used(struct device_record *dev_rec,
10779 struct device_extent_tree *dext_cache)
10781 struct cache_extent *cache;
10782 struct device_extent_record *dev_extent_rec;
10783 u64 total_byte = 0;
10785 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10787 dev_extent_rec = container_of(cache,
10788 struct device_extent_record,
10790 if (dev_extent_rec->objectid != dev_rec->devid)
10793 list_del_init(&dev_extent_rec->device_list);
10794 total_byte += dev_extent_rec->length;
10795 cache = next_cache_extent(cache);
10798 if (total_byte != dev_rec->byte_used) {
10800 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10801 total_byte, dev_rec->byte_used, dev_rec->objectid,
10802 dev_rec->type, dev_rec->offset);
10810 * Extra (optional) check for dev_item size to report possbile problem on a new
10813 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10815 if (!IS_ALIGNED(total_bytes, sectorsize)) {
10817 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10818 devid, total_bytes, sectorsize);
10820 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10821 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10826 * Unlike device size alignment check above, some super total_bytes check
10827 * failure can lead to mount failure for newer kernel.
10829 * So this function will return the error for a fatal super total_bytes problem.
10831 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10833 struct btrfs_device *dev;
10834 struct list_head *dev_list = &fs_info->fs_devices->devices;
10835 u64 total_bytes = 0;
10836 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10838 list_for_each_entry(dev, dev_list, dev_list)
10839 total_bytes += dev->total_bytes;
10841 /* Important check, which can cause unmountable fs */
10842 if (super_bytes < total_bytes) {
10843 error("super total bytes %llu smaller than real device(s) size %llu",
10844 super_bytes, total_bytes);
10845 error("mounting this fs may fail for newer kernels");
10846 error("this can be fixed by 'btrfs rescue fix-device-size'");
10851 * Optional check, just to make everything aligned and match with each
10854 * For a btrfs-image restored fs, we don't need to check it anyway.
10856 if (btrfs_super_flags(fs_info->super_copy) &
10857 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10859 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10860 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10861 super_bytes != total_bytes) {
10862 warning("minor unaligned/mismatch device size detected");
10864 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10869 /* check btrfs_dev_item -> btrfs_dev_extent */
10870 static int check_devices(struct rb_root *dev_cache,
10871 struct device_extent_tree *dev_extent_cache)
10873 struct rb_node *dev_node;
10874 struct device_record *dev_rec;
10875 struct device_extent_record *dext_rec;
10879 dev_node = rb_first(dev_cache);
10881 dev_rec = container_of(dev_node, struct device_record, node);
10882 err = check_device_used(dev_rec, dev_extent_cache);
10886 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10887 global_info->sectorsize);
10888 dev_node = rb_next(dev_node);
10890 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10893 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10894 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10901 static int add_root_item_to_list(struct list_head *head,
10902 u64 objectid, u64 bytenr, u64 last_snapshot,
10903 u8 level, u8 drop_level,
10904 struct btrfs_key *drop_key)
10907 struct root_item_record *ri_rec;
10908 ri_rec = malloc(sizeof(*ri_rec));
10911 ri_rec->bytenr = bytenr;
10912 ri_rec->objectid = objectid;
10913 ri_rec->level = level;
10914 ri_rec->drop_level = drop_level;
10915 ri_rec->last_snapshot = last_snapshot;
10917 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10918 list_add_tail(&ri_rec->list, head);
10923 static void free_root_item_list(struct list_head *list)
10925 struct root_item_record *ri_rec;
10927 while (!list_empty(list)) {
10928 ri_rec = list_first_entry(list, struct root_item_record,
10930 list_del_init(&ri_rec->list);
10935 static int deal_root_from_list(struct list_head *list,
10936 struct btrfs_root *root,
10937 struct block_info *bits,
10939 struct cache_tree *pending,
10940 struct cache_tree *seen,
10941 struct cache_tree *reada,
10942 struct cache_tree *nodes,
10943 struct cache_tree *extent_cache,
10944 struct cache_tree *chunk_cache,
10945 struct rb_root *dev_cache,
10946 struct block_group_tree *block_group_cache,
10947 struct device_extent_tree *dev_extent_cache)
10952 while (!list_empty(list)) {
10953 struct root_item_record *rec;
10954 struct extent_buffer *buf;
10955 rec = list_entry(list->next,
10956 struct root_item_record, list);
10958 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10959 if (!extent_buffer_uptodate(buf)) {
10960 free_extent_buffer(buf);
10964 ret = add_root_to_pending(buf, extent_cache, pending,
10965 seen, nodes, rec->objectid);
10969 * To rebuild extent tree, we need deal with snapshot
10970 * one by one, otherwise we deal with node firstly which
10971 * can maximize readahead.
10974 ret = run_next_block(root, bits, bits_nr, &last,
10975 pending, seen, reada, nodes,
10976 extent_cache, chunk_cache,
10977 dev_cache, block_group_cache,
10978 dev_extent_cache, rec);
10982 free_extent_buffer(buf);
10983 list_del(&rec->list);
10989 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10990 reada, nodes, extent_cache, chunk_cache,
10991 dev_cache, block_group_cache,
10992 dev_extent_cache, NULL);
11002 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11004 struct rb_root dev_cache;
11005 struct cache_tree chunk_cache;
11006 struct block_group_tree block_group_cache;
11007 struct device_extent_tree dev_extent_cache;
11008 struct cache_tree extent_cache;
11009 struct cache_tree seen;
11010 struct cache_tree pending;
11011 struct cache_tree reada;
11012 struct cache_tree nodes;
11013 struct extent_io_tree excluded_extents;
11014 struct cache_tree corrupt_blocks;
11015 struct btrfs_path path;
11016 struct btrfs_key key;
11017 struct btrfs_key found_key;
11019 struct block_info *bits;
11021 struct extent_buffer *leaf;
11023 struct btrfs_root_item ri;
11024 struct list_head dropping_trees;
11025 struct list_head normal_trees;
11026 struct btrfs_root *root1;
11027 struct btrfs_root *root;
11031 root = fs_info->fs_root;
11032 dev_cache = RB_ROOT;
11033 cache_tree_init(&chunk_cache);
11034 block_group_tree_init(&block_group_cache);
11035 device_extent_tree_init(&dev_extent_cache);
11037 cache_tree_init(&extent_cache);
11038 cache_tree_init(&seen);
11039 cache_tree_init(&pending);
11040 cache_tree_init(&nodes);
11041 cache_tree_init(&reada);
11042 cache_tree_init(&corrupt_blocks);
11043 extent_io_tree_init(&excluded_extents);
11044 INIT_LIST_HEAD(&dropping_trees);
11045 INIT_LIST_HEAD(&normal_trees);
11048 fs_info->excluded_extents = &excluded_extents;
11049 fs_info->fsck_extent_cache = &extent_cache;
11050 fs_info->free_extent_hook = free_extent_hook;
11051 fs_info->corrupt_blocks = &corrupt_blocks;
11055 bits = malloc(bits_nr * sizeof(struct block_info));
11061 if (ctx.progress_enabled) {
11062 ctx.tp = TASK_EXTENTS;
11063 task_start(ctx.info);
11067 root1 = fs_info->tree_root;
11068 level = btrfs_header_level(root1->node);
11069 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11070 root1->node->start, 0, level, 0, NULL);
11073 root1 = fs_info->chunk_root;
11074 level = btrfs_header_level(root1->node);
11075 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11076 root1->node->start, 0, level, 0, NULL);
11079 btrfs_init_path(&path);
11082 key.type = BTRFS_ROOT_ITEM_KEY;
11083 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11087 leaf = path.nodes[0];
11088 slot = path.slots[0];
11089 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11090 ret = btrfs_next_leaf(root, &path);
11093 leaf = path.nodes[0];
11094 slot = path.slots[0];
11096 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11097 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11098 unsigned long offset;
11101 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11102 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11103 last_snapshot = btrfs_root_last_snapshot(&ri);
11104 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11105 level = btrfs_root_level(&ri);
11106 ret = add_root_item_to_list(&normal_trees,
11107 found_key.objectid,
11108 btrfs_root_bytenr(&ri),
11109 last_snapshot, level,
11114 level = btrfs_root_level(&ri);
11115 objectid = found_key.objectid;
11116 btrfs_disk_key_to_cpu(&found_key,
11117 &ri.drop_progress);
11118 ret = add_root_item_to_list(&dropping_trees,
11120 btrfs_root_bytenr(&ri),
11121 last_snapshot, level,
11122 ri.drop_level, &found_key);
11129 btrfs_release_path(&path);
11132 * check_block can return -EAGAIN if it fixes something, please keep
11133 * this in mind when dealing with return values from these functions, if
11134 * we get -EAGAIN we want to fall through and restart the loop.
11136 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11137 &seen, &reada, &nodes, &extent_cache,
11138 &chunk_cache, &dev_cache, &block_group_cache,
11139 &dev_extent_cache);
11141 if (ret == -EAGAIN)
11145 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11146 &pending, &seen, &reada, &nodes,
11147 &extent_cache, &chunk_cache, &dev_cache,
11148 &block_group_cache, &dev_extent_cache);
11150 if (ret == -EAGAIN)
11155 ret = check_chunks(&chunk_cache, &block_group_cache,
11156 &dev_extent_cache, NULL, NULL, NULL, 0);
11158 if (ret == -EAGAIN)
11163 ret = check_extent_refs(root, &extent_cache);
11165 if (ret == -EAGAIN)
11170 ret = check_devices(&dev_cache, &dev_extent_cache);
11175 task_stop(ctx.info);
11177 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11178 extent_io_tree_cleanup(&excluded_extents);
11179 fs_info->fsck_extent_cache = NULL;
11180 fs_info->free_extent_hook = NULL;
11181 fs_info->corrupt_blocks = NULL;
11182 fs_info->excluded_extents = NULL;
11185 free_chunk_cache_tree(&chunk_cache);
11186 free_device_cache_tree(&dev_cache);
11187 free_block_group_tree(&block_group_cache);
11188 free_device_extent_tree(&dev_extent_cache);
11189 free_extent_cache_tree(&seen);
11190 free_extent_cache_tree(&pending);
11191 free_extent_cache_tree(&reada);
11192 free_extent_cache_tree(&nodes);
11193 free_root_item_list(&normal_trees);
11194 free_root_item_list(&dropping_trees);
11197 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11198 free_extent_cache_tree(&seen);
11199 free_extent_cache_tree(&pending);
11200 free_extent_cache_tree(&reada);
11201 free_extent_cache_tree(&nodes);
11202 free_chunk_cache_tree(&chunk_cache);
11203 free_block_group_tree(&block_group_cache);
11204 free_device_cache_tree(&dev_cache);
11205 free_device_extent_tree(&dev_extent_cache);
11206 free_extent_record_cache(&extent_cache);
11207 free_root_item_list(&normal_trees);
11208 free_root_item_list(&dropping_trees);
11209 extent_io_tree_cleanup(&excluded_extents);
11213 static int check_extent_inline_ref(struct extent_buffer *eb,
11214 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11217 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11220 case BTRFS_TREE_BLOCK_REF_KEY:
11221 case BTRFS_EXTENT_DATA_REF_KEY:
11222 case BTRFS_SHARED_BLOCK_REF_KEY:
11223 case BTRFS_SHARED_DATA_REF_KEY:
11227 error("extent[%llu %u %llu] has unknown ref type: %d",
11228 key->objectid, key->type, key->offset, type);
11229 ret = UNKNOWN_TYPE;
11237 * Check backrefs of a tree block given by @bytenr or @eb.
11239 * @root: the root containing the @bytenr or @eb
11240 * @eb: tree block extent buffer, can be NULL
11241 * @bytenr: bytenr of the tree block to search
11242 * @level: tree level of the tree block
11243 * @owner: owner of the tree block
11245 * Return >0 for any error found and output error message
11246 * Return 0 for no error found
11248 static int check_tree_block_ref(struct btrfs_root *root,
11249 struct extent_buffer *eb, u64 bytenr,
11250 int level, u64 owner, struct node_refs *nrefs)
11252 struct btrfs_key key;
11253 struct btrfs_root *extent_root = root->fs_info->extent_root;
11254 struct btrfs_path path;
11255 struct btrfs_extent_item *ei;
11256 struct btrfs_extent_inline_ref *iref;
11257 struct extent_buffer *leaf;
11262 int root_level = btrfs_header_level(root->node);
11264 u32 nodesize = root->fs_info->nodesize;
11273 btrfs_init_path(&path);
11274 key.objectid = bytenr;
11275 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11276 key.type = BTRFS_METADATA_ITEM_KEY;
11278 key.type = BTRFS_EXTENT_ITEM_KEY;
11279 key.offset = (u64)-1;
11281 /* Search for the backref in extent tree */
11282 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11284 err |= BACKREF_MISSING;
11287 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11289 err |= BACKREF_MISSING;
11293 leaf = path.nodes[0];
11294 slot = path.slots[0];
11295 btrfs_item_key_to_cpu(leaf, &key, slot);
11297 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11299 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11300 skinny_level = (int)key.offset;
11301 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11303 struct btrfs_tree_block_info *info;
11305 info = (struct btrfs_tree_block_info *)(ei + 1);
11306 skinny_level = btrfs_tree_block_level(leaf, info);
11307 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11316 * Due to the feature of shared tree blocks, if the upper node
11317 * is a fs root or shared node, the extent of checked node may
11318 * not be updated until the next CoW.
11321 strict = should_check_extent_strictly(root, nrefs,
11323 if (!(btrfs_extent_flags(leaf, ei) &
11324 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11326 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11327 key.objectid, nodesize,
11328 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11329 err = BACKREF_MISMATCH;
11331 header_gen = btrfs_header_generation(eb);
11332 extent_gen = btrfs_extent_generation(leaf, ei);
11333 if (header_gen != extent_gen) {
11335 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11336 key.objectid, nodesize, header_gen,
11338 err = BACKREF_MISMATCH;
11340 if (level != skinny_level) {
11342 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11343 key.objectid, nodesize, level, skinny_level);
11344 err = BACKREF_MISMATCH;
11346 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11348 "extent[%llu %u] is referred by other roots than %llu",
11349 key.objectid, nodesize, root->objectid);
11350 err = BACKREF_MISMATCH;
11355 * Iterate the extent/metadata item to find the exact backref
11357 item_size = btrfs_item_size_nr(leaf, slot);
11358 ptr = (unsigned long)iref;
11359 end = (unsigned long)ei + item_size;
11361 while (ptr < end) {
11362 iref = (struct btrfs_extent_inline_ref *)ptr;
11363 type = btrfs_extent_inline_ref_type(leaf, iref);
11364 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11366 ret = check_extent_inline_ref(leaf, &key, iref);
11371 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11372 if (offset == root->objectid)
11374 if (!strict && owner == offset)
11376 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11378 * Backref of tree reloc root points to itself, no need
11379 * to check backref any more.
11381 * This may be an error of loop backref, but extent tree
11382 * checker should have already handled it.
11383 * Here we only need to avoid infinite iteration.
11385 if (offset == bytenr) {
11389 * Check if the backref points to valid
11392 found_ref = !check_tree_block_ref( root, NULL,
11393 offset, level + 1, owner,
11400 ptr += btrfs_extent_inline_ref_size(type);
11404 * Inlined extent item doesn't have what we need, check
11405 * TREE_BLOCK_REF_KEY
11408 btrfs_release_path(&path);
11409 key.objectid = bytenr;
11410 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11411 key.offset = root->objectid;
11413 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11418 * Finally check SHARED BLOCK REF, any found will be good
11419 * Here we're not doing comprehensive extent backref checking,
11420 * only need to ensure there is some extent referring to this
11424 btrfs_release_path(&path);
11425 key.objectid = bytenr;
11426 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11427 key.offset = (u64)-1;
11429 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11431 err |= BACKREF_MISSING;
11434 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11436 err |= BACKREF_MISSING;
11442 err |= BACKREF_MISSING;
11444 btrfs_release_path(&path);
11445 if (nrefs && strict &&
11446 level < root_level && nrefs->full_backref[level + 1])
11447 parent = nrefs->bytenr[level + 1];
11448 if (eb && (err & BACKREF_MISSING))
11450 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11451 bytenr, nodesize, owner, level,
11452 parent ? "parent" : "root",
11453 parent ? parent : root->objectid);
11458 * If @err contains BACKREF_MISSING then add extent of the
11459 * file_extent_data_item.
11461 * Returns error bits after reapir.
11463 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11464 struct btrfs_root *root,
11465 struct btrfs_path *pathp,
11466 struct node_refs *nrefs,
11469 struct btrfs_file_extent_item *fi;
11470 struct btrfs_key fi_key;
11471 struct btrfs_key key;
11472 struct btrfs_extent_item *ei;
11473 struct btrfs_path path;
11474 struct btrfs_root *extent_root = root->fs_info->extent_root;
11475 struct extent_buffer *eb;
11487 eb = pathp->nodes[0];
11488 slot = pathp->slots[0];
11489 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11490 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11492 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11493 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11496 file_offset = fi_key.offset;
11497 generation = btrfs_file_extent_generation(eb, fi);
11498 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11499 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11500 extent_offset = btrfs_file_extent_offset(eb, fi);
11501 offset = file_offset - extent_offset;
11503 /* now repair only adds backref */
11504 if ((err & BACKREF_MISSING) == 0)
11507 /* search extent item */
11508 key.objectid = disk_bytenr;
11509 key.type = BTRFS_EXTENT_ITEM_KEY;
11510 key.offset = num_bytes;
11512 btrfs_init_path(&path);
11513 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11519 /* insert an extent item */
11521 key.objectid = disk_bytenr;
11522 key.type = BTRFS_EXTENT_ITEM_KEY;
11523 key.offset = num_bytes;
11524 size = sizeof(*ei);
11526 btrfs_release_path(&path);
11527 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11531 eb = path.nodes[0];
11532 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11534 btrfs_set_extent_refs(eb, ei, 0);
11535 btrfs_set_extent_generation(eb, ei, generation);
11536 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11538 btrfs_mark_buffer_dirty(eb);
11539 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11541 btrfs_release_path(&path);
11544 if (nrefs->full_backref[0])
11545 parent = btrfs_header_bytenr(eb);
11549 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11551 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11555 "failed to increase extent data backref[%llu %llu] root %llu",
11556 disk_bytenr, num_bytes, root->objectid);
11559 printf("Add one extent data backref [%llu %llu]\n",
11560 disk_bytenr, num_bytes);
11563 err &= ~BACKREF_MISSING;
11566 error("can't repair root %llu extent data item[%llu %llu]",
11567 root->objectid, disk_bytenr, num_bytes);
11572 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11574 * Return >0 any error found and output error message
11575 * Return 0 for no error found
11577 static int check_extent_data_item(struct btrfs_root *root,
11578 struct btrfs_path *pathp,
11579 struct node_refs *nrefs, int account_bytes)
11581 struct btrfs_file_extent_item *fi;
11582 struct extent_buffer *eb = pathp->nodes[0];
11583 struct btrfs_path path;
11584 struct btrfs_root *extent_root = root->fs_info->extent_root;
11585 struct btrfs_key fi_key;
11586 struct btrfs_key dbref_key;
11587 struct extent_buffer *leaf;
11588 struct btrfs_extent_item *ei;
11589 struct btrfs_extent_inline_ref *iref;
11590 struct btrfs_extent_data_ref *dref;
11593 u64 disk_num_bytes;
11594 u64 extent_num_bytes;
11601 int found_dbackref = 0;
11602 int slot = pathp->slots[0];
11607 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11608 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11610 /* Nothing to check for hole and inline data extents */
11611 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11612 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11615 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11616 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11617 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11618 offset = btrfs_file_extent_offset(eb, fi);
11620 /* Check unaligned disk_num_bytes and num_bytes */
11621 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11623 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11624 fi_key.objectid, fi_key.offset, disk_num_bytes,
11625 root->fs_info->sectorsize);
11626 err |= BYTES_UNALIGNED;
11627 } else if (account_bytes) {
11628 data_bytes_allocated += disk_num_bytes;
11630 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11632 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11633 fi_key.objectid, fi_key.offset, extent_num_bytes,
11634 root->fs_info->sectorsize);
11635 err |= BYTES_UNALIGNED;
11636 } else if (account_bytes) {
11637 data_bytes_referenced += extent_num_bytes;
11639 owner = btrfs_header_owner(eb);
11641 /* Check the extent item of the file extent in extent tree */
11642 btrfs_init_path(&path);
11643 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11644 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11645 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11647 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11651 leaf = path.nodes[0];
11652 slot = path.slots[0];
11653 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11655 extent_flags = btrfs_extent_flags(leaf, ei);
11657 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11659 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11660 disk_bytenr, disk_num_bytes,
11661 BTRFS_EXTENT_FLAG_DATA);
11662 err |= BACKREF_MISMATCH;
11665 /* Check data backref inside that extent item */
11666 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11667 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11668 ptr = (unsigned long)iref;
11669 end = (unsigned long)ei + item_size;
11670 strict = should_check_extent_strictly(root, nrefs, -1);
11672 while (ptr < end) {
11676 bool match = false;
11678 iref = (struct btrfs_extent_inline_ref *)ptr;
11679 type = btrfs_extent_inline_ref_type(leaf, iref);
11680 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11682 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11687 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11688 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11689 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11690 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11692 if (ref_objectid == fi_key.objectid &&
11693 ref_offset == fi_key.offset - offset)
11695 if (ref_root == root->objectid && match)
11696 found_dbackref = 1;
11697 else if (!strict && owner == ref_root && match)
11698 found_dbackref = 1;
11699 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11700 found_dbackref = !check_tree_block_ref(root, NULL,
11701 btrfs_extent_inline_ref_offset(leaf, iref),
11705 if (found_dbackref)
11707 ptr += btrfs_extent_inline_ref_size(type);
11710 if (!found_dbackref) {
11711 btrfs_release_path(&path);
11713 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11714 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11715 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11716 dbref_key.offset = hash_extent_data_ref(root->objectid,
11717 fi_key.objectid, fi_key.offset - offset);
11719 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11720 &dbref_key, &path, 0, 0);
11722 found_dbackref = 1;
11726 btrfs_release_path(&path);
11729 * Neither inlined nor EXTENT_DATA_REF found, try
11730 * SHARED_DATA_REF as last chance.
11732 dbref_key.objectid = disk_bytenr;
11733 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11734 dbref_key.offset = eb->start;
11736 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11737 &dbref_key, &path, 0, 0);
11739 found_dbackref = 1;
11745 if (!found_dbackref)
11746 err |= BACKREF_MISSING;
11747 btrfs_release_path(&path);
11748 if (err & BACKREF_MISSING) {
11749 error("data extent[%llu %llu] backref lost",
11750 disk_bytenr, disk_num_bytes);
11756 * Get real tree block level for the case like shared block
11757 * Return >= 0 as tree level
11758 * Return <0 for error
11760 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11762 struct extent_buffer *eb;
11763 struct btrfs_path path;
11764 struct btrfs_key key;
11765 struct btrfs_extent_item *ei;
11772 /* Search extent tree for extent generation and level */
11773 key.objectid = bytenr;
11774 key.type = BTRFS_METADATA_ITEM_KEY;
11775 key.offset = (u64)-1;
11777 btrfs_init_path(&path);
11778 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11781 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11789 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11790 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11791 struct btrfs_extent_item);
11792 flags = btrfs_extent_flags(path.nodes[0], ei);
11793 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11798 /* Get transid for later read_tree_block() check */
11799 transid = btrfs_extent_generation(path.nodes[0], ei);
11801 /* Get backref level as one source */
11802 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11803 backref_level = key.offset;
11805 struct btrfs_tree_block_info *info;
11807 info = (struct btrfs_tree_block_info *)(ei + 1);
11808 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11810 btrfs_release_path(&path);
11812 /* Get level from tree block as an alternative source */
11813 eb = read_tree_block(fs_info, bytenr, transid);
11814 if (!extent_buffer_uptodate(eb)) {
11815 free_extent_buffer(eb);
11818 header_level = btrfs_header_level(eb);
11819 free_extent_buffer(eb);
11821 if (header_level != backref_level)
11823 return header_level;
11826 btrfs_release_path(&path);
11831 * Check if a tree block backref is valid (points to a valid tree block)
11832 * if level == -1, level will be resolved
11833 * Return >0 for any error found and print error message
11835 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11836 u64 bytenr, int level)
11838 struct btrfs_root *root;
11839 struct btrfs_key key;
11840 struct btrfs_path path;
11841 struct extent_buffer *eb;
11842 struct extent_buffer *node;
11843 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11847 /* Query level for level == -1 special case */
11849 level = query_tree_block_level(fs_info, bytenr);
11851 err |= REFERENCER_MISSING;
11855 key.objectid = root_id;
11856 key.type = BTRFS_ROOT_ITEM_KEY;
11857 key.offset = (u64)-1;
11859 root = btrfs_read_fs_root(fs_info, &key);
11860 if (IS_ERR(root)) {
11861 err |= REFERENCER_MISSING;
11865 /* Read out the tree block to get item/node key */
11866 eb = read_tree_block(fs_info, bytenr, 0);
11867 if (!extent_buffer_uptodate(eb)) {
11868 err |= REFERENCER_MISSING;
11869 free_extent_buffer(eb);
11873 /* Empty tree, no need to check key */
11874 if (!btrfs_header_nritems(eb) && !level) {
11875 free_extent_buffer(eb);
11880 btrfs_node_key_to_cpu(eb, &key, 0);
11882 btrfs_item_key_to_cpu(eb, &key, 0);
11884 free_extent_buffer(eb);
11886 btrfs_init_path(&path);
11887 path.lowest_level = level;
11888 /* Search with the first key, to ensure we can reach it */
11889 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11891 err |= REFERENCER_MISSING;
11895 node = path.nodes[level];
11896 if (btrfs_header_bytenr(node) != bytenr) {
11898 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11899 bytenr, nodesize, bytenr,
11900 btrfs_header_bytenr(node));
11901 err |= REFERENCER_MISMATCH;
11903 if (btrfs_header_level(node) != level) {
11905 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11906 bytenr, nodesize, level,
11907 btrfs_header_level(node));
11908 err |= REFERENCER_MISMATCH;
11912 btrfs_release_path(&path);
11914 if (err & REFERENCER_MISSING) {
11916 error("extent [%llu %d] lost referencer (owner: %llu)",
11917 bytenr, nodesize, root_id);
11920 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11921 bytenr, nodesize, root_id, level);
11928 * Check if tree block @eb is tree reloc root.
11929 * Return 0 if it's not or any problem happens
11930 * Return 1 if it's a tree reloc root
11932 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11933 struct extent_buffer *eb)
11935 struct btrfs_root *tree_reloc_root;
11936 struct btrfs_key key;
11937 u64 bytenr = btrfs_header_bytenr(eb);
11938 u64 owner = btrfs_header_owner(eb);
11941 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11942 key.offset = owner;
11943 key.type = BTRFS_ROOT_ITEM_KEY;
11945 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11946 if (IS_ERR(tree_reloc_root))
11949 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11951 btrfs_free_fs_root(tree_reloc_root);
11956 * Check referencer for shared block backref
11957 * If level == -1, this function will resolve the level.
11959 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11960 u64 parent, u64 bytenr, int level)
11962 struct extent_buffer *eb;
11964 int found_parent = 0;
11967 eb = read_tree_block(fs_info, parent, 0);
11968 if (!extent_buffer_uptodate(eb))
11972 level = query_tree_block_level(fs_info, bytenr);
11976 /* It's possible it's a tree reloc root */
11977 if (parent == bytenr) {
11978 if (is_tree_reloc_root(fs_info, eb))
11983 if (level + 1 != btrfs_header_level(eb))
11986 nr = btrfs_header_nritems(eb);
11987 for (i = 0; i < nr; i++) {
11988 if (bytenr == btrfs_node_blockptr(eb, i)) {
11994 free_extent_buffer(eb);
11995 if (!found_parent) {
11997 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11998 bytenr, fs_info->nodesize, parent, level);
11999 return REFERENCER_MISSING;
12005 * Check referencer for normal (inlined) data ref
12006 * If len == 0, it will be resolved by searching in extent tree
12008 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12009 u64 root_id, u64 objectid, u64 offset,
12010 u64 bytenr, u64 len, u32 count)
12012 struct btrfs_root *root;
12013 struct btrfs_root *extent_root = fs_info->extent_root;
12014 struct btrfs_key key;
12015 struct btrfs_path path;
12016 struct extent_buffer *leaf;
12017 struct btrfs_file_extent_item *fi;
12018 u32 found_count = 0;
12023 key.objectid = bytenr;
12024 key.type = BTRFS_EXTENT_ITEM_KEY;
12025 key.offset = (u64)-1;
12027 btrfs_init_path(&path);
12028 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12031 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12034 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12035 if (key.objectid != bytenr ||
12036 key.type != BTRFS_EXTENT_ITEM_KEY)
12039 btrfs_release_path(&path);
12041 key.objectid = root_id;
12042 key.type = BTRFS_ROOT_ITEM_KEY;
12043 key.offset = (u64)-1;
12044 btrfs_init_path(&path);
12046 root = btrfs_read_fs_root(fs_info, &key);
12050 key.objectid = objectid;
12051 key.type = BTRFS_EXTENT_DATA_KEY;
12053 * It can be nasty as data backref offset is
12054 * file offset - file extent offset, which is smaller or
12055 * equal to original backref offset. The only special case is
12056 * overflow. So we need to special check and do further search.
12058 key.offset = offset & (1ULL << 63) ? 0 : offset;
12060 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12065 * Search afterwards to get correct one
12066 * NOTE: As we must do a comprehensive check on the data backref to
12067 * make sure the dref count also matches, we must iterate all file
12068 * extents for that inode.
12071 leaf = path.nodes[0];
12072 slot = path.slots[0];
12074 if (slot >= btrfs_header_nritems(leaf) ||
12075 btrfs_header_owner(leaf) != root_id)
12077 btrfs_item_key_to_cpu(leaf, &key, slot);
12078 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12080 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12082 * Except normal disk bytenr and disk num bytes, we still
12083 * need to do extra check on dbackref offset as
12084 * dbackref offset = file_offset - file_extent_offset
12086 * Also, we must check the leaf owner.
12087 * In case of shared tree blocks (snapshots) we can inherit
12088 * leaves from source snapshot.
12089 * In that case, reference from source snapshot should not
12092 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12093 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12094 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12095 offset && btrfs_header_owner(leaf) == root_id)
12099 ret = btrfs_next_item(root, &path);
12104 btrfs_release_path(&path);
12105 if (found_count != count) {
12107 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12108 bytenr, len, root_id, objectid, offset, count, found_count);
12109 return REFERENCER_MISSING;
12115 * Check if the referencer of a shared data backref exists
12117 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12118 u64 parent, u64 bytenr)
12120 struct extent_buffer *eb;
12121 struct btrfs_key key;
12122 struct btrfs_file_extent_item *fi;
12124 int found_parent = 0;
12127 eb = read_tree_block(fs_info, parent, 0);
12128 if (!extent_buffer_uptodate(eb))
12131 nr = btrfs_header_nritems(eb);
12132 for (i = 0; i < nr; i++) {
12133 btrfs_item_key_to_cpu(eb, &key, i);
12134 if (key.type != BTRFS_EXTENT_DATA_KEY)
12137 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12138 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12141 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12148 free_extent_buffer(eb);
12149 if (!found_parent) {
12150 error("shared extent %llu referencer lost (parent: %llu)",
12152 return REFERENCER_MISSING;
12158 * Only delete backref if REFERENCER_MISSING now
12160 * Returns <0 the extent was deleted
12161 * Returns >0 the backref was deleted but extent still exists, returned value
12162 * means error after repair
12163 * Returns 0 nothing happened
12165 static int repair_extent_item(struct btrfs_trans_handle *trans,
12166 struct btrfs_root *root, struct btrfs_path *path,
12167 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12168 u64 owner, u64 offset, int err)
12170 struct btrfs_key old_key;
12174 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12176 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12177 /* delete the backref */
12178 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12179 num_bytes, parent, root_objectid, owner, offset);
12182 err &= ~REFERENCER_MISSING;
12183 printf("Delete backref in extent [%llu %llu]\n",
12184 bytenr, num_bytes);
12186 error("fail to delete backref in extent [%llu %llu]",
12187 bytenr, num_bytes);
12191 /* btrfs_free_extent may delete the extent */
12192 btrfs_release_path(path);
12193 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12203 * This function will check a given extent item, including its backref and
12204 * itself (like crossing stripe boundary and type)
12206 * Since we don't use extent_record anymore, introduce new error bit
12208 static int check_extent_item(struct btrfs_trans_handle *trans,
12209 struct btrfs_fs_info *fs_info,
12210 struct btrfs_path *path)
12212 struct btrfs_extent_item *ei;
12213 struct btrfs_extent_inline_ref *iref;
12214 struct btrfs_extent_data_ref *dref;
12215 struct extent_buffer *eb = path->nodes[0];
12218 int slot = path->slots[0];
12220 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12221 u32 item_size = btrfs_item_size_nr(eb, slot);
12231 struct btrfs_key key;
12235 btrfs_item_key_to_cpu(eb, &key, slot);
12236 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12237 bytes_used += key.offset;
12238 num_bytes = key.offset;
12240 bytes_used += nodesize;
12241 num_bytes = nodesize;
12244 if (item_size < sizeof(*ei)) {
12246 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12247 * old thing when on disk format is still un-determined.
12248 * No need to care about it anymore
12250 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12254 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12255 flags = btrfs_extent_flags(eb, ei);
12257 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12259 if (metadata && check_crossing_stripes(global_info, key.objectid,
12261 error("bad metadata [%llu, %llu) crossing stripe boundary",
12262 key.objectid, key.objectid + nodesize);
12263 err |= CROSSING_STRIPE_BOUNDARY;
12266 ptr = (unsigned long)(ei + 1);
12268 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12269 /* Old EXTENT_ITEM metadata */
12270 struct btrfs_tree_block_info *info;
12272 info = (struct btrfs_tree_block_info *)ptr;
12273 level = btrfs_tree_block_level(eb, info);
12274 ptr += sizeof(struct btrfs_tree_block_info);
12276 /* New METADATA_ITEM */
12277 level = key.offset;
12279 end = (unsigned long)ei + item_size;
12282 /* Reached extent item end normally */
12286 /* Beyond extent item end, wrong item size */
12288 err |= ITEM_SIZE_MISMATCH;
12289 error("extent item at bytenr %llu slot %d has wrong size",
12298 /* Now check every backref in this extent item */
12299 iref = (struct btrfs_extent_inline_ref *)ptr;
12300 type = btrfs_extent_inline_ref_type(eb, iref);
12301 offset = btrfs_extent_inline_ref_offset(eb, iref);
12303 case BTRFS_TREE_BLOCK_REF_KEY:
12304 root_objectid = offset;
12306 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12310 case BTRFS_SHARED_BLOCK_REF_KEY:
12312 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12316 case BTRFS_EXTENT_DATA_REF_KEY:
12317 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12318 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12319 owner = btrfs_extent_data_ref_objectid(eb, dref);
12320 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12321 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12322 owner_offset, key.objectid, key.offset,
12323 btrfs_extent_data_ref_count(eb, dref));
12326 case BTRFS_SHARED_DATA_REF_KEY:
12328 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12332 error("extent[%llu %d %llu] has unknown ref type: %d",
12333 key.objectid, key.type, key.offset, type);
12334 ret = UNKNOWN_TYPE;
12339 if (err && repair) {
12340 ret = repair_extent_item(trans, fs_info->extent_root, path,
12341 key.objectid, num_bytes, parent, root_objectid,
12342 owner, owner_offset, ret);
12351 ptr += btrfs_extent_inline_ref_size(type);
12359 * Check if a dev extent item is referred correctly by its chunk
12361 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12362 struct extent_buffer *eb, int slot)
12364 struct btrfs_root *chunk_root = fs_info->chunk_root;
12365 struct btrfs_dev_extent *ptr;
12366 struct btrfs_path path;
12367 struct btrfs_key chunk_key;
12368 struct btrfs_key devext_key;
12369 struct btrfs_chunk *chunk;
12370 struct extent_buffer *l;
12374 int found_chunk = 0;
12377 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12378 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12379 length = btrfs_dev_extent_length(eb, ptr);
12381 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12382 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12383 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12385 btrfs_init_path(&path);
12386 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12391 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12392 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12397 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12400 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12401 for (i = 0; i < num_stripes; i++) {
12402 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12403 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12405 if (devid == devext_key.objectid &&
12406 offset == devext_key.offset) {
12412 btrfs_release_path(&path);
12413 if (!found_chunk) {
12415 "device extent[%llu, %llu, %llu] did not find the related chunk",
12416 devext_key.objectid, devext_key.offset, length);
12417 return REFERENCER_MISSING;
12423 * Check if the used space is correct with the dev item
12425 static int check_dev_item(struct btrfs_fs_info *fs_info,
12426 struct extent_buffer *eb, int slot)
12428 struct btrfs_root *dev_root = fs_info->dev_root;
12429 struct btrfs_dev_item *dev_item;
12430 struct btrfs_path path;
12431 struct btrfs_key key;
12432 struct btrfs_dev_extent *ptr;
12439 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12440 dev_id = btrfs_device_id(eb, dev_item);
12441 used = btrfs_device_bytes_used(eb, dev_item);
12442 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12444 key.objectid = dev_id;
12445 key.type = BTRFS_DEV_EXTENT_KEY;
12448 btrfs_init_path(&path);
12449 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12451 btrfs_item_key_to_cpu(eb, &key, slot);
12452 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12453 key.objectid, key.type, key.offset);
12454 btrfs_release_path(&path);
12455 return REFERENCER_MISSING;
12458 /* Iterate dev_extents to calculate the used space of a device */
12460 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12463 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12464 if (key.objectid > dev_id)
12466 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12469 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12470 struct btrfs_dev_extent);
12471 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12473 ret = btrfs_next_item(dev_root, &path);
12477 btrfs_release_path(&path);
12479 if (used != total) {
12480 btrfs_item_key_to_cpu(eb, &key, slot);
12482 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12483 total, used, BTRFS_ROOT_TREE_OBJECTID,
12484 BTRFS_DEV_EXTENT_KEY, dev_id);
12485 return ACCOUNTING_MISMATCH;
12487 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12493 * Check a block group item with its referener (chunk) and its used space
12494 * with extent/metadata item
12496 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12497 struct extent_buffer *eb, int slot)
12499 struct btrfs_root *extent_root = fs_info->extent_root;
12500 struct btrfs_root *chunk_root = fs_info->chunk_root;
12501 struct btrfs_block_group_item *bi;
12502 struct btrfs_block_group_item bg_item;
12503 struct btrfs_path path;
12504 struct btrfs_key bg_key;
12505 struct btrfs_key chunk_key;
12506 struct btrfs_key extent_key;
12507 struct btrfs_chunk *chunk;
12508 struct extent_buffer *leaf;
12509 struct btrfs_extent_item *ei;
12510 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12518 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12519 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12520 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12521 used = btrfs_block_group_used(&bg_item);
12522 bg_flags = btrfs_block_group_flags(&bg_item);
12524 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12525 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12526 chunk_key.offset = bg_key.objectid;
12528 btrfs_init_path(&path);
12529 /* Search for the referencer chunk */
12530 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12533 "block group[%llu %llu] did not find the related chunk item",
12534 bg_key.objectid, bg_key.offset);
12535 err |= REFERENCER_MISSING;
12537 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12538 struct btrfs_chunk);
12539 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12542 "block group[%llu %llu] related chunk item length does not match",
12543 bg_key.objectid, bg_key.offset);
12544 err |= REFERENCER_MISMATCH;
12547 btrfs_release_path(&path);
12549 /* Search from the block group bytenr */
12550 extent_key.objectid = bg_key.objectid;
12551 extent_key.type = 0;
12552 extent_key.offset = 0;
12554 btrfs_init_path(&path);
12555 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12559 /* Iterate extent tree to account used space */
12561 leaf = path.nodes[0];
12563 /* Search slot can point to the last item beyond leaf nritems */
12564 if (path.slots[0] >= btrfs_header_nritems(leaf))
12567 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12568 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12571 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12572 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12574 if (extent_key.objectid < bg_key.objectid)
12577 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12580 total += extent_key.offset;
12582 ei = btrfs_item_ptr(leaf, path.slots[0],
12583 struct btrfs_extent_item);
12584 flags = btrfs_extent_flags(leaf, ei);
12585 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12586 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12588 "bad extent[%llu, %llu) type mismatch with chunk",
12589 extent_key.objectid,
12590 extent_key.objectid + extent_key.offset);
12591 err |= CHUNK_TYPE_MISMATCH;
12593 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12594 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12595 BTRFS_BLOCK_GROUP_METADATA))) {
12597 "bad extent[%llu, %llu) type mismatch with chunk",
12598 extent_key.objectid,
12599 extent_key.objectid + nodesize);
12600 err |= CHUNK_TYPE_MISMATCH;
12604 ret = btrfs_next_item(extent_root, &path);
12610 btrfs_release_path(&path);
12612 if (total != used) {
12614 "block group[%llu %llu] used %llu but extent items used %llu",
12615 bg_key.objectid, bg_key.offset, used, total);
12616 err |= BG_ACCOUNTING_ERROR;
12622 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12623 * FIXME: We still need to repair error of dev_item.
12625 * Returns error after repair.
12627 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12628 struct btrfs_root *chunk_root,
12629 struct btrfs_path *path, int err)
12631 struct btrfs_chunk *chunk;
12632 struct btrfs_key chunk_key;
12633 struct extent_buffer *eb = path->nodes[0];
12635 int slot = path->slots[0];
12639 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12640 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12642 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12643 type = btrfs_chunk_type(path->nodes[0], chunk);
12644 length = btrfs_chunk_length(eb, chunk);
12646 if (err & REFERENCER_MISSING) {
12647 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12648 type, chunk_key.offset, length);
12650 error("fail to add block group item[%llu %llu]",
12651 chunk_key.offset, length);
12654 err &= ~REFERENCER_MISSING;
12655 printf("Added block group item[%llu %llu]\n",
12656 chunk_key.offset, length);
12665 * Check a chunk item.
12666 * Including checking all referred dev_extents and block group
12668 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12669 struct extent_buffer *eb, int slot)
12671 struct btrfs_root *extent_root = fs_info->extent_root;
12672 struct btrfs_root *dev_root = fs_info->dev_root;
12673 struct btrfs_path path;
12674 struct btrfs_key chunk_key;
12675 struct btrfs_key bg_key;
12676 struct btrfs_key devext_key;
12677 struct btrfs_chunk *chunk;
12678 struct extent_buffer *leaf;
12679 struct btrfs_block_group_item *bi;
12680 struct btrfs_block_group_item bg_item;
12681 struct btrfs_dev_extent *ptr;
12693 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12694 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12695 length = btrfs_chunk_length(eb, chunk);
12696 chunk_end = chunk_key.offset + length;
12697 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12700 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12702 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12705 type = btrfs_chunk_type(eb, chunk);
12707 bg_key.objectid = chunk_key.offset;
12708 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12709 bg_key.offset = length;
12711 btrfs_init_path(&path);
12712 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12715 "chunk[%llu %llu) did not find the related block group item",
12716 chunk_key.offset, chunk_end);
12717 err |= REFERENCER_MISSING;
12719 leaf = path.nodes[0];
12720 bi = btrfs_item_ptr(leaf, path.slots[0],
12721 struct btrfs_block_group_item);
12722 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12724 if (btrfs_block_group_flags(&bg_item) != type) {
12726 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12727 chunk_key.offset, chunk_end, type,
12728 btrfs_block_group_flags(&bg_item));
12729 err |= REFERENCER_MISSING;
12733 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12734 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12735 for (i = 0; i < num_stripes; i++) {
12736 btrfs_release_path(&path);
12737 btrfs_init_path(&path);
12738 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12739 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12740 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12742 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12745 goto not_match_dev;
12747 leaf = path.nodes[0];
12748 ptr = btrfs_item_ptr(leaf, path.slots[0],
12749 struct btrfs_dev_extent);
12750 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12751 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12752 if (objectid != chunk_key.objectid ||
12753 offset != chunk_key.offset ||
12754 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12755 goto not_match_dev;
12758 err |= BACKREF_MISSING;
12760 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12761 chunk_key.objectid, chunk_end, i);
12764 btrfs_release_path(&path);
12769 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12770 struct btrfs_root *root,
12771 struct btrfs_path *path)
12773 struct btrfs_key key;
12776 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12777 btrfs_release_path(path);
12778 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12784 ret = btrfs_del_item(trans, root, path);
12788 if (path->slots[0] == 0)
12789 btrfs_prev_leaf(root, path);
12794 error("failed to delete root %llu item[%llu, %u, %llu]",
12795 root->objectid, key.objectid, key.type, key.offset);
12797 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12798 root->objectid, key.objectid, key.type, key.offset);
12803 * Main entry function to check known items and update related accounting info
12805 static int check_leaf_items(struct btrfs_trans_handle *trans,
12806 struct btrfs_root *root, struct btrfs_path *path,
12807 struct node_refs *nrefs, int account_bytes)
12809 struct btrfs_fs_info *fs_info = root->fs_info;
12810 struct btrfs_key key;
12811 struct extent_buffer *eb;
12814 struct btrfs_extent_data_ref *dref;
12819 eb = path->nodes[0];
12820 slot = path->slots[0];
12821 if (slot >= btrfs_header_nritems(eb)) {
12823 error("empty leaf [%llu %u] root %llu", eb->start,
12824 root->fs_info->nodesize, root->objectid);
12830 btrfs_item_key_to_cpu(eb, &key, slot);
12834 case BTRFS_EXTENT_DATA_KEY:
12835 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12837 ret = repair_extent_data_item(trans, root, path, nrefs,
12841 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12842 ret = check_block_group_item(fs_info, eb, slot);
12844 ret & REFERENCER_MISSING)
12845 ret = delete_extent_tree_item(trans, root, path);
12848 case BTRFS_DEV_ITEM_KEY:
12849 ret = check_dev_item(fs_info, eb, slot);
12852 case BTRFS_CHUNK_ITEM_KEY:
12853 ret = check_chunk_item(fs_info, eb, slot);
12855 ret = repair_chunk_item(trans, root, path, ret);
12858 case BTRFS_DEV_EXTENT_KEY:
12859 ret = check_dev_extent_item(fs_info, eb, slot);
12862 case BTRFS_EXTENT_ITEM_KEY:
12863 case BTRFS_METADATA_ITEM_KEY:
12864 ret = check_extent_item(trans, fs_info, path);
12867 case BTRFS_EXTENT_CSUM_KEY:
12868 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12871 case BTRFS_TREE_BLOCK_REF_KEY:
12872 ret = check_tree_block_backref(fs_info, key.offset,
12875 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12876 ret = delete_extent_tree_item(trans, root, path);
12879 case BTRFS_EXTENT_DATA_REF_KEY:
12880 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12881 ret = check_extent_data_backref(fs_info,
12882 btrfs_extent_data_ref_root(eb, dref),
12883 btrfs_extent_data_ref_objectid(eb, dref),
12884 btrfs_extent_data_ref_offset(eb, dref),
12886 btrfs_extent_data_ref_count(eb, dref));
12888 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12889 ret = delete_extent_tree_item(trans, root, path);
12892 case BTRFS_SHARED_BLOCK_REF_KEY:
12893 ret = check_shared_block_backref(fs_info, key.offset,
12896 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12897 ret = delete_extent_tree_item(trans, root, path);
12900 case BTRFS_SHARED_DATA_REF_KEY:
12901 ret = check_shared_data_backref(fs_info, key.offset,
12904 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12905 ret = delete_extent_tree_item(trans, root, path);
12919 * Low memory usage version check_chunks_and_extents.
12921 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12923 struct btrfs_trans_handle *trans = NULL;
12924 struct btrfs_path path;
12925 struct btrfs_key old_key;
12926 struct btrfs_key key;
12927 struct btrfs_root *root1;
12928 struct btrfs_root *root;
12929 struct btrfs_root *cur_root;
12933 root = fs_info->fs_root;
12936 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12937 if (IS_ERR(trans)) {
12938 error("failed to start transaction before check");
12939 return PTR_ERR(trans);
12943 root1 = root->fs_info->chunk_root;
12944 ret = check_btrfs_root(trans, root1, 0, 1);
12947 root1 = root->fs_info->tree_root;
12948 ret = check_btrfs_root(trans, root1, 0, 1);
12951 btrfs_init_path(&path);
12952 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12954 key.type = BTRFS_ROOT_ITEM_KEY;
12956 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12958 error("cannot find extent tree in tree_root");
12963 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12964 if (key.type != BTRFS_ROOT_ITEM_KEY)
12967 key.offset = (u64)-1;
12969 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12970 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12973 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12974 if (IS_ERR(cur_root) || !cur_root) {
12975 error("failed to read tree: %lld", key.objectid);
12979 ret = check_btrfs_root(trans, cur_root, 0, 1);
12982 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12983 btrfs_free_fs_root(cur_root);
12985 btrfs_release_path(&path);
12986 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12987 &old_key, &path, 0, 0);
12991 ret = btrfs_next_item(root1, &path);
12997 /* if repair, update block accounting */
12999 ret = btrfs_fix_block_accounting(trans, root);
13003 err &= ~BG_ACCOUNTING_ERROR;
13007 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13009 btrfs_release_path(&path);
13014 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13018 if (!ctx.progress_enabled)
13019 fprintf(stderr, "checking extents\n");
13020 if (check_mode == CHECK_MODE_LOWMEM)
13021 ret = check_chunks_and_extents_v2(fs_info);
13023 ret = check_chunks_and_extents(fs_info);
13025 /* Also repair device size related problems */
13026 if (repair && !ret) {
13027 ret = btrfs_fix_device_and_super_size(fs_info);
13034 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13035 struct btrfs_root *root, int overwrite)
13037 struct extent_buffer *c;
13038 struct extent_buffer *old = root->node;
13041 struct btrfs_disk_key disk_key = {0,0,0};
13047 extent_buffer_get(c);
13050 c = btrfs_alloc_free_block(trans, root,
13051 root->fs_info->nodesize,
13052 root->root_key.objectid,
13053 &disk_key, level, 0, 0);
13056 extent_buffer_get(c);
13060 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13061 btrfs_set_header_level(c, level);
13062 btrfs_set_header_bytenr(c, c->start);
13063 btrfs_set_header_generation(c, trans->transid);
13064 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13065 btrfs_set_header_owner(c, root->root_key.objectid);
13067 write_extent_buffer(c, root->fs_info->fsid,
13068 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13070 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13071 btrfs_header_chunk_tree_uuid(c),
13074 btrfs_mark_buffer_dirty(c);
13076 * this case can happen in the following case:
13078 * 1.overwrite previous root.
13080 * 2.reinit reloc data root, this is because we skip pin
13081 * down reloc data tree before which means we can allocate
13082 * same block bytenr here.
13084 if (old->start == c->start) {
13085 btrfs_set_root_generation(&root->root_item,
13087 root->root_item.level = btrfs_header_level(root->node);
13088 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13089 &root->root_key, &root->root_item);
13091 free_extent_buffer(c);
13095 free_extent_buffer(old);
13097 add_root_to_dirty_list(root);
13101 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13102 struct extent_buffer *eb, int tree_root)
13104 struct extent_buffer *tmp;
13105 struct btrfs_root_item *ri;
13106 struct btrfs_key key;
13108 int level = btrfs_header_level(eb);
13114 * If we have pinned this block before, don't pin it again.
13115 * This can not only avoid forever loop with broken filesystem
13116 * but also give us some speedups.
13118 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13119 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13122 btrfs_pin_extent(fs_info, eb->start, eb->len);
13124 nritems = btrfs_header_nritems(eb);
13125 for (i = 0; i < nritems; i++) {
13127 btrfs_item_key_to_cpu(eb, &key, i);
13128 if (key.type != BTRFS_ROOT_ITEM_KEY)
13130 /* Skip the extent root and reloc roots */
13131 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13132 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13133 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13135 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13136 bytenr = btrfs_disk_root_bytenr(eb, ri);
13139 * If at any point we start needing the real root we
13140 * will have to build a stump root for the root we are
13141 * in, but for now this doesn't actually use the root so
13142 * just pass in extent_root.
13144 tmp = read_tree_block(fs_info, bytenr, 0);
13145 if (!extent_buffer_uptodate(tmp)) {
13146 fprintf(stderr, "Error reading root block\n");
13149 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13150 free_extent_buffer(tmp);
13154 bytenr = btrfs_node_blockptr(eb, i);
13156 /* If we aren't the tree root don't read the block */
13157 if (level == 1 && !tree_root) {
13158 btrfs_pin_extent(fs_info, bytenr,
13159 fs_info->nodesize);
13163 tmp = read_tree_block(fs_info, bytenr, 0);
13164 if (!extent_buffer_uptodate(tmp)) {
13165 fprintf(stderr, "Error reading tree block\n");
13168 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13169 free_extent_buffer(tmp);
13178 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13182 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13186 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13189 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13191 struct btrfs_block_group_cache *cache;
13192 struct btrfs_path path;
13193 struct extent_buffer *leaf;
13194 struct btrfs_chunk *chunk;
13195 struct btrfs_key key;
13199 btrfs_init_path(&path);
13201 key.type = BTRFS_CHUNK_ITEM_KEY;
13203 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13205 btrfs_release_path(&path);
13210 * We do this in case the block groups were screwed up and had alloc
13211 * bits that aren't actually set on the chunks. This happens with
13212 * restored images every time and could happen in real life I guess.
13214 fs_info->avail_data_alloc_bits = 0;
13215 fs_info->avail_metadata_alloc_bits = 0;
13216 fs_info->avail_system_alloc_bits = 0;
13218 /* First we need to create the in-memory block groups */
13220 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13221 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13223 btrfs_release_path(&path);
13231 leaf = path.nodes[0];
13232 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13233 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13238 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13239 btrfs_add_block_group(fs_info, 0,
13240 btrfs_chunk_type(leaf, chunk), key.offset,
13241 btrfs_chunk_length(leaf, chunk));
13242 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13243 key.offset + btrfs_chunk_length(leaf, chunk));
13248 cache = btrfs_lookup_first_block_group(fs_info, start);
13252 start = cache->key.objectid + cache->key.offset;
13255 btrfs_release_path(&path);
13259 static int reset_balance(struct btrfs_trans_handle *trans,
13260 struct btrfs_fs_info *fs_info)
13262 struct btrfs_root *root = fs_info->tree_root;
13263 struct btrfs_path path;
13264 struct extent_buffer *leaf;
13265 struct btrfs_key key;
13266 int del_slot, del_nr = 0;
13270 btrfs_init_path(&path);
13271 key.objectid = BTRFS_BALANCE_OBJECTID;
13272 key.type = BTRFS_BALANCE_ITEM_KEY;
13274 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13279 goto reinit_data_reloc;
13284 ret = btrfs_del_item(trans, root, &path);
13287 btrfs_release_path(&path);
13289 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13290 key.type = BTRFS_ROOT_ITEM_KEY;
13292 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13296 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13301 ret = btrfs_del_items(trans, root, &path,
13308 btrfs_release_path(&path);
13311 ret = btrfs_search_slot(trans, root, &key, &path,
13318 leaf = path.nodes[0];
13319 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13320 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13322 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13327 del_slot = path.slots[0];
13336 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13340 btrfs_release_path(&path);
13343 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13344 key.type = BTRFS_ROOT_ITEM_KEY;
13345 key.offset = (u64)-1;
13346 root = btrfs_read_fs_root(fs_info, &key);
13347 if (IS_ERR(root)) {
13348 fprintf(stderr, "Error reading data reloc tree\n");
13349 ret = PTR_ERR(root);
13352 record_root_in_trans(trans, root);
13353 ret = btrfs_fsck_reinit_root(trans, root, 0);
13356 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13358 btrfs_release_path(&path);
13362 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13363 struct btrfs_fs_info *fs_info)
13369 * The only reason we don't do this is because right now we're just
13370 * walking the trees we find and pinning down their bytes, we don't look
13371 * at any of the leaves. In order to do mixed groups we'd have to check
13372 * the leaves of any fs roots and pin down the bytes for any file
13373 * extents we find. Not hard but why do it if we don't have to?
13375 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13376 fprintf(stderr, "We don't support re-initing the extent tree "
13377 "for mixed block groups yet, please notify a btrfs "
13378 "developer you want to do this so they can add this "
13379 "functionality.\n");
13384 * first we need to walk all of the trees except the extent tree and pin
13385 * down the bytes that are in use so we don't overwrite any existing
13388 ret = pin_metadata_blocks(fs_info);
13390 fprintf(stderr, "error pinning down used bytes\n");
13395 * Need to drop all the block groups since we're going to recreate all
13398 btrfs_free_block_groups(fs_info);
13399 ret = reset_block_groups(fs_info);
13401 fprintf(stderr, "error resetting the block groups\n");
13405 /* Ok we can allocate now, reinit the extent root */
13406 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13408 fprintf(stderr, "extent root initialization failed\n");
13410 * When the transaction code is updated we should end the
13411 * transaction, but for now progs only knows about commit so
13412 * just return an error.
13418 * Now we have all the in-memory block groups setup so we can make
13419 * allocations properly, and the metadata we care about is safe since we
13420 * pinned all of it above.
13423 struct btrfs_block_group_cache *cache;
13425 cache = btrfs_lookup_first_block_group(fs_info, start);
13428 start = cache->key.objectid + cache->key.offset;
13429 ret = btrfs_insert_item(trans, fs_info->extent_root,
13430 &cache->key, &cache->item,
13431 sizeof(cache->item));
13433 fprintf(stderr, "Error adding block group\n");
13436 btrfs_extent_post_op(trans, fs_info->extent_root);
13439 ret = reset_balance(trans, fs_info);
13441 fprintf(stderr, "error resetting the pending balance\n");
13446 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13448 struct btrfs_path path;
13449 struct btrfs_trans_handle *trans;
13450 struct btrfs_key key;
13453 printf("Recowing metadata block %llu\n", eb->start);
13454 key.objectid = btrfs_header_owner(eb);
13455 key.type = BTRFS_ROOT_ITEM_KEY;
13456 key.offset = (u64)-1;
13458 root = btrfs_read_fs_root(root->fs_info, &key);
13459 if (IS_ERR(root)) {
13460 fprintf(stderr, "Couldn't find owner root %llu\n",
13462 return PTR_ERR(root);
13465 trans = btrfs_start_transaction(root, 1);
13467 return PTR_ERR(trans);
13469 btrfs_init_path(&path);
13470 path.lowest_level = btrfs_header_level(eb);
13471 if (path.lowest_level)
13472 btrfs_node_key_to_cpu(eb, &key, 0);
13474 btrfs_item_key_to_cpu(eb, &key, 0);
13476 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13477 btrfs_commit_transaction(trans, root);
13478 btrfs_release_path(&path);
13482 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13484 struct btrfs_path path;
13485 struct btrfs_trans_handle *trans;
13486 struct btrfs_key key;
13489 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13490 bad->key.type, bad->key.offset);
13491 key.objectid = bad->root_id;
13492 key.type = BTRFS_ROOT_ITEM_KEY;
13493 key.offset = (u64)-1;
13495 root = btrfs_read_fs_root(root->fs_info, &key);
13496 if (IS_ERR(root)) {
13497 fprintf(stderr, "Couldn't find owner root %llu\n",
13499 return PTR_ERR(root);
13502 trans = btrfs_start_transaction(root, 1);
13504 return PTR_ERR(trans);
13506 btrfs_init_path(&path);
13507 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13513 ret = btrfs_del_item(trans, root, &path);
13515 btrfs_commit_transaction(trans, root);
13516 btrfs_release_path(&path);
13520 static int zero_log_tree(struct btrfs_root *root)
13522 struct btrfs_trans_handle *trans;
13525 trans = btrfs_start_transaction(root, 1);
13526 if (IS_ERR(trans)) {
13527 ret = PTR_ERR(trans);
13530 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13531 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13532 ret = btrfs_commit_transaction(trans, root);
13536 static int populate_csum(struct btrfs_trans_handle *trans,
13537 struct btrfs_root *csum_root, char *buf, u64 start,
13540 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13545 while (offset < len) {
13546 sectorsize = fs_info->sectorsize;
13547 ret = read_extent_data(fs_info, buf, start + offset,
13551 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13552 start + offset, buf, sectorsize);
13555 offset += sectorsize;
13560 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13561 struct btrfs_root *csum_root,
13562 struct btrfs_root *cur_root)
13564 struct btrfs_path path;
13565 struct btrfs_key key;
13566 struct extent_buffer *node;
13567 struct btrfs_file_extent_item *fi;
13574 buf = malloc(cur_root->fs_info->sectorsize);
13578 btrfs_init_path(&path);
13582 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13585 /* Iterate all regular file extents and fill its csum */
13587 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13589 if (key.type != BTRFS_EXTENT_DATA_KEY)
13591 node = path.nodes[0];
13592 slot = path.slots[0];
13593 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13594 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13596 start = btrfs_file_extent_disk_bytenr(node, fi);
13597 len = btrfs_file_extent_disk_num_bytes(node, fi);
13599 ret = populate_csum(trans, csum_root, buf, start, len);
13600 if (ret == -EEXIST)
13606 * TODO: if next leaf is corrupted, jump to nearest next valid
13609 ret = btrfs_next_item(cur_root, &path);
13619 btrfs_release_path(&path);
13624 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13625 struct btrfs_root *csum_root)
13627 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13628 struct btrfs_path path;
13629 struct btrfs_root *tree_root = fs_info->tree_root;
13630 struct btrfs_root *cur_root;
13631 struct extent_buffer *node;
13632 struct btrfs_key key;
13636 btrfs_init_path(&path);
13637 key.objectid = BTRFS_FS_TREE_OBJECTID;
13639 key.type = BTRFS_ROOT_ITEM_KEY;
13640 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13649 node = path.nodes[0];
13650 slot = path.slots[0];
13651 btrfs_item_key_to_cpu(node, &key, slot);
13652 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13654 if (key.type != BTRFS_ROOT_ITEM_KEY)
13656 if (!is_fstree(key.objectid))
13658 key.offset = (u64)-1;
13660 cur_root = btrfs_read_fs_root(fs_info, &key);
13661 if (IS_ERR(cur_root) || !cur_root) {
13662 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13666 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13671 ret = btrfs_next_item(tree_root, &path);
13681 btrfs_release_path(&path);
13685 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13686 struct btrfs_root *csum_root)
13688 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13689 struct btrfs_path path;
13690 struct btrfs_extent_item *ei;
13691 struct extent_buffer *leaf;
13693 struct btrfs_key key;
13696 btrfs_init_path(&path);
13698 key.type = BTRFS_EXTENT_ITEM_KEY;
13700 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13702 btrfs_release_path(&path);
13706 buf = malloc(csum_root->fs_info->sectorsize);
13708 btrfs_release_path(&path);
13713 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13714 ret = btrfs_next_leaf(extent_root, &path);
13722 leaf = path.nodes[0];
13724 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13725 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13730 ei = btrfs_item_ptr(leaf, path.slots[0],
13731 struct btrfs_extent_item);
13732 if (!(btrfs_extent_flags(leaf, ei) &
13733 BTRFS_EXTENT_FLAG_DATA)) {
13738 ret = populate_csum(trans, csum_root, buf, key.objectid,
13745 btrfs_release_path(&path);
13751 * Recalculate the csum and put it into the csum tree.
13753 * Extent tree init will wipe out all the extent info, so in that case, we
13754 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13755 * will use fs/subvol trees to init the csum tree.
13757 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13758 struct btrfs_root *csum_root,
13759 int search_fs_tree)
13761 if (search_fs_tree)
13762 return fill_csum_tree_from_fs(trans, csum_root);
13764 return fill_csum_tree_from_extent(trans, csum_root);
13767 static void free_roots_info_cache(void)
13769 if (!roots_info_cache)
13772 while (!cache_tree_empty(roots_info_cache)) {
13773 struct cache_extent *entry;
13774 struct root_item_info *rii;
13776 entry = first_cache_extent(roots_info_cache);
13779 remove_cache_extent(roots_info_cache, entry);
13780 rii = container_of(entry, struct root_item_info, cache_extent);
13784 free(roots_info_cache);
13785 roots_info_cache = NULL;
13788 static int build_roots_info_cache(struct btrfs_fs_info *info)
13791 struct btrfs_key key;
13792 struct extent_buffer *leaf;
13793 struct btrfs_path path;
13795 if (!roots_info_cache) {
13796 roots_info_cache = malloc(sizeof(*roots_info_cache));
13797 if (!roots_info_cache)
13799 cache_tree_init(roots_info_cache);
13802 btrfs_init_path(&path);
13804 key.type = BTRFS_EXTENT_ITEM_KEY;
13806 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13809 leaf = path.nodes[0];
13812 struct btrfs_key found_key;
13813 struct btrfs_extent_item *ei;
13814 struct btrfs_extent_inline_ref *iref;
13815 int slot = path.slots[0];
13820 struct cache_extent *entry;
13821 struct root_item_info *rii;
13823 if (slot >= btrfs_header_nritems(leaf)) {
13824 ret = btrfs_next_leaf(info->extent_root, &path);
13831 leaf = path.nodes[0];
13832 slot = path.slots[0];
13835 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13837 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13838 found_key.type != BTRFS_METADATA_ITEM_KEY)
13841 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13842 flags = btrfs_extent_flags(leaf, ei);
13844 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13845 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13848 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13849 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13850 level = found_key.offset;
13852 struct btrfs_tree_block_info *binfo;
13854 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13855 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13856 level = btrfs_tree_block_level(leaf, binfo);
13860 * For a root extent, it must be of the following type and the
13861 * first (and only one) iref in the item.
13863 type = btrfs_extent_inline_ref_type(leaf, iref);
13864 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13867 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13868 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13870 rii = malloc(sizeof(struct root_item_info));
13875 rii->cache_extent.start = root_id;
13876 rii->cache_extent.size = 1;
13877 rii->level = (u8)-1;
13878 entry = &rii->cache_extent;
13879 ret = insert_cache_extent(roots_info_cache, entry);
13882 rii = container_of(entry, struct root_item_info,
13886 ASSERT(rii->cache_extent.start == root_id);
13887 ASSERT(rii->cache_extent.size == 1);
13889 if (level > rii->level || rii->level == (u8)-1) {
13890 rii->level = level;
13891 rii->bytenr = found_key.objectid;
13892 rii->gen = btrfs_extent_generation(leaf, ei);
13893 rii->node_count = 1;
13894 } else if (level == rii->level) {
13902 btrfs_release_path(&path);
13907 static int maybe_repair_root_item(struct btrfs_path *path,
13908 const struct btrfs_key *root_key,
13909 const int read_only_mode)
13911 const u64 root_id = root_key->objectid;
13912 struct cache_extent *entry;
13913 struct root_item_info *rii;
13914 struct btrfs_root_item ri;
13915 unsigned long offset;
13917 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13920 "Error: could not find extent items for root %llu\n",
13921 root_key->objectid);
13925 rii = container_of(entry, struct root_item_info, cache_extent);
13926 ASSERT(rii->cache_extent.start == root_id);
13927 ASSERT(rii->cache_extent.size == 1);
13929 if (rii->node_count != 1) {
13931 "Error: could not find btree root extent for root %llu\n",
13936 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13937 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13939 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13940 btrfs_root_level(&ri) != rii->level ||
13941 btrfs_root_generation(&ri) != rii->gen) {
13944 * If we're in repair mode but our caller told us to not update
13945 * the root item, i.e. just check if it needs to be updated, don't
13946 * print this message, since the caller will call us again shortly
13947 * for the same root item without read only mode (the caller will
13948 * open a transaction first).
13950 if (!(read_only_mode && repair))
13952 "%sroot item for root %llu,"
13953 " current bytenr %llu, current gen %llu, current level %u,"
13954 " new bytenr %llu, new gen %llu, new level %u\n",
13955 (read_only_mode ? "" : "fixing "),
13957 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13958 btrfs_root_level(&ri),
13959 rii->bytenr, rii->gen, rii->level);
13961 if (btrfs_root_generation(&ri) > rii->gen) {
13963 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13964 root_id, btrfs_root_generation(&ri), rii->gen);
13968 if (!read_only_mode) {
13969 btrfs_set_root_bytenr(&ri, rii->bytenr);
13970 btrfs_set_root_level(&ri, rii->level);
13971 btrfs_set_root_generation(&ri, rii->gen);
13972 write_extent_buffer(path->nodes[0], &ri,
13973 offset, sizeof(ri));
13983 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13984 * caused read-only snapshots to be corrupted if they were created at a moment
13985 * when the source subvolume/snapshot had orphan items. The issue was that the
13986 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13987 * node instead of the post orphan cleanup root node.
13988 * So this function, and its callees, just detects and fixes those cases. Even
13989 * though the regression was for read-only snapshots, this function applies to
13990 * any snapshot/subvolume root.
13991 * This must be run before any other repair code - not doing it so, makes other
13992 * repair code delete or modify backrefs in the extent tree for example, which
13993 * will result in an inconsistent fs after repairing the root items.
13995 static int repair_root_items(struct btrfs_fs_info *info)
13997 struct btrfs_path path;
13998 struct btrfs_key key;
13999 struct extent_buffer *leaf;
14000 struct btrfs_trans_handle *trans = NULL;
14003 int need_trans = 0;
14005 btrfs_init_path(&path);
14007 ret = build_roots_info_cache(info);
14011 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14012 key.type = BTRFS_ROOT_ITEM_KEY;
14017 * Avoid opening and committing transactions if a leaf doesn't have
14018 * any root items that need to be fixed, so that we avoid rotating
14019 * backup roots unnecessarily.
14022 trans = btrfs_start_transaction(info->tree_root, 1);
14023 if (IS_ERR(trans)) {
14024 ret = PTR_ERR(trans);
14029 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14033 leaf = path.nodes[0];
14036 struct btrfs_key found_key;
14038 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14039 int no_more_keys = find_next_key(&path, &key);
14041 btrfs_release_path(&path);
14043 ret = btrfs_commit_transaction(trans,
14055 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14057 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14059 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14062 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14066 if (!trans && repair) {
14069 btrfs_release_path(&path);
14079 free_roots_info_cache();
14080 btrfs_release_path(&path);
14082 btrfs_commit_transaction(trans, info->tree_root);
14089 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14091 struct btrfs_trans_handle *trans;
14092 struct btrfs_block_group_cache *bg_cache;
14096 /* Clear all free space cache inodes and its extent data */
14098 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14101 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14104 current = bg_cache->key.objectid + bg_cache->key.offset;
14107 /* Don't forget to set cache_generation to -1 */
14108 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14109 if (IS_ERR(trans)) {
14110 error("failed to update super block cache generation");
14111 return PTR_ERR(trans);
14113 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14114 btrfs_commit_transaction(trans, fs_info->tree_root);
14119 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14124 if (clear_version == 1) {
14125 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14127 "free space cache v2 detected, use --clear-space-cache v2");
14131 printf("Clearing free space cache\n");
14132 ret = clear_free_space_cache(fs_info);
14134 error("failed to clear free space cache");
14137 printf("Free space cache cleared\n");
14139 } else if (clear_version == 2) {
14140 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14141 printf("no free space cache v2 to clear\n");
14145 printf("Clear free space cache v2\n");
14146 ret = btrfs_clear_free_space_tree(fs_info);
14148 error("failed to clear free space cache v2: %d", ret);
14151 printf("free space cache v2 cleared\n");
14158 const char * const cmd_check_usage[] = {
14159 "btrfs check [options] <device>",
14160 "Check structural integrity of a filesystem (unmounted).",
14161 "Check structural integrity of an unmounted filesystem. Verify internal",
14162 "trees' consistency and item connectivity. In the repair mode try to",
14163 "fix the problems found. ",
14164 "WARNING: the repair mode is considered dangerous",
14166 "-s|--super <superblock> use this superblock copy",
14167 "-b|--backup use the first valid backup root copy",
14168 "--force skip mount checks, repair is not possible",
14169 "--repair try to repair the filesystem",
14170 "--readonly run in read-only mode (default)",
14171 "--init-csum-tree create a new CRC tree",
14172 "--init-extent-tree create a new extent tree",
14173 "--mode <MODE> allows choice of memory/IO trade-offs",
14174 " where MODE is one of:",
14175 " original - read inodes and extents to memory (requires",
14176 " more memory, does less IO)",
14177 " lowmem - try to use less memory but read blocks again",
14179 "--check-data-csum verify checksums of data blocks",
14180 "-Q|--qgroup-report print a report on qgroup consistency",
14181 "-E|--subvol-extents <subvolid>",
14182 " print subvolume extents and sharing state",
14183 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14184 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14185 "-p|--progress indicate progress",
14186 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14190 int cmd_check(int argc, char **argv)
14192 struct cache_tree root_cache;
14193 struct btrfs_root *root;
14194 struct btrfs_fs_info *info;
14197 u64 tree_root_bytenr = 0;
14198 u64 chunk_root_bytenr = 0;
14199 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14203 int init_csum_tree = 0;
14205 int clear_space_cache = 0;
14206 int qgroup_report = 0;
14207 int qgroups_repaired = 0;
14208 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14213 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14214 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14215 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14216 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14217 GETOPT_VAL_FORCE };
14218 static const struct option long_options[] = {
14219 { "super", required_argument, NULL, 's' },
14220 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14221 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14222 { "init-csum-tree", no_argument, NULL,
14223 GETOPT_VAL_INIT_CSUM },
14224 { "init-extent-tree", no_argument, NULL,
14225 GETOPT_VAL_INIT_EXTENT },
14226 { "check-data-csum", no_argument, NULL,
14227 GETOPT_VAL_CHECK_CSUM },
14228 { "backup", no_argument, NULL, 'b' },
14229 { "subvol-extents", required_argument, NULL, 'E' },
14230 { "qgroup-report", no_argument, NULL, 'Q' },
14231 { "tree-root", required_argument, NULL, 'r' },
14232 { "chunk-root", required_argument, NULL,
14233 GETOPT_VAL_CHUNK_TREE },
14234 { "progress", no_argument, NULL, 'p' },
14235 { "mode", required_argument, NULL,
14237 { "clear-space-cache", required_argument, NULL,
14238 GETOPT_VAL_CLEAR_SPACE_CACHE},
14239 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14240 { NULL, 0, NULL, 0}
14243 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14247 case 'a': /* ignored */ break;
14249 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14252 num = arg_strtou64(optarg);
14253 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14255 "super mirror should be less than %d",
14256 BTRFS_SUPER_MIRROR_MAX);
14259 bytenr = btrfs_sb_offset(((int)num));
14260 printf("using SB copy %llu, bytenr %llu\n", num,
14261 (unsigned long long)bytenr);
14267 subvolid = arg_strtou64(optarg);
14270 tree_root_bytenr = arg_strtou64(optarg);
14272 case GETOPT_VAL_CHUNK_TREE:
14273 chunk_root_bytenr = arg_strtou64(optarg);
14276 ctx.progress_enabled = true;
14280 usage(cmd_check_usage);
14281 case GETOPT_VAL_REPAIR:
14282 printf("enabling repair mode\n");
14284 ctree_flags |= OPEN_CTREE_WRITES;
14286 case GETOPT_VAL_READONLY:
14289 case GETOPT_VAL_INIT_CSUM:
14290 printf("Creating a new CRC tree\n");
14291 init_csum_tree = 1;
14293 ctree_flags |= OPEN_CTREE_WRITES;
14295 case GETOPT_VAL_INIT_EXTENT:
14296 init_extent_tree = 1;
14297 ctree_flags |= (OPEN_CTREE_WRITES |
14298 OPEN_CTREE_NO_BLOCK_GROUPS);
14301 case GETOPT_VAL_CHECK_CSUM:
14302 check_data_csum = 1;
14304 case GETOPT_VAL_MODE:
14305 check_mode = parse_check_mode(optarg);
14306 if (check_mode == CHECK_MODE_UNKNOWN) {
14307 error("unknown mode: %s", optarg);
14311 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14312 if (strcmp(optarg, "v1") == 0) {
14313 clear_space_cache = 1;
14314 } else if (strcmp(optarg, "v2") == 0) {
14315 clear_space_cache = 2;
14316 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14319 "invalid argument to --clear-space-cache, must be v1 or v2");
14322 ctree_flags |= OPEN_CTREE_WRITES;
14324 case GETOPT_VAL_FORCE:
14330 if (check_argc_exact(argc - optind, 1))
14331 usage(cmd_check_usage);
14333 if (ctx.progress_enabled) {
14334 ctx.tp = TASK_NOTHING;
14335 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14338 /* This check is the only reason for --readonly to exist */
14339 if (readonly && repair) {
14340 error("repair options are not compatible with --readonly");
14345 * experimental and dangerous
14347 if (repair && check_mode == CHECK_MODE_LOWMEM)
14348 warning("low-memory mode repair support is only partial");
14351 cache_tree_init(&root_cache);
14353 ret = check_mounted(argv[optind]);
14356 error("could not check mount status: %s",
14362 "%s is currently mounted, use --force if you really intend to check the filesystem",
14370 error("repair and --force is not yet supported");
14377 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14381 "filesystem mounted, continuing because of --force");
14383 /* A block device is mounted in exclusive mode by kernel */
14384 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14387 /* only allow partial opening under repair mode */
14389 ctree_flags |= OPEN_CTREE_PARTIAL;
14391 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14392 chunk_root_bytenr, ctree_flags);
14394 error("cannot open file system");
14400 global_info = info;
14401 root = info->fs_root;
14402 uuid_unparse(info->super_copy->fsid, uuidbuf);
14404 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14407 * Check the bare minimum before starting anything else that could rely
14408 * on it, namely the tree roots, any local consistency checks
14410 if (!extent_buffer_uptodate(info->tree_root->node) ||
14411 !extent_buffer_uptodate(info->dev_root->node) ||
14412 !extent_buffer_uptodate(info->chunk_root->node)) {
14413 error("critical roots corrupted, unable to check the filesystem");
14419 if (clear_space_cache) {
14420 ret = do_clear_free_space_cache(info, clear_space_cache);
14426 * repair mode will force us to commit transaction which
14427 * will make us fail to load log tree when mounting.
14429 if (repair && btrfs_super_log_root(info->super_copy)) {
14430 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14436 ret = zero_log_tree(root);
14439 error("failed to zero log tree: %d", ret);
14444 if (qgroup_report) {
14445 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14447 ret = qgroup_verify_all(info);
14454 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14455 subvolid, argv[optind], uuidbuf);
14456 ret = print_extent_state(info, subvolid);
14461 if (init_extent_tree || init_csum_tree) {
14462 struct btrfs_trans_handle *trans;
14464 trans = btrfs_start_transaction(info->extent_root, 0);
14465 if (IS_ERR(trans)) {
14466 error("error starting transaction");
14467 ret = PTR_ERR(trans);
14472 if (init_extent_tree) {
14473 printf("Creating a new extent tree\n");
14474 ret = reinit_extent_tree(trans, info);
14480 if (init_csum_tree) {
14481 printf("Reinitialize checksum tree\n");
14482 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14484 error("checksum tree initialization failed: %d",
14491 ret = fill_csum_tree(trans, info->csum_root,
14495 error("checksum tree refilling failed: %d", ret);
14500 * Ok now we commit and run the normal fsck, which will add
14501 * extent entries for all of the items it finds.
14503 ret = btrfs_commit_transaction(trans, info->extent_root);
14508 if (!extent_buffer_uptodate(info->extent_root->node)) {
14509 error("critical: extent_root, unable to check the filesystem");
14514 if (!extent_buffer_uptodate(info->csum_root->node)) {
14515 error("critical: csum_root, unable to check the filesystem");
14521 if (!init_extent_tree) {
14522 ret = repair_root_items(info);
14525 error("failed to repair root items: %s", strerror(-ret));
14529 fprintf(stderr, "Fixed %d roots.\n", ret);
14531 } else if (ret > 0) {
14533 "Found %d roots with an outdated root item.\n",
14536 "Please run a filesystem check with the option --repair to fix them.\n");
14543 ret = do_check_chunks_and_extents(info);
14547 "errors found in extent allocation tree or chunk allocation");
14549 /* Only re-check super size after we checked and repaired the fs */
14550 err |= !is_super_size_valid(info);
14552 if (!ctx.progress_enabled) {
14553 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14554 fprintf(stderr, "checking free space tree\n");
14556 fprintf(stderr, "checking free space cache\n");
14558 ret = check_space_cache(root);
14561 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14562 error("errors found in free space tree");
14564 error("errors found in free space cache");
14569 * We used to have to have these hole extents in between our real
14570 * extents so if we don't have this flag set we need to make sure there
14571 * are no gaps in the file extents for inodes, otherwise we can just
14572 * ignore it when this happens.
14574 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14575 ret = do_check_fs_roots(info, &root_cache);
14578 error("errors found in fs roots");
14582 fprintf(stderr, "checking csums\n");
14583 ret = check_csums(root);
14586 error("errors found in csum tree");
14590 fprintf(stderr, "checking root refs\n");
14591 /* For low memory mode, check_fs_roots_v2 handles root refs */
14592 if (check_mode != CHECK_MODE_LOWMEM) {
14593 ret = check_root_refs(root, &root_cache);
14596 error("errors found in root refs");
14601 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14602 struct extent_buffer *eb;
14604 eb = list_first_entry(&root->fs_info->recow_ebs,
14605 struct extent_buffer, recow);
14606 list_del_init(&eb->recow);
14607 ret = recow_extent_buffer(root, eb);
14610 error("fails to fix transid errors");
14615 while (!list_empty(&delete_items)) {
14616 struct bad_item *bad;
14618 bad = list_first_entry(&delete_items, struct bad_item, list);
14619 list_del_init(&bad->list);
14621 ret = delete_bad_item(root, bad);
14627 if (info->quota_enabled) {
14628 fprintf(stderr, "checking quota groups\n");
14629 ret = qgroup_verify_all(info);
14632 error("failed to check quota groups");
14636 ret = repair_qgroups(info, &qgroups_repaired);
14639 error("failed to repair quota groups");
14645 if (!list_empty(&root->fs_info->recow_ebs)) {
14646 error("transid errors in file system");
14651 printf("found %llu bytes used, ",
14652 (unsigned long long)bytes_used);
14654 printf("error(s) found\n");
14656 printf("no error found\n");
14657 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14658 printf("total tree bytes: %llu\n",
14659 (unsigned long long)total_btree_bytes);
14660 printf("total fs tree bytes: %llu\n",
14661 (unsigned long long)total_fs_tree_bytes);
14662 printf("total extent tree bytes: %llu\n",
14663 (unsigned long long)total_extent_tree_bytes);
14664 printf("btree space waste bytes: %llu\n",
14665 (unsigned long long)btree_space_waste);
14666 printf("file data blocks allocated: %llu\n referenced %llu\n",
14667 (unsigned long long)data_bytes_allocated,
14668 (unsigned long long)data_bytes_referenced);
14670 free_qgroup_counts();
14671 free_root_recs_tree(&root_cache);
14675 if (ctx.progress_enabled)
14676 task_deinit(ctx.info);