2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
54 TASK_NOTHING, /* have to be the last element */
59 enum task_position tp;
61 struct task_info *info;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
103 if (back1->parent < back2->parent)
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
110 if (back1->owner > back2->owner)
112 if (back1->owner < back2->owner)
115 if (back1->offset > back2->offset)
117 if (back1->offset < back2->offset)
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
123 if (back1->disk_bytenr < back2->disk_bytenr)
126 if (back1->bytes > back2->bytes)
128 if (back1->bytes < back2->bytes)
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
148 if (back1->parent < back2->parent)
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
162 if (ext1->is_data < ext2->is_data)
165 if (ext1->full_backref > ext2->full_backref)
167 if (ext1->full_backref < ext2->full_backref)
171 return compare_data_backref(node1, node2);
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string[] = {
184 "checking free space cache",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
198 task_period_wait(priv->info);
203 static int print_status_return(void *p)
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
245 if (hole1->start < hole2->start)
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole->node))
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
301 rb_erase(&next->node, holes);
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
315 hole = (struct file_extent_hole *)data;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
321 if (start >= hole->start && start < hole->start + hole->len)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
340 struct rb_node *node;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
368 rb_erase(node, holes);
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
376 ret = add_file_extent_hole(holes, next_start, next_len);
383 static int copy_file_extent_holes(struct rb_root *dst,
386 struct file_extent_hole *hole;
387 struct rb_node *node;
390 node = rb_first(src);
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
396 node = rb_next(node);
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
411 node = rb_first(holes);
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root)
420 if (root->last_trans != trans->transid) {
421 root->track_dirty = 1;
422 root->last_trans = trans->transid;
423 root->commit_root = root->node;
424 extent_buffer_get(root->node);
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
430 struct device_record *rec1;
431 struct device_record *rec2;
433 rec1 = rb_entry(node1, struct device_record, node);
434 rec2 = rb_entry(node2, struct device_record, node);
435 if (rec1->devid > rec2->devid)
437 else if (rec1->devid < rec2->devid)
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
445 struct inode_record *rec;
446 struct inode_backref *backref;
447 struct inode_backref *orig;
448 struct inode_backref *tmp;
449 struct orphan_data_extent *src_orphan;
450 struct orphan_data_extent *dst_orphan;
455 rec = malloc(sizeof(*rec));
457 return ERR_PTR(-ENOMEM);
458 memcpy(rec, orig_rec, sizeof(*rec));
460 INIT_LIST_HEAD(&rec->backrefs);
461 INIT_LIST_HEAD(&rec->orphan_extents);
462 rec->holes = RB_ROOT;
464 list_for_each_entry(orig, &orig_rec->backrefs, list) {
465 size = sizeof(*orig) + orig->namelen + 1;
466 backref = malloc(size);
471 memcpy(backref, orig, size);
472 list_add_tail(&backref->list, &rec->backrefs);
474 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475 dst_orphan = malloc(sizeof(*dst_orphan));
480 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
483 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
490 rb = rb_first(&rec->holes);
492 struct file_extent_hole *hole;
494 hole = rb_entry(rb, struct file_extent_hole, node);
500 if (!list_empty(&rec->backrefs))
501 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502 list_del(&orig->list);
506 if (!list_empty(&rec->orphan_extents))
507 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508 list_del(&orig->list);
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
520 struct orphan_data_extent *orphan;
522 if (list_empty(orphan_extents))
524 printf("The following data extent is lost in tree %llu:\n",
526 list_for_each_entry(orphan, orphan_extents, list) {
527 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528 orphan->objectid, orphan->offset, orphan->disk_bytenr,
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
535 u64 root_objectid = root->root_key.objectid;
536 int errors = rec->errors;
540 /* reloc root errors, we print its corresponding fs root objectid*/
541 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542 root_objectid = root->root_key.offset;
543 fprintf(stderr, "reloc");
545 fprintf(stderr, "root %llu inode %llu errors %x",
546 (unsigned long long) root_objectid,
547 (unsigned long long) rec->ino, rec->errors);
549 if (errors & I_ERR_NO_INODE_ITEM)
550 fprintf(stderr, ", no inode item");
551 if (errors & I_ERR_NO_ORPHAN_ITEM)
552 fprintf(stderr, ", no orphan item");
553 if (errors & I_ERR_DUP_INODE_ITEM)
554 fprintf(stderr, ", dup inode item");
555 if (errors & I_ERR_DUP_DIR_INDEX)
556 fprintf(stderr, ", dup dir index");
557 if (errors & I_ERR_ODD_DIR_ITEM)
558 fprintf(stderr, ", odd dir item");
559 if (errors & I_ERR_ODD_FILE_EXTENT)
560 fprintf(stderr, ", odd file extent");
561 if (errors & I_ERR_BAD_FILE_EXTENT)
562 fprintf(stderr, ", bad file extent");
563 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564 fprintf(stderr, ", file extent overlap");
565 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566 fprintf(stderr, ", file extent discount");
567 if (errors & I_ERR_DIR_ISIZE_WRONG)
568 fprintf(stderr, ", dir isize wrong");
569 if (errors & I_ERR_FILE_NBYTES_WRONG)
570 fprintf(stderr, ", nbytes wrong");
571 if (errors & I_ERR_ODD_CSUM_ITEM)
572 fprintf(stderr, ", odd csum item");
573 if (errors & I_ERR_SOME_CSUM_MISSING)
574 fprintf(stderr, ", some csum missing");
575 if (errors & I_ERR_LINK_COUNT_WRONG)
576 fprintf(stderr, ", link count wrong");
577 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578 fprintf(stderr, ", orphan file extent");
579 fprintf(stderr, "\n");
580 /* Print the orphan extents if needed */
581 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
584 /* Print the holes if needed */
585 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586 struct file_extent_hole *hole;
587 struct rb_node *node;
590 node = rb_first(&rec->holes);
591 fprintf(stderr, "Found file extent holes:\n");
594 hole = rb_entry(node, struct file_extent_hole, node);
595 fprintf(stderr, "\tstart: %llu, len: %llu\n",
596 hole->start, hole->len);
597 node = rb_next(node);
600 fprintf(stderr, "\tstart: 0, len: %llu\n",
602 root->fs_info->sectorsize));
606 static void print_ref_error(int errors)
608 if (errors & REF_ERR_NO_DIR_ITEM)
609 fprintf(stderr, ", no dir item");
610 if (errors & REF_ERR_NO_DIR_INDEX)
611 fprintf(stderr, ", no dir index");
612 if (errors & REF_ERR_NO_INODE_REF)
613 fprintf(stderr, ", no inode ref");
614 if (errors & REF_ERR_DUP_DIR_ITEM)
615 fprintf(stderr, ", dup dir item");
616 if (errors & REF_ERR_DUP_DIR_INDEX)
617 fprintf(stderr, ", dup dir index");
618 if (errors & REF_ERR_DUP_INODE_REF)
619 fprintf(stderr, ", dup inode ref");
620 if (errors & REF_ERR_INDEX_UNMATCH)
621 fprintf(stderr, ", index mismatch");
622 if (errors & REF_ERR_FILETYPE_UNMATCH)
623 fprintf(stderr, ", filetype mismatch");
624 if (errors & REF_ERR_NAME_TOO_LONG)
625 fprintf(stderr, ", name too long");
626 if (errors & REF_ERR_NO_ROOT_REF)
627 fprintf(stderr, ", no root ref");
628 if (errors & REF_ERR_NO_ROOT_BACKREF)
629 fprintf(stderr, ", no root backref");
630 if (errors & REF_ERR_DUP_ROOT_REF)
631 fprintf(stderr, ", dup root ref");
632 if (errors & REF_ERR_DUP_ROOT_BACKREF)
633 fprintf(stderr, ", dup root backref");
634 fprintf(stderr, "\n");
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
640 struct ptr_node *node;
641 struct cache_extent *cache;
642 struct inode_record *rec = NULL;
645 cache = lookup_cache_extent(inode_cache, ino, 1);
647 node = container_of(cache, struct ptr_node, cache);
649 if (mod && rec->refs > 1) {
650 node->data = clone_inode_rec(rec);
651 if (IS_ERR(node->data))
657 rec = calloc(1, sizeof(*rec));
659 return ERR_PTR(-ENOMEM);
661 rec->extent_start = (u64)-1;
663 INIT_LIST_HEAD(&rec->backrefs);
664 INIT_LIST_HEAD(&rec->orphan_extents);
665 rec->holes = RB_ROOT;
667 node = malloc(sizeof(*node));
670 return ERR_PTR(-ENOMEM);
672 node->cache.start = ino;
673 node->cache.size = 1;
676 if (ino == BTRFS_FREE_INO_OBJECTID)
679 ret = insert_cache_extent(inode_cache, &node->cache);
681 return ERR_PTR(-EEXIST);
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
688 struct orphan_data_extent *orphan;
690 while (!list_empty(orphan_extents)) {
691 orphan = list_entry(orphan_extents->next,
692 struct orphan_data_extent, list);
693 list_del(&orphan->list);
698 static void free_inode_rec(struct inode_record *rec)
700 struct inode_backref *backref;
705 while (!list_empty(&rec->backrefs)) {
706 backref = to_inode_backref(rec->backrefs.next);
707 list_del(&backref->list);
710 free_orphan_data_extents(&rec->orphan_extents);
711 free_file_extent_holes(&rec->holes);
715 static int can_free_inode_rec(struct inode_record *rec)
717 if (!rec->errors && rec->checked && rec->found_inode_item &&
718 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724 struct inode_record *rec)
726 struct cache_extent *cache;
727 struct inode_backref *tmp, *backref;
728 struct ptr_node *node;
731 if (!rec->found_inode_item)
734 filetype = imode_to_type(rec->imode);
735 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736 if (backref->found_dir_item && backref->found_dir_index) {
737 if (backref->filetype != filetype)
738 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739 if (!backref->errors && backref->found_inode_ref &&
740 rec->nlink == rec->found_link) {
741 list_del(&backref->list);
747 if (!rec->checked || rec->merging)
750 if (S_ISDIR(rec->imode)) {
751 if (rec->found_size != rec->isize)
752 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753 if (rec->found_file_extent)
754 rec->errors |= I_ERR_ODD_FILE_EXTENT;
755 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756 if (rec->found_dir_item)
757 rec->errors |= I_ERR_ODD_DIR_ITEM;
758 if (rec->found_size != rec->nbytes)
759 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760 if (rec->nlink > 0 && !no_holes &&
761 (rec->extent_end < rec->isize ||
762 first_extent_gap(&rec->holes) < rec->isize))
763 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
766 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767 if (rec->found_csum_item && rec->nodatasum)
768 rec->errors |= I_ERR_ODD_CSUM_ITEM;
769 if (rec->some_csum_missing && !rec->nodatasum)
770 rec->errors |= I_ERR_SOME_CSUM_MISSING;
773 BUG_ON(rec->refs != 1);
774 if (can_free_inode_rec(rec)) {
775 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776 node = container_of(cache, struct ptr_node, cache);
777 BUG_ON(node->data != rec);
778 remove_cache_extent(inode_cache, &node->cache);
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
786 struct btrfs_path path;
787 struct btrfs_key key;
790 key.objectid = BTRFS_ORPHAN_OBJECTID;
791 key.type = BTRFS_ORPHAN_ITEM_KEY;
794 btrfs_init_path(&path);
795 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796 btrfs_release_path(&path);
802 static int process_inode_item(struct extent_buffer *eb,
803 int slot, struct btrfs_key *key,
804 struct shared_node *active_node)
806 struct inode_record *rec;
807 struct btrfs_inode_item *item;
809 rec = active_node->current;
810 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811 if (rec->found_inode_item) {
812 rec->errors |= I_ERR_DUP_INODE_ITEM;
815 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816 rec->nlink = btrfs_inode_nlink(eb, item);
817 rec->isize = btrfs_inode_size(eb, item);
818 rec->nbytes = btrfs_inode_nbytes(eb, item);
819 rec->imode = btrfs_inode_mode(eb, item);
820 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
822 rec->found_inode_item = 1;
824 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825 maybe_free_inode_rec(&active_node->inode_cache, rec);
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
831 int namelen, u64 dir)
833 struct inode_backref *backref;
835 list_for_each_entry(backref, &rec->backrefs, list) {
836 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
838 if (backref->dir != dir || backref->namelen != namelen)
840 if (memcmp(name, backref->name, namelen))
845 backref = malloc(sizeof(*backref) + namelen + 1);
848 memset(backref, 0, sizeof(*backref));
850 backref->namelen = namelen;
851 memcpy(backref->name, name, namelen);
852 backref->name[namelen] = '\0';
853 list_add_tail(&backref->list, &rec->backrefs);
857 static int add_inode_backref(struct cache_tree *inode_cache,
858 u64 ino, u64 dir, u64 index,
859 const char *name, int namelen,
860 u8 filetype, u8 itemtype, int errors)
862 struct inode_record *rec;
863 struct inode_backref *backref;
865 rec = get_inode_rec(inode_cache, ino, 1);
867 backref = get_inode_backref(rec, name, namelen, dir);
870 backref->errors |= errors;
871 if (itemtype == BTRFS_DIR_INDEX_KEY) {
872 if (backref->found_dir_index)
873 backref->errors |= REF_ERR_DUP_DIR_INDEX;
874 if (backref->found_inode_ref && backref->index != index)
875 backref->errors |= REF_ERR_INDEX_UNMATCH;
876 if (backref->found_dir_item && backref->filetype != filetype)
877 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
879 backref->index = index;
880 backref->filetype = filetype;
881 backref->found_dir_index = 1;
882 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
884 if (backref->found_dir_item)
885 backref->errors |= REF_ERR_DUP_DIR_ITEM;
886 if (backref->found_dir_index && backref->filetype != filetype)
887 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
889 backref->filetype = filetype;
890 backref->found_dir_item = 1;
891 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893 if (backref->found_inode_ref)
894 backref->errors |= REF_ERR_DUP_INODE_REF;
895 if (backref->found_dir_index && backref->index != index)
896 backref->errors |= REF_ERR_INDEX_UNMATCH;
898 backref->index = index;
900 backref->ref_type = itemtype;
901 backref->found_inode_ref = 1;
906 maybe_free_inode_rec(inode_cache, rec);
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911 struct cache_tree *dst_cache)
913 struct inode_backref *backref;
918 list_for_each_entry(backref, &src->backrefs, list) {
919 if (backref->found_dir_index) {
920 add_inode_backref(dst_cache, dst->ino, backref->dir,
921 backref->index, backref->name,
922 backref->namelen, backref->filetype,
923 BTRFS_DIR_INDEX_KEY, backref->errors);
925 if (backref->found_dir_item) {
927 add_inode_backref(dst_cache, dst->ino,
928 backref->dir, 0, backref->name,
929 backref->namelen, backref->filetype,
930 BTRFS_DIR_ITEM_KEY, backref->errors);
932 if (backref->found_inode_ref) {
933 add_inode_backref(dst_cache, dst->ino,
934 backref->dir, backref->index,
935 backref->name, backref->namelen, 0,
936 backref->ref_type, backref->errors);
940 if (src->found_dir_item)
941 dst->found_dir_item = 1;
942 if (src->found_file_extent)
943 dst->found_file_extent = 1;
944 if (src->found_csum_item)
945 dst->found_csum_item = 1;
946 if (src->some_csum_missing)
947 dst->some_csum_missing = 1;
948 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949 ret = copy_file_extent_holes(&dst->holes, &src->holes);
954 BUG_ON(src->found_link < dir_count);
955 dst->found_link += src->found_link - dir_count;
956 dst->found_size += src->found_size;
957 if (src->extent_start != (u64)-1) {
958 if (dst->extent_start == (u64)-1) {
959 dst->extent_start = src->extent_start;
960 dst->extent_end = src->extent_end;
962 if (dst->extent_end > src->extent_start)
963 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964 else if (dst->extent_end < src->extent_start) {
965 ret = add_file_extent_hole(&dst->holes,
967 src->extent_start - dst->extent_end);
969 if (dst->extent_end < src->extent_end)
970 dst->extent_end = src->extent_end;
974 dst->errors |= src->errors;
975 if (src->found_inode_item) {
976 if (!dst->found_inode_item) {
977 dst->nlink = src->nlink;
978 dst->isize = src->isize;
979 dst->nbytes = src->nbytes;
980 dst->imode = src->imode;
981 dst->nodatasum = src->nodatasum;
982 dst->found_inode_item = 1;
984 dst->errors |= I_ERR_DUP_INODE_ITEM;
992 static int splice_shared_node(struct shared_node *src_node,
993 struct shared_node *dst_node)
995 struct cache_extent *cache;
996 struct ptr_node *node, *ins;
997 struct cache_tree *src, *dst;
998 struct inode_record *rec, *conflict;
1003 if (--src_node->refs == 0)
1005 if (src_node->current)
1006 current_ino = src_node->current->ino;
1008 src = &src_node->root_cache;
1009 dst = &dst_node->root_cache;
1011 cache = search_cache_extent(src, 0);
1013 node = container_of(cache, struct ptr_node, cache);
1015 cache = next_cache_extent(cache);
1018 remove_cache_extent(src, &node->cache);
1021 ins = malloc(sizeof(*ins));
1023 ins->cache.start = node->cache.start;
1024 ins->cache.size = node->cache.size;
1028 ret = insert_cache_extent(dst, &ins->cache);
1029 if (ret == -EEXIST) {
1030 conflict = get_inode_rec(dst, rec->ino, 1);
1031 BUG_ON(IS_ERR(conflict));
1032 merge_inode_recs(rec, conflict, dst);
1034 conflict->checked = 1;
1035 if (dst_node->current == conflict)
1036 dst_node->current = NULL;
1038 maybe_free_inode_rec(dst, conflict);
1039 free_inode_rec(rec);
1046 if (src == &src_node->root_cache) {
1047 src = &src_node->inode_cache;
1048 dst = &dst_node->inode_cache;
1052 if (current_ino > 0 && (!dst_node->current ||
1053 current_ino > dst_node->current->ino)) {
1054 if (dst_node->current) {
1055 dst_node->current->checked = 1;
1056 maybe_free_inode_rec(dst, dst_node->current);
1058 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059 BUG_ON(IS_ERR(dst_node->current));
1064 static void free_inode_ptr(struct cache_extent *cache)
1066 struct ptr_node *node;
1067 struct inode_record *rec;
1069 node = container_of(cache, struct ptr_node, cache);
1071 free_inode_rec(rec);
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1080 struct cache_extent *cache;
1081 struct shared_node *node;
1083 cache = lookup_cache_extent(shared, bytenr, 1);
1085 node = container_of(cache, struct shared_node, cache);
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1094 struct shared_node *node;
1096 node = calloc(1, sizeof(*node));
1099 node->cache.start = bytenr;
1100 node->cache.size = 1;
1101 cache_tree_init(&node->root_cache);
1102 cache_tree_init(&node->inode_cache);
1105 ret = insert_cache_extent(shared, &node->cache);
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111 struct walk_control *wc, int level)
1113 struct shared_node *node;
1114 struct shared_node *dest;
1117 if (level == wc->active_node)
1120 BUG_ON(wc->active_node <= level);
1121 node = find_shared_node(&wc->shared, bytenr);
1123 ret = add_shared_node(&wc->shared, bytenr, refs);
1125 node = find_shared_node(&wc->shared, bytenr);
1126 wc->nodes[level] = node;
1127 wc->active_node = level;
1131 if (wc->root_level == wc->active_node &&
1132 btrfs_root_refs(&root->root_item) == 0) {
1133 if (--node->refs == 0) {
1134 free_inode_recs_tree(&node->root_cache);
1135 free_inode_recs_tree(&node->inode_cache);
1136 remove_cache_extent(&wc->shared, &node->cache);
1142 dest = wc->nodes[wc->active_node];
1143 splice_shared_node(node, dest);
1144 if (node->refs == 0) {
1145 remove_cache_extent(&wc->shared, &node->cache);
1151 static int leave_shared_node(struct btrfs_root *root,
1152 struct walk_control *wc, int level)
1154 struct shared_node *node;
1155 struct shared_node *dest;
1158 if (level == wc->root_level)
1161 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1165 BUG_ON(i >= BTRFS_MAX_LEVEL);
1167 node = wc->nodes[wc->active_node];
1168 wc->nodes[wc->active_node] = NULL;
1169 wc->active_node = i;
1171 dest = wc->nodes[wc->active_node];
1172 if (wc->active_node < wc->root_level ||
1173 btrfs_root_refs(&root->root_item) > 0) {
1174 BUG_ON(node->refs <= 1);
1175 splice_shared_node(node, dest);
1177 BUG_ON(node->refs < 2);
1186 * 1 - if the root with id child_root_id is a child of root parent_root_id
1187 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1188 * has other root(s) as parent(s)
1189 * 2 - if the root child_root_id doesn't have any parent roots
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1194 struct btrfs_path path;
1195 struct btrfs_key key;
1196 struct extent_buffer *leaf;
1200 btrfs_init_path(&path);
1202 key.objectid = parent_root_id;
1203 key.type = BTRFS_ROOT_REF_KEY;
1204 key.offset = child_root_id;
1205 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1209 btrfs_release_path(&path);
1213 key.objectid = child_root_id;
1214 key.type = BTRFS_ROOT_BACKREF_KEY;
1216 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1222 leaf = path.nodes[0];
1223 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1227 leaf = path.nodes[0];
1230 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231 if (key.objectid != child_root_id ||
1232 key.type != BTRFS_ROOT_BACKREF_KEY)
1237 if (key.offset == parent_root_id) {
1238 btrfs_release_path(&path);
1245 btrfs_release_path(&path);
1248 return has_parent ? 0 : 2;
1251 static int process_dir_item(struct extent_buffer *eb,
1252 int slot, struct btrfs_key *key,
1253 struct shared_node *active_node)
1263 struct btrfs_dir_item *di;
1264 struct inode_record *rec;
1265 struct cache_tree *root_cache;
1266 struct cache_tree *inode_cache;
1267 struct btrfs_key location;
1268 char namebuf[BTRFS_NAME_LEN];
1270 root_cache = &active_node->root_cache;
1271 inode_cache = &active_node->inode_cache;
1272 rec = active_node->current;
1273 rec->found_dir_item = 1;
1275 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276 total = btrfs_item_size_nr(eb, slot);
1277 while (cur < total) {
1279 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280 name_len = btrfs_dir_name_len(eb, di);
1281 data_len = btrfs_dir_data_len(eb, di);
1282 filetype = btrfs_dir_type(eb, di);
1284 rec->found_size += name_len;
1285 if (cur + sizeof(*di) + name_len > total ||
1286 name_len > BTRFS_NAME_LEN) {
1287 error = REF_ERR_NAME_TOO_LONG;
1289 if (cur + sizeof(*di) > total)
1291 len = min_t(u32, total - cur - sizeof(*di),
1298 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1300 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301 key->offset != btrfs_name_hash(namebuf, len)) {
1302 rec->errors |= I_ERR_ODD_DIR_ITEM;
1303 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304 key->objectid, key->offset, namebuf, len, filetype,
1305 key->offset, btrfs_name_hash(namebuf, len));
1308 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309 add_inode_backref(inode_cache, location.objectid,
1310 key->objectid, key->offset, namebuf,
1311 len, filetype, key->type, error);
1312 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313 add_inode_backref(root_cache, location.objectid,
1314 key->objectid, key->offset,
1315 namebuf, len, filetype,
1319 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320 location.type, key->objectid, key->offset);
1321 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322 key->objectid, key->offset, namebuf,
1323 len, filetype, key->type, error);
1326 len = sizeof(*di) + name_len + data_len;
1327 di = (struct btrfs_dir_item *)((char *)di + len);
1330 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331 rec->errors |= I_ERR_DUP_DIR_INDEX;
1336 static int process_inode_ref(struct extent_buffer *eb,
1337 int slot, struct btrfs_key *key,
1338 struct shared_node *active_node)
1346 struct cache_tree *inode_cache;
1347 struct btrfs_inode_ref *ref;
1348 char namebuf[BTRFS_NAME_LEN];
1350 inode_cache = &active_node->inode_cache;
1352 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353 total = btrfs_item_size_nr(eb, slot);
1354 while (cur < total) {
1355 name_len = btrfs_inode_ref_name_len(eb, ref);
1356 index = btrfs_inode_ref_index(eb, ref);
1358 /* inode_ref + namelen should not cross item boundary */
1359 if (cur + sizeof(*ref) + name_len > total ||
1360 name_len > BTRFS_NAME_LEN) {
1361 if (total < cur + sizeof(*ref))
1364 /* Still try to read out the remaining part */
1365 len = min_t(u32, total - cur - sizeof(*ref),
1367 error = REF_ERR_NAME_TOO_LONG;
1373 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374 add_inode_backref(inode_cache, key->objectid, key->offset,
1375 index, namebuf, len, 0, key->type, error);
1377 len = sizeof(*ref) + name_len;
1378 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1384 static int process_inode_extref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1395 struct cache_tree *inode_cache;
1396 struct btrfs_inode_extref *extref;
1397 char namebuf[BTRFS_NAME_LEN];
1399 inode_cache = &active_node->inode_cache;
1401 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402 total = btrfs_item_size_nr(eb, slot);
1403 while (cur < total) {
1404 name_len = btrfs_inode_extref_name_len(eb, extref);
1405 index = btrfs_inode_extref_index(eb, extref);
1406 parent = btrfs_inode_extref_parent(eb, extref);
1407 if (name_len <= BTRFS_NAME_LEN) {
1411 len = BTRFS_NAME_LEN;
1412 error = REF_ERR_NAME_TOO_LONG;
1414 read_extent_buffer(eb, namebuf,
1415 (unsigned long)(extref + 1), len);
1416 add_inode_backref(inode_cache, key->objectid, parent,
1417 index, namebuf, len, 0, key->type, error);
1419 len = sizeof(*extref) + name_len;
1420 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1427 static int process_file_extent(struct btrfs_root *root,
1428 struct extent_buffer *eb,
1429 int slot, struct btrfs_key *key,
1430 struct shared_node *active_node)
1432 struct inode_record *rec;
1433 struct btrfs_file_extent_item *fi;
1435 u64 disk_bytenr = 0;
1436 u64 extent_offset = 0;
1437 u64 mask = root->fs_info->sectorsize - 1;
1441 rec = active_node->current;
1442 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443 rec->found_file_extent = 1;
1445 if (rec->extent_start == (u64)-1) {
1446 rec->extent_start = key->offset;
1447 rec->extent_end = key->offset;
1450 if (rec->extent_end > key->offset)
1451 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452 else if (rec->extent_end < key->offset) {
1453 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454 key->offset - rec->extent_end);
1459 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460 extent_type = btrfs_file_extent_type(eb, fi);
1462 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1465 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466 rec->found_size += num_bytes;
1467 num_bytes = (num_bytes + mask) & ~mask;
1468 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472 extent_offset = btrfs_file_extent_offset(eb, fi);
1473 if (num_bytes == 0 || (num_bytes & mask))
1474 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475 if (num_bytes + extent_offset >
1476 btrfs_file_extent_ram_bytes(eb, fi))
1477 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479 (btrfs_file_extent_compression(eb, fi) ||
1480 btrfs_file_extent_encryption(eb, fi) ||
1481 btrfs_file_extent_other_encoding(eb, fi)))
1482 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483 if (disk_bytenr > 0)
1484 rec->found_size += num_bytes;
1486 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1488 rec->extent_end = key->offset + num_bytes;
1491 * The data reloc tree will copy full extents into its inode and then
1492 * copy the corresponding csums. Because the extent it copied could be
1493 * a preallocated extent that hasn't been written to yet there may be no
1494 * csums to copy, ergo we won't have csums for our file extent. This is
1495 * ok so just don't bother checking csums if the inode belongs to the
1498 if (disk_bytenr > 0 &&
1499 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1501 if (btrfs_file_extent_compression(eb, fi))
1502 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1504 disk_bytenr += extent_offset;
1506 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1510 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1512 rec->found_csum_item = 1;
1513 if (found < num_bytes)
1514 rec->some_csum_missing = 1;
1515 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1517 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524 struct walk_control *wc)
1526 struct btrfs_key key;
1530 struct cache_tree *inode_cache;
1531 struct shared_node *active_node;
1533 if (wc->root_level == wc->active_node &&
1534 btrfs_root_refs(&root->root_item) == 0)
1537 active_node = wc->nodes[wc->active_node];
1538 inode_cache = &active_node->inode_cache;
1539 nritems = btrfs_header_nritems(eb);
1540 for (i = 0; i < nritems; i++) {
1541 btrfs_item_key_to_cpu(eb, &key, i);
1543 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1545 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1548 if (active_node->current == NULL ||
1549 active_node->current->ino < key.objectid) {
1550 if (active_node->current) {
1551 active_node->current->checked = 1;
1552 maybe_free_inode_rec(inode_cache,
1553 active_node->current);
1555 active_node->current = get_inode_rec(inode_cache,
1557 BUG_ON(IS_ERR(active_node->current));
1560 case BTRFS_DIR_ITEM_KEY:
1561 case BTRFS_DIR_INDEX_KEY:
1562 ret = process_dir_item(eb, i, &key, active_node);
1564 case BTRFS_INODE_REF_KEY:
1565 ret = process_inode_ref(eb, i, &key, active_node);
1567 case BTRFS_INODE_EXTREF_KEY:
1568 ret = process_inode_extref(eb, i, &key, active_node);
1570 case BTRFS_INODE_ITEM_KEY:
1571 ret = process_inode_item(eb, i, &key, active_node);
1573 case BTRFS_EXTENT_DATA_KEY:
1574 ret = process_file_extent(root, eb, i, &key,
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585 struct extent_buffer *eb, struct node_refs *nrefs,
1586 u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588 unsigned int ext_ref);
1591 * Returns >0 Found error, not fatal, should continue
1592 * Returns <0 Fatal error, must exit the whole check
1593 * Returns 0 No errors found
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596 struct node_refs *nrefs, int *level, int ext_ref)
1598 struct extent_buffer *cur = path->nodes[0];
1599 struct btrfs_key key;
1603 int root_level = btrfs_header_level(root->node);
1605 int ret = 0; /* Final return value */
1606 int err = 0; /* Positive error bitmap */
1608 cur_bytenr = cur->start;
1610 /* skip to first inode item or the first inode number change */
1611 nritems = btrfs_header_nritems(cur);
1612 for (i = 0; i < nritems; i++) {
1613 btrfs_item_key_to_cpu(cur, &key, i);
1615 first_ino = key.objectid;
1616 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617 (first_ino && first_ino != key.objectid))
1621 path->slots[0] = nritems;
1627 err |= check_inode_item(root, path, ext_ref);
1629 /* modify cur since check_inode_item may change path */
1630 cur = path->nodes[0];
1632 if (err & LAST_ITEM)
1635 /* still have inode items in thie leaf */
1636 if (cur->start == cur_bytenr)
1640 * we have switched to another leaf, above nodes may
1641 * have changed, here walk down the path, if a node
1642 * or leaf is shared, check whether we can skip this
1645 for (i = root_level; i >= 0; i--) {
1646 if (path->nodes[i]->start == nrefs->bytenr[i])
1649 ret = update_nodes_refs(root, path->nodes[i]->start,
1650 path->nodes[i], nrefs, i, 0);
1654 if (!nrefs->need_check[i]) {
1660 for (i = 0; i < *level; i++) {
1661 free_extent_buffer(path->nodes[i]);
1662 path->nodes[i] = NULL;
1672 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1673 * in every fs or file tree check. Here we find its all root ids, and only check
1674 * it in the fs or file tree which has the smallest root id.
1676 static int need_check(struct btrfs_root *root, struct ulist *roots)
1678 struct rb_node *node;
1679 struct ulist_node *u;
1682 * @roots can be empty if it belongs to tree reloc tree
1683 * In that case, we should always check the leaf, as we can't use
1684 * the tree owner to ensure some other root will check it.
1686 if (roots->nnodes == 1 || roots->nnodes == 0)
1689 node = rb_first(&roots->root);
1690 u = rb_entry(node, struct ulist_node, rb_node);
1692 * current root id is not smallest, we skip it and let it be checked
1693 * in the fs or file tree who hash the smallest root id.
1695 if (root->objectid != u->val)
1701 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1704 struct btrfs_root *extent_root = root->fs_info->extent_root;
1705 struct btrfs_root_item *ri = &root->root_item;
1706 struct btrfs_extent_inline_ref *iref;
1707 struct btrfs_extent_item *ei;
1708 struct btrfs_key key;
1709 struct btrfs_path *path = NULL;
1720 * Except file/reloc tree, we can not have FULL BACKREF MODE
1722 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1726 if (eb->start == btrfs_root_bytenr(ri))
1729 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1732 owner = btrfs_header_owner(eb);
1733 if (owner == root->objectid)
1736 path = btrfs_alloc_path();
1740 key.objectid = btrfs_header_bytenr(eb);
1742 key.offset = (u64)-1;
1744 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1751 ret = btrfs_previous_extent_item(extent_root, path,
1757 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1759 eb = path->nodes[0];
1760 slot = path->slots[0];
1761 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1763 flags = btrfs_extent_flags(eb, ei);
1764 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1767 ptr = (unsigned long)(ei + 1);
1768 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1770 if (key.type == BTRFS_EXTENT_ITEM_KEY)
1771 ptr += sizeof(struct btrfs_tree_block_info);
1774 /* Reached extent item ends normally */
1778 /* Beyond extent item end, wrong item size */
1780 error("extent item at bytenr %llu slot %d has wrong size",
1785 iref = (struct btrfs_extent_inline_ref *)ptr;
1786 offset = btrfs_extent_inline_ref_offset(eb, iref);
1787 type = btrfs_extent_inline_ref_type(eb, iref);
1789 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1791 ptr += btrfs_extent_inline_ref_size(type);
1795 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1799 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1801 btrfs_free_path(path);
1806 * for a tree node or leaf, we record its reference count, so later if we still
1807 * process this node or leaf, don't need to compute its reference count again.
1809 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1811 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1812 struct extent_buffer *eb, struct node_refs *nrefs,
1813 u64 level, int check_all)
1815 struct ulist *roots;
1818 int root_level = btrfs_header_level(root->node);
1822 if (nrefs->bytenr[level] == bytenr)
1825 if (bytenr != (u64)-1) {
1826 /* the return value of this function seems a mistake */
1827 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1828 level, 1, &refs, &flags);
1830 if (ret < 0 && !check_all)
1833 nrefs->bytenr[level] = bytenr;
1834 nrefs->refs[level] = refs;
1835 nrefs->full_backref[level] = 0;
1836 nrefs->checked[level] = 0;
1839 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1844 check = need_check(root, roots);
1846 nrefs->need_check[level] = check;
1849 nrefs->need_check[level] = 1;
1851 if (level == root_level) {
1852 nrefs->need_check[level] = 1;
1855 * The node refs may have not been
1856 * updated if upper needs checking (the
1857 * lowest root_objectid) the node can
1860 nrefs->need_check[level] =
1861 nrefs->need_check[level + 1];
1867 if (check_all && eb) {
1868 calc_extent_flag_v2(root, eb, &flags);
1869 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1870 nrefs->full_backref[level] = 1;
1877 * @level if @level == -1 means extent data item
1878 * else normal treeblocl.
1880 static int should_check_extent_strictly(struct btrfs_root *root,
1881 struct node_refs *nrefs, int level)
1883 int root_level = btrfs_header_level(root->node);
1885 if (level > root_level || level < -1)
1887 if (level == root_level)
1890 * if the upper node is marked full backref, it should contain shared
1891 * backref of the parent (except owner == root->objectid).
1893 while (++level <= root_level)
1894 if (nrefs->refs[level] > 1)
1900 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1901 struct walk_control *wc, int *level,
1902 struct node_refs *nrefs)
1904 enum btrfs_tree_block_status status;
1907 struct btrfs_fs_info *fs_info = root->fs_info;
1908 struct extent_buffer *next;
1909 struct extent_buffer *cur;
1913 WARN_ON(*level < 0);
1914 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1916 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1917 refs = nrefs->refs[*level];
1920 ret = btrfs_lookup_extent_info(NULL, root,
1921 path->nodes[*level]->start,
1922 *level, 1, &refs, NULL);
1927 nrefs->bytenr[*level] = path->nodes[*level]->start;
1928 nrefs->refs[*level] = refs;
1932 ret = enter_shared_node(root, path->nodes[*level]->start,
1940 while (*level >= 0) {
1941 WARN_ON(*level < 0);
1942 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1943 cur = path->nodes[*level];
1945 if (btrfs_header_level(cur) != *level)
1948 if (path->slots[*level] >= btrfs_header_nritems(cur))
1951 ret = process_one_leaf(root, cur, wc);
1956 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1957 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1959 if (bytenr == nrefs->bytenr[*level - 1]) {
1960 refs = nrefs->refs[*level - 1];
1962 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1963 *level - 1, 1, &refs, NULL);
1967 nrefs->bytenr[*level - 1] = bytenr;
1968 nrefs->refs[*level - 1] = refs;
1973 ret = enter_shared_node(root, bytenr, refs,
1976 path->slots[*level]++;
1981 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1982 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1983 free_extent_buffer(next);
1984 reada_walk_down(root, cur, path->slots[*level]);
1985 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1986 if (!extent_buffer_uptodate(next)) {
1987 struct btrfs_key node_key;
1989 btrfs_node_key_to_cpu(path->nodes[*level],
1991 path->slots[*level]);
1992 btrfs_add_corrupt_extent_record(root->fs_info,
1994 path->nodes[*level]->start,
1995 root->fs_info->nodesize,
2002 ret = check_child_node(cur, path->slots[*level], next);
2004 free_extent_buffer(next);
2009 if (btrfs_is_leaf(next))
2010 status = btrfs_check_leaf(root, NULL, next);
2012 status = btrfs_check_node(root, NULL, next);
2013 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2014 free_extent_buffer(next);
2019 *level = *level - 1;
2020 free_extent_buffer(path->nodes[*level]);
2021 path->nodes[*level] = next;
2022 path->slots[*level] = 0;
2025 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2030 * Update global fs information.
2032 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2036 struct extent_buffer *eb = path->nodes[level];
2038 total_btree_bytes += eb->len;
2039 if (fs_root_objectid(root->objectid))
2040 total_fs_tree_bytes += eb->len;
2041 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2042 total_extent_tree_bytes += eb->len;
2045 btree_space_waste += btrfs_leaf_free_space(root, eb);
2047 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2048 btrfs_header_nritems(eb));
2049 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2054 * This function only handles BACKREF_MISSING,
2055 * If corresponding extent item exists, increase the ref, else insert an extent
2058 * Returns error bits after repair.
2060 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2061 struct btrfs_root *root,
2062 struct extent_buffer *node,
2063 struct node_refs *nrefs, int level, int err)
2065 struct btrfs_fs_info *fs_info = root->fs_info;
2066 struct btrfs_root *extent_root = fs_info->extent_root;
2067 struct btrfs_path path;
2068 struct btrfs_extent_item *ei;
2069 struct btrfs_tree_block_info *bi;
2070 struct btrfs_key key;
2071 struct extent_buffer *eb;
2072 u32 size = sizeof(*ei);
2073 u32 node_size = root->fs_info->nodesize;
2074 int insert_extent = 0;
2075 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2076 int root_level = btrfs_header_level(root->node);
2081 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2084 if ((err & BACKREF_MISSING) == 0)
2087 WARN_ON(level > BTRFS_MAX_LEVEL);
2090 btrfs_init_path(&path);
2091 bytenr = btrfs_header_bytenr(node);
2092 owner = btrfs_header_owner(node);
2093 generation = btrfs_header_generation(node);
2095 key.objectid = bytenr;
2097 key.offset = (u64)-1;
2099 /* Search for the extent item */
2100 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2106 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2110 /* calculate if the extent item flag is full backref or not */
2111 if (nrefs->full_backref[level] != 0)
2112 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2114 /* insert an extent item */
2115 if (insert_extent) {
2116 struct btrfs_disk_key copy_key;
2118 generation = btrfs_header_generation(node);
2120 if (level < root_level && nrefs->full_backref[level + 1] &&
2121 owner != root->objectid) {
2122 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2125 key.objectid = bytenr;
2126 if (!skinny_metadata) {
2127 key.type = BTRFS_EXTENT_ITEM_KEY;
2128 key.offset = node_size;
2129 size += sizeof(*bi);
2131 key.type = BTRFS_METADATA_ITEM_KEY;
2135 btrfs_release_path(&path);
2136 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2142 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2144 btrfs_set_extent_refs(eb, ei, 0);
2145 btrfs_set_extent_generation(eb, ei, generation);
2146 btrfs_set_extent_flags(eb, ei, flags);
2148 if (!skinny_metadata) {
2149 bi = (struct btrfs_tree_block_info *)(ei + 1);
2150 memset_extent_buffer(eb, 0, (unsigned long)bi,
2152 btrfs_set_disk_key_objectid(©_key, root->objectid);
2153 btrfs_set_disk_key_type(©_key, 0);
2154 btrfs_set_disk_key_offset(©_key, 0);
2156 btrfs_set_tree_block_level(eb, bi, level);
2157 btrfs_set_tree_block_key(eb, bi, ©_key);
2159 btrfs_mark_buffer_dirty(eb);
2160 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2161 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2163 nrefs->refs[level] = 0;
2164 nrefs->full_backref[level] =
2165 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2166 btrfs_release_path(&path);
2169 if (level < root_level && nrefs->full_backref[level + 1] &&
2170 owner != root->objectid)
2171 parent = nrefs->bytenr[level + 1];
2173 /* increase the ref */
2174 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2175 parent, root->objectid, level, 0);
2177 nrefs->refs[level]++;
2179 btrfs_release_path(&path);
2182 "failed to repair tree block ref start %llu root %llu due to %s",
2183 bytenr, root->objectid, strerror(-ret));
2185 printf("Added one tree block ref start %llu %s %llu\n",
2186 bytenr, parent ? "parent" : "root",
2187 parent ? parent : root->objectid);
2188 err &= ~BACKREF_MISSING;
2194 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2195 unsigned int ext_ref);
2196 static int check_tree_block_ref(struct btrfs_root *root,
2197 struct extent_buffer *eb, u64 bytenr,
2198 int level, u64 owner, struct node_refs *nrefs);
2199 static int check_leaf_items(struct btrfs_trans_handle *trans,
2200 struct btrfs_root *root, struct btrfs_path *path,
2201 struct node_refs *nrefs, int account_bytes);
2204 * @trans just for lowmem repair mode
2205 * @check all if not 0 then check all tree block backrefs and items
2206 * 0 then just check relationship of items in fs tree(s)
2208 * Returns >0 Found error, should continue
2209 * Returns <0 Fatal error, must exit the whole check
2210 * Returns 0 No errors found
2212 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2213 struct btrfs_root *root, struct btrfs_path *path,
2214 int *level, struct node_refs *nrefs, int ext_ref,
2218 enum btrfs_tree_block_status status;
2221 struct btrfs_fs_info *fs_info = root->fs_info;
2222 struct extent_buffer *next;
2223 struct extent_buffer *cur;
2227 int account_file_data = 0;
2229 WARN_ON(*level < 0);
2230 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2232 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2233 path->nodes[*level], nrefs, *level, check_all);
2237 while (*level >= 0) {
2238 WARN_ON(*level < 0);
2239 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240 cur = path->nodes[*level];
2241 bytenr = btrfs_header_bytenr(cur);
2242 check = nrefs->need_check[*level];
2244 if (btrfs_header_level(cur) != *level)
2247 * Update bytes accounting and check tree block ref
2248 * NOTE: Doing accounting and check before checking nritems
2249 * is necessary because of empty node/leaf.
2251 if ((check_all && !nrefs->checked[*level]) ||
2252 (!check_all && nrefs->need_check[*level])) {
2253 ret = check_tree_block_ref(root, cur,
2254 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2255 btrfs_header_owner(cur), nrefs);
2258 ret = repair_tree_block_ref(trans, root,
2259 path->nodes[*level], nrefs, *level, ret);
2262 if (check_all && nrefs->need_check[*level] &&
2263 nrefs->refs[*level]) {
2264 account_bytes(root, path, *level);
2265 account_file_data = 1;
2267 nrefs->checked[*level] = 1;
2270 if (path->slots[*level] >= btrfs_header_nritems(cur))
2273 /* Don't forgot to check leaf/node validation */
2275 /* skip duplicate check */
2276 if (check || !check_all) {
2277 ret = btrfs_check_leaf(root, NULL, cur);
2278 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2286 ret = process_one_leaf_v2(root, path, nrefs,
2289 ret = check_leaf_items(trans, root, path,
2290 nrefs, account_file_data);
2294 if (check || !check_all) {
2295 ret = btrfs_check_node(root, NULL, cur);
2296 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2303 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2304 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2306 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2311 * check all trees in check_chunks_and_extent_v2
2312 * check shared node once in check_fs_roots
2314 if (!check_all && !nrefs->need_check[*level - 1]) {
2315 path->slots[*level]++;
2319 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2320 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2321 free_extent_buffer(next);
2322 reada_walk_down(root, cur, path->slots[*level]);
2323 next = read_tree_block(fs_info, bytenr, ptr_gen);
2324 if (!extent_buffer_uptodate(next)) {
2325 struct btrfs_key node_key;
2327 btrfs_node_key_to_cpu(path->nodes[*level],
2329 path->slots[*level]);
2330 btrfs_add_corrupt_extent_record(fs_info,
2331 &node_key, path->nodes[*level]->start,
2332 fs_info->nodesize, *level);
2338 ret = check_child_node(cur, path->slots[*level], next);
2343 if (btrfs_is_leaf(next))
2344 status = btrfs_check_leaf(root, NULL, next);
2346 status = btrfs_check_node(root, NULL, next);
2347 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2348 free_extent_buffer(next);
2353 *level = *level - 1;
2354 free_extent_buffer(path->nodes[*level]);
2355 path->nodes[*level] = next;
2356 path->slots[*level] = 0;
2357 account_file_data = 0;
2359 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2364 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2365 struct walk_control *wc, int *level)
2368 struct extent_buffer *leaf;
2370 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2371 leaf = path->nodes[i];
2372 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2377 free_extent_buffer(path->nodes[*level]);
2378 path->nodes[*level] = NULL;
2379 BUG_ON(*level > wc->active_node);
2380 if (*level == wc->active_node)
2381 leave_shared_node(root, wc, *level);
2388 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2392 struct extent_buffer *leaf;
2394 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2395 leaf = path->nodes[i];
2396 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2401 free_extent_buffer(path->nodes[*level]);
2402 path->nodes[*level] = NULL;
2409 static int check_root_dir(struct inode_record *rec)
2411 struct inode_backref *backref;
2414 if (!rec->found_inode_item || rec->errors)
2416 if (rec->nlink != 1 || rec->found_link != 0)
2418 if (list_empty(&rec->backrefs))
2420 backref = to_inode_backref(rec->backrefs.next);
2421 if (!backref->found_inode_ref)
2423 if (backref->index != 0 || backref->namelen != 2 ||
2424 memcmp(backref->name, "..", 2))
2426 if (backref->found_dir_index || backref->found_dir_item)
2433 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2434 struct btrfs_root *root, struct btrfs_path *path,
2435 struct inode_record *rec)
2437 struct btrfs_inode_item *ei;
2438 struct btrfs_key key;
2441 key.objectid = rec->ino;
2442 key.type = BTRFS_INODE_ITEM_KEY;
2443 key.offset = (u64)-1;
2445 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2449 if (!path->slots[0]) {
2456 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2457 if (key.objectid != rec->ino) {
2462 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2463 struct btrfs_inode_item);
2464 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2465 btrfs_mark_buffer_dirty(path->nodes[0]);
2466 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2467 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2468 root->root_key.objectid);
2470 btrfs_release_path(path);
2474 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2475 struct btrfs_root *root,
2476 struct btrfs_path *path,
2477 struct inode_record *rec)
2481 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2482 btrfs_release_path(path);
2484 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2488 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2489 struct btrfs_root *root,
2490 struct btrfs_path *path,
2491 struct inode_record *rec)
2493 struct btrfs_inode_item *ei;
2494 struct btrfs_key key;
2497 key.objectid = rec->ino;
2498 key.type = BTRFS_INODE_ITEM_KEY;
2501 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2508 /* Since ret == 0, no need to check anything */
2509 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2510 struct btrfs_inode_item);
2511 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2512 btrfs_mark_buffer_dirty(path->nodes[0]);
2513 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2514 printf("reset nbytes for ino %llu root %llu\n",
2515 rec->ino, root->root_key.objectid);
2517 btrfs_release_path(path);
2521 static int add_missing_dir_index(struct btrfs_root *root,
2522 struct cache_tree *inode_cache,
2523 struct inode_record *rec,
2524 struct inode_backref *backref)
2526 struct btrfs_path path;
2527 struct btrfs_trans_handle *trans;
2528 struct btrfs_dir_item *dir_item;
2529 struct extent_buffer *leaf;
2530 struct btrfs_key key;
2531 struct btrfs_disk_key disk_key;
2532 struct inode_record *dir_rec;
2533 unsigned long name_ptr;
2534 u32 data_size = sizeof(*dir_item) + backref->namelen;
2537 trans = btrfs_start_transaction(root, 1);
2539 return PTR_ERR(trans);
2541 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2542 (unsigned long long)rec->ino);
2544 btrfs_init_path(&path);
2545 key.objectid = backref->dir;
2546 key.type = BTRFS_DIR_INDEX_KEY;
2547 key.offset = backref->index;
2548 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2551 leaf = path.nodes[0];
2552 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2554 disk_key.objectid = cpu_to_le64(rec->ino);
2555 disk_key.type = BTRFS_INODE_ITEM_KEY;
2556 disk_key.offset = 0;
2558 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2559 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2560 btrfs_set_dir_data_len(leaf, dir_item, 0);
2561 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2562 name_ptr = (unsigned long)(dir_item + 1);
2563 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2564 btrfs_mark_buffer_dirty(leaf);
2565 btrfs_release_path(&path);
2566 btrfs_commit_transaction(trans, root);
2568 backref->found_dir_index = 1;
2569 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2570 BUG_ON(IS_ERR(dir_rec));
2573 dir_rec->found_size += backref->namelen;
2574 if (dir_rec->found_size == dir_rec->isize &&
2575 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2576 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2577 if (dir_rec->found_size != dir_rec->isize)
2578 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2583 static int delete_dir_index(struct btrfs_root *root,
2584 struct inode_backref *backref)
2586 struct btrfs_trans_handle *trans;
2587 struct btrfs_dir_item *di;
2588 struct btrfs_path path;
2591 trans = btrfs_start_transaction(root, 1);
2593 return PTR_ERR(trans);
2595 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2596 (unsigned long long)backref->dir,
2597 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2598 (unsigned long long)root->objectid);
2600 btrfs_init_path(&path);
2601 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2602 backref->name, backref->namelen,
2603 backref->index, -1);
2606 btrfs_release_path(&path);
2607 btrfs_commit_transaction(trans, root);
2614 ret = btrfs_del_item(trans, root, &path);
2616 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2618 btrfs_release_path(&path);
2619 btrfs_commit_transaction(trans, root);
2623 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2624 struct btrfs_root *root, u64 ino,
2627 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2629 return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2632 static int create_inode_item(struct btrfs_root *root,
2633 struct inode_record *rec, int root_dir)
2635 struct btrfs_trans_handle *trans;
2641 trans = btrfs_start_transaction(root, 1);
2642 if (IS_ERR(trans)) {
2643 ret = PTR_ERR(trans);
2647 nlink = root_dir ? 1 : rec->found_link;
2648 if (rec->found_dir_item) {
2649 if (rec->found_file_extent)
2650 fprintf(stderr, "root %llu inode %llu has both a dir "
2651 "item and extents, unsure if it is a dir or a "
2652 "regular file so setting it as a directory\n",
2653 (unsigned long long)root->objectid,
2654 (unsigned long long)rec->ino);
2655 mode = S_IFDIR | 0755;
2656 size = rec->found_size;
2657 } else if (!rec->found_dir_item) {
2658 size = rec->extent_end;
2659 mode = S_IFREG | 0755;
2662 ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2664 btrfs_commit_transaction(trans, root);
2668 static int repair_inode_backrefs(struct btrfs_root *root,
2669 struct inode_record *rec,
2670 struct cache_tree *inode_cache,
2673 struct inode_backref *tmp, *backref;
2674 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2678 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2679 if (!delete && rec->ino == root_dirid) {
2680 if (!rec->found_inode_item) {
2681 ret = create_inode_item(root, rec, 1);
2688 /* Index 0 for root dir's are special, don't mess with it */
2689 if (rec->ino == root_dirid && backref->index == 0)
2693 ((backref->found_dir_index && !backref->found_inode_ref) ||
2694 (backref->found_dir_index && backref->found_inode_ref &&
2695 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2696 ret = delete_dir_index(root, backref);
2700 list_del(&backref->list);
2705 if (!delete && !backref->found_dir_index &&
2706 backref->found_dir_item && backref->found_inode_ref) {
2707 ret = add_missing_dir_index(root, inode_cache, rec,
2712 if (backref->found_dir_item &&
2713 backref->found_dir_index) {
2714 if (!backref->errors &&
2715 backref->found_inode_ref) {
2716 list_del(&backref->list);
2723 if (!delete && (!backref->found_dir_index &&
2724 !backref->found_dir_item &&
2725 backref->found_inode_ref)) {
2726 struct btrfs_trans_handle *trans;
2727 struct btrfs_key location;
2729 ret = check_dir_conflict(root, backref->name,
2735 * let nlink fixing routine to handle it,
2736 * which can do it better.
2741 location.objectid = rec->ino;
2742 location.type = BTRFS_INODE_ITEM_KEY;
2743 location.offset = 0;
2745 trans = btrfs_start_transaction(root, 1);
2746 if (IS_ERR(trans)) {
2747 ret = PTR_ERR(trans);
2750 fprintf(stderr, "adding missing dir index/item pair "
2752 (unsigned long long)rec->ino);
2753 ret = btrfs_insert_dir_item(trans, root, backref->name,
2755 backref->dir, &location,
2756 imode_to_type(rec->imode),
2759 btrfs_commit_transaction(trans, root);
2763 if (!delete && (backref->found_inode_ref &&
2764 backref->found_dir_index &&
2765 backref->found_dir_item &&
2766 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2767 !rec->found_inode_item)) {
2768 ret = create_inode_item(root, rec, 0);
2775 return ret ? ret : repaired;
2779 * To determine the file type for nlink/inode_item repair
2781 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2782 * Return -ENOENT if file type is not found.
2784 static int find_file_type(struct inode_record *rec, u8 *type)
2786 struct inode_backref *backref;
2788 /* For inode item recovered case */
2789 if (rec->found_inode_item) {
2790 *type = imode_to_type(rec->imode);
2794 list_for_each_entry(backref, &rec->backrefs, list) {
2795 if (backref->found_dir_index || backref->found_dir_item) {
2796 *type = backref->filetype;
2804 * To determine the file name for nlink repair
2806 * Return 0 if file name is found, set name and namelen.
2807 * Return -ENOENT if file name is not found.
2809 static int find_file_name(struct inode_record *rec,
2810 char *name, int *namelen)
2812 struct inode_backref *backref;
2814 list_for_each_entry(backref, &rec->backrefs, list) {
2815 if (backref->found_dir_index || backref->found_dir_item ||
2816 backref->found_inode_ref) {
2817 memcpy(name, backref->name, backref->namelen);
2818 *namelen = backref->namelen;
2825 /* Reset the nlink of the inode to the correct one */
2826 static int reset_nlink(struct btrfs_trans_handle *trans,
2827 struct btrfs_root *root,
2828 struct btrfs_path *path,
2829 struct inode_record *rec)
2831 struct inode_backref *backref;
2832 struct inode_backref *tmp;
2833 struct btrfs_key key;
2834 struct btrfs_inode_item *inode_item;
2837 /* We don't believe this either, reset it and iterate backref */
2838 rec->found_link = 0;
2840 /* Remove all backref including the valid ones */
2841 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2842 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2843 backref->index, backref->name,
2844 backref->namelen, 0);
2848 /* remove invalid backref, so it won't be added back */
2849 if (!(backref->found_dir_index &&
2850 backref->found_dir_item &&
2851 backref->found_inode_ref)) {
2852 list_del(&backref->list);
2859 /* Set nlink to 0 */
2860 key.objectid = rec->ino;
2861 key.type = BTRFS_INODE_ITEM_KEY;
2863 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2870 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2871 struct btrfs_inode_item);
2872 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2873 btrfs_mark_buffer_dirty(path->nodes[0]);
2874 btrfs_release_path(path);
2877 * Add back valid inode_ref/dir_item/dir_index,
2878 * add_link() will handle the nlink inc, so new nlink must be correct
2880 list_for_each_entry(backref, &rec->backrefs, list) {
2881 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2882 backref->name, backref->namelen,
2883 backref->filetype, &backref->index, 1, 0);
2888 btrfs_release_path(path);
2892 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2893 struct btrfs_root *root,
2894 struct btrfs_path *path,
2895 struct inode_record *rec)
2897 char namebuf[BTRFS_NAME_LEN] = {0};
2900 int name_recovered = 0;
2901 int type_recovered = 0;
2905 * Get file name and type first before these invalid inode ref
2906 * are deleted by remove_all_invalid_backref()
2908 name_recovered = !find_file_name(rec, namebuf, &namelen);
2909 type_recovered = !find_file_type(rec, &type);
2911 if (!name_recovered) {
2912 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2913 rec->ino, rec->ino);
2914 namelen = count_digits(rec->ino);
2915 sprintf(namebuf, "%llu", rec->ino);
2918 if (!type_recovered) {
2919 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2921 type = BTRFS_FT_REG_FILE;
2925 ret = reset_nlink(trans, root, path, rec);
2928 "Failed to reset nlink for inode %llu: %s\n",
2929 rec->ino, strerror(-ret));
2933 if (rec->found_link == 0) {
2934 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2935 namebuf, namelen, type,
2936 (u64 *)&rec->found_link);
2940 printf("Fixed the nlink of inode %llu\n", rec->ino);
2943 * Clear the flag anyway, or we will loop forever for the same inode
2944 * as it will not be removed from the bad inode list and the dead loop
2947 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2948 btrfs_release_path(path);
2953 * Check if there is any normal(reg or prealloc) file extent for given
2955 * This is used to determine the file type when neither its dir_index/item or
2956 * inode_item exists.
2958 * This will *NOT* report error, if any error happens, just consider it does
2959 * not have any normal file extent.
2961 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2963 struct btrfs_path path;
2964 struct btrfs_key key;
2965 struct btrfs_key found_key;
2966 struct btrfs_file_extent_item *fi;
2970 btrfs_init_path(&path);
2972 key.type = BTRFS_EXTENT_DATA_KEY;
2975 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2980 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2981 ret = btrfs_next_leaf(root, &path);
2988 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2990 if (found_key.objectid != ino ||
2991 found_key.type != BTRFS_EXTENT_DATA_KEY)
2993 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2994 struct btrfs_file_extent_item);
2995 type = btrfs_file_extent_type(path.nodes[0], fi);
2996 if (type != BTRFS_FILE_EXTENT_INLINE) {
3002 btrfs_release_path(&path);
3006 static u32 btrfs_type_to_imode(u8 type)
3008 static u32 imode_by_btrfs_type[] = {
3009 [BTRFS_FT_REG_FILE] = S_IFREG,
3010 [BTRFS_FT_DIR] = S_IFDIR,
3011 [BTRFS_FT_CHRDEV] = S_IFCHR,
3012 [BTRFS_FT_BLKDEV] = S_IFBLK,
3013 [BTRFS_FT_FIFO] = S_IFIFO,
3014 [BTRFS_FT_SOCK] = S_IFSOCK,
3015 [BTRFS_FT_SYMLINK] = S_IFLNK,
3018 return imode_by_btrfs_type[(type)];
3021 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3022 struct btrfs_root *root,
3023 struct btrfs_path *path,
3024 struct inode_record *rec)
3028 int type_recovered = 0;
3031 printf("Trying to rebuild inode:%llu\n", rec->ino);
3033 type_recovered = !find_file_type(rec, &filetype);
3036 * Try to determine inode type if type not found.
3038 * For found regular file extent, it must be FILE.
3039 * For found dir_item/index, it must be DIR.
3041 * For undetermined one, use FILE as fallback.
3044 * 1. If found backref(inode_index/item is already handled) to it,
3046 * Need new inode-inode ref structure to allow search for that.
3048 if (!type_recovered) {
3049 if (rec->found_file_extent &&
3050 find_normal_file_extent(root, rec->ino)) {
3052 filetype = BTRFS_FT_REG_FILE;
3053 } else if (rec->found_dir_item) {
3055 filetype = BTRFS_FT_DIR;
3056 } else if (!list_empty(&rec->orphan_extents)) {
3058 filetype = BTRFS_FT_REG_FILE;
3060 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3063 filetype = BTRFS_FT_REG_FILE;
3067 ret = btrfs_new_inode(trans, root, rec->ino,
3068 mode | btrfs_type_to_imode(filetype));
3073 * Here inode rebuild is done, we only rebuild the inode item,
3074 * don't repair the nlink(like move to lost+found).
3075 * That is the job of nlink repair.
3077 * We just fill the record and return
3079 rec->found_dir_item = 1;
3080 rec->imode = mode | btrfs_type_to_imode(filetype);
3082 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3083 /* Ensure the inode_nlinks repair function will be called */
3084 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3089 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3090 struct btrfs_root *root,
3091 struct btrfs_path *path,
3092 struct inode_record *rec)
3094 struct orphan_data_extent *orphan;
3095 struct orphan_data_extent *tmp;
3098 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3100 * Check for conflicting file extents
3102 * Here we don't know whether the extents is compressed or not,
3103 * so we can only assume it not compressed nor data offset,
3104 * and use its disk_len as extent length.
3106 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3107 orphan->offset, orphan->disk_len, 0);
3108 btrfs_release_path(path);
3113 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3114 orphan->disk_bytenr, orphan->disk_len);
3115 ret = btrfs_free_extent(trans,
3116 root->fs_info->extent_root,
3117 orphan->disk_bytenr, orphan->disk_len,
3118 0, root->objectid, orphan->objectid,
3123 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3124 orphan->offset, orphan->disk_bytenr,
3125 orphan->disk_len, orphan->disk_len);
3129 /* Update file size info */
3130 rec->found_size += orphan->disk_len;
3131 if (rec->found_size == rec->nbytes)
3132 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3134 /* Update the file extent hole info too */
3135 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3139 if (RB_EMPTY_ROOT(&rec->holes))
3140 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3142 list_del(&orphan->list);
3145 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3150 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3151 struct btrfs_root *root,
3152 struct btrfs_path *path,
3153 struct inode_record *rec)
3155 struct rb_node *node;
3156 struct file_extent_hole *hole;
3160 node = rb_first(&rec->holes);
3164 hole = rb_entry(node, struct file_extent_hole, node);
3165 ret = btrfs_punch_hole(trans, root, rec->ino,
3166 hole->start, hole->len);
3169 ret = del_file_extent_hole(&rec->holes, hole->start,
3173 if (RB_EMPTY_ROOT(&rec->holes))
3174 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3175 node = rb_first(&rec->holes);
3177 /* special case for a file losing all its file extent */
3179 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3180 round_up(rec->isize,
3181 root->fs_info->sectorsize));
3185 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3186 rec->ino, root->objectid);
3191 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3193 struct btrfs_trans_handle *trans;
3194 struct btrfs_path path;
3197 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3198 I_ERR_NO_ORPHAN_ITEM |
3199 I_ERR_LINK_COUNT_WRONG |
3200 I_ERR_NO_INODE_ITEM |
3201 I_ERR_FILE_EXTENT_ORPHAN |
3202 I_ERR_FILE_EXTENT_DISCOUNT|
3203 I_ERR_FILE_NBYTES_WRONG)))
3207 * For nlink repair, it may create a dir and add link, so
3208 * 2 for parent(256)'s dir_index and dir_item
3209 * 2 for lost+found dir's inode_item and inode_ref
3210 * 1 for the new inode_ref of the file
3211 * 2 for lost+found dir's dir_index and dir_item for the file
3213 trans = btrfs_start_transaction(root, 7);
3215 return PTR_ERR(trans);
3217 btrfs_init_path(&path);
3218 if (rec->errors & I_ERR_NO_INODE_ITEM)
3219 ret = repair_inode_no_item(trans, root, &path, rec);
3220 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3221 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3222 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3223 ret = repair_inode_discount_extent(trans, root, &path, rec);
3224 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3225 ret = repair_inode_isize(trans, root, &path, rec);
3226 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3227 ret = repair_inode_orphan_item(trans, root, &path, rec);
3228 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3229 ret = repair_inode_nlinks(trans, root, &path, rec);
3230 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3231 ret = repair_inode_nbytes(trans, root, &path, rec);
3232 btrfs_commit_transaction(trans, root);
3233 btrfs_release_path(&path);
3237 static int check_inode_recs(struct btrfs_root *root,
3238 struct cache_tree *inode_cache)
3240 struct cache_extent *cache;
3241 struct ptr_node *node;
3242 struct inode_record *rec;
3243 struct inode_backref *backref;
3248 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3250 if (btrfs_root_refs(&root->root_item) == 0) {
3251 if (!cache_tree_empty(inode_cache))
3252 fprintf(stderr, "warning line %d\n", __LINE__);
3257 * We need to repair backrefs first because we could change some of the
3258 * errors in the inode recs.
3260 * We also need to go through and delete invalid backrefs first and then
3261 * add the correct ones second. We do this because we may get EEXIST
3262 * when adding back the correct index because we hadn't yet deleted the
3265 * For example, if we were missing a dir index then the directories
3266 * isize would be wrong, so if we fixed the isize to what we thought it
3267 * would be and then fixed the backref we'd still have a invalid fs, so
3268 * we need to add back the dir index and then check to see if the isize
3273 if (stage == 3 && !err)
3276 cache = search_cache_extent(inode_cache, 0);
3277 while (repair && cache) {
3278 node = container_of(cache, struct ptr_node, cache);
3280 cache = next_cache_extent(cache);
3282 /* Need to free everything up and rescan */
3284 remove_cache_extent(inode_cache, &node->cache);
3286 free_inode_rec(rec);
3290 if (list_empty(&rec->backrefs))
3293 ret = repair_inode_backrefs(root, rec, inode_cache,
3307 rec = get_inode_rec(inode_cache, root_dirid, 0);
3308 BUG_ON(IS_ERR(rec));
3310 ret = check_root_dir(rec);
3312 fprintf(stderr, "root %llu root dir %llu error\n",
3313 (unsigned long long)root->root_key.objectid,
3314 (unsigned long long)root_dirid);
3315 print_inode_error(root, rec);
3320 struct btrfs_trans_handle *trans;
3322 trans = btrfs_start_transaction(root, 1);
3323 if (IS_ERR(trans)) {
3324 err = PTR_ERR(trans);
3329 "root %llu missing its root dir, recreating\n",
3330 (unsigned long long)root->objectid);
3332 ret = btrfs_make_root_dir(trans, root, root_dirid);
3335 btrfs_commit_transaction(trans, root);
3339 fprintf(stderr, "root %llu root dir %llu not found\n",
3340 (unsigned long long)root->root_key.objectid,
3341 (unsigned long long)root_dirid);
3345 cache = search_cache_extent(inode_cache, 0);
3348 node = container_of(cache, struct ptr_node, cache);
3350 remove_cache_extent(inode_cache, &node->cache);
3352 if (rec->ino == root_dirid ||
3353 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3354 free_inode_rec(rec);
3358 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3359 ret = check_orphan_item(root, rec->ino);
3361 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3362 if (can_free_inode_rec(rec)) {
3363 free_inode_rec(rec);
3368 if (!rec->found_inode_item)
3369 rec->errors |= I_ERR_NO_INODE_ITEM;
3370 if (rec->found_link != rec->nlink)
3371 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3373 ret = try_repair_inode(root, rec);
3374 if (ret == 0 && can_free_inode_rec(rec)) {
3375 free_inode_rec(rec);
3381 if (!(repair && ret == 0))
3383 print_inode_error(root, rec);
3384 list_for_each_entry(backref, &rec->backrefs, list) {
3385 if (!backref->found_dir_item)
3386 backref->errors |= REF_ERR_NO_DIR_ITEM;
3387 if (!backref->found_dir_index)
3388 backref->errors |= REF_ERR_NO_DIR_INDEX;
3389 if (!backref->found_inode_ref)
3390 backref->errors |= REF_ERR_NO_INODE_REF;
3391 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3392 " namelen %u name %s filetype %d errors %x",
3393 (unsigned long long)backref->dir,
3394 (unsigned long long)backref->index,
3395 backref->namelen, backref->name,
3396 backref->filetype, backref->errors);
3397 print_ref_error(backref->errors);
3399 free_inode_rec(rec);
3401 return (error > 0) ? -1 : 0;
3404 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3407 struct cache_extent *cache;
3408 struct root_record *rec = NULL;
3411 cache = lookup_cache_extent(root_cache, objectid, 1);
3413 rec = container_of(cache, struct root_record, cache);
3415 rec = calloc(1, sizeof(*rec));
3417 return ERR_PTR(-ENOMEM);
3418 rec->objectid = objectid;
3419 INIT_LIST_HEAD(&rec->backrefs);
3420 rec->cache.start = objectid;
3421 rec->cache.size = 1;
3423 ret = insert_cache_extent(root_cache, &rec->cache);
3425 return ERR_PTR(-EEXIST);
3430 static struct root_backref *get_root_backref(struct root_record *rec,
3431 u64 ref_root, u64 dir, u64 index,
3432 const char *name, int namelen)
3434 struct root_backref *backref;
3436 list_for_each_entry(backref, &rec->backrefs, list) {
3437 if (backref->ref_root != ref_root || backref->dir != dir ||
3438 backref->namelen != namelen)
3440 if (memcmp(name, backref->name, namelen))
3445 backref = calloc(1, sizeof(*backref) + namelen + 1);
3448 backref->ref_root = ref_root;
3450 backref->index = index;
3451 backref->namelen = namelen;
3452 memcpy(backref->name, name, namelen);
3453 backref->name[namelen] = '\0';
3454 list_add_tail(&backref->list, &rec->backrefs);
3458 static void free_root_record(struct cache_extent *cache)
3460 struct root_record *rec;
3461 struct root_backref *backref;
3463 rec = container_of(cache, struct root_record, cache);
3464 while (!list_empty(&rec->backrefs)) {
3465 backref = to_root_backref(rec->backrefs.next);
3466 list_del(&backref->list);
3473 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3475 static int add_root_backref(struct cache_tree *root_cache,
3476 u64 root_id, u64 ref_root, u64 dir, u64 index,
3477 const char *name, int namelen,
3478 int item_type, int errors)
3480 struct root_record *rec;
3481 struct root_backref *backref;
3483 rec = get_root_rec(root_cache, root_id);
3484 BUG_ON(IS_ERR(rec));
3485 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3488 backref->errors |= errors;
3490 if (item_type != BTRFS_DIR_ITEM_KEY) {
3491 if (backref->found_dir_index || backref->found_back_ref ||
3492 backref->found_forward_ref) {
3493 if (backref->index != index)
3494 backref->errors |= REF_ERR_INDEX_UNMATCH;
3496 backref->index = index;
3500 if (item_type == BTRFS_DIR_ITEM_KEY) {
3501 if (backref->found_forward_ref)
3503 backref->found_dir_item = 1;
3504 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3505 backref->found_dir_index = 1;
3506 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3507 if (backref->found_forward_ref)
3508 backref->errors |= REF_ERR_DUP_ROOT_REF;
3509 else if (backref->found_dir_item)
3511 backref->found_forward_ref = 1;
3512 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3513 if (backref->found_back_ref)
3514 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3515 backref->found_back_ref = 1;
3520 if (backref->found_forward_ref && backref->found_dir_item)
3521 backref->reachable = 1;
3525 static int merge_root_recs(struct btrfs_root *root,
3526 struct cache_tree *src_cache,
3527 struct cache_tree *dst_cache)
3529 struct cache_extent *cache;
3530 struct ptr_node *node;
3531 struct inode_record *rec;
3532 struct inode_backref *backref;
3535 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3536 free_inode_recs_tree(src_cache);
3541 cache = search_cache_extent(src_cache, 0);
3544 node = container_of(cache, struct ptr_node, cache);
3546 remove_cache_extent(src_cache, &node->cache);
3549 ret = is_child_root(root, root->objectid, rec->ino);
3555 list_for_each_entry(backref, &rec->backrefs, list) {
3556 BUG_ON(backref->found_inode_ref);
3557 if (backref->found_dir_item)
3558 add_root_backref(dst_cache, rec->ino,
3559 root->root_key.objectid, backref->dir,
3560 backref->index, backref->name,
3561 backref->namelen, BTRFS_DIR_ITEM_KEY,
3563 if (backref->found_dir_index)
3564 add_root_backref(dst_cache, rec->ino,
3565 root->root_key.objectid, backref->dir,
3566 backref->index, backref->name,
3567 backref->namelen, BTRFS_DIR_INDEX_KEY,
3571 free_inode_rec(rec);
3578 static int check_root_refs(struct btrfs_root *root,
3579 struct cache_tree *root_cache)
3581 struct root_record *rec;
3582 struct root_record *ref_root;
3583 struct root_backref *backref;
3584 struct cache_extent *cache;
3590 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3591 BUG_ON(IS_ERR(rec));
3594 /* fixme: this can not detect circular references */
3597 cache = search_cache_extent(root_cache, 0);
3601 rec = container_of(cache, struct root_record, cache);
3602 cache = next_cache_extent(cache);
3604 if (rec->found_ref == 0)
3607 list_for_each_entry(backref, &rec->backrefs, list) {
3608 if (!backref->reachable)
3611 ref_root = get_root_rec(root_cache,
3613 BUG_ON(IS_ERR(ref_root));
3614 if (ref_root->found_ref > 0)
3617 backref->reachable = 0;
3619 if (rec->found_ref == 0)
3625 cache = search_cache_extent(root_cache, 0);
3629 rec = container_of(cache, struct root_record, cache);
3630 cache = next_cache_extent(cache);
3632 if (rec->found_ref == 0 &&
3633 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3634 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3635 ret = check_orphan_item(root->fs_info->tree_root,
3641 * If we don't have a root item then we likely just have
3642 * a dir item in a snapshot for this root but no actual
3643 * ref key or anything so it's meaningless.
3645 if (!rec->found_root_item)
3648 fprintf(stderr, "fs tree %llu not referenced\n",
3649 (unsigned long long)rec->objectid);
3653 if (rec->found_ref > 0 && !rec->found_root_item)
3655 list_for_each_entry(backref, &rec->backrefs, list) {
3656 if (!backref->found_dir_item)
3657 backref->errors |= REF_ERR_NO_DIR_ITEM;
3658 if (!backref->found_dir_index)
3659 backref->errors |= REF_ERR_NO_DIR_INDEX;
3660 if (!backref->found_back_ref)
3661 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3662 if (!backref->found_forward_ref)
3663 backref->errors |= REF_ERR_NO_ROOT_REF;
3664 if (backref->reachable && backref->errors)
3671 fprintf(stderr, "fs tree %llu refs %u %s\n",
3672 (unsigned long long)rec->objectid, rec->found_ref,
3673 rec->found_root_item ? "" : "not found");
3675 list_for_each_entry(backref, &rec->backrefs, list) {
3676 if (!backref->reachable)
3678 if (!backref->errors && rec->found_root_item)
3680 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3681 " index %llu namelen %u name %s errors %x\n",
3682 (unsigned long long)backref->ref_root,
3683 (unsigned long long)backref->dir,
3684 (unsigned long long)backref->index,
3685 backref->namelen, backref->name,
3687 print_ref_error(backref->errors);
3690 return errors > 0 ? 1 : 0;
3693 static int process_root_ref(struct extent_buffer *eb, int slot,
3694 struct btrfs_key *key,
3695 struct cache_tree *root_cache)
3701 struct btrfs_root_ref *ref;
3702 char namebuf[BTRFS_NAME_LEN];
3705 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3707 dirid = btrfs_root_ref_dirid(eb, ref);
3708 index = btrfs_root_ref_sequence(eb, ref);
3709 name_len = btrfs_root_ref_name_len(eb, ref);
3711 if (name_len <= BTRFS_NAME_LEN) {
3715 len = BTRFS_NAME_LEN;
3716 error = REF_ERR_NAME_TOO_LONG;
3718 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3720 if (key->type == BTRFS_ROOT_REF_KEY) {
3721 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3722 index, namebuf, len, key->type, error);
3724 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3725 index, namebuf, len, key->type, error);
3730 static void free_corrupt_block(struct cache_extent *cache)
3732 struct btrfs_corrupt_block *corrupt;
3734 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3738 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3741 * Repair the btree of the given root.
3743 * The fix is to remove the node key in corrupt_blocks cache_tree.
3744 * and rebalance the tree.
3745 * After the fix, the btree should be writeable.
3747 static int repair_btree(struct btrfs_root *root,
3748 struct cache_tree *corrupt_blocks)
3750 struct btrfs_trans_handle *trans;
3751 struct btrfs_path path;
3752 struct btrfs_corrupt_block *corrupt;
3753 struct cache_extent *cache;
3754 struct btrfs_key key;
3759 if (cache_tree_empty(corrupt_blocks))
3762 trans = btrfs_start_transaction(root, 1);
3763 if (IS_ERR(trans)) {
3764 ret = PTR_ERR(trans);
3765 fprintf(stderr, "Error starting transaction: %s\n",
3769 btrfs_init_path(&path);
3770 cache = first_cache_extent(corrupt_blocks);
3772 corrupt = container_of(cache, struct btrfs_corrupt_block,
3774 level = corrupt->level;
3775 path.lowest_level = level;
3776 key.objectid = corrupt->key.objectid;
3777 key.type = corrupt->key.type;
3778 key.offset = corrupt->key.offset;
3781 * Here we don't want to do any tree balance, since it may
3782 * cause a balance with corrupted brother leaf/node,
3783 * so ins_len set to 0 here.
3784 * Balance will be done after all corrupt node/leaf is deleted.
3786 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3789 offset = btrfs_node_blockptr(path.nodes[level],
3792 /* Remove the ptr */
3793 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3797 * Remove the corresponding extent
3798 * return value is not concerned.
3800 btrfs_release_path(&path);
3801 ret = btrfs_free_extent(trans, root, offset,
3802 root->fs_info->nodesize, 0,
3803 root->root_key.objectid, level - 1, 0);
3804 cache = next_cache_extent(cache);
3807 /* Balance the btree using btrfs_search_slot() */
3808 cache = first_cache_extent(corrupt_blocks);
3810 corrupt = container_of(cache, struct btrfs_corrupt_block,
3812 memcpy(&key, &corrupt->key, sizeof(key));
3813 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3816 /* return will always >0 since it won't find the item */
3818 btrfs_release_path(&path);
3819 cache = next_cache_extent(cache);
3822 btrfs_commit_transaction(trans, root);
3823 btrfs_release_path(&path);
3827 static int check_fs_root(struct btrfs_root *root,
3828 struct cache_tree *root_cache,
3829 struct walk_control *wc)
3835 struct btrfs_path path;
3836 struct shared_node root_node;
3837 struct root_record *rec;
3838 struct btrfs_root_item *root_item = &root->root_item;
3839 struct cache_tree corrupt_blocks;
3840 struct orphan_data_extent *orphan;
3841 struct orphan_data_extent *tmp;
3842 enum btrfs_tree_block_status status;
3843 struct node_refs nrefs;
3846 * Reuse the corrupt_block cache tree to record corrupted tree block
3848 * Unlike the usage in extent tree check, here we do it in a per
3849 * fs/subvol tree base.
3851 cache_tree_init(&corrupt_blocks);
3852 root->fs_info->corrupt_blocks = &corrupt_blocks;
3854 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3855 rec = get_root_rec(root_cache, root->root_key.objectid);
3856 BUG_ON(IS_ERR(rec));
3857 if (btrfs_root_refs(root_item) > 0)
3858 rec->found_root_item = 1;
3861 btrfs_init_path(&path);
3862 memset(&root_node, 0, sizeof(root_node));
3863 cache_tree_init(&root_node.root_cache);
3864 cache_tree_init(&root_node.inode_cache);
3865 memset(&nrefs, 0, sizeof(nrefs));
3867 /* Move the orphan extent record to corresponding inode_record */
3868 list_for_each_entry_safe(orphan, tmp,
3869 &root->orphan_data_extents, list) {
3870 struct inode_record *inode;
3872 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3874 BUG_ON(IS_ERR(inode));
3875 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3876 list_move(&orphan->list, &inode->orphan_extents);
3879 level = btrfs_header_level(root->node);
3880 memset(wc->nodes, 0, sizeof(wc->nodes));
3881 wc->nodes[level] = &root_node;
3882 wc->active_node = level;
3883 wc->root_level = level;
3885 /* We may not have checked the root block, lets do that now */
3886 if (btrfs_is_leaf(root->node))
3887 status = btrfs_check_leaf(root, NULL, root->node);
3889 status = btrfs_check_node(root, NULL, root->node);
3890 if (status != BTRFS_TREE_BLOCK_CLEAN)
3893 if (btrfs_root_refs(root_item) > 0 ||
3894 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3895 path.nodes[level] = root->node;
3896 extent_buffer_get(root->node);
3897 path.slots[level] = 0;
3899 struct btrfs_key key;
3900 struct btrfs_disk_key found_key;
3902 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3903 level = root_item->drop_level;
3904 path.lowest_level = level;
3905 if (level > btrfs_header_level(root->node) ||
3906 level >= BTRFS_MAX_LEVEL) {
3907 error("ignoring invalid drop level: %u", level);
3910 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3913 btrfs_node_key(path.nodes[level], &found_key,
3915 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3916 sizeof(found_key)));
3920 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3926 wret = walk_up_tree(root, &path, wc, &level);
3933 btrfs_release_path(&path);
3935 if (!cache_tree_empty(&corrupt_blocks)) {
3936 struct cache_extent *cache;
3937 struct btrfs_corrupt_block *corrupt;
3939 printf("The following tree block(s) is corrupted in tree %llu:\n",
3940 root->root_key.objectid);
3941 cache = first_cache_extent(&corrupt_blocks);
3943 corrupt = container_of(cache,
3944 struct btrfs_corrupt_block,
3946 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3947 cache->start, corrupt->level,
3948 corrupt->key.objectid, corrupt->key.type,
3949 corrupt->key.offset);
3950 cache = next_cache_extent(cache);
3953 printf("Try to repair the btree for root %llu\n",
3954 root->root_key.objectid);
3955 ret = repair_btree(root, &corrupt_blocks);
3957 fprintf(stderr, "Failed to repair btree: %s\n",
3960 printf("Btree for root %llu is fixed\n",
3961 root->root_key.objectid);
3965 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3969 if (root_node.current) {
3970 root_node.current->checked = 1;
3971 maybe_free_inode_rec(&root_node.inode_cache,
3975 err = check_inode_recs(root, &root_node.inode_cache);
3979 free_corrupt_blocks_tree(&corrupt_blocks);
3980 root->fs_info->corrupt_blocks = NULL;
3981 free_orphan_data_extents(&root->orphan_data_extents);
3985 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3986 struct cache_tree *root_cache)
3988 struct btrfs_path path;
3989 struct btrfs_key key;
3990 struct walk_control wc;
3991 struct extent_buffer *leaf, *tree_node;
3992 struct btrfs_root *tmp_root;
3993 struct btrfs_root *tree_root = fs_info->tree_root;
3997 if (ctx.progress_enabled) {
3998 ctx.tp = TASK_FS_ROOTS;
3999 task_start(ctx.info);
4003 * Just in case we made any changes to the extent tree that weren't
4004 * reflected into the free space cache yet.
4007 reset_cached_block_groups(fs_info);
4008 memset(&wc, 0, sizeof(wc));
4009 cache_tree_init(&wc.shared);
4010 btrfs_init_path(&path);
4015 key.type = BTRFS_ROOT_ITEM_KEY;
4016 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4021 tree_node = tree_root->node;
4023 if (tree_node != tree_root->node) {
4024 free_root_recs_tree(root_cache);
4025 btrfs_release_path(&path);
4028 leaf = path.nodes[0];
4029 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4030 ret = btrfs_next_leaf(tree_root, &path);
4036 leaf = path.nodes[0];
4038 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4039 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4040 fs_root_objectid(key.objectid)) {
4041 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4042 tmp_root = btrfs_read_fs_root_no_cache(
4045 key.offset = (u64)-1;
4046 tmp_root = btrfs_read_fs_root(
4049 if (IS_ERR(tmp_root)) {
4053 ret = check_fs_root(tmp_root, root_cache, &wc);
4054 if (ret == -EAGAIN) {
4055 free_root_recs_tree(root_cache);
4056 btrfs_release_path(&path);
4061 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4062 btrfs_free_fs_root(tmp_root);
4063 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4064 key.type == BTRFS_ROOT_BACKREF_KEY) {
4065 process_root_ref(leaf, path.slots[0], &key,
4072 btrfs_release_path(&path);
4074 free_extent_cache_tree(&wc.shared);
4075 if (!cache_tree_empty(&wc.shared))
4076 fprintf(stderr, "warning line %d\n", __LINE__);
4078 task_stop(ctx.info);
4084 * Find the @index according by @ino and name.
4085 * Notice:time efficiency is O(N)
4087 * @root: the root of the fs/file tree
4088 * @index_ret: the index as return value
4089 * @namebuf: the name to match
4090 * @name_len: the length of name to match
4091 * @file_type: the file_type of INODE_ITEM to match
4093 * Returns 0 if found and *@index_ret will be modified with right value
4094 * Returns< 0 not found and *@index_ret will be (u64)-1
4096 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4097 u64 *index_ret, char *namebuf, u32 name_len,
4100 struct btrfs_path path;
4101 struct extent_buffer *node;
4102 struct btrfs_dir_item *di;
4103 struct btrfs_key key;
4104 struct btrfs_key location;
4105 char name[BTRFS_NAME_LEN] = {0};
4117 /* search from the last index */
4118 key.objectid = dirid;
4119 key.offset = (u64)-1;
4120 key.type = BTRFS_DIR_INDEX_KEY;
4122 btrfs_init_path(&path);
4123 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4128 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4131 *index_ret = (64)-1;
4134 /* Check whether inode_id/filetype/name match */
4135 node = path.nodes[0];
4136 slot = path.slots[0];
4137 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4138 total = btrfs_item_size_nr(node, slot);
4139 while (cur < total) {
4141 len = btrfs_dir_name_len(node, di);
4142 data_len = btrfs_dir_data_len(node, di);
4144 btrfs_dir_item_key_to_cpu(node, di, &location);
4145 if (location.objectid != location_id ||
4146 location.type != BTRFS_INODE_ITEM_KEY ||
4147 location.offset != 0)
4150 filetype = btrfs_dir_type(node, di);
4151 if (file_type != filetype)
4154 if (len > BTRFS_NAME_LEN)
4155 len = BTRFS_NAME_LEN;
4157 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4158 if (len != name_len || strncmp(namebuf, name, len))
4161 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4162 *index_ret = key.offset;
4166 len += sizeof(*di) + data_len;
4167 di = (struct btrfs_dir_item *)((char *)di + len);
4173 btrfs_release_path(&path);
4178 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4179 * INODE_REF/INODE_EXTREF match.
4181 * @root: the root of the fs/file tree
4182 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4183 * value while find index
4184 * @location_key: location key of the struct btrfs_dir_item to match
4185 * @name: the name to match
4186 * @namelen: the length of name
4187 * @file_type: the type of file to math
4189 * Return 0 if no error occurred.
4190 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4191 * DIR_ITEM/DIR_INDEX
4192 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4193 * and DIR_ITEM/DIR_INDEX mismatch
4195 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4196 struct btrfs_key *location_key, char *name,
4197 u32 namelen, u8 file_type)
4199 struct btrfs_path path;
4200 struct extent_buffer *node;
4201 struct btrfs_dir_item *di;
4202 struct btrfs_key location;
4203 char namebuf[BTRFS_NAME_LEN] = {0};
4212 /* get the index by traversing all index */
4213 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4214 ret = find_dir_index(root, key->objectid,
4215 location_key->objectid, &key->offset,
4216 name, namelen, file_type);
4218 ret = DIR_INDEX_MISSING;
4222 btrfs_init_path(&path);
4223 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4225 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4230 /* Check whether inode_id/filetype/name match */
4231 node = path.nodes[0];
4232 slot = path.slots[0];
4233 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4234 total = btrfs_item_size_nr(node, slot);
4235 while (cur < total) {
4236 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4237 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4239 len = btrfs_dir_name_len(node, di);
4240 data_len = btrfs_dir_data_len(node, di);
4242 btrfs_dir_item_key_to_cpu(node, di, &location);
4243 if (location.objectid != location_key->objectid ||
4244 location.type != location_key->type ||
4245 location.offset != location_key->offset)
4248 filetype = btrfs_dir_type(node, di);
4249 if (file_type != filetype)
4252 if (len > BTRFS_NAME_LEN) {
4253 len = BTRFS_NAME_LEN;
4254 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4256 key->type == BTRFS_DIR_ITEM_KEY ?
4257 "DIR_ITEM" : "DIR_INDEX",
4258 key->objectid, key->offset, len);
4260 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4262 if (len != namelen || strncmp(namebuf, name, len))
4268 len += sizeof(*di) + data_len;
4269 di = (struct btrfs_dir_item *)((char *)di + len);
4274 btrfs_release_path(&path);
4279 * Prints inode ref error message
4281 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4282 u64 index, const char *namebuf, int name_len,
4283 u8 filetype, int err)
4288 /* root dir error */
4289 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4291 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4292 root->objectid, key->objectid, key->offset, namebuf);
4297 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4298 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4299 root->objectid, key->offset,
4300 btrfs_name_hash(namebuf, name_len),
4301 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4303 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4304 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4305 root->objectid, key->offset, index,
4306 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4311 * Insert the missing inode item.
4313 * Returns 0 means success.
4314 * Returns <0 means error.
4316 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4319 struct btrfs_key key;
4320 struct btrfs_trans_handle *trans;
4321 struct btrfs_path path;
4325 key.type = BTRFS_INODE_ITEM_KEY;
4328 btrfs_init_path(&path);
4329 trans = btrfs_start_transaction(root, 1);
4330 if (IS_ERR(trans)) {
4335 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4336 if (ret < 0 || !ret)
4339 /* insert inode item */
4340 create_inode_item_lowmem(trans, root, ino, filetype);
4343 btrfs_commit_transaction(trans, root);
4346 error("failed to repair root %llu INODE ITEM[%llu] missing",
4347 root->objectid, ino);
4348 btrfs_release_path(&path);
4353 * The ternary means dir item, dir index and relative inode ref.
4354 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4355 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4357 * If two of three is missing or mismatched, delete the existing one.
4358 * If one of three is missing or mismatched, add the missing one.
4360 * returns 0 means success.
4361 * returns not 0 means on error;
4363 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4364 u64 index, char *name, int name_len, u8 filetype,
4367 struct btrfs_trans_handle *trans;
4372 * stage shall be one of following valild values:
4373 * 0: Fine, nothing to do.
4374 * 1: One of three is wrong, so add missing one.
4375 * 2: Two of three is wrong, so delete existed one.
4377 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4379 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4381 if (err & (INODE_REF_MISSING))
4384 /* stage must be smllarer than 3 */
4387 trans = btrfs_start_transaction(root, 1);
4389 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4394 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4395 filetype, &index, 1, 1);
4399 btrfs_commit_transaction(trans, root);
4402 error("fail to repair inode %llu name %s filetype %u",
4403 ino, name, filetype);
4405 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4406 stage == 2 ? "Delete" : "Add",
4407 ino, name, filetype);
4413 * Traverse the given INODE_REF and call find_dir_item() to find related
4414 * DIR_ITEM/DIR_INDEX.
4416 * @root: the root of the fs/file tree
4417 * @ref_key: the key of the INODE_REF
4418 * @path the path provides node and slot
4419 * @refs: the count of INODE_REF
4420 * @mode: the st_mode of INODE_ITEM
4421 * @name_ret: returns with the first ref's name
4422 * @name_len_ret: len of the name_ret
4424 * Return 0 if no error occurred.
4426 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4427 struct btrfs_path *path, char *name_ret,
4428 u32 *namelen_ret, u64 *refs_ret, int mode)
4430 struct btrfs_key key;
4431 struct btrfs_key location;
4432 struct btrfs_inode_ref *ref;
4433 struct extent_buffer *node;
4434 char namebuf[BTRFS_NAME_LEN] = {0};
4444 int need_research = 0;
4452 /* since after repair, path and the dir item may be changed */
4453 if (need_research) {
4455 btrfs_release_path(path);
4456 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4457 /* the item was deleted, let path point to the last checked item */
4459 if (path->slots[0] == 0)
4460 btrfs_prev_leaf(root, path);
4468 location.objectid = ref_key->objectid;
4469 location.type = BTRFS_INODE_ITEM_KEY;
4470 location.offset = 0;
4471 node = path->nodes[0];
4472 slot = path->slots[0];
4474 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4475 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4476 total = btrfs_item_size_nr(node, slot);
4479 /* Update inode ref count */
4482 index = btrfs_inode_ref_index(node, ref);
4483 name_len = btrfs_inode_ref_name_len(node, ref);
4485 if (name_len <= BTRFS_NAME_LEN) {
4488 len = BTRFS_NAME_LEN;
4489 warning("root %llu INODE_REF[%llu %llu] name too long",
4490 root->objectid, ref_key->objectid, ref_key->offset);
4493 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4495 /* copy the first name found to name_ret */
4496 if (refs == 1 && name_ret) {
4497 memcpy(name_ret, namebuf, len);
4501 /* Check root dir ref */
4502 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4503 if (index != 0 || len != strlen("..") ||
4504 strncmp("..", namebuf, len) ||
4505 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4506 /* set err bits then repair will delete the ref */
4507 err |= DIR_INDEX_MISSING;
4508 err |= DIR_ITEM_MISSING;
4513 /* Find related DIR_INDEX */
4514 key.objectid = ref_key->offset;
4515 key.type = BTRFS_DIR_INDEX_KEY;
4517 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4518 imode_to_type(mode));
4520 /* Find related dir_item */
4521 key.objectid = ref_key->offset;
4522 key.type = BTRFS_DIR_ITEM_KEY;
4523 key.offset = btrfs_name_hash(namebuf, len);
4524 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4525 imode_to_type(mode));
4527 if (tmp_err && repair) {
4528 ret = repair_ternary_lowmem(root, ref_key->offset,
4529 ref_key->objectid, index, namebuf,
4530 name_len, imode_to_type(mode),
4537 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4538 imode_to_type(mode), tmp_err);
4540 len = sizeof(*ref) + name_len;
4541 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4552 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4553 * DIR_ITEM/DIR_INDEX.
4555 * @root: the root of the fs/file tree
4556 * @ref_key: the key of the INODE_EXTREF
4557 * @refs: the count of INODE_EXTREF
4558 * @mode: the st_mode of INODE_ITEM
4560 * Return 0 if no error occurred.
4562 static int check_inode_extref(struct btrfs_root *root,
4563 struct btrfs_key *ref_key,
4564 struct extent_buffer *node, int slot, u64 *refs,
4567 struct btrfs_key key;
4568 struct btrfs_key location;
4569 struct btrfs_inode_extref *extref;
4570 char namebuf[BTRFS_NAME_LEN] = {0};
4580 location.objectid = ref_key->objectid;
4581 location.type = BTRFS_INODE_ITEM_KEY;
4582 location.offset = 0;
4584 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4585 total = btrfs_item_size_nr(node, slot);
4588 /* update inode ref count */
4590 name_len = btrfs_inode_extref_name_len(node, extref);
4591 index = btrfs_inode_extref_index(node, extref);
4592 parent = btrfs_inode_extref_parent(node, extref);
4593 if (name_len <= BTRFS_NAME_LEN) {
4596 len = BTRFS_NAME_LEN;
4597 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4598 root->objectid, ref_key->objectid, ref_key->offset);
4600 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4602 /* Check root dir ref name */
4603 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4604 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4605 root->objectid, ref_key->objectid, ref_key->offset,
4607 err |= ROOT_DIR_ERROR;
4610 /* find related dir_index */
4611 key.objectid = parent;
4612 key.type = BTRFS_DIR_INDEX_KEY;
4614 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4617 /* find related dir_item */
4618 key.objectid = parent;
4619 key.type = BTRFS_DIR_ITEM_KEY;
4620 key.offset = btrfs_name_hash(namebuf, len);
4621 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4624 len = sizeof(*extref) + name_len;
4625 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4635 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4636 * DIR_ITEM/DIR_INDEX match.
4637 * Return with @index_ret.
4639 * @root: the root of the fs/file tree
4640 * @key: the key of the INODE_REF/INODE_EXTREF
4641 * @name: the name in the INODE_REF/INODE_EXTREF
4642 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4643 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4644 * value (64)-1 means do not check index
4645 * @ext_ref: the EXTENDED_IREF feature
4647 * Return 0 if no error occurred.
4648 * Return >0 for error bitmap
4650 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4651 char *name, int namelen, u64 *index_ret,
4652 unsigned int ext_ref)
4654 struct btrfs_path path;
4655 struct btrfs_inode_ref *ref;
4656 struct btrfs_inode_extref *extref;
4657 struct extent_buffer *node;
4658 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4671 btrfs_init_path(&path);
4672 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4674 ret = INODE_REF_MISSING;
4678 node = path.nodes[0];
4679 slot = path.slots[0];
4681 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4682 total = btrfs_item_size_nr(node, slot);
4684 /* Iterate all entry of INODE_REF */
4685 while (cur < total) {
4686 ret = INODE_REF_MISSING;
4688 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4689 ref_index = btrfs_inode_ref_index(node, ref);
4690 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4693 if (cur + sizeof(*ref) + ref_namelen > total ||
4694 ref_namelen > BTRFS_NAME_LEN) {
4695 warning("root %llu INODE %s[%llu %llu] name too long",
4697 key->type == BTRFS_INODE_REF_KEY ?
4699 key->objectid, key->offset);
4701 if (cur + sizeof(*ref) > total)
4703 len = min_t(u32, total - cur - sizeof(*ref),
4709 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4712 if (len != namelen || strncmp(ref_namebuf, name, len))
4715 *index_ret = ref_index;
4719 len = sizeof(*ref) + ref_namelen;
4720 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4725 /* Skip if not support EXTENDED_IREF feature */
4729 btrfs_release_path(&path);
4730 btrfs_init_path(&path);
4732 dir_id = key->offset;
4733 key->type = BTRFS_INODE_EXTREF_KEY;
4734 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4736 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4738 ret = INODE_REF_MISSING;
4742 node = path.nodes[0];
4743 slot = path.slots[0];
4745 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4747 total = btrfs_item_size_nr(node, slot);
4749 /* Iterate all entry of INODE_EXTREF */
4750 while (cur < total) {
4751 ret = INODE_REF_MISSING;
4753 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4754 ref_index = btrfs_inode_extref_index(node, extref);
4755 parent = btrfs_inode_extref_parent(node, extref);
4756 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4759 if (parent != dir_id)
4762 if (ref_namelen <= BTRFS_NAME_LEN) {
4765 len = BTRFS_NAME_LEN;
4766 warning("root %llu INODE %s[%llu %llu] name too long",
4768 key->type == BTRFS_INODE_REF_KEY ?
4770 key->objectid, key->offset);
4772 read_extent_buffer(node, ref_namebuf,
4773 (unsigned long)(extref + 1), len);
4775 if (len != namelen || strncmp(ref_namebuf, name, len))
4778 *index_ret = ref_index;
4783 len = sizeof(*extref) + ref_namelen;
4784 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4789 btrfs_release_path(&path);
4793 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4794 u64 ino, u64 index, const char *namebuf,
4795 int name_len, u8 filetype, int err)
4797 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4798 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4799 root->objectid, key->objectid, key->offset, namebuf,
4801 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4804 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4805 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4806 root->objectid, key->objectid, index, namebuf, filetype,
4807 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4810 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4812 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4813 root->objectid, ino, index, namebuf, filetype,
4814 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4817 if (err & INODE_REF_MISSING)
4819 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4820 root->objectid, ino, key->objectid, namebuf, filetype);
4825 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4827 * Returns error after repair
4829 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4830 u64 index, u8 filetype, char *namebuf, u32 name_len,
4835 if (err & INODE_ITEM_MISSING) {
4836 ret = repair_inode_item_missing(root, ino, filetype);
4838 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
4841 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
4842 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
4843 name_len, filetype, err);
4845 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
4846 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
4847 err &= ~(INODE_REF_MISSING);
4853 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
4856 struct btrfs_key key;
4857 struct btrfs_path path;
4859 struct btrfs_dir_item *di;
4869 key.offset = (u64)-1;
4871 btrfs_init_path(&path);
4872 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4877 /* if found, go to spacial case */
4882 ret = btrfs_previous_item(root, &path, ino, type);
4890 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
4892 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
4894 while (cur < total) {
4895 len = btrfs_dir_name_len(path.nodes[0], di);
4896 if (len > BTRFS_NAME_LEN)
4897 len = BTRFS_NAME_LEN;
4900 len += btrfs_dir_data_len(path.nodes[0], di);
4902 di = (struct btrfs_dir_item *)((char *)di + len);
4908 btrfs_release_path(&path);
4912 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
4919 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
4923 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
4927 *size = item_size + index_size;
4931 error("failed to count root %llu INODE[%llu] root size",
4932 root->objectid, ino);
4937 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4938 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4940 * @root: the root of the fs/file tree
4941 * @key: the key of the INODE_REF/INODE_EXTREF
4943 * @size: the st_size of the INODE_ITEM
4944 * @ext_ref: the EXTENDED_IREF feature
4946 * Return 0 if no error occurred.
4947 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
4949 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4950 struct btrfs_path *path, u64 *size,
4951 unsigned int ext_ref)
4953 struct btrfs_dir_item *di;
4954 struct btrfs_inode_item *ii;
4955 struct btrfs_key key;
4956 struct btrfs_key location;
4957 struct extent_buffer *node;
4959 char namebuf[BTRFS_NAME_LEN] = {0};
4971 int need_research = 0;
4974 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4975 * ignore index check.
4977 if (di_key->type == BTRFS_DIR_INDEX_KEY)
4978 index = di_key->offset;
4985 /* since after repair, path and the dir item may be changed */
4986 if (need_research) {
4988 err |= DIR_COUNT_AGAIN;
4989 btrfs_release_path(path);
4990 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
4991 /* the item was deleted, let path point the last checked item */
4993 if (path->slots[0] == 0)
4994 btrfs_prev_leaf(root, path);
5002 node = path->nodes[0];
5003 slot = path->slots[0];
5005 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5006 total = btrfs_item_size_nr(node, slot);
5007 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5009 while (cur < total) {
5010 data_len = btrfs_dir_data_len(node, di);
5013 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5015 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5016 di_key->objectid, di_key->offset, data_len);
5018 name_len = btrfs_dir_name_len(node, di);
5019 if (name_len <= BTRFS_NAME_LEN) {
5022 len = BTRFS_NAME_LEN;
5023 warning("root %llu %s[%llu %llu] name too long",
5025 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5026 di_key->objectid, di_key->offset);
5028 (*size) += name_len;
5029 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5031 filetype = btrfs_dir_type(node, di);
5033 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5034 di_key->offset != btrfs_name_hash(namebuf, len)) {
5036 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5037 root->objectid, di_key->objectid, di_key->offset,
5038 namebuf, len, filetype, di_key->offset,
5039 btrfs_name_hash(namebuf, len));
5042 btrfs_dir_item_key_to_cpu(node, di, &location);
5043 /* Ignore related ROOT_ITEM check */
5044 if (location.type == BTRFS_ROOT_ITEM_KEY)
5047 btrfs_release_path(path);
5048 /* Check relative INODE_ITEM(existence/filetype) */
5049 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5051 tmp_err |= INODE_ITEM_MISSING;
5055 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5056 struct btrfs_inode_item);
5057 mode = btrfs_inode_mode(path->nodes[0], ii);
5058 if (imode_to_type(mode) != filetype) {
5059 tmp_err |= INODE_ITEM_MISMATCH;
5063 /* Check relative INODE_REF/INODE_EXTREF */
5064 key.objectid = location.objectid;
5065 key.type = BTRFS_INODE_REF_KEY;
5066 key.offset = di_key->objectid;
5067 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5070 /* check relative INDEX/ITEM */
5071 key.objectid = di_key->objectid;
5072 if (key.type == BTRFS_DIR_ITEM_KEY) {
5073 key.type = BTRFS_DIR_INDEX_KEY;
5076 key.type = BTRFS_DIR_ITEM_KEY;
5077 key.offset = btrfs_name_hash(namebuf, name_len);
5080 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5081 name_len, filetype);
5082 /* find_dir_item may find index */
5083 if (key.type == BTRFS_DIR_INDEX_KEY)
5087 if (tmp_err && repair) {
5088 ret = repair_dir_item(root, di_key->objectid,
5089 location.objectid, index,
5090 imode_to_type(mode), namebuf,
5092 if (ret != tmp_err) {
5097 btrfs_release_path(path);
5098 print_dir_item_err(root, di_key, location.objectid, index,
5099 namebuf, name_len, filetype, tmp_err);
5101 len = sizeof(*di) + name_len + data_len;
5102 di = (struct btrfs_dir_item *)((char *)di + len);
5105 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5106 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5107 root->objectid, di_key->objectid,
5114 btrfs_release_path(path);
5115 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5117 err |= ret > 0 ? -ENOENT : ret;
5122 * Wrapper function of btrfs_punch_hole.
5124 * Returns 0 means success.
5125 * Returns not 0 means error.
5127 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5130 struct btrfs_trans_handle *trans;
5133 trans = btrfs_start_transaction(root, 1);
5135 return PTR_ERR(trans);
5137 ret = btrfs_punch_hole(trans, root, ino, start, len);
5139 error("failed to add hole [%llu, %llu] in inode [%llu]",
5142 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5145 btrfs_commit_transaction(trans, root);
5150 * Check file extent datasum/hole, update the size of the file extents,
5151 * check and update the last offset of the file extent.
5153 * @root: the root of fs/file tree.
5154 * @fkey: the key of the file extent.
5155 * @nodatasum: INODE_NODATASUM feature.
5156 * @size: the sum of all EXTENT_DATA items size for this inode.
5157 * @end: the offset of the last extent.
5159 * Return 0 if no error occurred.
5161 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5162 struct extent_buffer *node, int slot,
5163 unsigned int nodatasum, u64 *size, u64 *end)
5165 struct btrfs_file_extent_item *fi;
5168 u64 extent_num_bytes;
5170 u64 csum_found; /* In byte size, sectorsize aligned */
5171 u64 search_start; /* Logical range start we search for csum */
5172 u64 search_len; /* Logical range len we search for csum */
5173 unsigned int extent_type;
5174 unsigned int is_hole;
5179 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5181 /* Check inline extent */
5182 extent_type = btrfs_file_extent_type(node, fi);
5183 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5184 struct btrfs_item *e = btrfs_item_nr(slot);
5185 u32 item_inline_len;
5187 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5188 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5189 compressed = btrfs_file_extent_compression(node, fi);
5190 if (extent_num_bytes == 0) {
5192 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5193 root->objectid, fkey->objectid, fkey->offset);
5194 err |= FILE_EXTENT_ERROR;
5196 if (!compressed && extent_num_bytes != item_inline_len) {
5198 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5199 root->objectid, fkey->objectid, fkey->offset,
5200 extent_num_bytes, item_inline_len);
5201 err |= FILE_EXTENT_ERROR;
5203 *end += extent_num_bytes;
5204 *size += extent_num_bytes;
5208 /* Check extent type */
5209 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5210 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5211 err |= FILE_EXTENT_ERROR;
5212 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5213 root->objectid, fkey->objectid, fkey->offset);
5217 /* Check REG_EXTENT/PREALLOC_EXTENT */
5218 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5219 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5220 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5221 extent_offset = btrfs_file_extent_offset(node, fi);
5222 compressed = btrfs_file_extent_compression(node, fi);
5223 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5226 * Check EXTENT_DATA csum
5228 * For plain (uncompressed) extent, we should only check the range
5229 * we're referring to, as it's possible that part of prealloc extent
5230 * has been written, and has csum:
5232 * |<--- Original large preallocated extent A ---->|
5233 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5236 * For compressed extent, we should check the whole range.
5239 search_start = disk_bytenr + extent_offset;
5240 search_len = extent_num_bytes;
5242 search_start = disk_bytenr;
5243 search_len = disk_num_bytes;
5245 ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5246 if (csum_found > 0 && nodatasum) {
5247 err |= ODD_CSUM_ITEM;
5248 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5249 root->objectid, fkey->objectid, fkey->offset);
5250 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5251 !is_hole && (ret < 0 || csum_found < search_len)) {
5252 err |= CSUM_ITEM_MISSING;
5253 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5254 root->objectid, fkey->objectid, fkey->offset,
5255 csum_found, search_len);
5256 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5257 err |= ODD_CSUM_ITEM;
5258 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5259 root->objectid, fkey->objectid, fkey->offset, csum_found);
5262 /* Check EXTENT_DATA hole */
5263 if (!no_holes && *end != fkey->offset) {
5265 ret = punch_extent_hole(root, fkey->objectid,
5266 *end, fkey->offset - *end);
5267 if (!repair || ret) {
5268 err |= FILE_EXTENT_ERROR;
5270 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5271 root->objectid, fkey->objectid, fkey->offset,
5272 fkey->objectid, *end);
5276 *end += extent_num_bytes;
5278 *size += extent_num_bytes;
5284 * Set inode item nbytes to @nbytes
5286 * Returns 0 on success
5287 * Returns != 0 on error
5289 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5290 struct btrfs_path *path,
5291 u64 ino, u64 nbytes)
5293 struct btrfs_trans_handle *trans;
5294 struct btrfs_inode_item *ii;
5295 struct btrfs_key key;
5296 struct btrfs_key research_key;
5300 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5303 key.type = BTRFS_INODE_ITEM_KEY;
5306 trans = btrfs_start_transaction(root, 1);
5307 if (IS_ERR(trans)) {
5308 ret = PTR_ERR(trans);
5313 btrfs_release_path(path);
5314 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5322 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5323 struct btrfs_inode_item);
5324 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5325 btrfs_mark_buffer_dirty(path->nodes[0]);
5327 btrfs_commit_transaction(trans, root);
5330 error("failed to set nbytes in inode %llu root %llu",
5331 ino, root->root_key.objectid);
5333 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5334 root->root_key.objectid, nbytes);
5337 btrfs_release_path(path);
5338 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5345 * Set directory inode isize to @isize.
5347 * Returns 0 on success.
5348 * Returns != 0 on error.
5350 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5351 struct btrfs_path *path,
5354 struct btrfs_trans_handle *trans;
5355 struct btrfs_inode_item *ii;
5356 struct btrfs_key key;
5357 struct btrfs_key research_key;
5361 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5364 key.type = BTRFS_INODE_ITEM_KEY;
5367 trans = btrfs_start_transaction(root, 1);
5368 if (IS_ERR(trans)) {
5369 ret = PTR_ERR(trans);
5374 btrfs_release_path(path);
5375 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5383 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5384 struct btrfs_inode_item);
5385 btrfs_set_inode_size(path->nodes[0], ii, isize);
5386 btrfs_mark_buffer_dirty(path->nodes[0]);
5388 btrfs_commit_transaction(trans, root);
5391 error("failed to set isize in inode %llu root %llu",
5392 ino, root->root_key.objectid);
5394 printf("Set isize in inode %llu root %llu to %llu\n",
5395 ino, root->root_key.objectid, isize);
5397 btrfs_release_path(path);
5398 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5405 * Wrapper function for btrfs_add_orphan_item().
5407 * Returns 0 on success.
5408 * Returns != 0 on error.
5410 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5411 struct btrfs_path *path, u64 ino)
5413 struct btrfs_trans_handle *trans;
5414 struct btrfs_key research_key;
5418 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5420 trans = btrfs_start_transaction(root, 1);
5421 if (IS_ERR(trans)) {
5422 ret = PTR_ERR(trans);
5427 btrfs_release_path(path);
5428 ret = btrfs_add_orphan_item(trans, root, path, ino);
5430 btrfs_commit_transaction(trans, root);
5433 error("failed to add inode %llu as orphan item root %llu",
5434 ino, root->root_key.objectid);
5436 printf("Added inode %llu as orphan item root %llu\n",
5437 ino, root->root_key.objectid);
5439 btrfs_release_path(path);
5440 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5446 /* Set inode_item nlink to @ref_count.
5447 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5449 * Returns 0 on success
5451 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5452 struct btrfs_path *path, u64 ino,
5453 const char *name, u32 namelen,
5454 u64 ref_count, u8 filetype, u64 *nlink)
5456 struct btrfs_trans_handle *trans;
5457 struct btrfs_inode_item *ii;
5458 struct btrfs_key key;
5459 struct btrfs_key old_key;
5460 char namebuf[BTRFS_NAME_LEN] = {0};
5466 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5468 if (name && namelen) {
5469 ASSERT(namelen <= BTRFS_NAME_LEN);
5470 memcpy(namebuf, name, namelen);
5473 sprintf(namebuf, "%llu", ino);
5474 name_len = count_digits(ino);
5475 printf("Can't find file name for inode %llu, use %s instead\n",
5479 trans = btrfs_start_transaction(root, 1);
5480 if (IS_ERR(trans)) {
5481 ret = PTR_ERR(trans);
5485 btrfs_release_path(path);
5486 /* if refs is 0, put it into lostfound */
5487 if (ref_count == 0) {
5488 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5489 name_len, filetype, &ref_count);
5494 /* reset inode_item's nlink to ref_count */
5496 key.type = BTRFS_INODE_ITEM_KEY;
5499 btrfs_release_path(path);
5500 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5506 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5507 struct btrfs_inode_item);
5508 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5509 btrfs_mark_buffer_dirty(path->nodes[0]);
5514 btrfs_commit_transaction(trans, root);
5518 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5519 root->objectid, ino, namebuf, filetype);
5521 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5522 root->objectid, ino, namebuf, filetype);
5525 btrfs_release_path(path);
5526 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5533 * Check INODE_ITEM and related ITEMs (the same inode number)
5534 * 1. check link count
5535 * 2. check inode ref/extref
5536 * 3. check dir item/index
5538 * @ext_ref: the EXTENDED_IREF feature
5540 * Return 0 if no error occurred.
5541 * Return >0 for error or hit the traversal is done(by error bitmap)
5543 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5544 unsigned int ext_ref)
5546 struct extent_buffer *node;
5547 struct btrfs_inode_item *ii;
5548 struct btrfs_key key;
5549 struct btrfs_key last_key;
5558 u64 extent_size = 0;
5560 unsigned int nodatasum;
5564 char namebuf[BTRFS_NAME_LEN] = {0};
5567 node = path->nodes[0];
5568 slot = path->slots[0];
5570 btrfs_item_key_to_cpu(node, &key, slot);
5571 inode_id = key.objectid;
5573 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5574 ret = btrfs_next_item(root, path);
5580 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5581 isize = btrfs_inode_size(node, ii);
5582 nbytes = btrfs_inode_nbytes(node, ii);
5583 mode = btrfs_inode_mode(node, ii);
5584 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5585 nlink = btrfs_inode_nlink(node, ii);
5586 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5589 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5590 ret = btrfs_next_item(root, path);
5592 /* out will fill 'err' rusing current statistics */
5594 } else if (ret > 0) {
5599 node = path->nodes[0];
5600 slot = path->slots[0];
5601 btrfs_item_key_to_cpu(node, &key, slot);
5602 if (key.objectid != inode_id)
5606 case BTRFS_INODE_REF_KEY:
5607 ret = check_inode_ref(root, &key, path, namebuf,
5608 &name_len, &refs, mode);
5611 case BTRFS_INODE_EXTREF_KEY:
5612 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5613 warning("root %llu EXTREF[%llu %llu] isn't supported",
5614 root->objectid, key.objectid,
5616 ret = check_inode_extref(root, &key, node, slot, &refs,
5620 case BTRFS_DIR_ITEM_KEY:
5621 case BTRFS_DIR_INDEX_KEY:
5623 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5624 root->objectid, inode_id,
5625 imode_to_type(mode), key.objectid,
5628 ret = check_dir_item(root, &key, path, &size, ext_ref);
5631 case BTRFS_EXTENT_DATA_KEY:
5633 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5634 root->objectid, inode_id, key.objectid,
5637 ret = check_file_extent(root, &key, node, slot,
5638 nodatasum, &extent_size,
5642 case BTRFS_XATTR_ITEM_KEY:
5645 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5646 key.objectid, key.type, key.offset);
5651 if (err & LAST_ITEM) {
5652 btrfs_release_path(path);
5653 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5658 /* verify INODE_ITEM nlink/isize/nbytes */
5660 if (repair && (err & DIR_COUNT_AGAIN)) {
5661 err &= ~DIR_COUNT_AGAIN;
5662 count_dir_isize(root, inode_id, &size);
5665 if ((nlink != 1 || refs != 1) && repair) {
5666 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5667 namebuf, name_len, refs, imode_to_type(mode),
5672 err |= LINK_COUNT_ERROR;
5673 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5674 root->objectid, inode_id, nlink);
5678 * Just a warning, as dir inode nbytes is just an
5679 * instructive value.
5681 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5682 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5683 root->objectid, inode_id,
5684 root->fs_info->nodesize);
5687 if (isize != size) {
5689 ret = repair_dir_isize_lowmem(root, path,
5691 if (!repair || ret) {
5694 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5695 root->objectid, inode_id, isize, size);
5699 if (nlink != refs) {
5701 ret = repair_inode_nlinks_lowmem(root, path,
5702 inode_id, namebuf, name_len, refs,
5703 imode_to_type(mode), &nlink);
5704 if (!repair || ret) {
5705 err |= LINK_COUNT_ERROR;
5707 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5708 root->objectid, inode_id, nlink, refs);
5710 } else if (!nlink) {
5712 ret = repair_inode_orphan_item_lowmem(root,
5714 if (!repair || ret) {
5716 error("root %llu INODE[%llu] is orphan item",
5717 root->objectid, inode_id);
5721 if (!nbytes && !no_holes && extent_end < isize) {
5723 ret = punch_extent_hole(root, inode_id,
5724 extent_end, isize - extent_end);
5725 if (!repair || ret) {
5726 err |= NBYTES_ERROR;
5728 "root %llu INODE[%llu] size %llu should have a file extent hole",
5729 root->objectid, inode_id, isize);
5733 if (nbytes != extent_size) {
5735 ret = repair_inode_nbytes_lowmem(root, path,
5736 inode_id, extent_size);
5737 if (!repair || ret) {
5738 err |= NBYTES_ERROR;
5740 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5741 root->objectid, inode_id, nbytes,
5747 if (err & LAST_ITEM)
5748 btrfs_next_item(root, path);
5753 * Insert the missing inode item and inode ref.
5755 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5756 * Root dir should be handled specially because root dir is the root of fs.
5758 * returns err (>0 or 0) after repair
5760 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5762 struct btrfs_trans_handle *trans;
5763 struct btrfs_key key;
5764 struct btrfs_path path;
5765 int filetype = BTRFS_FT_DIR;
5768 btrfs_init_path(&path);
5770 if (err & INODE_REF_MISSING) {
5771 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5772 key.type = BTRFS_INODE_REF_KEY;
5773 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5775 trans = btrfs_start_transaction(root, 1);
5776 if (IS_ERR(trans)) {
5777 ret = PTR_ERR(trans);
5781 btrfs_release_path(&path);
5782 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5786 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5787 BTRFS_FIRST_FREE_OBJECTID,
5788 BTRFS_FIRST_FREE_OBJECTID, 0);
5792 printf("Add INODE_REF[%llu %llu] name %s\n",
5793 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5795 err &= ~INODE_REF_MISSING;
5798 error("fail to insert first inode's ref");
5799 btrfs_commit_transaction(trans, root);
5802 if (err & INODE_ITEM_MISSING) {
5803 ret = repair_inode_item_missing(root,
5804 BTRFS_FIRST_FREE_OBJECTID, filetype);
5807 err &= ~INODE_ITEM_MISSING;
5811 error("fail to repair first inode");
5812 btrfs_release_path(&path);
5817 * check first root dir's inode_item and inode_ref
5819 * returns 0 means no error
5820 * returns >0 means error
5821 * returns <0 means fatal error
5823 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5825 struct btrfs_path path;
5826 struct btrfs_key key;
5827 struct btrfs_inode_item *ii;
5833 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5834 key.type = BTRFS_INODE_ITEM_KEY;
5837 /* For root being dropped, we don't need to check first inode */
5838 if (btrfs_root_refs(&root->root_item) == 0 &&
5839 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5840 BTRFS_FIRST_FREE_OBJECTID)
5843 btrfs_init_path(&path);
5844 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5849 err |= INODE_ITEM_MISSING;
5851 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5852 struct btrfs_inode_item);
5853 mode = btrfs_inode_mode(path.nodes[0], ii);
5854 if (imode_to_type(mode) != BTRFS_FT_DIR)
5855 err |= INODE_ITEM_MISMATCH;
5858 /* lookup first inode ref */
5859 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5860 key.type = BTRFS_INODE_REF_KEY;
5861 /* special index value */
5864 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5870 btrfs_release_path(&path);
5873 err = repair_fs_first_inode(root, err);
5875 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5876 error("root dir INODE_ITEM is %s",
5877 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5878 if (err & INODE_REF_MISSING)
5879 error("root dir INODE_REF is missing");
5881 return ret < 0 ? ret : err;
5884 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5885 u64 parent, u64 root)
5887 struct rb_node *node;
5888 struct tree_backref *back = NULL;
5889 struct tree_backref match = {
5896 match.parent = parent;
5897 match.node.full_backref = 1;
5902 node = rb_search(&rec->backref_tree, &match.node.node,
5903 (rb_compare_keys)compare_extent_backref, NULL);
5905 back = to_tree_backref(rb_node_to_extent_backref(node));
5910 static struct data_backref *find_data_backref(struct extent_record *rec,
5911 u64 parent, u64 root,
5912 u64 owner, u64 offset,
5914 u64 disk_bytenr, u64 bytes)
5916 struct rb_node *node;
5917 struct data_backref *back = NULL;
5918 struct data_backref match = {
5925 .found_ref = found_ref,
5926 .disk_bytenr = disk_bytenr,
5930 match.parent = parent;
5931 match.node.full_backref = 1;
5936 node = rb_search(&rec->backref_tree, &match.node.node,
5937 (rb_compare_keys)compare_extent_backref, NULL);
5939 back = to_data_backref(rb_node_to_extent_backref(node));
5944 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
5945 * blocks and integrity of fs tree items.
5947 * @root: the root of the tree to be checked.
5948 * @ext_ref feature EXTENDED_IREF is enable or not.
5949 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
5950 * otherwise means check fs tree(s) items relationship and
5951 * @root MUST be a fs tree root.
5952 * Returns 0 represents OK.
5953 * Returns not 0 represents error.
5955 static int check_btrfs_root(struct btrfs_trans_handle *trans,
5956 struct btrfs_root *root, unsigned int ext_ref,
5960 struct btrfs_path path;
5961 struct node_refs nrefs;
5962 struct btrfs_root_item *root_item = &root->root_item;
5967 memset(&nrefs, 0, sizeof(nrefs));
5970 * We need to manually check the first inode item (256)
5971 * As the following traversal function will only start from
5972 * the first inode item in the leaf, if inode item (256) is
5973 * missing we will skip it forever.
5975 ret = check_fs_first_inode(root, ext_ref);
5981 level = btrfs_header_level(root->node);
5982 btrfs_init_path(&path);
5984 if (btrfs_root_refs(root_item) > 0 ||
5985 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5986 path.nodes[level] = root->node;
5987 path.slots[level] = 0;
5988 extent_buffer_get(root->node);
5990 struct btrfs_key key;
5992 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5993 level = root_item->drop_level;
5994 path.lowest_level = level;
5995 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6002 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6003 ext_ref, check_all);
6007 /* if ret is negative, walk shall stop */
6013 ret = walk_up_tree_v2(root, &path, &level);
6015 /* Normal exit, reset ret to err */
6022 btrfs_release_path(&path);
6027 * Iterate all items in the tree and call check_inode_item() to check.
6029 * @root: the root of the tree to be checked.
6030 * @ext_ref: the EXTENDED_IREF feature
6032 * Return 0 if no error found.
6033 * Return <0 for error.
6035 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6037 reset_cached_block_groups(root->fs_info);
6038 return check_btrfs_root(NULL, root, ext_ref, 0);
6042 * Find the relative ref for root_ref and root_backref.
6044 * @root: the root of the root tree.
6045 * @ref_key: the key of the root ref.
6047 * Return 0 if no error occurred.
6049 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6050 struct extent_buffer *node, int slot)
6052 struct btrfs_path path;
6053 struct btrfs_key key;
6054 struct btrfs_root_ref *ref;
6055 struct btrfs_root_ref *backref;
6056 char ref_name[BTRFS_NAME_LEN] = {0};
6057 char backref_name[BTRFS_NAME_LEN] = {0};
6063 u32 backref_namelen;
6068 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6069 ref_dirid = btrfs_root_ref_dirid(node, ref);
6070 ref_seq = btrfs_root_ref_sequence(node, ref);
6071 ref_namelen = btrfs_root_ref_name_len(node, ref);
6073 if (ref_namelen <= BTRFS_NAME_LEN) {
6076 len = BTRFS_NAME_LEN;
6077 warning("%s[%llu %llu] ref_name too long",
6078 ref_key->type == BTRFS_ROOT_REF_KEY ?
6079 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6082 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6084 /* Find relative root_ref */
6085 key.objectid = ref_key->offset;
6086 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6087 key.offset = ref_key->objectid;
6089 btrfs_init_path(&path);
6090 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6092 err |= ROOT_REF_MISSING;
6093 error("%s[%llu %llu] couldn't find relative ref",
6094 ref_key->type == BTRFS_ROOT_REF_KEY ?
6095 "ROOT_REF" : "ROOT_BACKREF",
6096 ref_key->objectid, ref_key->offset);
6100 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6101 struct btrfs_root_ref);
6102 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6103 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6104 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6106 if (backref_namelen <= BTRFS_NAME_LEN) {
6107 len = backref_namelen;
6109 len = BTRFS_NAME_LEN;
6110 warning("%s[%llu %llu] ref_name too long",
6111 key.type == BTRFS_ROOT_REF_KEY ?
6112 "ROOT_REF" : "ROOT_BACKREF",
6113 key.objectid, key.offset);
6115 read_extent_buffer(path.nodes[0], backref_name,
6116 (unsigned long)(backref + 1), len);
6118 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6119 ref_namelen != backref_namelen ||
6120 strncmp(ref_name, backref_name, len)) {
6121 err |= ROOT_REF_MISMATCH;
6122 error("%s[%llu %llu] mismatch relative ref",
6123 ref_key->type == BTRFS_ROOT_REF_KEY ?
6124 "ROOT_REF" : "ROOT_BACKREF",
6125 ref_key->objectid, ref_key->offset);
6128 btrfs_release_path(&path);
6133 * Check all fs/file tree in low_memory mode.
6135 * 1. for fs tree root item, call check_fs_root_v2()
6136 * 2. for fs tree root ref/backref, call check_root_ref()
6138 * Return 0 if no error occurred.
6140 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6142 struct btrfs_root *tree_root = fs_info->tree_root;
6143 struct btrfs_root *cur_root = NULL;
6144 struct btrfs_path path;
6145 struct btrfs_key key;
6146 struct extent_buffer *node;
6147 unsigned int ext_ref;
6152 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6154 btrfs_init_path(&path);
6155 key.objectid = BTRFS_FS_TREE_OBJECTID;
6157 key.type = BTRFS_ROOT_ITEM_KEY;
6159 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6163 } else if (ret > 0) {
6169 node = path.nodes[0];
6170 slot = path.slots[0];
6171 btrfs_item_key_to_cpu(node, &key, slot);
6172 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6174 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6175 fs_root_objectid(key.objectid)) {
6176 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6177 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6180 key.offset = (u64)-1;
6181 cur_root = btrfs_read_fs_root(fs_info, &key);
6184 if (IS_ERR(cur_root)) {
6185 error("Fail to read fs/subvol tree: %lld",
6191 ret = check_fs_root_v2(cur_root, ext_ref);
6194 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6195 btrfs_free_fs_root(cur_root);
6196 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6197 key.type == BTRFS_ROOT_BACKREF_KEY) {
6198 ret = check_root_ref(tree_root, &key, node, slot);
6202 ret = btrfs_next_item(tree_root, &path);
6212 btrfs_release_path(&path);
6216 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6217 struct cache_tree *root_cache)
6221 if (!ctx.progress_enabled)
6222 fprintf(stderr, "checking fs roots\n");
6223 if (check_mode == CHECK_MODE_LOWMEM)
6224 ret = check_fs_roots_v2(fs_info);
6226 ret = check_fs_roots(fs_info, root_cache);
6231 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6233 struct extent_backref *back, *tmp;
6234 struct tree_backref *tback;
6235 struct data_backref *dback;
6239 rbtree_postorder_for_each_entry_safe(back, tmp,
6240 &rec->backref_tree, node) {
6241 if (!back->found_extent_tree) {
6245 if (back->is_data) {
6246 dback = to_data_backref(back);
6247 fprintf(stderr, "Data backref %llu %s %llu"
6248 " owner %llu offset %llu num_refs %lu"
6249 " not found in extent tree\n",
6250 (unsigned long long)rec->start,
6251 back->full_backref ?
6253 back->full_backref ?
6254 (unsigned long long)dback->parent:
6255 (unsigned long long)dback->root,
6256 (unsigned long long)dback->owner,
6257 (unsigned long long)dback->offset,
6258 (unsigned long)dback->num_refs);
6260 tback = to_tree_backref(back);
6261 fprintf(stderr, "Tree backref %llu parent %llu"
6262 " root %llu not found in extent tree\n",
6263 (unsigned long long)rec->start,
6264 (unsigned long long)tback->parent,
6265 (unsigned long long)tback->root);
6268 if (!back->is_data && !back->found_ref) {
6272 tback = to_tree_backref(back);
6273 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6274 (unsigned long long)rec->start,
6275 back->full_backref ? "parent" : "root",
6276 back->full_backref ?
6277 (unsigned long long)tback->parent :
6278 (unsigned long long)tback->root, back);
6280 if (back->is_data) {
6281 dback = to_data_backref(back);
6282 if (dback->found_ref != dback->num_refs) {
6286 fprintf(stderr, "Incorrect local backref count"
6287 " on %llu %s %llu owner %llu"
6288 " offset %llu found %u wanted %u back %p\n",
6289 (unsigned long long)rec->start,
6290 back->full_backref ?
6292 back->full_backref ?
6293 (unsigned long long)dback->parent:
6294 (unsigned long long)dback->root,
6295 (unsigned long long)dback->owner,
6296 (unsigned long long)dback->offset,
6297 dback->found_ref, dback->num_refs, back);
6299 if (dback->disk_bytenr != rec->start) {
6303 fprintf(stderr, "Backref disk bytenr does not"
6304 " match extent record, bytenr=%llu, "
6305 "ref bytenr=%llu\n",
6306 (unsigned long long)rec->start,
6307 (unsigned long long)dback->disk_bytenr);
6310 if (dback->bytes != rec->nr) {
6314 fprintf(stderr, "Backref bytes do not match "
6315 "extent backref, bytenr=%llu, ref "
6316 "bytes=%llu, backref bytes=%llu\n",
6317 (unsigned long long)rec->start,
6318 (unsigned long long)rec->nr,
6319 (unsigned long long)dback->bytes);
6322 if (!back->is_data) {
6325 dback = to_data_backref(back);
6326 found += dback->found_ref;
6329 if (found != rec->refs) {
6333 fprintf(stderr, "Incorrect global backref count "
6334 "on %llu found %llu wanted %llu\n",
6335 (unsigned long long)rec->start,
6336 (unsigned long long)found,
6337 (unsigned long long)rec->refs);
6343 static void __free_one_backref(struct rb_node *node)
6345 struct extent_backref *back = rb_node_to_extent_backref(node);
6350 static void free_all_extent_backrefs(struct extent_record *rec)
6352 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6355 static void free_extent_record_cache(struct cache_tree *extent_cache)
6357 struct cache_extent *cache;
6358 struct extent_record *rec;
6361 cache = first_cache_extent(extent_cache);
6364 rec = container_of(cache, struct extent_record, cache);
6365 remove_cache_extent(extent_cache, cache);
6366 free_all_extent_backrefs(rec);
6371 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6372 struct extent_record *rec)
6374 if (rec->content_checked && rec->owner_ref_checked &&
6375 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6376 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6377 !rec->bad_full_backref && !rec->crossing_stripes &&
6378 !rec->wrong_chunk_type) {
6379 remove_cache_extent(extent_cache, &rec->cache);
6380 free_all_extent_backrefs(rec);
6381 list_del_init(&rec->list);
6387 static int check_owner_ref(struct btrfs_root *root,
6388 struct extent_record *rec,
6389 struct extent_buffer *buf)
6391 struct extent_backref *node, *tmp;
6392 struct tree_backref *back;
6393 struct btrfs_root *ref_root;
6394 struct btrfs_key key;
6395 struct btrfs_path path;
6396 struct extent_buffer *parent;
6401 rbtree_postorder_for_each_entry_safe(node, tmp,
6402 &rec->backref_tree, node) {
6405 if (!node->found_ref)
6407 if (node->full_backref)
6409 back = to_tree_backref(node);
6410 if (btrfs_header_owner(buf) == back->root)
6413 BUG_ON(rec->is_root);
6415 /* try to find the block by search corresponding fs tree */
6416 key.objectid = btrfs_header_owner(buf);
6417 key.type = BTRFS_ROOT_ITEM_KEY;
6418 key.offset = (u64)-1;
6420 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6421 if (IS_ERR(ref_root))
6424 level = btrfs_header_level(buf);
6426 btrfs_item_key_to_cpu(buf, &key, 0);
6428 btrfs_node_key_to_cpu(buf, &key, 0);
6430 btrfs_init_path(&path);
6431 path.lowest_level = level + 1;
6432 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6436 parent = path.nodes[level + 1];
6437 if (parent && buf->start == btrfs_node_blockptr(parent,
6438 path.slots[level + 1]))
6441 btrfs_release_path(&path);
6442 return found ? 0 : 1;
6445 static int is_extent_tree_record(struct extent_record *rec)
6447 struct extent_backref *node, *tmp;
6448 struct tree_backref *back;
6451 rbtree_postorder_for_each_entry_safe(node, tmp,
6452 &rec->backref_tree, node) {
6455 back = to_tree_backref(node);
6456 if (node->full_backref)
6458 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6465 static int record_bad_block_io(struct btrfs_fs_info *info,
6466 struct cache_tree *extent_cache,
6469 struct extent_record *rec;
6470 struct cache_extent *cache;
6471 struct btrfs_key key;
6473 cache = lookup_cache_extent(extent_cache, start, len);
6477 rec = container_of(cache, struct extent_record, cache);
6478 if (!is_extent_tree_record(rec))
6481 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6482 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6485 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6486 struct extent_buffer *buf, int slot)
6488 if (btrfs_header_level(buf)) {
6489 struct btrfs_key_ptr ptr1, ptr2;
6491 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6492 sizeof(struct btrfs_key_ptr));
6493 read_extent_buffer(buf, &ptr2,
6494 btrfs_node_key_ptr_offset(slot + 1),
6495 sizeof(struct btrfs_key_ptr));
6496 write_extent_buffer(buf, &ptr1,
6497 btrfs_node_key_ptr_offset(slot + 1),
6498 sizeof(struct btrfs_key_ptr));
6499 write_extent_buffer(buf, &ptr2,
6500 btrfs_node_key_ptr_offset(slot),
6501 sizeof(struct btrfs_key_ptr));
6503 struct btrfs_disk_key key;
6504 btrfs_node_key(buf, &key, 0);
6505 btrfs_fixup_low_keys(root, path, &key,
6506 btrfs_header_level(buf) + 1);
6509 struct btrfs_item *item1, *item2;
6510 struct btrfs_key k1, k2;
6511 char *item1_data, *item2_data;
6512 u32 item1_offset, item2_offset, item1_size, item2_size;
6514 item1 = btrfs_item_nr(slot);
6515 item2 = btrfs_item_nr(slot + 1);
6516 btrfs_item_key_to_cpu(buf, &k1, slot);
6517 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6518 item1_offset = btrfs_item_offset(buf, item1);
6519 item2_offset = btrfs_item_offset(buf, item2);
6520 item1_size = btrfs_item_size(buf, item1);
6521 item2_size = btrfs_item_size(buf, item2);
6523 item1_data = malloc(item1_size);
6526 item2_data = malloc(item2_size);
6532 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6533 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6535 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6536 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6540 btrfs_set_item_offset(buf, item1, item2_offset);
6541 btrfs_set_item_offset(buf, item2, item1_offset);
6542 btrfs_set_item_size(buf, item1, item2_size);
6543 btrfs_set_item_size(buf, item2, item1_size);
6545 path->slots[0] = slot;
6546 btrfs_set_item_key_unsafe(root, path, &k2);
6547 path->slots[0] = slot + 1;
6548 btrfs_set_item_key_unsafe(root, path, &k1);
6553 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6555 struct extent_buffer *buf;
6556 struct btrfs_key k1, k2;
6558 int level = path->lowest_level;
6561 buf = path->nodes[level];
6562 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6564 btrfs_node_key_to_cpu(buf, &k1, i);
6565 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6567 btrfs_item_key_to_cpu(buf, &k1, i);
6568 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6570 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6572 ret = swap_values(root, path, buf, i);
6575 btrfs_mark_buffer_dirty(buf);
6581 static int delete_bogus_item(struct btrfs_root *root,
6582 struct btrfs_path *path,
6583 struct extent_buffer *buf, int slot)
6585 struct btrfs_key key;
6586 int nritems = btrfs_header_nritems(buf);
6588 btrfs_item_key_to_cpu(buf, &key, slot);
6590 /* These are all the keys we can deal with missing. */
6591 if (key.type != BTRFS_DIR_INDEX_KEY &&
6592 key.type != BTRFS_EXTENT_ITEM_KEY &&
6593 key.type != BTRFS_METADATA_ITEM_KEY &&
6594 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6595 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6598 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6599 (unsigned long long)key.objectid, key.type,
6600 (unsigned long long)key.offset, slot, buf->start);
6601 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6602 btrfs_item_nr_offset(slot + 1),
6603 sizeof(struct btrfs_item) *
6604 (nritems - slot - 1));
6605 btrfs_set_header_nritems(buf, nritems - 1);
6607 struct btrfs_disk_key disk_key;
6609 btrfs_item_key(buf, &disk_key, 0);
6610 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6612 btrfs_mark_buffer_dirty(buf);
6616 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6618 struct extent_buffer *buf;
6622 /* We should only get this for leaves */
6623 BUG_ON(path->lowest_level);
6624 buf = path->nodes[0];
6626 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6627 unsigned int shift = 0, offset;
6629 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6630 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6631 if (btrfs_item_end_nr(buf, i) >
6632 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6633 ret = delete_bogus_item(root, path, buf, i);
6636 fprintf(stderr, "item is off the end of the "
6637 "leaf, can't fix\n");
6641 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6642 btrfs_item_end_nr(buf, i);
6643 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6644 btrfs_item_offset_nr(buf, i - 1)) {
6645 if (btrfs_item_end_nr(buf, i) >
6646 btrfs_item_offset_nr(buf, i - 1)) {
6647 ret = delete_bogus_item(root, path, buf, i);
6650 fprintf(stderr, "items overlap, can't fix\n");
6654 shift = btrfs_item_offset_nr(buf, i - 1) -
6655 btrfs_item_end_nr(buf, i);
6660 printf("Shifting item nr %d by %u bytes in block %llu\n",
6661 i, shift, (unsigned long long)buf->start);
6662 offset = btrfs_item_offset_nr(buf, i);
6663 memmove_extent_buffer(buf,
6664 btrfs_leaf_data(buf) + offset + shift,
6665 btrfs_leaf_data(buf) + offset,
6666 btrfs_item_size_nr(buf, i));
6667 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6669 btrfs_mark_buffer_dirty(buf);
6673 * We may have moved things, in which case we want to exit so we don't
6674 * write those changes out. Once we have proper abort functionality in
6675 * progs this can be changed to something nicer.
6682 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6683 * then just return -EIO.
6685 static int try_to_fix_bad_block(struct btrfs_root *root,
6686 struct extent_buffer *buf,
6687 enum btrfs_tree_block_status status)
6689 struct btrfs_trans_handle *trans;
6690 struct ulist *roots;
6691 struct ulist_node *node;
6692 struct btrfs_root *search_root;
6693 struct btrfs_path path;
6694 struct ulist_iterator iter;
6695 struct btrfs_key root_key, key;
6698 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6699 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6702 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6706 btrfs_init_path(&path);
6707 ULIST_ITER_INIT(&iter);
6708 while ((node = ulist_next(roots, &iter))) {
6709 root_key.objectid = node->val;
6710 root_key.type = BTRFS_ROOT_ITEM_KEY;
6711 root_key.offset = (u64)-1;
6713 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6720 trans = btrfs_start_transaction(search_root, 0);
6721 if (IS_ERR(trans)) {
6722 ret = PTR_ERR(trans);
6726 path.lowest_level = btrfs_header_level(buf);
6727 path.skip_check_block = 1;
6728 if (path.lowest_level)
6729 btrfs_node_key_to_cpu(buf, &key, 0);
6731 btrfs_item_key_to_cpu(buf, &key, 0);
6732 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6735 btrfs_commit_transaction(trans, search_root);
6738 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6739 ret = fix_key_order(search_root, &path);
6740 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6741 ret = fix_item_offset(search_root, &path);
6743 btrfs_commit_transaction(trans, search_root);
6746 btrfs_release_path(&path);
6747 btrfs_commit_transaction(trans, search_root);
6750 btrfs_release_path(&path);
6754 static int check_block(struct btrfs_root *root,
6755 struct cache_tree *extent_cache,
6756 struct extent_buffer *buf, u64 flags)
6758 struct extent_record *rec;
6759 struct cache_extent *cache;
6760 struct btrfs_key key;
6761 enum btrfs_tree_block_status status;
6765 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6768 rec = container_of(cache, struct extent_record, cache);
6769 rec->generation = btrfs_header_generation(buf);
6771 level = btrfs_header_level(buf);
6772 if (btrfs_header_nritems(buf) > 0) {
6775 btrfs_item_key_to_cpu(buf, &key, 0);
6777 btrfs_node_key_to_cpu(buf, &key, 0);
6779 rec->info_objectid = key.objectid;
6781 rec->info_level = level;
6783 if (btrfs_is_leaf(buf))
6784 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6786 status = btrfs_check_node(root, &rec->parent_key, buf);
6788 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6790 status = try_to_fix_bad_block(root, buf, status);
6791 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6793 fprintf(stderr, "bad block %llu\n",
6794 (unsigned long long)buf->start);
6797 * Signal to callers we need to start the scan over
6798 * again since we'll have cowed blocks.
6803 rec->content_checked = 1;
6804 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6805 rec->owner_ref_checked = 1;
6807 ret = check_owner_ref(root, rec, buf);
6809 rec->owner_ref_checked = 1;
6813 maybe_free_extent_rec(extent_cache, rec);
6818 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6819 u64 parent, u64 root)
6821 struct list_head *cur = rec->backrefs.next;
6822 struct extent_backref *node;
6823 struct tree_backref *back;
6825 while(cur != &rec->backrefs) {
6826 node = to_extent_backref(cur);
6830 back = to_tree_backref(node);
6832 if (!node->full_backref)
6834 if (parent == back->parent)
6837 if (node->full_backref)
6839 if (back->root == root)
6847 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6848 u64 parent, u64 root)
6850 struct tree_backref *ref = malloc(sizeof(*ref));
6854 memset(&ref->node, 0, sizeof(ref->node));
6856 ref->parent = parent;
6857 ref->node.full_backref = 1;
6860 ref->node.full_backref = 0;
6867 static struct data_backref *find_data_backref(struct extent_record *rec,
6868 u64 parent, u64 root,
6869 u64 owner, u64 offset,
6871 u64 disk_bytenr, u64 bytes)
6873 struct list_head *cur = rec->backrefs.next;
6874 struct extent_backref *node;
6875 struct data_backref *back;
6877 while(cur != &rec->backrefs) {
6878 node = to_extent_backref(cur);
6882 back = to_data_backref(node);
6884 if (!node->full_backref)
6886 if (parent == back->parent)
6889 if (node->full_backref)
6891 if (back->root == root && back->owner == owner &&
6892 back->offset == offset) {
6893 if (found_ref && node->found_ref &&
6894 (back->bytes != bytes ||
6895 back->disk_bytenr != disk_bytenr))
6905 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6906 u64 parent, u64 root,
6907 u64 owner, u64 offset,
6910 struct data_backref *ref = malloc(sizeof(*ref));
6914 memset(&ref->node, 0, sizeof(ref->node));
6915 ref->node.is_data = 1;
6918 ref->parent = parent;
6921 ref->node.full_backref = 1;
6925 ref->offset = offset;
6926 ref->node.full_backref = 0;
6928 ref->bytes = max_size;
6931 if (max_size > rec->max_size)
6932 rec->max_size = max_size;
6936 /* Check if the type of extent matches with its chunk */
6937 static void check_extent_type(struct extent_record *rec)
6939 struct btrfs_block_group_cache *bg_cache;
6941 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6945 /* data extent, check chunk directly*/
6946 if (!rec->metadata) {
6947 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6948 rec->wrong_chunk_type = 1;
6952 /* metadata extent, check the obvious case first */
6953 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6954 BTRFS_BLOCK_GROUP_METADATA))) {
6955 rec->wrong_chunk_type = 1;
6960 * Check SYSTEM extent, as it's also marked as metadata, we can only
6961 * make sure it's a SYSTEM extent by its backref
6963 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6964 struct extent_backref *node;
6965 struct tree_backref *tback;
6968 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6969 if (node->is_data) {
6970 /* tree block shouldn't have data backref */
6971 rec->wrong_chunk_type = 1;
6974 tback = container_of(node, struct tree_backref, node);
6976 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6977 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6979 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6980 if (!(bg_cache->flags & bg_type))
6981 rec->wrong_chunk_type = 1;
6986 * Allocate a new extent record, fill default values from @tmpl and insert int
6987 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6988 * the cache, otherwise it fails.
6990 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6991 struct extent_record *tmpl)
6993 struct extent_record *rec;
6996 BUG_ON(tmpl->max_size == 0);
6997 rec = malloc(sizeof(*rec));
7000 rec->start = tmpl->start;
7001 rec->max_size = tmpl->max_size;
7002 rec->nr = max(tmpl->nr, tmpl->max_size);
7003 rec->found_rec = tmpl->found_rec;
7004 rec->content_checked = tmpl->content_checked;
7005 rec->owner_ref_checked = tmpl->owner_ref_checked;
7006 rec->num_duplicates = 0;
7007 rec->metadata = tmpl->metadata;
7008 rec->flag_block_full_backref = FLAG_UNSET;
7009 rec->bad_full_backref = 0;
7010 rec->crossing_stripes = 0;
7011 rec->wrong_chunk_type = 0;
7012 rec->is_root = tmpl->is_root;
7013 rec->refs = tmpl->refs;
7014 rec->extent_item_refs = tmpl->extent_item_refs;
7015 rec->parent_generation = tmpl->parent_generation;
7016 INIT_LIST_HEAD(&rec->backrefs);
7017 INIT_LIST_HEAD(&rec->dups);
7018 INIT_LIST_HEAD(&rec->list);
7019 rec->backref_tree = RB_ROOT;
7020 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7021 rec->cache.start = tmpl->start;
7022 rec->cache.size = tmpl->nr;
7023 ret = insert_cache_extent(extent_cache, &rec->cache);
7028 bytes_used += rec->nr;
7031 rec->crossing_stripes = check_crossing_stripes(global_info,
7032 rec->start, global_info->nodesize);
7033 check_extent_type(rec);
7038 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7040 * - refs - if found, increase refs
7041 * - is_root - if found, set
7042 * - content_checked - if found, set
7043 * - owner_ref_checked - if found, set
7045 * If not found, create a new one, initialize and insert.
7047 static int add_extent_rec(struct cache_tree *extent_cache,
7048 struct extent_record *tmpl)
7050 struct extent_record *rec;
7051 struct cache_extent *cache;
7055 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7057 rec = container_of(cache, struct extent_record, cache);
7061 rec->nr = max(tmpl->nr, tmpl->max_size);
7064 * We need to make sure to reset nr to whatever the extent
7065 * record says was the real size, this way we can compare it to
7068 if (tmpl->found_rec) {
7069 if (tmpl->start != rec->start || rec->found_rec) {
7070 struct extent_record *tmp;
7073 if (list_empty(&rec->list))
7074 list_add_tail(&rec->list,
7075 &duplicate_extents);
7078 * We have to do this song and dance in case we
7079 * find an extent record that falls inside of
7080 * our current extent record but does not have
7081 * the same objectid.
7083 tmp = malloc(sizeof(*tmp));
7086 tmp->start = tmpl->start;
7087 tmp->max_size = tmpl->max_size;
7090 tmp->metadata = tmpl->metadata;
7091 tmp->extent_item_refs = tmpl->extent_item_refs;
7092 INIT_LIST_HEAD(&tmp->list);
7093 list_add_tail(&tmp->list, &rec->dups);
7094 rec->num_duplicates++;
7101 if (tmpl->extent_item_refs && !dup) {
7102 if (rec->extent_item_refs) {
7103 fprintf(stderr, "block %llu rec "
7104 "extent_item_refs %llu, passed %llu\n",
7105 (unsigned long long)tmpl->start,
7106 (unsigned long long)
7107 rec->extent_item_refs,
7108 (unsigned long long)tmpl->extent_item_refs);
7110 rec->extent_item_refs = tmpl->extent_item_refs;
7114 if (tmpl->content_checked)
7115 rec->content_checked = 1;
7116 if (tmpl->owner_ref_checked)
7117 rec->owner_ref_checked = 1;
7118 memcpy(&rec->parent_key, &tmpl->parent_key,
7119 sizeof(tmpl->parent_key));
7120 if (tmpl->parent_generation)
7121 rec->parent_generation = tmpl->parent_generation;
7122 if (rec->max_size < tmpl->max_size)
7123 rec->max_size = tmpl->max_size;
7126 * A metadata extent can't cross stripe_len boundary, otherwise
7127 * kernel scrub won't be able to handle it.
7128 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7132 rec->crossing_stripes = check_crossing_stripes(
7133 global_info, rec->start,
7134 global_info->nodesize);
7135 check_extent_type(rec);
7136 maybe_free_extent_rec(extent_cache, rec);
7140 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7145 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7146 u64 parent, u64 root, int found_ref)
7148 struct extent_record *rec;
7149 struct tree_backref *back;
7150 struct cache_extent *cache;
7152 bool insert = false;
7154 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7156 struct extent_record tmpl;
7158 memset(&tmpl, 0, sizeof(tmpl));
7159 tmpl.start = bytenr;
7164 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7168 /* really a bug in cache_extent implement now */
7169 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7174 rec = container_of(cache, struct extent_record, cache);
7175 if (rec->start != bytenr) {
7177 * Several cause, from unaligned bytenr to over lapping extents
7182 back = find_tree_backref(rec, parent, root);
7184 back = alloc_tree_backref(rec, parent, root);
7191 if (back->node.found_ref) {
7192 fprintf(stderr, "Extent back ref already exists "
7193 "for %llu parent %llu root %llu \n",
7194 (unsigned long long)bytenr,
7195 (unsigned long long)parent,
7196 (unsigned long long)root);
7198 back->node.found_ref = 1;
7200 if (back->node.found_extent_tree) {
7201 fprintf(stderr, "Extent back ref already exists "
7202 "for %llu parent %llu root %llu \n",
7203 (unsigned long long)bytenr,
7204 (unsigned long long)parent,
7205 (unsigned long long)root);
7207 back->node.found_extent_tree = 1;
7210 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7211 compare_extent_backref));
7212 check_extent_type(rec);
7213 maybe_free_extent_rec(extent_cache, rec);
7217 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7218 u64 parent, u64 root, u64 owner, u64 offset,
7219 u32 num_refs, int found_ref, u64 max_size)
7221 struct extent_record *rec;
7222 struct data_backref *back;
7223 struct cache_extent *cache;
7225 bool insert = false;
7227 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7229 struct extent_record tmpl;
7231 memset(&tmpl, 0, sizeof(tmpl));
7232 tmpl.start = bytenr;
7234 tmpl.max_size = max_size;
7236 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7240 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7245 rec = container_of(cache, struct extent_record, cache);
7246 if (rec->max_size < max_size)
7247 rec->max_size = max_size;
7250 * If found_ref is set then max_size is the real size and must match the
7251 * existing refs. So if we have already found a ref then we need to
7252 * make sure that this ref matches the existing one, otherwise we need
7253 * to add a new backref so we can notice that the backrefs don't match
7254 * and we need to figure out who is telling the truth. This is to
7255 * account for that awful fsync bug I introduced where we'd end up with
7256 * a btrfs_file_extent_item that would have its length include multiple
7257 * prealloc extents or point inside of a prealloc extent.
7259 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7262 back = alloc_data_backref(rec, parent, root, owner, offset,
7269 BUG_ON(num_refs != 1);
7270 if (back->node.found_ref)
7271 BUG_ON(back->bytes != max_size);
7272 back->node.found_ref = 1;
7273 back->found_ref += 1;
7274 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7275 back->bytes = max_size;
7276 back->disk_bytenr = bytenr;
7278 /* Need to reinsert if not already in the tree */
7280 rb_erase(&back->node.node, &rec->backref_tree);
7285 rec->content_checked = 1;
7286 rec->owner_ref_checked = 1;
7288 if (back->node.found_extent_tree) {
7289 fprintf(stderr, "Extent back ref already exists "
7290 "for %llu parent %llu root %llu "
7291 "owner %llu offset %llu num_refs %lu\n",
7292 (unsigned long long)bytenr,
7293 (unsigned long long)parent,
7294 (unsigned long long)root,
7295 (unsigned long long)owner,
7296 (unsigned long long)offset,
7297 (unsigned long)num_refs);
7299 back->num_refs = num_refs;
7300 back->node.found_extent_tree = 1;
7303 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7304 compare_extent_backref));
7306 maybe_free_extent_rec(extent_cache, rec);
7310 static int add_pending(struct cache_tree *pending,
7311 struct cache_tree *seen, u64 bytenr, u32 size)
7314 ret = add_cache_extent(seen, bytenr, size);
7317 add_cache_extent(pending, bytenr, size);
7321 static int pick_next_pending(struct cache_tree *pending,
7322 struct cache_tree *reada,
7323 struct cache_tree *nodes,
7324 u64 last, struct block_info *bits, int bits_nr,
7327 unsigned long node_start = last;
7328 struct cache_extent *cache;
7331 cache = search_cache_extent(reada, 0);
7333 bits[0].start = cache->start;
7334 bits[0].size = cache->size;
7339 if (node_start > 32768)
7340 node_start -= 32768;
7342 cache = search_cache_extent(nodes, node_start);
7344 cache = search_cache_extent(nodes, 0);
7347 cache = search_cache_extent(pending, 0);
7352 bits[ret].start = cache->start;
7353 bits[ret].size = cache->size;
7354 cache = next_cache_extent(cache);
7356 } while (cache && ret < bits_nr);
7362 bits[ret].start = cache->start;
7363 bits[ret].size = cache->size;
7364 cache = next_cache_extent(cache);
7366 } while (cache && ret < bits_nr);
7368 if (bits_nr - ret > 8) {
7369 u64 lookup = bits[0].start + bits[0].size;
7370 struct cache_extent *next;
7371 next = search_cache_extent(pending, lookup);
7373 if (next->start - lookup > 32768)
7375 bits[ret].start = next->start;
7376 bits[ret].size = next->size;
7377 lookup = next->start + next->size;
7381 next = next_cache_extent(next);
7389 static void free_chunk_record(struct cache_extent *cache)
7391 struct chunk_record *rec;
7393 rec = container_of(cache, struct chunk_record, cache);
7394 list_del_init(&rec->list);
7395 list_del_init(&rec->dextents);
7399 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7401 cache_tree_free_extents(chunk_cache, free_chunk_record);
7404 static void free_device_record(struct rb_node *node)
7406 struct device_record *rec;
7408 rec = container_of(node, struct device_record, node);
7412 FREE_RB_BASED_TREE(device_cache, free_device_record);
7414 int insert_block_group_record(struct block_group_tree *tree,
7415 struct block_group_record *bg_rec)
7419 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7423 list_add_tail(&bg_rec->list, &tree->block_groups);
7427 static void free_block_group_record(struct cache_extent *cache)
7429 struct block_group_record *rec;
7431 rec = container_of(cache, struct block_group_record, cache);
7432 list_del_init(&rec->list);
7436 void free_block_group_tree(struct block_group_tree *tree)
7438 cache_tree_free_extents(&tree->tree, free_block_group_record);
7441 int insert_device_extent_record(struct device_extent_tree *tree,
7442 struct device_extent_record *de_rec)
7447 * Device extent is a bit different from the other extents, because
7448 * the extents which belong to the different devices may have the
7449 * same start and size, so we need use the special extent cache
7450 * search/insert functions.
7452 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7456 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7457 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7461 static void free_device_extent_record(struct cache_extent *cache)
7463 struct device_extent_record *rec;
7465 rec = container_of(cache, struct device_extent_record, cache);
7466 if (!list_empty(&rec->chunk_list))
7467 list_del_init(&rec->chunk_list);
7468 if (!list_empty(&rec->device_list))
7469 list_del_init(&rec->device_list);
7473 void free_device_extent_tree(struct device_extent_tree *tree)
7475 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7478 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7479 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7480 struct extent_buffer *leaf, int slot)
7482 struct btrfs_extent_ref_v0 *ref0;
7483 struct btrfs_key key;
7486 btrfs_item_key_to_cpu(leaf, &key, slot);
7487 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7488 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7489 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7492 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7493 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7499 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7500 struct btrfs_key *key,
7503 struct btrfs_chunk *ptr;
7504 struct chunk_record *rec;
7507 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7508 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7510 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7512 fprintf(stderr, "memory allocation failed\n");
7516 INIT_LIST_HEAD(&rec->list);
7517 INIT_LIST_HEAD(&rec->dextents);
7520 rec->cache.start = key->offset;
7521 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7523 rec->generation = btrfs_header_generation(leaf);
7525 rec->objectid = key->objectid;
7526 rec->type = key->type;
7527 rec->offset = key->offset;
7529 rec->length = rec->cache.size;
7530 rec->owner = btrfs_chunk_owner(leaf, ptr);
7531 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7532 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7533 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7534 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7535 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7536 rec->num_stripes = num_stripes;
7537 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7539 for (i = 0; i < rec->num_stripes; ++i) {
7540 rec->stripes[i].devid =
7541 btrfs_stripe_devid_nr(leaf, ptr, i);
7542 rec->stripes[i].offset =
7543 btrfs_stripe_offset_nr(leaf, ptr, i);
7544 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7545 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7552 static int process_chunk_item(struct cache_tree *chunk_cache,
7553 struct btrfs_key *key, struct extent_buffer *eb,
7556 struct chunk_record *rec;
7557 struct btrfs_chunk *chunk;
7560 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7562 * Do extra check for this chunk item,
7564 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7565 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7566 * and owner<->key_type check.
7568 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7571 error("chunk(%llu, %llu) is not valid, ignore it",
7572 key->offset, btrfs_chunk_length(eb, chunk));
7575 rec = btrfs_new_chunk_record(eb, key, slot);
7576 ret = insert_cache_extent(chunk_cache, &rec->cache);
7578 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7579 rec->offset, rec->length);
7586 static int process_device_item(struct rb_root *dev_cache,
7587 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7589 struct btrfs_dev_item *ptr;
7590 struct device_record *rec;
7593 ptr = btrfs_item_ptr(eb,
7594 slot, struct btrfs_dev_item);
7596 rec = malloc(sizeof(*rec));
7598 fprintf(stderr, "memory allocation failed\n");
7602 rec->devid = key->offset;
7603 rec->generation = btrfs_header_generation(eb);
7605 rec->objectid = key->objectid;
7606 rec->type = key->type;
7607 rec->offset = key->offset;
7609 rec->devid = btrfs_device_id(eb, ptr);
7610 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7611 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7613 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7615 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7622 struct block_group_record *
7623 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7626 struct btrfs_block_group_item *ptr;
7627 struct block_group_record *rec;
7629 rec = calloc(1, sizeof(*rec));
7631 fprintf(stderr, "memory allocation failed\n");
7635 rec->cache.start = key->objectid;
7636 rec->cache.size = key->offset;
7638 rec->generation = btrfs_header_generation(leaf);
7640 rec->objectid = key->objectid;
7641 rec->type = key->type;
7642 rec->offset = key->offset;
7644 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7645 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7647 INIT_LIST_HEAD(&rec->list);
7652 static int process_block_group_item(struct block_group_tree *block_group_cache,
7653 struct btrfs_key *key,
7654 struct extent_buffer *eb, int slot)
7656 struct block_group_record *rec;
7659 rec = btrfs_new_block_group_record(eb, key, slot);
7660 ret = insert_block_group_record(block_group_cache, rec);
7662 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7663 rec->objectid, rec->offset);
7670 struct device_extent_record *
7671 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7672 struct btrfs_key *key, int slot)
7674 struct device_extent_record *rec;
7675 struct btrfs_dev_extent *ptr;
7677 rec = calloc(1, sizeof(*rec));
7679 fprintf(stderr, "memory allocation failed\n");
7683 rec->cache.objectid = key->objectid;
7684 rec->cache.start = key->offset;
7686 rec->generation = btrfs_header_generation(leaf);
7688 rec->objectid = key->objectid;
7689 rec->type = key->type;
7690 rec->offset = key->offset;
7692 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7693 rec->chunk_objecteid =
7694 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7696 btrfs_dev_extent_chunk_offset(leaf, ptr);
7697 rec->length = btrfs_dev_extent_length(leaf, ptr);
7698 rec->cache.size = rec->length;
7700 INIT_LIST_HEAD(&rec->chunk_list);
7701 INIT_LIST_HEAD(&rec->device_list);
7707 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7708 struct btrfs_key *key, struct extent_buffer *eb,
7711 struct device_extent_record *rec;
7714 rec = btrfs_new_device_extent_record(eb, key, slot);
7715 ret = insert_device_extent_record(dev_extent_cache, rec);
7718 "Device extent[%llu, %llu, %llu] existed.\n",
7719 rec->objectid, rec->offset, rec->length);
7726 static int process_extent_item(struct btrfs_root *root,
7727 struct cache_tree *extent_cache,
7728 struct extent_buffer *eb, int slot)
7730 struct btrfs_extent_item *ei;
7731 struct btrfs_extent_inline_ref *iref;
7732 struct btrfs_extent_data_ref *dref;
7733 struct btrfs_shared_data_ref *sref;
7734 struct btrfs_key key;
7735 struct extent_record tmpl;
7740 u32 item_size = btrfs_item_size_nr(eb, slot);
7746 btrfs_item_key_to_cpu(eb, &key, slot);
7748 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7750 num_bytes = root->fs_info->nodesize;
7752 num_bytes = key.offset;
7755 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7756 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7757 key.objectid, root->fs_info->sectorsize);
7760 if (item_size < sizeof(*ei)) {
7761 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7762 struct btrfs_extent_item_v0 *ei0;
7763 if (item_size != sizeof(*ei0)) {
7765 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7766 key.objectid, key.type, key.offset,
7767 btrfs_header_bytenr(eb), slot);
7770 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7771 refs = btrfs_extent_refs_v0(eb, ei0);
7775 memset(&tmpl, 0, sizeof(tmpl));
7776 tmpl.start = key.objectid;
7777 tmpl.nr = num_bytes;
7778 tmpl.extent_item_refs = refs;
7779 tmpl.metadata = metadata;
7781 tmpl.max_size = num_bytes;
7783 return add_extent_rec(extent_cache, &tmpl);
7786 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7787 refs = btrfs_extent_refs(eb, ei);
7788 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7792 if (metadata && num_bytes != root->fs_info->nodesize) {
7793 error("ignore invalid metadata extent, length %llu does not equal to %u",
7794 num_bytes, root->fs_info->nodesize);
7797 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7798 error("ignore invalid data extent, length %llu is not aligned to %u",
7799 num_bytes, root->fs_info->sectorsize);
7803 memset(&tmpl, 0, sizeof(tmpl));
7804 tmpl.start = key.objectid;
7805 tmpl.nr = num_bytes;
7806 tmpl.extent_item_refs = refs;
7807 tmpl.metadata = metadata;
7809 tmpl.max_size = num_bytes;
7810 add_extent_rec(extent_cache, &tmpl);
7812 ptr = (unsigned long)(ei + 1);
7813 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7814 key.type == BTRFS_EXTENT_ITEM_KEY)
7815 ptr += sizeof(struct btrfs_tree_block_info);
7817 end = (unsigned long)ei + item_size;
7819 iref = (struct btrfs_extent_inline_ref *)ptr;
7820 type = btrfs_extent_inline_ref_type(eb, iref);
7821 offset = btrfs_extent_inline_ref_offset(eb, iref);
7823 case BTRFS_TREE_BLOCK_REF_KEY:
7824 ret = add_tree_backref(extent_cache, key.objectid,
7828 "add_tree_backref failed (extent items tree block): %s",
7831 case BTRFS_SHARED_BLOCK_REF_KEY:
7832 ret = add_tree_backref(extent_cache, key.objectid,
7836 "add_tree_backref failed (extent items shared block): %s",
7839 case BTRFS_EXTENT_DATA_REF_KEY:
7840 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7841 add_data_backref(extent_cache, key.objectid, 0,
7842 btrfs_extent_data_ref_root(eb, dref),
7843 btrfs_extent_data_ref_objectid(eb,
7845 btrfs_extent_data_ref_offset(eb, dref),
7846 btrfs_extent_data_ref_count(eb, dref),
7849 case BTRFS_SHARED_DATA_REF_KEY:
7850 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7851 add_data_backref(extent_cache, key.objectid, offset,
7853 btrfs_shared_data_ref_count(eb, sref),
7857 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7858 key.objectid, key.type, num_bytes);
7861 ptr += btrfs_extent_inline_ref_size(type);
7868 static int check_cache_range(struct btrfs_root *root,
7869 struct btrfs_block_group_cache *cache,
7870 u64 offset, u64 bytes)
7872 struct btrfs_free_space *entry;
7878 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7879 bytenr = btrfs_sb_offset(i);
7880 ret = btrfs_rmap_block(root->fs_info,
7881 cache->key.objectid, bytenr, 0,
7882 &logical, &nr, &stripe_len);
7887 if (logical[nr] + stripe_len <= offset)
7889 if (offset + bytes <= logical[nr])
7891 if (logical[nr] == offset) {
7892 if (stripe_len >= bytes) {
7896 bytes -= stripe_len;
7897 offset += stripe_len;
7898 } else if (logical[nr] < offset) {
7899 if (logical[nr] + stripe_len >=
7904 bytes = (offset + bytes) -
7905 (logical[nr] + stripe_len);
7906 offset = logical[nr] + stripe_len;
7909 * Could be tricky, the super may land in the
7910 * middle of the area we're checking. First
7911 * check the easiest case, it's at the end.
7913 if (logical[nr] + stripe_len >=
7915 bytes = logical[nr] - offset;
7919 /* Check the left side */
7920 ret = check_cache_range(root, cache,
7922 logical[nr] - offset);
7928 /* Now we continue with the right side */
7929 bytes = (offset + bytes) -
7930 (logical[nr] + stripe_len);
7931 offset = logical[nr] + stripe_len;
7938 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7940 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7941 offset, offset+bytes);
7945 if (entry->offset != offset) {
7946 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7951 if (entry->bytes != bytes) {
7952 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7953 bytes, entry->bytes, offset);
7957 unlink_free_space(cache->free_space_ctl, entry);
7962 static int verify_space_cache(struct btrfs_root *root,
7963 struct btrfs_block_group_cache *cache)
7965 struct btrfs_path path;
7966 struct extent_buffer *leaf;
7967 struct btrfs_key key;
7971 root = root->fs_info->extent_root;
7973 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7975 btrfs_init_path(&path);
7976 key.objectid = last;
7978 key.type = BTRFS_EXTENT_ITEM_KEY;
7979 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7984 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7985 ret = btrfs_next_leaf(root, &path);
7993 leaf = path.nodes[0];
7994 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7995 if (key.objectid >= cache->key.offset + cache->key.objectid)
7997 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7998 key.type != BTRFS_METADATA_ITEM_KEY) {
8003 if (last == key.objectid) {
8004 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8005 last = key.objectid + key.offset;
8007 last = key.objectid + root->fs_info->nodesize;
8012 ret = check_cache_range(root, cache, last,
8013 key.objectid - last);
8016 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8017 last = key.objectid + key.offset;
8019 last = key.objectid + root->fs_info->nodesize;
8023 if (last < cache->key.objectid + cache->key.offset)
8024 ret = check_cache_range(root, cache, last,
8025 cache->key.objectid +
8026 cache->key.offset - last);
8029 btrfs_release_path(&path);
8032 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8033 fprintf(stderr, "There are still entries left in the space "
8041 static int check_space_cache(struct btrfs_root *root)
8043 struct btrfs_block_group_cache *cache;
8044 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8048 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8049 btrfs_super_generation(root->fs_info->super_copy) !=
8050 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8051 printf("cache and super generation don't match, space cache "
8052 "will be invalidated\n");
8056 if (ctx.progress_enabled) {
8057 ctx.tp = TASK_FREE_SPACE;
8058 task_start(ctx.info);
8062 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8066 start = cache->key.objectid + cache->key.offset;
8067 if (!cache->free_space_ctl) {
8068 if (btrfs_init_free_space_ctl(cache,
8069 root->fs_info->sectorsize)) {
8074 btrfs_remove_free_space_cache(cache);
8077 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8078 ret = exclude_super_stripes(root, cache);
8080 fprintf(stderr, "could not exclude super stripes: %s\n",
8085 ret = load_free_space_tree(root->fs_info, cache);
8086 free_excluded_extents(root, cache);
8088 fprintf(stderr, "could not load free space tree: %s\n",
8095 ret = load_free_space_cache(root->fs_info, cache);
8100 ret = verify_space_cache(root, cache);
8102 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8103 cache->key.objectid);
8108 task_stop(ctx.info);
8110 return error ? -EINVAL : 0;
8113 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8114 u64 num_bytes, unsigned long leaf_offset,
8115 struct extent_buffer *eb) {
8117 struct btrfs_fs_info *fs_info = root->fs_info;
8119 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8121 unsigned long csum_offset;
8125 u64 data_checked = 0;
8131 if (num_bytes % fs_info->sectorsize)
8134 data = malloc(num_bytes);
8138 while (offset < num_bytes) {
8141 read_len = num_bytes - offset;
8142 /* read as much space once a time */
8143 ret = read_extent_data(fs_info, data + offset,
8144 bytenr + offset, &read_len, mirror);
8148 /* verify every 4k data's checksum */
8149 while (data_checked < read_len) {
8151 tmp = offset + data_checked;
8153 csum = btrfs_csum_data((char *)data + tmp,
8154 csum, fs_info->sectorsize);
8155 btrfs_csum_final(csum, (u8 *)&csum);
8157 csum_offset = leaf_offset +
8158 tmp / fs_info->sectorsize * csum_size;
8159 read_extent_buffer(eb, (char *)&csum_expected,
8160 csum_offset, csum_size);
8161 /* try another mirror */
8162 if (csum != csum_expected) {
8163 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8164 mirror, bytenr + tmp,
8165 csum, csum_expected);
8166 num_copies = btrfs_num_copies(root->fs_info,
8168 if (mirror < num_copies - 1) {
8173 data_checked += fs_info->sectorsize;
8182 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8185 struct btrfs_path path;
8186 struct extent_buffer *leaf;
8187 struct btrfs_key key;
8190 btrfs_init_path(&path);
8191 key.objectid = bytenr;
8192 key.type = BTRFS_EXTENT_ITEM_KEY;
8193 key.offset = (u64)-1;
8196 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8199 fprintf(stderr, "Error looking up extent record %d\n", ret);
8200 btrfs_release_path(&path);
8203 if (path.slots[0] > 0) {
8206 ret = btrfs_prev_leaf(root, &path);
8209 } else if (ret > 0) {
8216 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8219 * Block group items come before extent items if they have the same
8220 * bytenr, so walk back one more just in case. Dear future traveller,
8221 * first congrats on mastering time travel. Now if it's not too much
8222 * trouble could you go back to 2006 and tell Chris to make the
8223 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8224 * EXTENT_ITEM_KEY please?
8226 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8227 if (path.slots[0] > 0) {
8230 ret = btrfs_prev_leaf(root, &path);
8233 } else if (ret > 0) {
8238 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8242 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8243 ret = btrfs_next_leaf(root, &path);
8245 fprintf(stderr, "Error going to next leaf "
8247 btrfs_release_path(&path);
8253 leaf = path.nodes[0];
8254 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8255 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8259 if (key.objectid + key.offset < bytenr) {
8263 if (key.objectid > bytenr + num_bytes)
8266 if (key.objectid == bytenr) {
8267 if (key.offset >= num_bytes) {
8271 num_bytes -= key.offset;
8272 bytenr += key.offset;
8273 } else if (key.objectid < bytenr) {
8274 if (key.objectid + key.offset >= bytenr + num_bytes) {
8278 num_bytes = (bytenr + num_bytes) -
8279 (key.objectid + key.offset);
8280 bytenr = key.objectid + key.offset;
8282 if (key.objectid + key.offset < bytenr + num_bytes) {
8283 u64 new_start = key.objectid + key.offset;
8284 u64 new_bytes = bytenr + num_bytes - new_start;
8287 * Weird case, the extent is in the middle of
8288 * our range, we'll have to search one side
8289 * and then the other. Not sure if this happens
8290 * in real life, but no harm in coding it up
8291 * anyway just in case.
8293 btrfs_release_path(&path);
8294 ret = check_extent_exists(root, new_start,
8297 fprintf(stderr, "Right section didn't "
8301 num_bytes = key.objectid - bytenr;
8304 num_bytes = key.objectid - bytenr;
8311 if (num_bytes && !ret) {
8312 fprintf(stderr, "There are no extents for csum range "
8313 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8317 btrfs_release_path(&path);
8321 static int check_csums(struct btrfs_root *root)
8323 struct btrfs_path path;
8324 struct extent_buffer *leaf;
8325 struct btrfs_key key;
8326 u64 offset = 0, num_bytes = 0;
8327 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8331 unsigned long leaf_offset;
8333 root = root->fs_info->csum_root;
8334 if (!extent_buffer_uptodate(root->node)) {
8335 fprintf(stderr, "No valid csum tree found\n");
8339 btrfs_init_path(&path);
8340 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8341 key.type = BTRFS_EXTENT_CSUM_KEY;
8343 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8345 fprintf(stderr, "Error searching csum tree %d\n", ret);
8346 btrfs_release_path(&path);
8350 if (ret > 0 && path.slots[0])
8355 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8356 ret = btrfs_next_leaf(root, &path);
8358 fprintf(stderr, "Error going to next leaf "
8365 leaf = path.nodes[0];
8367 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8368 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8373 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8374 csum_size) * root->fs_info->sectorsize;
8375 if (!check_data_csum)
8376 goto skip_csum_check;
8377 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8378 ret = check_extent_csums(root, key.offset, data_len,
8384 offset = key.offset;
8385 } else if (key.offset != offset + num_bytes) {
8386 ret = check_extent_exists(root, offset, num_bytes);
8388 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8389 "there is no extent record\n",
8390 offset, offset+num_bytes);
8393 offset = key.offset;
8396 num_bytes += data_len;
8400 btrfs_release_path(&path);
8404 static int is_dropped_key(struct btrfs_key *key,
8405 struct btrfs_key *drop_key) {
8406 if (key->objectid < drop_key->objectid)
8408 else if (key->objectid == drop_key->objectid) {
8409 if (key->type < drop_key->type)
8411 else if (key->type == drop_key->type) {
8412 if (key->offset < drop_key->offset)
8420 * Here are the rules for FULL_BACKREF.
8422 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8423 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8425 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8426 * if it happened after the relocation occurred since we'll have dropped the
8427 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8428 * have no real way to know for sure.
8430 * We process the blocks one root at a time, and we start from the lowest root
8431 * objectid and go to the highest. So we can just lookup the owner backref for
8432 * the record and if we don't find it then we know it doesn't exist and we have
8435 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8436 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8437 * be set or not and then we can check later once we've gathered all the refs.
8439 static int calc_extent_flag(struct cache_tree *extent_cache,
8440 struct extent_buffer *buf,
8441 struct root_item_record *ri,
8444 struct extent_record *rec;
8445 struct cache_extent *cache;
8446 struct tree_backref *tback;
8449 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8450 /* we have added this extent before */
8454 rec = container_of(cache, struct extent_record, cache);
8457 * Except file/reloc tree, we can not have
8460 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8465 if (buf->start == ri->bytenr)
8468 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8471 owner = btrfs_header_owner(buf);
8472 if (owner == ri->objectid)
8475 tback = find_tree_backref(rec, 0, owner);
8480 if (rec->flag_block_full_backref != FLAG_UNSET &&
8481 rec->flag_block_full_backref != 0)
8482 rec->bad_full_backref = 1;
8485 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8486 if (rec->flag_block_full_backref != FLAG_UNSET &&
8487 rec->flag_block_full_backref != 1)
8488 rec->bad_full_backref = 1;
8492 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8494 fprintf(stderr, "Invalid key type(");
8495 print_key_type(stderr, 0, key_type);
8496 fprintf(stderr, ") found in root(");
8497 print_objectid(stderr, rootid, 0);
8498 fprintf(stderr, ")\n");
8502 * Check if the key is valid with its extent buffer.
8504 * This is a early check in case invalid key exists in a extent buffer
8505 * This is not comprehensive yet, but should prevent wrong key/item passed
8508 static int check_type_with_root(u64 rootid, u8 key_type)
8511 /* Only valid in chunk tree */
8512 case BTRFS_DEV_ITEM_KEY:
8513 case BTRFS_CHUNK_ITEM_KEY:
8514 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8517 /* valid in csum and log tree */
8518 case BTRFS_CSUM_TREE_OBJECTID:
8519 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8523 case BTRFS_EXTENT_ITEM_KEY:
8524 case BTRFS_METADATA_ITEM_KEY:
8525 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8526 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8529 case BTRFS_ROOT_ITEM_KEY:
8530 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8533 case BTRFS_DEV_EXTENT_KEY:
8534 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8540 report_mismatch_key_root(key_type, rootid);
8544 static int run_next_block(struct btrfs_root *root,
8545 struct block_info *bits,
8548 struct cache_tree *pending,
8549 struct cache_tree *seen,
8550 struct cache_tree *reada,
8551 struct cache_tree *nodes,
8552 struct cache_tree *extent_cache,
8553 struct cache_tree *chunk_cache,
8554 struct rb_root *dev_cache,
8555 struct block_group_tree *block_group_cache,
8556 struct device_extent_tree *dev_extent_cache,
8557 struct root_item_record *ri)
8559 struct btrfs_fs_info *fs_info = root->fs_info;
8560 struct extent_buffer *buf;
8561 struct extent_record *rec = NULL;
8572 struct btrfs_key key;
8573 struct cache_extent *cache;
8576 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8577 bits_nr, &reada_bits);
8582 for(i = 0; i < nritems; i++) {
8583 ret = add_cache_extent(reada, bits[i].start,
8588 /* fixme, get the parent transid */
8589 readahead_tree_block(fs_info, bits[i].start, 0);
8592 *last = bits[0].start;
8593 bytenr = bits[0].start;
8594 size = bits[0].size;
8596 cache = lookup_cache_extent(pending, bytenr, size);
8598 remove_cache_extent(pending, cache);
8601 cache = lookup_cache_extent(reada, bytenr, size);
8603 remove_cache_extent(reada, cache);
8606 cache = lookup_cache_extent(nodes, bytenr, size);
8608 remove_cache_extent(nodes, cache);
8611 cache = lookup_cache_extent(extent_cache, bytenr, size);
8613 rec = container_of(cache, struct extent_record, cache);
8614 gen = rec->parent_generation;
8617 /* fixme, get the real parent transid */
8618 buf = read_tree_block(root->fs_info, bytenr, gen);
8619 if (!extent_buffer_uptodate(buf)) {
8620 record_bad_block_io(root->fs_info,
8621 extent_cache, bytenr, size);
8625 nritems = btrfs_header_nritems(buf);
8628 if (!init_extent_tree) {
8629 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8630 btrfs_header_level(buf), 1, NULL,
8633 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8635 fprintf(stderr, "Couldn't calc extent flags\n");
8636 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8641 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8643 fprintf(stderr, "Couldn't calc extent flags\n");
8644 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8648 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8650 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8651 ri->objectid == btrfs_header_owner(buf)) {
8653 * Ok we got to this block from it's original owner and
8654 * we have FULL_BACKREF set. Relocation can leave
8655 * converted blocks over so this is altogether possible,
8656 * however it's not possible if the generation > the
8657 * last snapshot, so check for this case.
8659 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8660 btrfs_header_generation(buf) > ri->last_snapshot) {
8661 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8662 rec->bad_full_backref = 1;
8667 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8668 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8669 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8670 rec->bad_full_backref = 1;
8674 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8675 rec->flag_block_full_backref = 1;
8679 rec->flag_block_full_backref = 0;
8681 owner = btrfs_header_owner(buf);
8684 ret = check_block(root, extent_cache, buf, flags);
8688 if (btrfs_is_leaf(buf)) {
8689 btree_space_waste += btrfs_leaf_free_space(root, buf);
8690 for (i = 0; i < nritems; i++) {
8691 struct btrfs_file_extent_item *fi;
8692 btrfs_item_key_to_cpu(buf, &key, i);
8694 * Check key type against the leaf owner.
8695 * Could filter quite a lot of early error if
8698 if (check_type_with_root(btrfs_header_owner(buf),
8700 fprintf(stderr, "ignoring invalid key\n");
8703 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8704 process_extent_item(root, extent_cache, buf,
8708 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8709 process_extent_item(root, extent_cache, buf,
8713 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8715 btrfs_item_size_nr(buf, i);
8718 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8719 process_chunk_item(chunk_cache, &key, buf, i);
8722 if (key.type == BTRFS_DEV_ITEM_KEY) {
8723 process_device_item(dev_cache, &key, buf, i);
8726 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8727 process_block_group_item(block_group_cache,
8731 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8732 process_device_extent_item(dev_extent_cache,
8737 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8738 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8739 process_extent_ref_v0(extent_cache, buf, i);
8746 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8747 ret = add_tree_backref(extent_cache,
8748 key.objectid, 0, key.offset, 0);
8751 "add_tree_backref failed (leaf tree block): %s",
8755 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8756 ret = add_tree_backref(extent_cache,
8757 key.objectid, key.offset, 0, 0);
8760 "add_tree_backref failed (leaf shared block): %s",
8764 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8765 struct btrfs_extent_data_ref *ref;
8766 ref = btrfs_item_ptr(buf, i,
8767 struct btrfs_extent_data_ref);
8768 add_data_backref(extent_cache,
8770 btrfs_extent_data_ref_root(buf, ref),
8771 btrfs_extent_data_ref_objectid(buf,
8773 btrfs_extent_data_ref_offset(buf, ref),
8774 btrfs_extent_data_ref_count(buf, ref),
8775 0, root->fs_info->sectorsize);
8778 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8779 struct btrfs_shared_data_ref *ref;
8780 ref = btrfs_item_ptr(buf, i,
8781 struct btrfs_shared_data_ref);
8782 add_data_backref(extent_cache,
8783 key.objectid, key.offset, 0, 0, 0,
8784 btrfs_shared_data_ref_count(buf, ref),
8785 0, root->fs_info->sectorsize);
8788 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8789 struct bad_item *bad;
8791 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8795 bad = malloc(sizeof(struct bad_item));
8798 INIT_LIST_HEAD(&bad->list);
8799 memcpy(&bad->key, &key,
8800 sizeof(struct btrfs_key));
8801 bad->root_id = owner;
8802 list_add_tail(&bad->list, &delete_items);
8805 if (key.type != BTRFS_EXTENT_DATA_KEY)
8807 fi = btrfs_item_ptr(buf, i,
8808 struct btrfs_file_extent_item);
8809 if (btrfs_file_extent_type(buf, fi) ==
8810 BTRFS_FILE_EXTENT_INLINE)
8812 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8815 data_bytes_allocated +=
8816 btrfs_file_extent_disk_num_bytes(buf, fi);
8817 if (data_bytes_allocated < root->fs_info->sectorsize) {
8820 data_bytes_referenced +=
8821 btrfs_file_extent_num_bytes(buf, fi);
8822 add_data_backref(extent_cache,
8823 btrfs_file_extent_disk_bytenr(buf, fi),
8824 parent, owner, key.objectid, key.offset -
8825 btrfs_file_extent_offset(buf, fi), 1, 1,
8826 btrfs_file_extent_disk_num_bytes(buf, fi));
8830 struct btrfs_key first_key;
8832 first_key.objectid = 0;
8835 btrfs_item_key_to_cpu(buf, &first_key, 0);
8836 level = btrfs_header_level(buf);
8837 for (i = 0; i < nritems; i++) {
8838 struct extent_record tmpl;
8840 ptr = btrfs_node_blockptr(buf, i);
8841 size = root->fs_info->nodesize;
8842 btrfs_node_key_to_cpu(buf, &key, i);
8844 if ((level == ri->drop_level)
8845 && is_dropped_key(&key, &ri->drop_key)) {
8850 memset(&tmpl, 0, sizeof(tmpl));
8851 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8852 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8857 tmpl.max_size = size;
8858 ret = add_extent_rec(extent_cache, &tmpl);
8862 ret = add_tree_backref(extent_cache, ptr, parent,
8866 "add_tree_backref failed (non-leaf block): %s",
8872 add_pending(nodes, seen, ptr, size);
8874 add_pending(pending, seen, ptr, size);
8877 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
8878 nritems) * sizeof(struct btrfs_key_ptr);
8880 total_btree_bytes += buf->len;
8881 if (fs_root_objectid(btrfs_header_owner(buf)))
8882 total_fs_tree_bytes += buf->len;
8883 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8884 total_extent_tree_bytes += buf->len;
8886 free_extent_buffer(buf);
8890 static int add_root_to_pending(struct extent_buffer *buf,
8891 struct cache_tree *extent_cache,
8892 struct cache_tree *pending,
8893 struct cache_tree *seen,
8894 struct cache_tree *nodes,
8897 struct extent_record tmpl;
8900 if (btrfs_header_level(buf) > 0)
8901 add_pending(nodes, seen, buf->start, buf->len);
8903 add_pending(pending, seen, buf->start, buf->len);
8905 memset(&tmpl, 0, sizeof(tmpl));
8906 tmpl.start = buf->start;
8911 tmpl.max_size = buf->len;
8912 add_extent_rec(extent_cache, &tmpl);
8914 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8915 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8916 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8919 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8924 /* as we fix the tree, we might be deleting blocks that
8925 * we're tracking for repair. This hook makes sure we
8926 * remove any backrefs for blocks as we are fixing them.
8928 static int free_extent_hook(struct btrfs_trans_handle *trans,
8929 struct btrfs_root *root,
8930 u64 bytenr, u64 num_bytes, u64 parent,
8931 u64 root_objectid, u64 owner, u64 offset,
8934 struct extent_record *rec;
8935 struct cache_extent *cache;
8937 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8939 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8940 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8944 rec = container_of(cache, struct extent_record, cache);
8946 struct data_backref *back;
8947 back = find_data_backref(rec, parent, root_objectid, owner,
8948 offset, 1, bytenr, num_bytes);
8951 if (back->node.found_ref) {
8952 back->found_ref -= refs_to_drop;
8954 rec->refs -= refs_to_drop;
8956 if (back->node.found_extent_tree) {
8957 back->num_refs -= refs_to_drop;
8958 if (rec->extent_item_refs)
8959 rec->extent_item_refs -= refs_to_drop;
8961 if (back->found_ref == 0)
8962 back->node.found_ref = 0;
8963 if (back->num_refs == 0)
8964 back->node.found_extent_tree = 0;
8966 if (!back->node.found_extent_tree && back->node.found_ref) {
8967 rb_erase(&back->node.node, &rec->backref_tree);
8971 struct tree_backref *back;
8972 back = find_tree_backref(rec, parent, root_objectid);
8975 if (back->node.found_ref) {
8978 back->node.found_ref = 0;
8980 if (back->node.found_extent_tree) {
8981 if (rec->extent_item_refs)
8982 rec->extent_item_refs--;
8983 back->node.found_extent_tree = 0;
8985 if (!back->node.found_extent_tree && back->node.found_ref) {
8986 rb_erase(&back->node.node, &rec->backref_tree);
8990 maybe_free_extent_rec(extent_cache, rec);
8995 static int delete_extent_records(struct btrfs_trans_handle *trans,
8996 struct btrfs_root *root,
8997 struct btrfs_path *path,
9000 struct btrfs_key key;
9001 struct btrfs_key found_key;
9002 struct extent_buffer *leaf;
9007 key.objectid = bytenr;
9009 key.offset = (u64)-1;
9012 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9019 if (path->slots[0] == 0)
9025 leaf = path->nodes[0];
9026 slot = path->slots[0];
9028 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9029 if (found_key.objectid != bytenr)
9032 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9033 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9034 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9035 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9036 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9037 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9038 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9039 btrfs_release_path(path);
9040 if (found_key.type == 0) {
9041 if (found_key.offset == 0)
9043 key.offset = found_key.offset - 1;
9044 key.type = found_key.type;
9046 key.type = found_key.type - 1;
9047 key.offset = (u64)-1;
9051 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9052 found_key.objectid, found_key.type, found_key.offset);
9054 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9057 btrfs_release_path(path);
9059 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9060 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9061 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9062 found_key.offset : root->fs_info->nodesize;
9064 ret = btrfs_update_block_group(root, bytenr,
9071 btrfs_release_path(path);
9076 * for a single backref, this will allocate a new extent
9077 * and add the backref to it.
9079 static int record_extent(struct btrfs_trans_handle *trans,
9080 struct btrfs_fs_info *info,
9081 struct btrfs_path *path,
9082 struct extent_record *rec,
9083 struct extent_backref *back,
9084 int allocated, u64 flags)
9087 struct btrfs_root *extent_root = info->extent_root;
9088 struct extent_buffer *leaf;
9089 struct btrfs_key ins_key;
9090 struct btrfs_extent_item *ei;
9091 struct data_backref *dback;
9092 struct btrfs_tree_block_info *bi;
9095 rec->max_size = max_t(u64, rec->max_size,
9099 u32 item_size = sizeof(*ei);
9102 item_size += sizeof(*bi);
9104 ins_key.objectid = rec->start;
9105 ins_key.offset = rec->max_size;
9106 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9108 ret = btrfs_insert_empty_item(trans, extent_root, path,
9109 &ins_key, item_size);
9113 leaf = path->nodes[0];
9114 ei = btrfs_item_ptr(leaf, path->slots[0],
9115 struct btrfs_extent_item);
9117 btrfs_set_extent_refs(leaf, ei, 0);
9118 btrfs_set_extent_generation(leaf, ei, rec->generation);
9120 if (back->is_data) {
9121 btrfs_set_extent_flags(leaf, ei,
9122 BTRFS_EXTENT_FLAG_DATA);
9124 struct btrfs_disk_key copy_key;;
9126 bi = (struct btrfs_tree_block_info *)(ei + 1);
9127 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9130 btrfs_set_disk_key_objectid(©_key,
9131 rec->info_objectid);
9132 btrfs_set_disk_key_type(©_key, 0);
9133 btrfs_set_disk_key_offset(©_key, 0);
9135 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9136 btrfs_set_tree_block_key(leaf, bi, ©_key);
9138 btrfs_set_extent_flags(leaf, ei,
9139 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9142 btrfs_mark_buffer_dirty(leaf);
9143 ret = btrfs_update_block_group(extent_root, rec->start,
9144 rec->max_size, 1, 0);
9147 btrfs_release_path(path);
9150 if (back->is_data) {
9154 dback = to_data_backref(back);
9155 if (back->full_backref)
9156 parent = dback->parent;
9160 for (i = 0; i < dback->found_ref; i++) {
9161 /* if parent != 0, we're doing a full backref
9162 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9163 * just makes the backref allocator create a data
9166 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9167 rec->start, rec->max_size,
9171 BTRFS_FIRST_FREE_OBJECTID :
9177 fprintf(stderr, "adding new data backref"
9178 " on %llu %s %llu owner %llu"
9179 " offset %llu found %d\n",
9180 (unsigned long long)rec->start,
9181 back->full_backref ?
9183 back->full_backref ?
9184 (unsigned long long)parent :
9185 (unsigned long long)dback->root,
9186 (unsigned long long)dback->owner,
9187 (unsigned long long)dback->offset,
9191 struct tree_backref *tback;
9193 tback = to_tree_backref(back);
9194 if (back->full_backref)
9195 parent = tback->parent;
9199 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9200 rec->start, rec->max_size,
9201 parent, tback->root, 0, 0);
9202 fprintf(stderr, "adding new tree backref on "
9203 "start %llu len %llu parent %llu root %llu\n",
9204 rec->start, rec->max_size, parent, tback->root);
9207 btrfs_release_path(path);
9211 static struct extent_entry *find_entry(struct list_head *entries,
9212 u64 bytenr, u64 bytes)
9214 struct extent_entry *entry = NULL;
9216 list_for_each_entry(entry, entries, list) {
9217 if (entry->bytenr == bytenr && entry->bytes == bytes)
9224 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9226 struct extent_entry *entry, *best = NULL, *prev = NULL;
9228 list_for_each_entry(entry, entries, list) {
9230 * If there are as many broken entries as entries then we know
9231 * not to trust this particular entry.
9233 if (entry->broken == entry->count)
9237 * Special case, when there are only two entries and 'best' is
9247 * If our current entry == best then we can't be sure our best
9248 * is really the best, so we need to keep searching.
9250 if (best && best->count == entry->count) {
9256 /* Prev == entry, not good enough, have to keep searching */
9257 if (!prev->broken && prev->count == entry->count)
9261 best = (prev->count > entry->count) ? prev : entry;
9262 else if (best->count < entry->count)
9270 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9271 struct data_backref *dback, struct extent_entry *entry)
9273 struct btrfs_trans_handle *trans;
9274 struct btrfs_root *root;
9275 struct btrfs_file_extent_item *fi;
9276 struct extent_buffer *leaf;
9277 struct btrfs_key key;
9281 key.objectid = dback->root;
9282 key.type = BTRFS_ROOT_ITEM_KEY;
9283 key.offset = (u64)-1;
9284 root = btrfs_read_fs_root(info, &key);
9286 fprintf(stderr, "Couldn't find root for our ref\n");
9291 * The backref points to the original offset of the extent if it was
9292 * split, so we need to search down to the offset we have and then walk
9293 * forward until we find the backref we're looking for.
9295 key.objectid = dback->owner;
9296 key.type = BTRFS_EXTENT_DATA_KEY;
9297 key.offset = dback->offset;
9298 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9300 fprintf(stderr, "Error looking up ref %d\n", ret);
9305 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9306 ret = btrfs_next_leaf(root, path);
9308 fprintf(stderr, "Couldn't find our ref, next\n");
9312 leaf = path->nodes[0];
9313 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9314 if (key.objectid != dback->owner ||
9315 key.type != BTRFS_EXTENT_DATA_KEY) {
9316 fprintf(stderr, "Couldn't find our ref, search\n");
9319 fi = btrfs_item_ptr(leaf, path->slots[0],
9320 struct btrfs_file_extent_item);
9321 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9322 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9324 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9329 btrfs_release_path(path);
9331 trans = btrfs_start_transaction(root, 1);
9333 return PTR_ERR(trans);
9336 * Ok we have the key of the file extent we want to fix, now we can cow
9337 * down to the thing and fix it.
9339 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9341 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9342 key.objectid, key.type, key.offset, ret);
9346 fprintf(stderr, "Well that's odd, we just found this key "
9347 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9352 leaf = path->nodes[0];
9353 fi = btrfs_item_ptr(leaf, path->slots[0],
9354 struct btrfs_file_extent_item);
9356 if (btrfs_file_extent_compression(leaf, fi) &&
9357 dback->disk_bytenr != entry->bytenr) {
9358 fprintf(stderr, "Ref doesn't match the record start and is "
9359 "compressed, please take a btrfs-image of this file "
9360 "system and send it to a btrfs developer so they can "
9361 "complete this functionality for bytenr %Lu\n",
9362 dback->disk_bytenr);
9367 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9368 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9369 } else if (dback->disk_bytenr > entry->bytenr) {
9370 u64 off_diff, offset;
9372 off_diff = dback->disk_bytenr - entry->bytenr;
9373 offset = btrfs_file_extent_offset(leaf, fi);
9374 if (dback->disk_bytenr + offset +
9375 btrfs_file_extent_num_bytes(leaf, fi) >
9376 entry->bytenr + entry->bytes) {
9377 fprintf(stderr, "Ref is past the entry end, please "
9378 "take a btrfs-image of this file system and "
9379 "send it to a btrfs developer, ref %Lu\n",
9380 dback->disk_bytenr);
9385 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9386 btrfs_set_file_extent_offset(leaf, fi, offset);
9387 } else if (dback->disk_bytenr < entry->bytenr) {
9390 offset = btrfs_file_extent_offset(leaf, fi);
9391 if (dback->disk_bytenr + offset < entry->bytenr) {
9392 fprintf(stderr, "Ref is before the entry start, please"
9393 " take a btrfs-image of this file system and "
9394 "send it to a btrfs developer, ref %Lu\n",
9395 dback->disk_bytenr);
9400 offset += dback->disk_bytenr;
9401 offset -= entry->bytenr;
9402 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9403 btrfs_set_file_extent_offset(leaf, fi, offset);
9406 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9409 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9410 * only do this if we aren't using compression, otherwise it's a
9413 if (!btrfs_file_extent_compression(leaf, fi))
9414 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9416 printf("ram bytes may be wrong?\n");
9417 btrfs_mark_buffer_dirty(leaf);
9419 err = btrfs_commit_transaction(trans, root);
9420 btrfs_release_path(path);
9421 return ret ? ret : err;
9424 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9425 struct extent_record *rec)
9427 struct extent_backref *back, *tmp;
9428 struct data_backref *dback;
9429 struct extent_entry *entry, *best = NULL;
9432 int broken_entries = 0;
9437 * Metadata is easy and the backrefs should always agree on bytenr and
9438 * size, if not we've got bigger issues.
9443 rbtree_postorder_for_each_entry_safe(back, tmp,
9444 &rec->backref_tree, node) {
9445 if (back->full_backref || !back->is_data)
9448 dback = to_data_backref(back);
9451 * We only pay attention to backrefs that we found a real
9454 if (dback->found_ref == 0)
9458 * For now we only catch when the bytes don't match, not the
9459 * bytenr. We can easily do this at the same time, but I want
9460 * to have a fs image to test on before we just add repair
9461 * functionality willy-nilly so we know we won't screw up the
9465 entry = find_entry(&entries, dback->disk_bytenr,
9468 entry = malloc(sizeof(struct extent_entry));
9473 memset(entry, 0, sizeof(*entry));
9474 entry->bytenr = dback->disk_bytenr;
9475 entry->bytes = dback->bytes;
9476 list_add_tail(&entry->list, &entries);
9481 * If we only have on entry we may think the entries agree when
9482 * in reality they don't so we have to do some extra checking.
9484 if (dback->disk_bytenr != rec->start ||
9485 dback->bytes != rec->nr || back->broken)
9496 /* Yay all the backrefs agree, carry on good sir */
9497 if (nr_entries <= 1 && !mismatch)
9500 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9501 "%Lu\n", rec->start);
9504 * First we want to see if the backrefs can agree amongst themselves who
9505 * is right, so figure out which one of the entries has the highest
9508 best = find_most_right_entry(&entries);
9511 * Ok so we may have an even split between what the backrefs think, so
9512 * this is where we use the extent ref to see what it thinks.
9515 entry = find_entry(&entries, rec->start, rec->nr);
9516 if (!entry && (!broken_entries || !rec->found_rec)) {
9517 fprintf(stderr, "Backrefs don't agree with each other "
9518 "and extent record doesn't agree with anybody,"
9519 " so we can't fix bytenr %Lu bytes %Lu\n",
9520 rec->start, rec->nr);
9523 } else if (!entry) {
9525 * Ok our backrefs were broken, we'll assume this is the
9526 * correct value and add an entry for this range.
9528 entry = malloc(sizeof(struct extent_entry));
9533 memset(entry, 0, sizeof(*entry));
9534 entry->bytenr = rec->start;
9535 entry->bytes = rec->nr;
9536 list_add_tail(&entry->list, &entries);
9540 best = find_most_right_entry(&entries);
9542 fprintf(stderr, "Backrefs and extent record evenly "
9543 "split on who is right, this is going to "
9544 "require user input to fix bytenr %Lu bytes "
9545 "%Lu\n", rec->start, rec->nr);
9552 * I don't think this can happen currently as we'll abort() if we catch
9553 * this case higher up, but in case somebody removes that we still can't
9554 * deal with it properly here yet, so just bail out of that's the case.
9556 if (best->bytenr != rec->start) {
9557 fprintf(stderr, "Extent start and backref starts don't match, "
9558 "please use btrfs-image on this file system and send "
9559 "it to a btrfs developer so they can make fsck fix "
9560 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9561 rec->start, rec->nr);
9567 * Ok great we all agreed on an extent record, let's go find the real
9568 * references and fix up the ones that don't match.
9570 rbtree_postorder_for_each_entry_safe(back, tmp,
9571 &rec->backref_tree, node) {
9572 if (back->full_backref || !back->is_data)
9575 dback = to_data_backref(back);
9578 * Still ignoring backrefs that don't have a real ref attached
9581 if (dback->found_ref == 0)
9584 if (dback->bytes == best->bytes &&
9585 dback->disk_bytenr == best->bytenr)
9588 ret = repair_ref(info, path, dback, best);
9594 * Ok we messed with the actual refs, which means we need to drop our
9595 * entire cache and go back and rescan. I know this is a huge pain and
9596 * adds a lot of extra work, but it's the only way to be safe. Once all
9597 * the backrefs agree we may not need to do anything to the extent
9602 while (!list_empty(&entries)) {
9603 entry = list_entry(entries.next, struct extent_entry, list);
9604 list_del_init(&entry->list);
9610 static int process_duplicates(struct cache_tree *extent_cache,
9611 struct extent_record *rec)
9613 struct extent_record *good, *tmp;
9614 struct cache_extent *cache;
9618 * If we found a extent record for this extent then return, or if we
9619 * have more than one duplicate we are likely going to need to delete
9622 if (rec->found_rec || rec->num_duplicates > 1)
9625 /* Shouldn't happen but just in case */
9626 BUG_ON(!rec->num_duplicates);
9629 * So this happens if we end up with a backref that doesn't match the
9630 * actual extent entry. So either the backref is bad or the extent
9631 * entry is bad. Either way we want to have the extent_record actually
9632 * reflect what we found in the extent_tree, so we need to take the
9633 * duplicate out and use that as the extent_record since the only way we
9634 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9636 remove_cache_extent(extent_cache, &rec->cache);
9638 good = to_extent_record(rec->dups.next);
9639 list_del_init(&good->list);
9640 INIT_LIST_HEAD(&good->backrefs);
9641 INIT_LIST_HEAD(&good->dups);
9642 good->cache.start = good->start;
9643 good->cache.size = good->nr;
9644 good->content_checked = 0;
9645 good->owner_ref_checked = 0;
9646 good->num_duplicates = 0;
9647 good->refs = rec->refs;
9648 list_splice_init(&rec->backrefs, &good->backrefs);
9650 cache = lookup_cache_extent(extent_cache, good->start,
9654 tmp = container_of(cache, struct extent_record, cache);
9657 * If we find another overlapping extent and it's found_rec is
9658 * set then it's a duplicate and we need to try and delete
9661 if (tmp->found_rec || tmp->num_duplicates > 0) {
9662 if (list_empty(&good->list))
9663 list_add_tail(&good->list,
9664 &duplicate_extents);
9665 good->num_duplicates += tmp->num_duplicates + 1;
9666 list_splice_init(&tmp->dups, &good->dups);
9667 list_del_init(&tmp->list);
9668 list_add_tail(&tmp->list, &good->dups);
9669 remove_cache_extent(extent_cache, &tmp->cache);
9674 * Ok we have another non extent item backed extent rec, so lets
9675 * just add it to this extent and carry on like we did above.
9677 good->refs += tmp->refs;
9678 list_splice_init(&tmp->backrefs, &good->backrefs);
9679 remove_cache_extent(extent_cache, &tmp->cache);
9682 ret = insert_cache_extent(extent_cache, &good->cache);
9685 return good->num_duplicates ? 0 : 1;
9688 static int delete_duplicate_records(struct btrfs_root *root,
9689 struct extent_record *rec)
9691 struct btrfs_trans_handle *trans;
9692 LIST_HEAD(delete_list);
9693 struct btrfs_path path;
9694 struct extent_record *tmp, *good, *n;
9697 struct btrfs_key key;
9699 btrfs_init_path(&path);
9702 /* Find the record that covers all of the duplicates. */
9703 list_for_each_entry(tmp, &rec->dups, list) {
9704 if (good->start < tmp->start)
9706 if (good->nr > tmp->nr)
9709 if (tmp->start + tmp->nr < good->start + good->nr) {
9710 fprintf(stderr, "Ok we have overlapping extents that "
9711 "aren't completely covered by each other, this "
9712 "is going to require more careful thought. "
9713 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9714 tmp->start, tmp->nr, good->start, good->nr);
9721 list_add_tail(&rec->list, &delete_list);
9723 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9726 list_move_tail(&tmp->list, &delete_list);
9729 root = root->fs_info->extent_root;
9730 trans = btrfs_start_transaction(root, 1);
9731 if (IS_ERR(trans)) {
9732 ret = PTR_ERR(trans);
9736 list_for_each_entry(tmp, &delete_list, list) {
9737 if (tmp->found_rec == 0)
9739 key.objectid = tmp->start;
9740 key.type = BTRFS_EXTENT_ITEM_KEY;
9741 key.offset = tmp->nr;
9743 /* Shouldn't happen but just in case */
9744 if (tmp->metadata) {
9745 fprintf(stderr, "Well this shouldn't happen, extent "
9746 "record overlaps but is metadata? "
9747 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9751 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9757 ret = btrfs_del_item(trans, root, &path);
9760 btrfs_release_path(&path);
9763 err = btrfs_commit_transaction(trans, root);
9767 while (!list_empty(&delete_list)) {
9768 tmp = to_extent_record(delete_list.next);
9769 list_del_init(&tmp->list);
9775 while (!list_empty(&rec->dups)) {
9776 tmp = to_extent_record(rec->dups.next);
9777 list_del_init(&tmp->list);
9781 btrfs_release_path(&path);
9783 if (!ret && !nr_del)
9784 rec->num_duplicates = 0;
9786 return ret ? ret : nr_del;
9789 static int find_possible_backrefs(struct btrfs_fs_info *info,
9790 struct btrfs_path *path,
9791 struct cache_tree *extent_cache,
9792 struct extent_record *rec)
9794 struct btrfs_root *root;
9795 struct extent_backref *back, *tmp;
9796 struct data_backref *dback;
9797 struct cache_extent *cache;
9798 struct btrfs_file_extent_item *fi;
9799 struct btrfs_key key;
9803 rbtree_postorder_for_each_entry_safe(back, tmp,
9804 &rec->backref_tree, node) {
9805 /* Don't care about full backrefs (poor unloved backrefs) */
9806 if (back->full_backref || !back->is_data)
9809 dback = to_data_backref(back);
9811 /* We found this one, we don't need to do a lookup */
9812 if (dback->found_ref)
9815 key.objectid = dback->root;
9816 key.type = BTRFS_ROOT_ITEM_KEY;
9817 key.offset = (u64)-1;
9819 root = btrfs_read_fs_root(info, &key);
9821 /* No root, definitely a bad ref, skip */
9822 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9824 /* Other err, exit */
9826 return PTR_ERR(root);
9828 key.objectid = dback->owner;
9829 key.type = BTRFS_EXTENT_DATA_KEY;
9830 key.offset = dback->offset;
9831 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9833 btrfs_release_path(path);
9836 /* Didn't find it, we can carry on */
9841 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9842 struct btrfs_file_extent_item);
9843 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9844 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9845 btrfs_release_path(path);
9846 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9848 struct extent_record *tmp;
9849 tmp = container_of(cache, struct extent_record, cache);
9852 * If we found an extent record for the bytenr for this
9853 * particular backref then we can't add it to our
9854 * current extent record. We only want to add backrefs
9855 * that don't have a corresponding extent item in the
9856 * extent tree since they likely belong to this record
9857 * and we need to fix it if it doesn't match bytenrs.
9863 dback->found_ref += 1;
9864 dback->disk_bytenr = bytenr;
9865 dback->bytes = bytes;
9868 * Set this so the verify backref code knows not to trust the
9869 * values in this backref.
9878 * Record orphan data ref into corresponding root.
9880 * Return 0 if the extent item contains data ref and recorded.
9881 * Return 1 if the extent item contains no useful data ref
9882 * On that case, it may contains only shared_dataref or metadata backref
9883 * or the file extent exists(this should be handled by the extent bytenr
9885 * Return <0 if something goes wrong.
9887 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9888 struct extent_record *rec)
9890 struct btrfs_key key;
9891 struct btrfs_root *dest_root;
9892 struct extent_backref *back, *tmp;
9893 struct data_backref *dback;
9894 struct orphan_data_extent *orphan;
9895 struct btrfs_path path;
9896 int recorded_data_ref = 0;
9901 btrfs_init_path(&path);
9902 rbtree_postorder_for_each_entry_safe(back, tmp,
9903 &rec->backref_tree, node) {
9904 if (back->full_backref || !back->is_data ||
9905 !back->found_extent_tree)
9907 dback = to_data_backref(back);
9908 if (dback->found_ref)
9910 key.objectid = dback->root;
9911 key.type = BTRFS_ROOT_ITEM_KEY;
9912 key.offset = (u64)-1;
9914 dest_root = btrfs_read_fs_root(fs_info, &key);
9916 /* For non-exist root we just skip it */
9917 if (IS_ERR(dest_root) || !dest_root)
9920 key.objectid = dback->owner;
9921 key.type = BTRFS_EXTENT_DATA_KEY;
9922 key.offset = dback->offset;
9924 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9925 btrfs_release_path(&path);
9927 * For ret < 0, it's OK since the fs-tree may be corrupted,
9928 * we need to record it for inode/file extent rebuild.
9929 * For ret > 0, we record it only for file extent rebuild.
9930 * For ret == 0, the file extent exists but only bytenr
9931 * mismatch, let the original bytenr fix routine to handle,
9937 orphan = malloc(sizeof(*orphan));
9942 INIT_LIST_HEAD(&orphan->list);
9943 orphan->root = dback->root;
9944 orphan->objectid = dback->owner;
9945 orphan->offset = dback->offset;
9946 orphan->disk_bytenr = rec->cache.start;
9947 orphan->disk_len = rec->cache.size;
9948 list_add(&dest_root->orphan_data_extents, &orphan->list);
9949 recorded_data_ref = 1;
9952 btrfs_release_path(&path);
9954 return !recorded_data_ref;
9960 * when an incorrect extent item is found, this will delete
9961 * all of the existing entries for it and recreate them
9962 * based on what the tree scan found.
9964 static int fixup_extent_refs(struct btrfs_fs_info *info,
9965 struct cache_tree *extent_cache,
9966 struct extent_record *rec)
9968 struct btrfs_trans_handle *trans = NULL;
9970 struct btrfs_path path;
9971 struct cache_extent *cache;
9972 struct extent_backref *back, *tmp;
9976 if (rec->flag_block_full_backref)
9977 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9979 btrfs_init_path(&path);
9980 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9982 * Sometimes the backrefs themselves are so broken they don't
9983 * get attached to any meaningful rec, so first go back and
9984 * check any of our backrefs that we couldn't find and throw
9985 * them into the list if we find the backref so that
9986 * verify_backrefs can figure out what to do.
9988 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9993 /* step one, make sure all of the backrefs agree */
9994 ret = verify_backrefs(info, &path, rec);
9998 trans = btrfs_start_transaction(info->extent_root, 1);
9999 if (IS_ERR(trans)) {
10000 ret = PTR_ERR(trans);
10004 /* step two, delete all the existing records */
10005 ret = delete_extent_records(trans, info->extent_root, &path,
10011 /* was this block corrupt? If so, don't add references to it */
10012 cache = lookup_cache_extent(info->corrupt_blocks,
10013 rec->start, rec->max_size);
10019 /* step three, recreate all the refs we did find */
10020 rbtree_postorder_for_each_entry_safe(back, tmp,
10021 &rec->backref_tree, node) {
10023 * if we didn't find any references, don't create a
10024 * new extent record
10026 if (!back->found_ref)
10029 rec->bad_full_backref = 0;
10030 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10038 int err = btrfs_commit_transaction(trans, info->extent_root);
10044 fprintf(stderr, "Repaired extent references for %llu\n",
10045 (unsigned long long)rec->start);
10047 btrfs_release_path(&path);
10051 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10052 struct extent_record *rec)
10054 struct btrfs_trans_handle *trans;
10055 struct btrfs_root *root = fs_info->extent_root;
10056 struct btrfs_path path;
10057 struct btrfs_extent_item *ei;
10058 struct btrfs_key key;
10062 key.objectid = rec->start;
10063 if (rec->metadata) {
10064 key.type = BTRFS_METADATA_ITEM_KEY;
10065 key.offset = rec->info_level;
10067 key.type = BTRFS_EXTENT_ITEM_KEY;
10068 key.offset = rec->max_size;
10071 trans = btrfs_start_transaction(root, 0);
10073 return PTR_ERR(trans);
10075 btrfs_init_path(&path);
10076 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10078 btrfs_release_path(&path);
10079 btrfs_commit_transaction(trans, root);
10082 fprintf(stderr, "Didn't find extent for %llu\n",
10083 (unsigned long long)rec->start);
10084 btrfs_release_path(&path);
10085 btrfs_commit_transaction(trans, root);
10089 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10090 struct btrfs_extent_item);
10091 flags = btrfs_extent_flags(path.nodes[0], ei);
10092 if (rec->flag_block_full_backref) {
10093 fprintf(stderr, "setting full backref on %llu\n",
10094 (unsigned long long)key.objectid);
10095 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10097 fprintf(stderr, "clearing full backref on %llu\n",
10098 (unsigned long long)key.objectid);
10099 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10101 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10102 btrfs_mark_buffer_dirty(path.nodes[0]);
10103 btrfs_release_path(&path);
10104 ret = btrfs_commit_transaction(trans, root);
10106 fprintf(stderr, "Repaired extent flags for %llu\n",
10107 (unsigned long long)rec->start);
10112 /* right now we only prune from the extent allocation tree */
10113 static int prune_one_block(struct btrfs_trans_handle *trans,
10114 struct btrfs_fs_info *info,
10115 struct btrfs_corrupt_block *corrupt)
10118 struct btrfs_path path;
10119 struct extent_buffer *eb;
10123 int level = corrupt->level + 1;
10125 btrfs_init_path(&path);
10127 /* we want to stop at the parent to our busted block */
10128 path.lowest_level = level;
10130 ret = btrfs_search_slot(trans, info->extent_root,
10131 &corrupt->key, &path, -1, 1);
10136 eb = path.nodes[level];
10143 * hopefully the search gave us the block we want to prune,
10144 * lets try that first
10146 slot = path.slots[level];
10147 found = btrfs_node_blockptr(eb, slot);
10148 if (found == corrupt->cache.start)
10151 nritems = btrfs_header_nritems(eb);
10153 /* the search failed, lets scan this node and hope we find it */
10154 for (slot = 0; slot < nritems; slot++) {
10155 found = btrfs_node_blockptr(eb, slot);
10156 if (found == corrupt->cache.start)
10160 * we couldn't find the bad block. TODO, search all the nodes for pointers
10163 if (eb == info->extent_root->node) {
10168 btrfs_release_path(&path);
10173 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10174 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10177 btrfs_release_path(&path);
10181 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10183 struct btrfs_trans_handle *trans = NULL;
10184 struct cache_extent *cache;
10185 struct btrfs_corrupt_block *corrupt;
10188 cache = search_cache_extent(info->corrupt_blocks, 0);
10192 trans = btrfs_start_transaction(info->extent_root, 1);
10194 return PTR_ERR(trans);
10196 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10197 prune_one_block(trans, info, corrupt);
10198 remove_cache_extent(info->corrupt_blocks, cache);
10201 return btrfs_commit_transaction(trans, info->extent_root);
10205 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10207 struct btrfs_block_group_cache *cache;
10212 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10213 &start, &end, EXTENT_DIRTY);
10216 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10221 cache = btrfs_lookup_first_block_group(fs_info, start);
10226 start = cache->key.objectid + cache->key.offset;
10230 static int check_extent_refs(struct btrfs_root *root,
10231 struct cache_tree *extent_cache)
10233 struct extent_record *rec;
10234 struct cache_extent *cache;
10241 * if we're doing a repair, we have to make sure
10242 * we don't allocate from the problem extents.
10243 * In the worst case, this will be all the
10244 * extents in the FS
10246 cache = search_cache_extent(extent_cache, 0);
10248 rec = container_of(cache, struct extent_record, cache);
10249 set_extent_dirty(root->fs_info->excluded_extents,
10251 rec->start + rec->max_size - 1);
10252 cache = next_cache_extent(cache);
10255 /* pin down all the corrupted blocks too */
10256 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10258 set_extent_dirty(root->fs_info->excluded_extents,
10260 cache->start + cache->size - 1);
10261 cache = next_cache_extent(cache);
10263 prune_corrupt_blocks(root->fs_info);
10264 reset_cached_block_groups(root->fs_info);
10267 reset_cached_block_groups(root->fs_info);
10270 * We need to delete any duplicate entries we find first otherwise we
10271 * could mess up the extent tree when we have backrefs that actually
10272 * belong to a different extent item and not the weird duplicate one.
10274 while (repair && !list_empty(&duplicate_extents)) {
10275 rec = to_extent_record(duplicate_extents.next);
10276 list_del_init(&rec->list);
10278 /* Sometimes we can find a backref before we find an actual
10279 * extent, so we need to process it a little bit to see if there
10280 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10281 * if this is a backref screwup. If we need to delete stuff
10282 * process_duplicates() will return 0, otherwise it will return
10285 if (process_duplicates(extent_cache, rec))
10287 ret = delete_duplicate_records(root, rec);
10291 * delete_duplicate_records will return the number of entries
10292 * deleted, so if it's greater than 0 then we know we actually
10293 * did something and we need to remove.
10306 cache = search_cache_extent(extent_cache, 0);
10309 rec = container_of(cache, struct extent_record, cache);
10310 if (rec->num_duplicates) {
10311 fprintf(stderr, "extent item %llu has multiple extent "
10312 "items\n", (unsigned long long)rec->start);
10316 if (rec->refs != rec->extent_item_refs) {
10317 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10318 (unsigned long long)rec->start,
10319 (unsigned long long)rec->nr);
10320 fprintf(stderr, "extent item %llu, found %llu\n",
10321 (unsigned long long)rec->extent_item_refs,
10322 (unsigned long long)rec->refs);
10323 ret = record_orphan_data_extents(root->fs_info, rec);
10329 if (all_backpointers_checked(rec, 1)) {
10330 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10331 (unsigned long long)rec->start,
10332 (unsigned long long)rec->nr);
10336 if (!rec->owner_ref_checked) {
10337 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10338 (unsigned long long)rec->start,
10339 (unsigned long long)rec->nr);
10344 if (repair && fix) {
10345 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10351 if (rec->bad_full_backref) {
10352 fprintf(stderr, "bad full backref, on [%llu]\n",
10353 (unsigned long long)rec->start);
10355 ret = fixup_extent_flags(root->fs_info, rec);
10363 * Although it's not a extent ref's problem, we reuse this
10364 * routine for error reporting.
10365 * No repair function yet.
10367 if (rec->crossing_stripes) {
10369 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10370 rec->start, rec->start + rec->max_size);
10374 if (rec->wrong_chunk_type) {
10376 "bad extent [%llu, %llu), type mismatch with chunk\n",
10377 rec->start, rec->start + rec->max_size);
10382 remove_cache_extent(extent_cache, cache);
10383 free_all_extent_backrefs(rec);
10384 if (!init_extent_tree && repair && (!cur_err || fix))
10385 clear_extent_dirty(root->fs_info->excluded_extents,
10387 rec->start + rec->max_size - 1);
10392 if (ret && ret != -EAGAIN) {
10393 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10396 struct btrfs_trans_handle *trans;
10398 root = root->fs_info->extent_root;
10399 trans = btrfs_start_transaction(root, 1);
10400 if (IS_ERR(trans)) {
10401 ret = PTR_ERR(trans);
10405 ret = btrfs_fix_block_accounting(trans, root);
10408 ret = btrfs_commit_transaction(trans, root);
10420 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10424 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10425 stripe_size = length;
10426 stripe_size /= num_stripes;
10427 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10428 stripe_size = length * 2;
10429 stripe_size /= num_stripes;
10430 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10431 stripe_size = length;
10432 stripe_size /= (num_stripes - 1);
10433 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10434 stripe_size = length;
10435 stripe_size /= (num_stripes - 2);
10437 stripe_size = length;
10439 return stripe_size;
10443 * Check the chunk with its block group/dev list ref:
10444 * Return 0 if all refs seems valid.
10445 * Return 1 if part of refs seems valid, need later check for rebuild ref
10446 * like missing block group and needs to search extent tree to rebuild them.
10447 * Return -1 if essential refs are missing and unable to rebuild.
10449 static int check_chunk_refs(struct chunk_record *chunk_rec,
10450 struct block_group_tree *block_group_cache,
10451 struct device_extent_tree *dev_extent_cache,
10454 struct cache_extent *block_group_item;
10455 struct block_group_record *block_group_rec;
10456 struct cache_extent *dev_extent_item;
10457 struct device_extent_record *dev_extent_rec;
10461 int metadump_v2 = 0;
10465 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10467 chunk_rec->length);
10468 if (block_group_item) {
10469 block_group_rec = container_of(block_group_item,
10470 struct block_group_record,
10472 if (chunk_rec->length != block_group_rec->offset ||
10473 chunk_rec->offset != block_group_rec->objectid ||
10475 chunk_rec->type_flags != block_group_rec->flags)) {
10478 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10479 chunk_rec->objectid,
10484 chunk_rec->type_flags,
10485 block_group_rec->objectid,
10486 block_group_rec->type,
10487 block_group_rec->offset,
10488 block_group_rec->offset,
10489 block_group_rec->objectid,
10490 block_group_rec->flags);
10493 list_del_init(&block_group_rec->list);
10494 chunk_rec->bg_rec = block_group_rec;
10499 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10500 chunk_rec->objectid,
10505 chunk_rec->type_flags);
10512 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10513 chunk_rec->num_stripes);
10514 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10515 devid = chunk_rec->stripes[i].devid;
10516 offset = chunk_rec->stripes[i].offset;
10517 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10518 devid, offset, length);
10519 if (dev_extent_item) {
10520 dev_extent_rec = container_of(dev_extent_item,
10521 struct device_extent_record,
10523 if (dev_extent_rec->objectid != devid ||
10524 dev_extent_rec->offset != offset ||
10525 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10526 dev_extent_rec->length != length) {
10529 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10530 chunk_rec->objectid,
10533 chunk_rec->stripes[i].devid,
10534 chunk_rec->stripes[i].offset,
10535 dev_extent_rec->objectid,
10536 dev_extent_rec->offset,
10537 dev_extent_rec->length);
10540 list_move(&dev_extent_rec->chunk_list,
10541 &chunk_rec->dextents);
10546 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10547 chunk_rec->objectid,
10550 chunk_rec->stripes[i].devid,
10551 chunk_rec->stripes[i].offset);
10558 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10559 int check_chunks(struct cache_tree *chunk_cache,
10560 struct block_group_tree *block_group_cache,
10561 struct device_extent_tree *dev_extent_cache,
10562 struct list_head *good, struct list_head *bad,
10563 struct list_head *rebuild, int silent)
10565 struct cache_extent *chunk_item;
10566 struct chunk_record *chunk_rec;
10567 struct block_group_record *bg_rec;
10568 struct device_extent_record *dext_rec;
10572 chunk_item = first_cache_extent(chunk_cache);
10573 while (chunk_item) {
10574 chunk_rec = container_of(chunk_item, struct chunk_record,
10576 err = check_chunk_refs(chunk_rec, block_group_cache,
10577 dev_extent_cache, silent);
10580 if (err == 0 && good)
10581 list_add_tail(&chunk_rec->list, good);
10582 if (err > 0 && rebuild)
10583 list_add_tail(&chunk_rec->list, rebuild);
10584 if (err < 0 && bad)
10585 list_add_tail(&chunk_rec->list, bad);
10586 chunk_item = next_cache_extent(chunk_item);
10589 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10592 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10600 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10604 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10605 dext_rec->objectid,
10615 static int check_device_used(struct device_record *dev_rec,
10616 struct device_extent_tree *dext_cache)
10618 struct cache_extent *cache;
10619 struct device_extent_record *dev_extent_rec;
10620 u64 total_byte = 0;
10622 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10624 dev_extent_rec = container_of(cache,
10625 struct device_extent_record,
10627 if (dev_extent_rec->objectid != dev_rec->devid)
10630 list_del_init(&dev_extent_rec->device_list);
10631 total_byte += dev_extent_rec->length;
10632 cache = next_cache_extent(cache);
10635 if (total_byte != dev_rec->byte_used) {
10637 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10638 total_byte, dev_rec->byte_used, dev_rec->objectid,
10639 dev_rec->type, dev_rec->offset);
10647 * Unlike device size alignment check above, some super total_bytes check
10648 * failure can lead to mount failure for newer kernel.
10650 * So this function will return the error for a fatal super total_bytes problem.
10652 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10654 struct btrfs_device *dev;
10655 struct list_head *dev_list = &fs_info->fs_devices->devices;
10656 u64 total_bytes = 0;
10657 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10659 list_for_each_entry(dev, dev_list, dev_list)
10660 total_bytes += dev->total_bytes;
10662 /* Important check, which can cause unmountable fs */
10663 if (super_bytes < total_bytes) {
10664 error("super total bytes %llu smaller than real device(s) size %llu",
10665 super_bytes, total_bytes);
10666 error("mounting this fs may fail for newer kernels");
10667 error("this can be fixed by 'btrfs rescue fix-device-size'");
10672 * Optional check, just to make everything aligned and match with each
10675 * For a btrfs-image restored fs, we don't need to check it anyway.
10677 if (btrfs_super_flags(fs_info->super_copy) &
10678 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10680 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10681 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10682 super_bytes != total_bytes) {
10683 warning("minor unaligned/mismatch device size detected");
10685 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10690 /* check btrfs_dev_item -> btrfs_dev_extent */
10691 static int check_devices(struct rb_root *dev_cache,
10692 struct device_extent_tree *dev_extent_cache)
10694 struct rb_node *dev_node;
10695 struct device_record *dev_rec;
10696 struct device_extent_record *dext_rec;
10700 dev_node = rb_first(dev_cache);
10702 dev_rec = container_of(dev_node, struct device_record, node);
10703 err = check_device_used(dev_rec, dev_extent_cache);
10707 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10708 global_info->sectorsize);
10709 dev_node = rb_next(dev_node);
10711 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10714 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10715 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10722 static int add_root_item_to_list(struct list_head *head,
10723 u64 objectid, u64 bytenr, u64 last_snapshot,
10724 u8 level, u8 drop_level,
10725 struct btrfs_key *drop_key)
10728 struct root_item_record *ri_rec;
10729 ri_rec = malloc(sizeof(*ri_rec));
10732 ri_rec->bytenr = bytenr;
10733 ri_rec->objectid = objectid;
10734 ri_rec->level = level;
10735 ri_rec->drop_level = drop_level;
10736 ri_rec->last_snapshot = last_snapshot;
10738 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10739 list_add_tail(&ri_rec->list, head);
10744 static void free_root_item_list(struct list_head *list)
10746 struct root_item_record *ri_rec;
10748 while (!list_empty(list)) {
10749 ri_rec = list_first_entry(list, struct root_item_record,
10751 list_del_init(&ri_rec->list);
10756 static int deal_root_from_list(struct list_head *list,
10757 struct btrfs_root *root,
10758 struct block_info *bits,
10760 struct cache_tree *pending,
10761 struct cache_tree *seen,
10762 struct cache_tree *reada,
10763 struct cache_tree *nodes,
10764 struct cache_tree *extent_cache,
10765 struct cache_tree *chunk_cache,
10766 struct rb_root *dev_cache,
10767 struct block_group_tree *block_group_cache,
10768 struct device_extent_tree *dev_extent_cache)
10773 while (!list_empty(list)) {
10774 struct root_item_record *rec;
10775 struct extent_buffer *buf;
10776 rec = list_entry(list->next,
10777 struct root_item_record, list);
10779 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10780 if (!extent_buffer_uptodate(buf)) {
10781 free_extent_buffer(buf);
10785 ret = add_root_to_pending(buf, extent_cache, pending,
10786 seen, nodes, rec->objectid);
10790 * To rebuild extent tree, we need deal with snapshot
10791 * one by one, otherwise we deal with node firstly which
10792 * can maximize readahead.
10795 ret = run_next_block(root, bits, bits_nr, &last,
10796 pending, seen, reada, nodes,
10797 extent_cache, chunk_cache,
10798 dev_cache, block_group_cache,
10799 dev_extent_cache, rec);
10803 free_extent_buffer(buf);
10804 list_del(&rec->list);
10810 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10811 reada, nodes, extent_cache, chunk_cache,
10812 dev_cache, block_group_cache,
10813 dev_extent_cache, NULL);
10823 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10825 struct rb_root dev_cache;
10826 struct cache_tree chunk_cache;
10827 struct block_group_tree block_group_cache;
10828 struct device_extent_tree dev_extent_cache;
10829 struct cache_tree extent_cache;
10830 struct cache_tree seen;
10831 struct cache_tree pending;
10832 struct cache_tree reada;
10833 struct cache_tree nodes;
10834 struct extent_io_tree excluded_extents;
10835 struct cache_tree corrupt_blocks;
10836 struct btrfs_path path;
10837 struct btrfs_key key;
10838 struct btrfs_key found_key;
10840 struct block_info *bits;
10842 struct extent_buffer *leaf;
10844 struct btrfs_root_item ri;
10845 struct list_head dropping_trees;
10846 struct list_head normal_trees;
10847 struct btrfs_root *root1;
10848 struct btrfs_root *root;
10852 root = fs_info->fs_root;
10853 dev_cache = RB_ROOT;
10854 cache_tree_init(&chunk_cache);
10855 block_group_tree_init(&block_group_cache);
10856 device_extent_tree_init(&dev_extent_cache);
10858 cache_tree_init(&extent_cache);
10859 cache_tree_init(&seen);
10860 cache_tree_init(&pending);
10861 cache_tree_init(&nodes);
10862 cache_tree_init(&reada);
10863 cache_tree_init(&corrupt_blocks);
10864 extent_io_tree_init(&excluded_extents);
10865 INIT_LIST_HEAD(&dropping_trees);
10866 INIT_LIST_HEAD(&normal_trees);
10869 fs_info->excluded_extents = &excluded_extents;
10870 fs_info->fsck_extent_cache = &extent_cache;
10871 fs_info->free_extent_hook = free_extent_hook;
10872 fs_info->corrupt_blocks = &corrupt_blocks;
10876 bits = malloc(bits_nr * sizeof(struct block_info));
10882 if (ctx.progress_enabled) {
10883 ctx.tp = TASK_EXTENTS;
10884 task_start(ctx.info);
10888 root1 = fs_info->tree_root;
10889 level = btrfs_header_level(root1->node);
10890 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10891 root1->node->start, 0, level, 0, NULL);
10894 root1 = fs_info->chunk_root;
10895 level = btrfs_header_level(root1->node);
10896 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10897 root1->node->start, 0, level, 0, NULL);
10900 btrfs_init_path(&path);
10903 key.type = BTRFS_ROOT_ITEM_KEY;
10904 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10908 leaf = path.nodes[0];
10909 slot = path.slots[0];
10910 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10911 ret = btrfs_next_leaf(root, &path);
10914 leaf = path.nodes[0];
10915 slot = path.slots[0];
10917 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10918 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10919 unsigned long offset;
10922 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10923 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10924 last_snapshot = btrfs_root_last_snapshot(&ri);
10925 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10926 level = btrfs_root_level(&ri);
10927 ret = add_root_item_to_list(&normal_trees,
10928 found_key.objectid,
10929 btrfs_root_bytenr(&ri),
10930 last_snapshot, level,
10935 level = btrfs_root_level(&ri);
10936 objectid = found_key.objectid;
10937 btrfs_disk_key_to_cpu(&found_key,
10938 &ri.drop_progress);
10939 ret = add_root_item_to_list(&dropping_trees,
10941 btrfs_root_bytenr(&ri),
10942 last_snapshot, level,
10943 ri.drop_level, &found_key);
10950 btrfs_release_path(&path);
10953 * check_block can return -EAGAIN if it fixes something, please keep
10954 * this in mind when dealing with return values from these functions, if
10955 * we get -EAGAIN we want to fall through and restart the loop.
10957 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10958 &seen, &reada, &nodes, &extent_cache,
10959 &chunk_cache, &dev_cache, &block_group_cache,
10960 &dev_extent_cache);
10962 if (ret == -EAGAIN)
10966 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10967 &pending, &seen, &reada, &nodes,
10968 &extent_cache, &chunk_cache, &dev_cache,
10969 &block_group_cache, &dev_extent_cache);
10971 if (ret == -EAGAIN)
10976 ret = check_chunks(&chunk_cache, &block_group_cache,
10977 &dev_extent_cache, NULL, NULL, NULL, 0);
10979 if (ret == -EAGAIN)
10984 ret = check_extent_refs(root, &extent_cache);
10986 if (ret == -EAGAIN)
10991 ret = check_devices(&dev_cache, &dev_extent_cache);
10996 task_stop(ctx.info);
10998 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10999 extent_io_tree_cleanup(&excluded_extents);
11000 fs_info->fsck_extent_cache = NULL;
11001 fs_info->free_extent_hook = NULL;
11002 fs_info->corrupt_blocks = NULL;
11003 fs_info->excluded_extents = NULL;
11006 free_chunk_cache_tree(&chunk_cache);
11007 free_device_cache_tree(&dev_cache);
11008 free_block_group_tree(&block_group_cache);
11009 free_device_extent_tree(&dev_extent_cache);
11010 free_extent_cache_tree(&seen);
11011 free_extent_cache_tree(&pending);
11012 free_extent_cache_tree(&reada);
11013 free_extent_cache_tree(&nodes);
11014 free_root_item_list(&normal_trees);
11015 free_root_item_list(&dropping_trees);
11018 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11019 free_extent_cache_tree(&seen);
11020 free_extent_cache_tree(&pending);
11021 free_extent_cache_tree(&reada);
11022 free_extent_cache_tree(&nodes);
11023 free_chunk_cache_tree(&chunk_cache);
11024 free_block_group_tree(&block_group_cache);
11025 free_device_cache_tree(&dev_cache);
11026 free_device_extent_tree(&dev_extent_cache);
11027 free_extent_record_cache(&extent_cache);
11028 free_root_item_list(&normal_trees);
11029 free_root_item_list(&dropping_trees);
11030 extent_io_tree_cleanup(&excluded_extents);
11034 static int check_extent_inline_ref(struct extent_buffer *eb,
11035 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11038 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11041 case BTRFS_TREE_BLOCK_REF_KEY:
11042 case BTRFS_EXTENT_DATA_REF_KEY:
11043 case BTRFS_SHARED_BLOCK_REF_KEY:
11044 case BTRFS_SHARED_DATA_REF_KEY:
11048 error("extent[%llu %u %llu] has unknown ref type: %d",
11049 key->objectid, key->type, key->offset, type);
11050 ret = UNKNOWN_TYPE;
11058 * Check backrefs of a tree block given by @bytenr or @eb.
11060 * @root: the root containing the @bytenr or @eb
11061 * @eb: tree block extent buffer, can be NULL
11062 * @bytenr: bytenr of the tree block to search
11063 * @level: tree level of the tree block
11064 * @owner: owner of the tree block
11066 * Return >0 for any error found and output error message
11067 * Return 0 for no error found
11069 static int check_tree_block_ref(struct btrfs_root *root,
11070 struct extent_buffer *eb, u64 bytenr,
11071 int level, u64 owner, struct node_refs *nrefs)
11073 struct btrfs_key key;
11074 struct btrfs_root *extent_root = root->fs_info->extent_root;
11075 struct btrfs_path path;
11076 struct btrfs_extent_item *ei;
11077 struct btrfs_extent_inline_ref *iref;
11078 struct extent_buffer *leaf;
11083 int root_level = btrfs_header_level(root->node);
11085 u32 nodesize = root->fs_info->nodesize;
11094 btrfs_init_path(&path);
11095 key.objectid = bytenr;
11096 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11097 key.type = BTRFS_METADATA_ITEM_KEY;
11099 key.type = BTRFS_EXTENT_ITEM_KEY;
11100 key.offset = (u64)-1;
11102 /* Search for the backref in extent tree */
11103 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11105 err |= BACKREF_MISSING;
11108 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11110 err |= BACKREF_MISSING;
11114 leaf = path.nodes[0];
11115 slot = path.slots[0];
11116 btrfs_item_key_to_cpu(leaf, &key, slot);
11118 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11120 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11121 skinny_level = (int)key.offset;
11122 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11124 struct btrfs_tree_block_info *info;
11126 info = (struct btrfs_tree_block_info *)(ei + 1);
11127 skinny_level = btrfs_tree_block_level(leaf, info);
11128 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11137 * Due to the feature of shared tree blocks, if the upper node
11138 * is a fs root or shared node, the extent of checked node may
11139 * not be updated until the next CoW.
11142 strict = should_check_extent_strictly(root, nrefs,
11144 if (!(btrfs_extent_flags(leaf, ei) &
11145 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11147 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11148 key.objectid, nodesize,
11149 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11150 err = BACKREF_MISMATCH;
11152 header_gen = btrfs_header_generation(eb);
11153 extent_gen = btrfs_extent_generation(leaf, ei);
11154 if (header_gen != extent_gen) {
11156 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11157 key.objectid, nodesize, header_gen,
11159 err = BACKREF_MISMATCH;
11161 if (level != skinny_level) {
11163 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11164 key.objectid, nodesize, level, skinny_level);
11165 err = BACKREF_MISMATCH;
11167 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11169 "extent[%llu %u] is referred by other roots than %llu",
11170 key.objectid, nodesize, root->objectid);
11171 err = BACKREF_MISMATCH;
11176 * Iterate the extent/metadata item to find the exact backref
11178 item_size = btrfs_item_size_nr(leaf, slot);
11179 ptr = (unsigned long)iref;
11180 end = (unsigned long)ei + item_size;
11182 while (ptr < end) {
11183 iref = (struct btrfs_extent_inline_ref *)ptr;
11184 type = btrfs_extent_inline_ref_type(leaf, iref);
11185 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11187 ret = check_extent_inline_ref(leaf, &key, iref);
11192 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11193 if (offset == root->objectid)
11195 if (!strict && owner == offset)
11197 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11199 * Backref of tree reloc root points to itself, no need
11200 * to check backref any more.
11202 * This may be an error of loop backref, but extent tree
11203 * checker should have already handled it.
11204 * Here we only need to avoid infinite iteration.
11206 if (offset == bytenr) {
11210 * Check if the backref points to valid
11213 found_ref = !check_tree_block_ref( root, NULL,
11214 offset, level + 1, owner,
11221 ptr += btrfs_extent_inline_ref_size(type);
11225 * Inlined extent item doesn't have what we need, check
11226 * TREE_BLOCK_REF_KEY
11229 btrfs_release_path(&path);
11230 key.objectid = bytenr;
11231 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11232 key.offset = root->objectid;
11234 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11239 * Finally check SHARED BLOCK REF, any found will be good
11240 * Here we're not doing comprehensive extent backref checking,
11241 * only need to ensure there is some extent referring to this
11245 btrfs_release_path(&path);
11246 key.objectid = bytenr;
11247 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11248 key.offset = (u64)-1;
11250 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11252 err |= BACKREF_MISSING;
11255 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11257 err |= BACKREF_MISSING;
11263 err |= BACKREF_MISSING;
11265 btrfs_release_path(&path);
11266 if (nrefs && strict &&
11267 level < root_level && nrefs->full_backref[level + 1])
11268 parent = nrefs->bytenr[level + 1];
11269 if (eb && (err & BACKREF_MISSING))
11271 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11272 bytenr, nodesize, owner, level,
11273 parent ? "parent" : "root",
11274 parent ? parent : root->objectid);
11279 * If @err contains BACKREF_MISSING then add extent of the
11280 * file_extent_data_item.
11282 * Returns error bits after reapir.
11284 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11285 struct btrfs_root *root,
11286 struct btrfs_path *pathp,
11287 struct node_refs *nrefs,
11290 struct btrfs_file_extent_item *fi;
11291 struct btrfs_key fi_key;
11292 struct btrfs_key key;
11293 struct btrfs_extent_item *ei;
11294 struct btrfs_path path;
11295 struct btrfs_root *extent_root = root->fs_info->extent_root;
11296 struct extent_buffer *eb;
11308 eb = pathp->nodes[0];
11309 slot = pathp->slots[0];
11310 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11311 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11313 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11314 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11317 file_offset = fi_key.offset;
11318 generation = btrfs_file_extent_generation(eb, fi);
11319 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11320 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11321 extent_offset = btrfs_file_extent_offset(eb, fi);
11322 offset = file_offset - extent_offset;
11324 /* now repair only adds backref */
11325 if ((err & BACKREF_MISSING) == 0)
11328 /* search extent item */
11329 key.objectid = disk_bytenr;
11330 key.type = BTRFS_EXTENT_ITEM_KEY;
11331 key.offset = num_bytes;
11333 btrfs_init_path(&path);
11334 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11340 /* insert an extent item */
11342 key.objectid = disk_bytenr;
11343 key.type = BTRFS_EXTENT_ITEM_KEY;
11344 key.offset = num_bytes;
11345 size = sizeof(*ei);
11347 btrfs_release_path(&path);
11348 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11352 eb = path.nodes[0];
11353 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11355 btrfs_set_extent_refs(eb, ei, 0);
11356 btrfs_set_extent_generation(eb, ei, generation);
11357 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11359 btrfs_mark_buffer_dirty(eb);
11360 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11362 btrfs_release_path(&path);
11365 if (nrefs->full_backref[0])
11366 parent = btrfs_header_bytenr(eb);
11370 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11372 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11376 "failed to increase extent data backref[%llu %llu] root %llu",
11377 disk_bytenr, num_bytes, root->objectid);
11380 printf("Add one extent data backref [%llu %llu]\n",
11381 disk_bytenr, num_bytes);
11384 err &= ~BACKREF_MISSING;
11387 error("can't repair root %llu extent data item[%llu %llu]",
11388 root->objectid, disk_bytenr, num_bytes);
11393 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11395 * Return >0 any error found and output error message
11396 * Return 0 for no error found
11398 static int check_extent_data_item(struct btrfs_root *root,
11399 struct btrfs_path *pathp,
11400 struct node_refs *nrefs, int account_bytes)
11402 struct btrfs_file_extent_item *fi;
11403 struct extent_buffer *eb = pathp->nodes[0];
11404 struct btrfs_path path;
11405 struct btrfs_root *extent_root = root->fs_info->extent_root;
11406 struct btrfs_key fi_key;
11407 struct btrfs_key dbref_key;
11408 struct extent_buffer *leaf;
11409 struct btrfs_extent_item *ei;
11410 struct btrfs_extent_inline_ref *iref;
11411 struct btrfs_extent_data_ref *dref;
11414 u64 disk_num_bytes;
11415 u64 extent_num_bytes;
11422 int found_dbackref = 0;
11423 int slot = pathp->slots[0];
11428 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11429 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11431 /* Nothing to check for hole and inline data extents */
11432 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11433 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11436 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11437 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11438 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11439 offset = btrfs_file_extent_offset(eb, fi);
11441 /* Check unaligned disk_num_bytes and num_bytes */
11442 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11444 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11445 fi_key.objectid, fi_key.offset, disk_num_bytes,
11446 root->fs_info->sectorsize);
11447 err |= BYTES_UNALIGNED;
11448 } else if (account_bytes) {
11449 data_bytes_allocated += disk_num_bytes;
11451 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11453 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11454 fi_key.objectid, fi_key.offset, extent_num_bytes,
11455 root->fs_info->sectorsize);
11456 err |= BYTES_UNALIGNED;
11457 } else if (account_bytes) {
11458 data_bytes_referenced += extent_num_bytes;
11460 owner = btrfs_header_owner(eb);
11462 /* Check the extent item of the file extent in extent tree */
11463 btrfs_init_path(&path);
11464 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11465 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11466 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11468 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11472 leaf = path.nodes[0];
11473 slot = path.slots[0];
11474 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11476 extent_flags = btrfs_extent_flags(leaf, ei);
11478 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11480 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11481 disk_bytenr, disk_num_bytes,
11482 BTRFS_EXTENT_FLAG_DATA);
11483 err |= BACKREF_MISMATCH;
11486 /* Check data backref inside that extent item */
11487 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11488 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11489 ptr = (unsigned long)iref;
11490 end = (unsigned long)ei + item_size;
11491 strict = should_check_extent_strictly(root, nrefs, -1);
11493 while (ptr < end) {
11497 bool match = false;
11499 iref = (struct btrfs_extent_inline_ref *)ptr;
11500 type = btrfs_extent_inline_ref_type(leaf, iref);
11501 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11503 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11508 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11509 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11510 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11511 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11513 if (ref_objectid == fi_key.objectid &&
11514 ref_offset == fi_key.offset - offset)
11516 if (ref_root == root->objectid && match)
11517 found_dbackref = 1;
11518 else if (!strict && owner == ref_root && match)
11519 found_dbackref = 1;
11520 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11521 found_dbackref = !check_tree_block_ref(root, NULL,
11522 btrfs_extent_inline_ref_offset(leaf, iref),
11526 if (found_dbackref)
11528 ptr += btrfs_extent_inline_ref_size(type);
11531 if (!found_dbackref) {
11532 btrfs_release_path(&path);
11534 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11535 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11536 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11537 dbref_key.offset = hash_extent_data_ref(root->objectid,
11538 fi_key.objectid, fi_key.offset - offset);
11540 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11541 &dbref_key, &path, 0, 0);
11543 found_dbackref = 1;
11547 btrfs_release_path(&path);
11550 * Neither inlined nor EXTENT_DATA_REF found, try
11551 * SHARED_DATA_REF as last chance.
11553 dbref_key.objectid = disk_bytenr;
11554 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11555 dbref_key.offset = eb->start;
11557 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11558 &dbref_key, &path, 0, 0);
11560 found_dbackref = 1;
11566 if (!found_dbackref)
11567 err |= BACKREF_MISSING;
11568 btrfs_release_path(&path);
11569 if (err & BACKREF_MISSING) {
11570 error("data extent[%llu %llu] backref lost",
11571 disk_bytenr, disk_num_bytes);
11577 * Get real tree block level for the case like shared block
11578 * Return >= 0 as tree level
11579 * Return <0 for error
11581 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11583 struct extent_buffer *eb;
11584 struct btrfs_path path;
11585 struct btrfs_key key;
11586 struct btrfs_extent_item *ei;
11593 /* Search extent tree for extent generation and level */
11594 key.objectid = bytenr;
11595 key.type = BTRFS_METADATA_ITEM_KEY;
11596 key.offset = (u64)-1;
11598 btrfs_init_path(&path);
11599 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11602 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11610 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11611 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11612 struct btrfs_extent_item);
11613 flags = btrfs_extent_flags(path.nodes[0], ei);
11614 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11619 /* Get transid for later read_tree_block() check */
11620 transid = btrfs_extent_generation(path.nodes[0], ei);
11622 /* Get backref level as one source */
11623 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11624 backref_level = key.offset;
11626 struct btrfs_tree_block_info *info;
11628 info = (struct btrfs_tree_block_info *)(ei + 1);
11629 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11631 btrfs_release_path(&path);
11633 /* Get level from tree block as an alternative source */
11634 eb = read_tree_block(fs_info, bytenr, transid);
11635 if (!extent_buffer_uptodate(eb)) {
11636 free_extent_buffer(eb);
11639 header_level = btrfs_header_level(eb);
11640 free_extent_buffer(eb);
11642 if (header_level != backref_level)
11644 return header_level;
11647 btrfs_release_path(&path);
11652 * Check if a tree block backref is valid (points to a valid tree block)
11653 * if level == -1, level will be resolved
11654 * Return >0 for any error found and print error message
11656 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11657 u64 bytenr, int level)
11659 struct btrfs_root *root;
11660 struct btrfs_key key;
11661 struct btrfs_path path;
11662 struct extent_buffer *eb;
11663 struct extent_buffer *node;
11664 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11668 /* Query level for level == -1 special case */
11670 level = query_tree_block_level(fs_info, bytenr);
11672 err |= REFERENCER_MISSING;
11676 key.objectid = root_id;
11677 key.type = BTRFS_ROOT_ITEM_KEY;
11678 key.offset = (u64)-1;
11680 root = btrfs_read_fs_root(fs_info, &key);
11681 if (IS_ERR(root)) {
11682 err |= REFERENCER_MISSING;
11686 /* Read out the tree block to get item/node key */
11687 eb = read_tree_block(fs_info, bytenr, 0);
11688 if (!extent_buffer_uptodate(eb)) {
11689 err |= REFERENCER_MISSING;
11690 free_extent_buffer(eb);
11694 /* Empty tree, no need to check key */
11695 if (!btrfs_header_nritems(eb) && !level) {
11696 free_extent_buffer(eb);
11701 btrfs_node_key_to_cpu(eb, &key, 0);
11703 btrfs_item_key_to_cpu(eb, &key, 0);
11705 free_extent_buffer(eb);
11707 btrfs_init_path(&path);
11708 path.lowest_level = level;
11709 /* Search with the first key, to ensure we can reach it */
11710 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11712 err |= REFERENCER_MISSING;
11716 node = path.nodes[level];
11717 if (btrfs_header_bytenr(node) != bytenr) {
11719 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11720 bytenr, nodesize, bytenr,
11721 btrfs_header_bytenr(node));
11722 err |= REFERENCER_MISMATCH;
11724 if (btrfs_header_level(node) != level) {
11726 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11727 bytenr, nodesize, level,
11728 btrfs_header_level(node));
11729 err |= REFERENCER_MISMATCH;
11733 btrfs_release_path(&path);
11735 if (err & REFERENCER_MISSING) {
11737 error("extent [%llu %d] lost referencer (owner: %llu)",
11738 bytenr, nodesize, root_id);
11741 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11742 bytenr, nodesize, root_id, level);
11749 * Check if tree block @eb is tree reloc root.
11750 * Return 0 if it's not or any problem happens
11751 * Return 1 if it's a tree reloc root
11753 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11754 struct extent_buffer *eb)
11756 struct btrfs_root *tree_reloc_root;
11757 struct btrfs_key key;
11758 u64 bytenr = btrfs_header_bytenr(eb);
11759 u64 owner = btrfs_header_owner(eb);
11762 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11763 key.offset = owner;
11764 key.type = BTRFS_ROOT_ITEM_KEY;
11766 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11767 if (IS_ERR(tree_reloc_root))
11770 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11772 btrfs_free_fs_root(tree_reloc_root);
11777 * Check referencer for shared block backref
11778 * If level == -1, this function will resolve the level.
11780 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11781 u64 parent, u64 bytenr, int level)
11783 struct extent_buffer *eb;
11785 int found_parent = 0;
11788 eb = read_tree_block(fs_info, parent, 0);
11789 if (!extent_buffer_uptodate(eb))
11793 level = query_tree_block_level(fs_info, bytenr);
11797 /* It's possible it's a tree reloc root */
11798 if (parent == bytenr) {
11799 if (is_tree_reloc_root(fs_info, eb))
11804 if (level + 1 != btrfs_header_level(eb))
11807 nr = btrfs_header_nritems(eb);
11808 for (i = 0; i < nr; i++) {
11809 if (bytenr == btrfs_node_blockptr(eb, i)) {
11815 free_extent_buffer(eb);
11816 if (!found_parent) {
11818 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11819 bytenr, fs_info->nodesize, parent, level);
11820 return REFERENCER_MISSING;
11826 * Check referencer for normal (inlined) data ref
11827 * If len == 0, it will be resolved by searching in extent tree
11829 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11830 u64 root_id, u64 objectid, u64 offset,
11831 u64 bytenr, u64 len, u32 count)
11833 struct btrfs_root *root;
11834 struct btrfs_root *extent_root = fs_info->extent_root;
11835 struct btrfs_key key;
11836 struct btrfs_path path;
11837 struct extent_buffer *leaf;
11838 struct btrfs_file_extent_item *fi;
11839 u32 found_count = 0;
11844 key.objectid = bytenr;
11845 key.type = BTRFS_EXTENT_ITEM_KEY;
11846 key.offset = (u64)-1;
11848 btrfs_init_path(&path);
11849 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11852 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11855 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11856 if (key.objectid != bytenr ||
11857 key.type != BTRFS_EXTENT_ITEM_KEY)
11860 btrfs_release_path(&path);
11862 key.objectid = root_id;
11863 key.type = BTRFS_ROOT_ITEM_KEY;
11864 key.offset = (u64)-1;
11865 btrfs_init_path(&path);
11867 root = btrfs_read_fs_root(fs_info, &key);
11871 key.objectid = objectid;
11872 key.type = BTRFS_EXTENT_DATA_KEY;
11874 * It can be nasty as data backref offset is
11875 * file offset - file extent offset, which is smaller or
11876 * equal to original backref offset. The only special case is
11877 * overflow. So we need to special check and do further search.
11879 key.offset = offset & (1ULL << 63) ? 0 : offset;
11881 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11886 * Search afterwards to get correct one
11887 * NOTE: As we must do a comprehensive check on the data backref to
11888 * make sure the dref count also matches, we must iterate all file
11889 * extents for that inode.
11892 leaf = path.nodes[0];
11893 slot = path.slots[0];
11895 if (slot >= btrfs_header_nritems(leaf) ||
11896 btrfs_header_owner(leaf) != root_id)
11898 btrfs_item_key_to_cpu(leaf, &key, slot);
11899 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11901 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11903 * Except normal disk bytenr and disk num bytes, we still
11904 * need to do extra check on dbackref offset as
11905 * dbackref offset = file_offset - file_extent_offset
11907 * Also, we must check the leaf owner.
11908 * In case of shared tree blocks (snapshots) we can inherit
11909 * leaves from source snapshot.
11910 * In that case, reference from source snapshot should not
11913 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11914 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11915 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11916 offset && btrfs_header_owner(leaf) == root_id)
11920 ret = btrfs_next_item(root, &path);
11925 btrfs_release_path(&path);
11926 if (found_count != count) {
11928 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11929 bytenr, len, root_id, objectid, offset, count, found_count);
11930 return REFERENCER_MISSING;
11936 * Check if the referencer of a shared data backref exists
11938 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11939 u64 parent, u64 bytenr)
11941 struct extent_buffer *eb;
11942 struct btrfs_key key;
11943 struct btrfs_file_extent_item *fi;
11945 int found_parent = 0;
11948 eb = read_tree_block(fs_info, parent, 0);
11949 if (!extent_buffer_uptodate(eb))
11952 nr = btrfs_header_nritems(eb);
11953 for (i = 0; i < nr; i++) {
11954 btrfs_item_key_to_cpu(eb, &key, i);
11955 if (key.type != BTRFS_EXTENT_DATA_KEY)
11958 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11959 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11962 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11969 free_extent_buffer(eb);
11970 if (!found_parent) {
11971 error("shared extent %llu referencer lost (parent: %llu)",
11973 return REFERENCER_MISSING;
11979 * Only delete backref if REFERENCER_MISSING now
11981 * Returns <0 the extent was deleted
11982 * Returns >0 the backref was deleted but extent still exists, returned value
11983 * means error after repair
11984 * Returns 0 nothing happened
11986 static int repair_extent_item(struct btrfs_trans_handle *trans,
11987 struct btrfs_root *root, struct btrfs_path *path,
11988 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
11989 u64 owner, u64 offset, int err)
11991 struct btrfs_key old_key;
11995 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
11997 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
11998 /* delete the backref */
11999 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12000 num_bytes, parent, root_objectid, owner, offset);
12003 err &= ~REFERENCER_MISSING;
12004 printf("Delete backref in extent [%llu %llu]\n",
12005 bytenr, num_bytes);
12007 error("fail to delete backref in extent [%llu %llu]",
12008 bytenr, num_bytes);
12012 /* btrfs_free_extent may delete the extent */
12013 btrfs_release_path(path);
12014 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12024 * This function will check a given extent item, including its backref and
12025 * itself (like crossing stripe boundary and type)
12027 * Since we don't use extent_record anymore, introduce new error bit
12029 static int check_extent_item(struct btrfs_trans_handle *trans,
12030 struct btrfs_fs_info *fs_info,
12031 struct btrfs_path *path)
12033 struct btrfs_extent_item *ei;
12034 struct btrfs_extent_inline_ref *iref;
12035 struct btrfs_extent_data_ref *dref;
12036 struct extent_buffer *eb = path->nodes[0];
12039 int slot = path->slots[0];
12041 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12042 u32 item_size = btrfs_item_size_nr(eb, slot);
12052 struct btrfs_key key;
12056 btrfs_item_key_to_cpu(eb, &key, slot);
12057 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12058 bytes_used += key.offset;
12059 num_bytes = key.offset;
12061 bytes_used += nodesize;
12062 num_bytes = nodesize;
12065 if (item_size < sizeof(*ei)) {
12067 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12068 * old thing when on disk format is still un-determined.
12069 * No need to care about it anymore
12071 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12075 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12076 flags = btrfs_extent_flags(eb, ei);
12078 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12080 if (metadata && check_crossing_stripes(global_info, key.objectid,
12082 error("bad metadata [%llu, %llu) crossing stripe boundary",
12083 key.objectid, key.objectid + nodesize);
12084 err |= CROSSING_STRIPE_BOUNDARY;
12087 ptr = (unsigned long)(ei + 1);
12089 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12090 /* Old EXTENT_ITEM metadata */
12091 struct btrfs_tree_block_info *info;
12093 info = (struct btrfs_tree_block_info *)ptr;
12094 level = btrfs_tree_block_level(eb, info);
12095 ptr += sizeof(struct btrfs_tree_block_info);
12097 /* New METADATA_ITEM */
12098 level = key.offset;
12100 end = (unsigned long)ei + item_size;
12103 /* Reached extent item end normally */
12107 /* Beyond extent item end, wrong item size */
12109 err |= ITEM_SIZE_MISMATCH;
12110 error("extent item at bytenr %llu slot %d has wrong size",
12119 /* Now check every backref in this extent item */
12120 iref = (struct btrfs_extent_inline_ref *)ptr;
12121 type = btrfs_extent_inline_ref_type(eb, iref);
12122 offset = btrfs_extent_inline_ref_offset(eb, iref);
12124 case BTRFS_TREE_BLOCK_REF_KEY:
12125 root_objectid = offset;
12127 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12131 case BTRFS_SHARED_BLOCK_REF_KEY:
12133 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12137 case BTRFS_EXTENT_DATA_REF_KEY:
12138 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12139 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12140 owner = btrfs_extent_data_ref_objectid(eb, dref);
12141 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12142 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12143 owner_offset, key.objectid, key.offset,
12144 btrfs_extent_data_ref_count(eb, dref));
12147 case BTRFS_SHARED_DATA_REF_KEY:
12149 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12153 error("extent[%llu %d %llu] has unknown ref type: %d",
12154 key.objectid, key.type, key.offset, type);
12155 ret = UNKNOWN_TYPE;
12160 if (err && repair) {
12161 ret = repair_extent_item(trans, fs_info->extent_root, path,
12162 key.objectid, num_bytes, parent, root_objectid,
12163 owner, owner_offset, ret);
12172 ptr += btrfs_extent_inline_ref_size(type);
12180 * Check if a dev extent item is referred correctly by its chunk
12182 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12183 struct extent_buffer *eb, int slot)
12185 struct btrfs_root *chunk_root = fs_info->chunk_root;
12186 struct btrfs_dev_extent *ptr;
12187 struct btrfs_path path;
12188 struct btrfs_key chunk_key;
12189 struct btrfs_key devext_key;
12190 struct btrfs_chunk *chunk;
12191 struct extent_buffer *l;
12195 int found_chunk = 0;
12198 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12199 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12200 length = btrfs_dev_extent_length(eb, ptr);
12202 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12203 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12204 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12206 btrfs_init_path(&path);
12207 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12212 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12213 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12218 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12221 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12222 for (i = 0; i < num_stripes; i++) {
12223 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12224 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12226 if (devid == devext_key.objectid &&
12227 offset == devext_key.offset) {
12233 btrfs_release_path(&path);
12234 if (!found_chunk) {
12236 "device extent[%llu, %llu, %llu] did not find the related chunk",
12237 devext_key.objectid, devext_key.offset, length);
12238 return REFERENCER_MISSING;
12244 * Check if the used space is correct with the dev item
12246 static int check_dev_item(struct btrfs_fs_info *fs_info,
12247 struct extent_buffer *eb, int slot)
12249 struct btrfs_root *dev_root = fs_info->dev_root;
12250 struct btrfs_dev_item *dev_item;
12251 struct btrfs_path path;
12252 struct btrfs_key key;
12253 struct btrfs_dev_extent *ptr;
12260 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12261 dev_id = btrfs_device_id(eb, dev_item);
12262 used = btrfs_device_bytes_used(eb, dev_item);
12263 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12265 key.objectid = dev_id;
12266 key.type = BTRFS_DEV_EXTENT_KEY;
12269 btrfs_init_path(&path);
12270 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12272 btrfs_item_key_to_cpu(eb, &key, slot);
12273 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12274 key.objectid, key.type, key.offset);
12275 btrfs_release_path(&path);
12276 return REFERENCER_MISSING;
12279 /* Iterate dev_extents to calculate the used space of a device */
12281 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12284 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12285 if (key.objectid > dev_id)
12287 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12290 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12291 struct btrfs_dev_extent);
12292 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12294 ret = btrfs_next_item(dev_root, &path);
12298 btrfs_release_path(&path);
12300 if (used != total) {
12301 btrfs_item_key_to_cpu(eb, &key, slot);
12303 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12304 total, used, BTRFS_ROOT_TREE_OBJECTID,
12305 BTRFS_DEV_EXTENT_KEY, dev_id);
12306 return ACCOUNTING_MISMATCH;
12308 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12314 * Check a block group item with its referener (chunk) and its used space
12315 * with extent/metadata item
12317 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12318 struct extent_buffer *eb, int slot)
12320 struct btrfs_root *extent_root = fs_info->extent_root;
12321 struct btrfs_root *chunk_root = fs_info->chunk_root;
12322 struct btrfs_block_group_item *bi;
12323 struct btrfs_block_group_item bg_item;
12324 struct btrfs_path path;
12325 struct btrfs_key bg_key;
12326 struct btrfs_key chunk_key;
12327 struct btrfs_key extent_key;
12328 struct btrfs_chunk *chunk;
12329 struct extent_buffer *leaf;
12330 struct btrfs_extent_item *ei;
12331 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12339 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12340 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12341 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12342 used = btrfs_block_group_used(&bg_item);
12343 bg_flags = btrfs_block_group_flags(&bg_item);
12345 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12346 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12347 chunk_key.offset = bg_key.objectid;
12349 btrfs_init_path(&path);
12350 /* Search for the referencer chunk */
12351 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12354 "block group[%llu %llu] did not find the related chunk item",
12355 bg_key.objectid, bg_key.offset);
12356 err |= REFERENCER_MISSING;
12358 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12359 struct btrfs_chunk);
12360 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12363 "block group[%llu %llu] related chunk item length does not match",
12364 bg_key.objectid, bg_key.offset);
12365 err |= REFERENCER_MISMATCH;
12368 btrfs_release_path(&path);
12370 /* Search from the block group bytenr */
12371 extent_key.objectid = bg_key.objectid;
12372 extent_key.type = 0;
12373 extent_key.offset = 0;
12375 btrfs_init_path(&path);
12376 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12380 /* Iterate extent tree to account used space */
12382 leaf = path.nodes[0];
12384 /* Search slot can point to the last item beyond leaf nritems */
12385 if (path.slots[0] >= btrfs_header_nritems(leaf))
12388 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12389 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12392 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12393 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12395 if (extent_key.objectid < bg_key.objectid)
12398 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12401 total += extent_key.offset;
12403 ei = btrfs_item_ptr(leaf, path.slots[0],
12404 struct btrfs_extent_item);
12405 flags = btrfs_extent_flags(leaf, ei);
12406 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12407 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12409 "bad extent[%llu, %llu) type mismatch with chunk",
12410 extent_key.objectid,
12411 extent_key.objectid + extent_key.offset);
12412 err |= CHUNK_TYPE_MISMATCH;
12414 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12415 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12416 BTRFS_BLOCK_GROUP_METADATA))) {
12418 "bad extent[%llu, %llu) type mismatch with chunk",
12419 extent_key.objectid,
12420 extent_key.objectid + nodesize);
12421 err |= CHUNK_TYPE_MISMATCH;
12425 ret = btrfs_next_item(extent_root, &path);
12431 btrfs_release_path(&path);
12433 if (total != used) {
12435 "block group[%llu %llu] used %llu but extent items used %llu",
12436 bg_key.objectid, bg_key.offset, used, total);
12437 err |= BG_ACCOUNTING_ERROR;
12443 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12444 * FIXME: We still need to repair error of dev_item.
12446 * Returns error after repair.
12448 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12449 struct btrfs_root *chunk_root,
12450 struct btrfs_path *path, int err)
12452 struct btrfs_chunk *chunk;
12453 struct btrfs_key chunk_key;
12454 struct extent_buffer *eb = path->nodes[0];
12456 int slot = path->slots[0];
12460 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12461 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12463 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12464 type = btrfs_chunk_type(path->nodes[0], chunk);
12465 length = btrfs_chunk_length(eb, chunk);
12467 if (err & REFERENCER_MISSING) {
12468 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12469 type, chunk_key.offset, length);
12471 error("fail to add block group item[%llu %llu]",
12472 chunk_key.offset, length);
12475 err &= ~REFERENCER_MISSING;
12476 printf("Added block group item[%llu %llu]\n",
12477 chunk_key.offset, length);
12486 * Check a chunk item.
12487 * Including checking all referred dev_extents and block group
12489 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12490 struct extent_buffer *eb, int slot)
12492 struct btrfs_root *extent_root = fs_info->extent_root;
12493 struct btrfs_root *dev_root = fs_info->dev_root;
12494 struct btrfs_path path;
12495 struct btrfs_key chunk_key;
12496 struct btrfs_key bg_key;
12497 struct btrfs_key devext_key;
12498 struct btrfs_chunk *chunk;
12499 struct extent_buffer *leaf;
12500 struct btrfs_block_group_item *bi;
12501 struct btrfs_block_group_item bg_item;
12502 struct btrfs_dev_extent *ptr;
12514 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12515 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12516 length = btrfs_chunk_length(eb, chunk);
12517 chunk_end = chunk_key.offset + length;
12518 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12521 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12523 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12526 type = btrfs_chunk_type(eb, chunk);
12528 bg_key.objectid = chunk_key.offset;
12529 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12530 bg_key.offset = length;
12532 btrfs_init_path(&path);
12533 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12536 "chunk[%llu %llu) did not find the related block group item",
12537 chunk_key.offset, chunk_end);
12538 err |= REFERENCER_MISSING;
12540 leaf = path.nodes[0];
12541 bi = btrfs_item_ptr(leaf, path.slots[0],
12542 struct btrfs_block_group_item);
12543 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12545 if (btrfs_block_group_flags(&bg_item) != type) {
12547 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12548 chunk_key.offset, chunk_end, type,
12549 btrfs_block_group_flags(&bg_item));
12550 err |= REFERENCER_MISSING;
12554 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12555 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12556 for (i = 0; i < num_stripes; i++) {
12557 btrfs_release_path(&path);
12558 btrfs_init_path(&path);
12559 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12560 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12561 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12563 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12566 goto not_match_dev;
12568 leaf = path.nodes[0];
12569 ptr = btrfs_item_ptr(leaf, path.slots[0],
12570 struct btrfs_dev_extent);
12571 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12572 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12573 if (objectid != chunk_key.objectid ||
12574 offset != chunk_key.offset ||
12575 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12576 goto not_match_dev;
12579 err |= BACKREF_MISSING;
12581 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12582 chunk_key.objectid, chunk_end, i);
12585 btrfs_release_path(&path);
12590 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12591 struct btrfs_root *root,
12592 struct btrfs_path *path)
12594 struct btrfs_key key;
12597 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12598 btrfs_release_path(path);
12599 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12605 ret = btrfs_del_item(trans, root, path);
12609 if (path->slots[0] == 0)
12610 btrfs_prev_leaf(root, path);
12615 error("failed to delete root %llu item[%llu, %u, %llu]",
12616 root->objectid, key.objectid, key.type, key.offset);
12618 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12619 root->objectid, key.objectid, key.type, key.offset);
12624 * Main entry function to check known items and update related accounting info
12626 static int check_leaf_items(struct btrfs_trans_handle *trans,
12627 struct btrfs_root *root, struct btrfs_path *path,
12628 struct node_refs *nrefs, int account_bytes)
12630 struct btrfs_fs_info *fs_info = root->fs_info;
12631 struct btrfs_key key;
12632 struct extent_buffer *eb;
12635 struct btrfs_extent_data_ref *dref;
12640 eb = path->nodes[0];
12641 slot = path->slots[0];
12642 if (slot >= btrfs_header_nritems(eb)) {
12644 error("empty leaf [%llu %u] root %llu", eb->start,
12645 root->fs_info->nodesize, root->objectid);
12651 btrfs_item_key_to_cpu(eb, &key, slot);
12655 case BTRFS_EXTENT_DATA_KEY:
12656 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12658 ret = repair_extent_data_item(trans, root, path, nrefs,
12662 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12663 ret = check_block_group_item(fs_info, eb, slot);
12665 ret & REFERENCER_MISSING)
12666 ret = delete_extent_tree_item(trans, root, path);
12669 case BTRFS_DEV_ITEM_KEY:
12670 ret = check_dev_item(fs_info, eb, slot);
12673 case BTRFS_CHUNK_ITEM_KEY:
12674 ret = check_chunk_item(fs_info, eb, slot);
12676 ret = repair_chunk_item(trans, root, path, ret);
12679 case BTRFS_DEV_EXTENT_KEY:
12680 ret = check_dev_extent_item(fs_info, eb, slot);
12683 case BTRFS_EXTENT_ITEM_KEY:
12684 case BTRFS_METADATA_ITEM_KEY:
12685 ret = check_extent_item(trans, fs_info, path);
12688 case BTRFS_EXTENT_CSUM_KEY:
12689 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12692 case BTRFS_TREE_BLOCK_REF_KEY:
12693 ret = check_tree_block_backref(fs_info, key.offset,
12696 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12697 ret = delete_extent_tree_item(trans, root, path);
12700 case BTRFS_EXTENT_DATA_REF_KEY:
12701 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12702 ret = check_extent_data_backref(fs_info,
12703 btrfs_extent_data_ref_root(eb, dref),
12704 btrfs_extent_data_ref_objectid(eb, dref),
12705 btrfs_extent_data_ref_offset(eb, dref),
12707 btrfs_extent_data_ref_count(eb, dref));
12709 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12710 ret = delete_extent_tree_item(trans, root, path);
12713 case BTRFS_SHARED_BLOCK_REF_KEY:
12714 ret = check_shared_block_backref(fs_info, key.offset,
12717 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12718 ret = delete_extent_tree_item(trans, root, path);
12721 case BTRFS_SHARED_DATA_REF_KEY:
12722 ret = check_shared_data_backref(fs_info, key.offset,
12725 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12726 ret = delete_extent_tree_item(trans, root, path);
12740 * Low memory usage version check_chunks_and_extents.
12742 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12744 struct btrfs_trans_handle *trans = NULL;
12745 struct btrfs_path path;
12746 struct btrfs_key old_key;
12747 struct btrfs_key key;
12748 struct btrfs_root *root1;
12749 struct btrfs_root *root;
12750 struct btrfs_root *cur_root;
12754 root = fs_info->fs_root;
12757 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12758 if (IS_ERR(trans)) {
12759 error("failed to start transaction before check");
12760 return PTR_ERR(trans);
12764 root1 = root->fs_info->chunk_root;
12765 ret = check_btrfs_root(trans, root1, 0, 1);
12768 root1 = root->fs_info->tree_root;
12769 ret = check_btrfs_root(trans, root1, 0, 1);
12772 btrfs_init_path(&path);
12773 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12775 key.type = BTRFS_ROOT_ITEM_KEY;
12777 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12779 error("cannot find extent tree in tree_root");
12784 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12785 if (key.type != BTRFS_ROOT_ITEM_KEY)
12788 key.offset = (u64)-1;
12790 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12791 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12794 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12795 if (IS_ERR(cur_root) || !cur_root) {
12796 error("failed to read tree: %lld", key.objectid);
12800 ret = check_btrfs_root(trans, cur_root, 0, 1);
12803 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12804 btrfs_free_fs_root(cur_root);
12806 btrfs_release_path(&path);
12807 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12808 &old_key, &path, 0, 0);
12812 ret = btrfs_next_item(root1, &path);
12818 /* if repair, update block accounting */
12820 ret = btrfs_fix_block_accounting(trans, root);
12824 err &= ~BG_ACCOUNTING_ERROR;
12828 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12830 btrfs_release_path(&path);
12835 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12839 if (!ctx.progress_enabled)
12840 fprintf(stderr, "checking extents\n");
12841 if (check_mode == CHECK_MODE_LOWMEM)
12842 ret = check_chunks_and_extents_v2(fs_info);
12844 ret = check_chunks_and_extents(fs_info);
12846 /* Also repair device size related problems */
12847 if (repair && !ret) {
12848 ret = btrfs_fix_device_and_super_size(fs_info);
12855 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12856 struct btrfs_root *root, int overwrite)
12858 struct extent_buffer *c;
12859 struct extent_buffer *old = root->node;
12862 struct btrfs_disk_key disk_key = {0,0,0};
12868 extent_buffer_get(c);
12871 c = btrfs_alloc_free_block(trans, root,
12872 root->fs_info->nodesize,
12873 root->root_key.objectid,
12874 &disk_key, level, 0, 0);
12877 extent_buffer_get(c);
12881 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12882 btrfs_set_header_level(c, level);
12883 btrfs_set_header_bytenr(c, c->start);
12884 btrfs_set_header_generation(c, trans->transid);
12885 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12886 btrfs_set_header_owner(c, root->root_key.objectid);
12888 write_extent_buffer(c, root->fs_info->fsid,
12889 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12891 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12892 btrfs_header_chunk_tree_uuid(c),
12895 btrfs_mark_buffer_dirty(c);
12897 * this case can happen in the following case:
12899 * 1.overwrite previous root.
12901 * 2.reinit reloc data root, this is because we skip pin
12902 * down reloc data tree before which means we can allocate
12903 * same block bytenr here.
12905 if (old->start == c->start) {
12906 btrfs_set_root_generation(&root->root_item,
12908 root->root_item.level = btrfs_header_level(root->node);
12909 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12910 &root->root_key, &root->root_item);
12912 free_extent_buffer(c);
12916 free_extent_buffer(old);
12918 add_root_to_dirty_list(root);
12922 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12923 struct extent_buffer *eb, int tree_root)
12925 struct extent_buffer *tmp;
12926 struct btrfs_root_item *ri;
12927 struct btrfs_key key;
12929 int level = btrfs_header_level(eb);
12935 * If we have pinned this block before, don't pin it again.
12936 * This can not only avoid forever loop with broken filesystem
12937 * but also give us some speedups.
12939 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12940 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12943 btrfs_pin_extent(fs_info, eb->start, eb->len);
12945 nritems = btrfs_header_nritems(eb);
12946 for (i = 0; i < nritems; i++) {
12948 btrfs_item_key_to_cpu(eb, &key, i);
12949 if (key.type != BTRFS_ROOT_ITEM_KEY)
12951 /* Skip the extent root and reloc roots */
12952 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12953 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12954 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12956 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12957 bytenr = btrfs_disk_root_bytenr(eb, ri);
12960 * If at any point we start needing the real root we
12961 * will have to build a stump root for the root we are
12962 * in, but for now this doesn't actually use the root so
12963 * just pass in extent_root.
12965 tmp = read_tree_block(fs_info, bytenr, 0);
12966 if (!extent_buffer_uptodate(tmp)) {
12967 fprintf(stderr, "Error reading root block\n");
12970 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12971 free_extent_buffer(tmp);
12975 bytenr = btrfs_node_blockptr(eb, i);
12977 /* If we aren't the tree root don't read the block */
12978 if (level == 1 && !tree_root) {
12979 btrfs_pin_extent(fs_info, bytenr,
12980 fs_info->nodesize);
12984 tmp = read_tree_block(fs_info, bytenr, 0);
12985 if (!extent_buffer_uptodate(tmp)) {
12986 fprintf(stderr, "Error reading tree block\n");
12989 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12990 free_extent_buffer(tmp);
12999 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13003 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13007 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13010 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13012 struct btrfs_block_group_cache *cache;
13013 struct btrfs_path path;
13014 struct extent_buffer *leaf;
13015 struct btrfs_chunk *chunk;
13016 struct btrfs_key key;
13020 btrfs_init_path(&path);
13022 key.type = BTRFS_CHUNK_ITEM_KEY;
13024 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13026 btrfs_release_path(&path);
13031 * We do this in case the block groups were screwed up and had alloc
13032 * bits that aren't actually set on the chunks. This happens with
13033 * restored images every time and could happen in real life I guess.
13035 fs_info->avail_data_alloc_bits = 0;
13036 fs_info->avail_metadata_alloc_bits = 0;
13037 fs_info->avail_system_alloc_bits = 0;
13039 /* First we need to create the in-memory block groups */
13041 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13042 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13044 btrfs_release_path(&path);
13052 leaf = path.nodes[0];
13053 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13054 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13059 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13060 btrfs_add_block_group(fs_info, 0,
13061 btrfs_chunk_type(leaf, chunk), key.offset,
13062 btrfs_chunk_length(leaf, chunk));
13063 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13064 key.offset + btrfs_chunk_length(leaf, chunk));
13069 cache = btrfs_lookup_first_block_group(fs_info, start);
13073 start = cache->key.objectid + cache->key.offset;
13076 btrfs_release_path(&path);
13080 static int reset_balance(struct btrfs_trans_handle *trans,
13081 struct btrfs_fs_info *fs_info)
13083 struct btrfs_root *root = fs_info->tree_root;
13084 struct btrfs_path path;
13085 struct extent_buffer *leaf;
13086 struct btrfs_key key;
13087 int del_slot, del_nr = 0;
13091 btrfs_init_path(&path);
13092 key.objectid = BTRFS_BALANCE_OBJECTID;
13093 key.type = BTRFS_BALANCE_ITEM_KEY;
13095 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13100 goto reinit_data_reloc;
13105 ret = btrfs_del_item(trans, root, &path);
13108 btrfs_release_path(&path);
13110 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13111 key.type = BTRFS_ROOT_ITEM_KEY;
13113 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13117 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13122 ret = btrfs_del_items(trans, root, &path,
13129 btrfs_release_path(&path);
13132 ret = btrfs_search_slot(trans, root, &key, &path,
13139 leaf = path.nodes[0];
13140 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13141 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13143 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13148 del_slot = path.slots[0];
13157 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13161 btrfs_release_path(&path);
13164 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13165 key.type = BTRFS_ROOT_ITEM_KEY;
13166 key.offset = (u64)-1;
13167 root = btrfs_read_fs_root(fs_info, &key);
13168 if (IS_ERR(root)) {
13169 fprintf(stderr, "Error reading data reloc tree\n");
13170 ret = PTR_ERR(root);
13173 record_root_in_trans(trans, root);
13174 ret = btrfs_fsck_reinit_root(trans, root, 0);
13177 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13179 btrfs_release_path(&path);
13183 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13184 struct btrfs_fs_info *fs_info)
13190 * The only reason we don't do this is because right now we're just
13191 * walking the trees we find and pinning down their bytes, we don't look
13192 * at any of the leaves. In order to do mixed groups we'd have to check
13193 * the leaves of any fs roots and pin down the bytes for any file
13194 * extents we find. Not hard but why do it if we don't have to?
13196 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13197 fprintf(stderr, "We don't support re-initing the extent tree "
13198 "for mixed block groups yet, please notify a btrfs "
13199 "developer you want to do this so they can add this "
13200 "functionality.\n");
13205 * first we need to walk all of the trees except the extent tree and pin
13206 * down the bytes that are in use so we don't overwrite any existing
13209 ret = pin_metadata_blocks(fs_info);
13211 fprintf(stderr, "error pinning down used bytes\n");
13216 * Need to drop all the block groups since we're going to recreate all
13219 btrfs_free_block_groups(fs_info);
13220 ret = reset_block_groups(fs_info);
13222 fprintf(stderr, "error resetting the block groups\n");
13226 /* Ok we can allocate now, reinit the extent root */
13227 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13229 fprintf(stderr, "extent root initialization failed\n");
13231 * When the transaction code is updated we should end the
13232 * transaction, but for now progs only knows about commit so
13233 * just return an error.
13239 * Now we have all the in-memory block groups setup so we can make
13240 * allocations properly, and the metadata we care about is safe since we
13241 * pinned all of it above.
13244 struct btrfs_block_group_cache *cache;
13246 cache = btrfs_lookup_first_block_group(fs_info, start);
13249 start = cache->key.objectid + cache->key.offset;
13250 ret = btrfs_insert_item(trans, fs_info->extent_root,
13251 &cache->key, &cache->item,
13252 sizeof(cache->item));
13254 fprintf(stderr, "Error adding block group\n");
13257 btrfs_extent_post_op(trans, fs_info->extent_root);
13260 ret = reset_balance(trans, fs_info);
13262 fprintf(stderr, "error resetting the pending balance\n");
13267 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13269 struct btrfs_path path;
13270 struct btrfs_trans_handle *trans;
13271 struct btrfs_key key;
13274 printf("Recowing metadata block %llu\n", eb->start);
13275 key.objectid = btrfs_header_owner(eb);
13276 key.type = BTRFS_ROOT_ITEM_KEY;
13277 key.offset = (u64)-1;
13279 root = btrfs_read_fs_root(root->fs_info, &key);
13280 if (IS_ERR(root)) {
13281 fprintf(stderr, "Couldn't find owner root %llu\n",
13283 return PTR_ERR(root);
13286 trans = btrfs_start_transaction(root, 1);
13288 return PTR_ERR(trans);
13290 btrfs_init_path(&path);
13291 path.lowest_level = btrfs_header_level(eb);
13292 if (path.lowest_level)
13293 btrfs_node_key_to_cpu(eb, &key, 0);
13295 btrfs_item_key_to_cpu(eb, &key, 0);
13297 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13298 btrfs_commit_transaction(trans, root);
13299 btrfs_release_path(&path);
13303 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13305 struct btrfs_path path;
13306 struct btrfs_trans_handle *trans;
13307 struct btrfs_key key;
13310 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13311 bad->key.type, bad->key.offset);
13312 key.objectid = bad->root_id;
13313 key.type = BTRFS_ROOT_ITEM_KEY;
13314 key.offset = (u64)-1;
13316 root = btrfs_read_fs_root(root->fs_info, &key);
13317 if (IS_ERR(root)) {
13318 fprintf(stderr, "Couldn't find owner root %llu\n",
13320 return PTR_ERR(root);
13323 trans = btrfs_start_transaction(root, 1);
13325 return PTR_ERR(trans);
13327 btrfs_init_path(&path);
13328 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13334 ret = btrfs_del_item(trans, root, &path);
13336 btrfs_commit_transaction(trans, root);
13337 btrfs_release_path(&path);
13341 static int zero_log_tree(struct btrfs_root *root)
13343 struct btrfs_trans_handle *trans;
13346 trans = btrfs_start_transaction(root, 1);
13347 if (IS_ERR(trans)) {
13348 ret = PTR_ERR(trans);
13351 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13352 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13353 ret = btrfs_commit_transaction(trans, root);
13357 static int populate_csum(struct btrfs_trans_handle *trans,
13358 struct btrfs_root *csum_root, char *buf, u64 start,
13361 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13366 while (offset < len) {
13367 sectorsize = fs_info->sectorsize;
13368 ret = read_extent_data(fs_info, buf, start + offset,
13372 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13373 start + offset, buf, sectorsize);
13376 offset += sectorsize;
13381 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13382 struct btrfs_root *csum_root,
13383 struct btrfs_root *cur_root)
13385 struct btrfs_path path;
13386 struct btrfs_key key;
13387 struct extent_buffer *node;
13388 struct btrfs_file_extent_item *fi;
13395 buf = malloc(cur_root->fs_info->sectorsize);
13399 btrfs_init_path(&path);
13403 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13406 /* Iterate all regular file extents and fill its csum */
13408 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13410 if (key.type != BTRFS_EXTENT_DATA_KEY)
13412 node = path.nodes[0];
13413 slot = path.slots[0];
13414 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13415 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13417 start = btrfs_file_extent_disk_bytenr(node, fi);
13418 len = btrfs_file_extent_disk_num_bytes(node, fi);
13420 ret = populate_csum(trans, csum_root, buf, start, len);
13421 if (ret == -EEXIST)
13427 * TODO: if next leaf is corrupted, jump to nearest next valid
13430 ret = btrfs_next_item(cur_root, &path);
13440 btrfs_release_path(&path);
13445 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13446 struct btrfs_root *csum_root)
13448 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13449 struct btrfs_path path;
13450 struct btrfs_root *tree_root = fs_info->tree_root;
13451 struct btrfs_root *cur_root;
13452 struct extent_buffer *node;
13453 struct btrfs_key key;
13457 btrfs_init_path(&path);
13458 key.objectid = BTRFS_FS_TREE_OBJECTID;
13460 key.type = BTRFS_ROOT_ITEM_KEY;
13461 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13470 node = path.nodes[0];
13471 slot = path.slots[0];
13472 btrfs_item_key_to_cpu(node, &key, slot);
13473 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13475 if (key.type != BTRFS_ROOT_ITEM_KEY)
13477 if (!is_fstree(key.objectid))
13479 key.offset = (u64)-1;
13481 cur_root = btrfs_read_fs_root(fs_info, &key);
13482 if (IS_ERR(cur_root) || !cur_root) {
13483 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13487 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13492 ret = btrfs_next_item(tree_root, &path);
13502 btrfs_release_path(&path);
13506 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13507 struct btrfs_root *csum_root)
13509 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13510 struct btrfs_path path;
13511 struct btrfs_extent_item *ei;
13512 struct extent_buffer *leaf;
13514 struct btrfs_key key;
13517 btrfs_init_path(&path);
13519 key.type = BTRFS_EXTENT_ITEM_KEY;
13521 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13523 btrfs_release_path(&path);
13527 buf = malloc(csum_root->fs_info->sectorsize);
13529 btrfs_release_path(&path);
13534 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13535 ret = btrfs_next_leaf(extent_root, &path);
13543 leaf = path.nodes[0];
13545 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13546 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13551 ei = btrfs_item_ptr(leaf, path.slots[0],
13552 struct btrfs_extent_item);
13553 if (!(btrfs_extent_flags(leaf, ei) &
13554 BTRFS_EXTENT_FLAG_DATA)) {
13559 ret = populate_csum(trans, csum_root, buf, key.objectid,
13566 btrfs_release_path(&path);
13572 * Recalculate the csum and put it into the csum tree.
13574 * Extent tree init will wipe out all the extent info, so in that case, we
13575 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13576 * will use fs/subvol trees to init the csum tree.
13578 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13579 struct btrfs_root *csum_root,
13580 int search_fs_tree)
13582 if (search_fs_tree)
13583 return fill_csum_tree_from_fs(trans, csum_root);
13585 return fill_csum_tree_from_extent(trans, csum_root);
13588 static void free_roots_info_cache(void)
13590 if (!roots_info_cache)
13593 while (!cache_tree_empty(roots_info_cache)) {
13594 struct cache_extent *entry;
13595 struct root_item_info *rii;
13597 entry = first_cache_extent(roots_info_cache);
13600 remove_cache_extent(roots_info_cache, entry);
13601 rii = container_of(entry, struct root_item_info, cache_extent);
13605 free(roots_info_cache);
13606 roots_info_cache = NULL;
13609 static int build_roots_info_cache(struct btrfs_fs_info *info)
13612 struct btrfs_key key;
13613 struct extent_buffer *leaf;
13614 struct btrfs_path path;
13616 if (!roots_info_cache) {
13617 roots_info_cache = malloc(sizeof(*roots_info_cache));
13618 if (!roots_info_cache)
13620 cache_tree_init(roots_info_cache);
13623 btrfs_init_path(&path);
13625 key.type = BTRFS_EXTENT_ITEM_KEY;
13627 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13630 leaf = path.nodes[0];
13633 struct btrfs_key found_key;
13634 struct btrfs_extent_item *ei;
13635 struct btrfs_extent_inline_ref *iref;
13636 int slot = path.slots[0];
13641 struct cache_extent *entry;
13642 struct root_item_info *rii;
13644 if (slot >= btrfs_header_nritems(leaf)) {
13645 ret = btrfs_next_leaf(info->extent_root, &path);
13652 leaf = path.nodes[0];
13653 slot = path.slots[0];
13656 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13658 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13659 found_key.type != BTRFS_METADATA_ITEM_KEY)
13662 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13663 flags = btrfs_extent_flags(leaf, ei);
13665 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13666 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13669 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13670 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13671 level = found_key.offset;
13673 struct btrfs_tree_block_info *binfo;
13675 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13676 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13677 level = btrfs_tree_block_level(leaf, binfo);
13681 * For a root extent, it must be of the following type and the
13682 * first (and only one) iref in the item.
13684 type = btrfs_extent_inline_ref_type(leaf, iref);
13685 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13688 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13689 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13691 rii = malloc(sizeof(struct root_item_info));
13696 rii->cache_extent.start = root_id;
13697 rii->cache_extent.size = 1;
13698 rii->level = (u8)-1;
13699 entry = &rii->cache_extent;
13700 ret = insert_cache_extent(roots_info_cache, entry);
13703 rii = container_of(entry, struct root_item_info,
13707 ASSERT(rii->cache_extent.start == root_id);
13708 ASSERT(rii->cache_extent.size == 1);
13710 if (level > rii->level || rii->level == (u8)-1) {
13711 rii->level = level;
13712 rii->bytenr = found_key.objectid;
13713 rii->gen = btrfs_extent_generation(leaf, ei);
13714 rii->node_count = 1;
13715 } else if (level == rii->level) {
13723 btrfs_release_path(&path);
13728 static int maybe_repair_root_item(struct btrfs_path *path,
13729 const struct btrfs_key *root_key,
13730 const int read_only_mode)
13732 const u64 root_id = root_key->objectid;
13733 struct cache_extent *entry;
13734 struct root_item_info *rii;
13735 struct btrfs_root_item ri;
13736 unsigned long offset;
13738 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13741 "Error: could not find extent items for root %llu\n",
13742 root_key->objectid);
13746 rii = container_of(entry, struct root_item_info, cache_extent);
13747 ASSERT(rii->cache_extent.start == root_id);
13748 ASSERT(rii->cache_extent.size == 1);
13750 if (rii->node_count != 1) {
13752 "Error: could not find btree root extent for root %llu\n",
13757 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13758 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13760 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13761 btrfs_root_level(&ri) != rii->level ||
13762 btrfs_root_generation(&ri) != rii->gen) {
13765 * If we're in repair mode but our caller told us to not update
13766 * the root item, i.e. just check if it needs to be updated, don't
13767 * print this message, since the caller will call us again shortly
13768 * for the same root item without read only mode (the caller will
13769 * open a transaction first).
13771 if (!(read_only_mode && repair))
13773 "%sroot item for root %llu,"
13774 " current bytenr %llu, current gen %llu, current level %u,"
13775 " new bytenr %llu, new gen %llu, new level %u\n",
13776 (read_only_mode ? "" : "fixing "),
13778 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13779 btrfs_root_level(&ri),
13780 rii->bytenr, rii->gen, rii->level);
13782 if (btrfs_root_generation(&ri) > rii->gen) {
13784 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13785 root_id, btrfs_root_generation(&ri), rii->gen);
13789 if (!read_only_mode) {
13790 btrfs_set_root_bytenr(&ri, rii->bytenr);
13791 btrfs_set_root_level(&ri, rii->level);
13792 btrfs_set_root_generation(&ri, rii->gen);
13793 write_extent_buffer(path->nodes[0], &ri,
13794 offset, sizeof(ri));
13804 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13805 * caused read-only snapshots to be corrupted if they were created at a moment
13806 * when the source subvolume/snapshot had orphan items. The issue was that the
13807 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13808 * node instead of the post orphan cleanup root node.
13809 * So this function, and its callees, just detects and fixes those cases. Even
13810 * though the regression was for read-only snapshots, this function applies to
13811 * any snapshot/subvolume root.
13812 * This must be run before any other repair code - not doing it so, makes other
13813 * repair code delete or modify backrefs in the extent tree for example, which
13814 * will result in an inconsistent fs after repairing the root items.
13816 static int repair_root_items(struct btrfs_fs_info *info)
13818 struct btrfs_path path;
13819 struct btrfs_key key;
13820 struct extent_buffer *leaf;
13821 struct btrfs_trans_handle *trans = NULL;
13824 int need_trans = 0;
13826 btrfs_init_path(&path);
13828 ret = build_roots_info_cache(info);
13832 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13833 key.type = BTRFS_ROOT_ITEM_KEY;
13838 * Avoid opening and committing transactions if a leaf doesn't have
13839 * any root items that need to be fixed, so that we avoid rotating
13840 * backup roots unnecessarily.
13843 trans = btrfs_start_transaction(info->tree_root, 1);
13844 if (IS_ERR(trans)) {
13845 ret = PTR_ERR(trans);
13850 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13854 leaf = path.nodes[0];
13857 struct btrfs_key found_key;
13859 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13860 int no_more_keys = find_next_key(&path, &key);
13862 btrfs_release_path(&path);
13864 ret = btrfs_commit_transaction(trans,
13876 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13878 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13880 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13883 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13887 if (!trans && repair) {
13890 btrfs_release_path(&path);
13900 free_roots_info_cache();
13901 btrfs_release_path(&path);
13903 btrfs_commit_transaction(trans, info->tree_root);
13910 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13912 struct btrfs_trans_handle *trans;
13913 struct btrfs_block_group_cache *bg_cache;
13917 /* Clear all free space cache inodes and its extent data */
13919 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13922 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13925 current = bg_cache->key.objectid + bg_cache->key.offset;
13928 /* Don't forget to set cache_generation to -1 */
13929 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13930 if (IS_ERR(trans)) {
13931 error("failed to update super block cache generation");
13932 return PTR_ERR(trans);
13934 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13935 btrfs_commit_transaction(trans, fs_info->tree_root);
13940 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13945 if (clear_version == 1) {
13946 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13948 "free space cache v2 detected, use --clear-space-cache v2");
13952 printf("Clearing free space cache\n");
13953 ret = clear_free_space_cache(fs_info);
13955 error("failed to clear free space cache");
13958 printf("Free space cache cleared\n");
13960 } else if (clear_version == 2) {
13961 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13962 printf("no free space cache v2 to clear\n");
13966 printf("Clear free space cache v2\n");
13967 ret = btrfs_clear_free_space_tree(fs_info);
13969 error("failed to clear free space cache v2: %d", ret);
13972 printf("free space cache v2 cleared\n");
13979 const char * const cmd_check_usage[] = {
13980 "btrfs check [options] <device>",
13981 "Check structural integrity of a filesystem (unmounted).",
13982 "Check structural integrity of an unmounted filesystem. Verify internal",
13983 "trees' consistency and item connectivity. In the repair mode try to",
13984 "fix the problems found. ",
13985 "WARNING: the repair mode is considered dangerous",
13987 "-s|--super <superblock> use this superblock copy",
13988 "-b|--backup use the first valid backup root copy",
13989 "--force skip mount checks, repair is not possible",
13990 "--repair try to repair the filesystem",
13991 "--readonly run in read-only mode (default)",
13992 "--init-csum-tree create a new CRC tree",
13993 "--init-extent-tree create a new extent tree",
13994 "--mode <MODE> allows choice of memory/IO trade-offs",
13995 " where MODE is one of:",
13996 " original - read inodes and extents to memory (requires",
13997 " more memory, does less IO)",
13998 " lowmem - try to use less memory but read blocks again",
14000 "--check-data-csum verify checksums of data blocks",
14001 "-Q|--qgroup-report print a report on qgroup consistency",
14002 "-E|--subvol-extents <subvolid>",
14003 " print subvolume extents and sharing state",
14004 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14005 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14006 "-p|--progress indicate progress",
14007 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14011 int cmd_check(int argc, char **argv)
14013 struct cache_tree root_cache;
14014 struct btrfs_root *root;
14015 struct btrfs_fs_info *info;
14018 u64 tree_root_bytenr = 0;
14019 u64 chunk_root_bytenr = 0;
14020 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14024 int init_csum_tree = 0;
14026 int clear_space_cache = 0;
14027 int qgroup_report = 0;
14028 int qgroups_repaired = 0;
14029 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14034 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14035 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14036 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14037 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14038 GETOPT_VAL_FORCE };
14039 static const struct option long_options[] = {
14040 { "super", required_argument, NULL, 's' },
14041 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14042 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14043 { "init-csum-tree", no_argument, NULL,
14044 GETOPT_VAL_INIT_CSUM },
14045 { "init-extent-tree", no_argument, NULL,
14046 GETOPT_VAL_INIT_EXTENT },
14047 { "check-data-csum", no_argument, NULL,
14048 GETOPT_VAL_CHECK_CSUM },
14049 { "backup", no_argument, NULL, 'b' },
14050 { "subvol-extents", required_argument, NULL, 'E' },
14051 { "qgroup-report", no_argument, NULL, 'Q' },
14052 { "tree-root", required_argument, NULL, 'r' },
14053 { "chunk-root", required_argument, NULL,
14054 GETOPT_VAL_CHUNK_TREE },
14055 { "progress", no_argument, NULL, 'p' },
14056 { "mode", required_argument, NULL,
14058 { "clear-space-cache", required_argument, NULL,
14059 GETOPT_VAL_CLEAR_SPACE_CACHE},
14060 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14061 { NULL, 0, NULL, 0}
14064 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14068 case 'a': /* ignored */ break;
14070 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14073 num = arg_strtou64(optarg);
14074 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14076 "super mirror should be less than %d",
14077 BTRFS_SUPER_MIRROR_MAX);
14080 bytenr = btrfs_sb_offset(((int)num));
14081 printf("using SB copy %llu, bytenr %llu\n", num,
14082 (unsigned long long)bytenr);
14088 subvolid = arg_strtou64(optarg);
14091 tree_root_bytenr = arg_strtou64(optarg);
14093 case GETOPT_VAL_CHUNK_TREE:
14094 chunk_root_bytenr = arg_strtou64(optarg);
14097 ctx.progress_enabled = true;
14101 usage(cmd_check_usage);
14102 case GETOPT_VAL_REPAIR:
14103 printf("enabling repair mode\n");
14105 ctree_flags |= OPEN_CTREE_WRITES;
14107 case GETOPT_VAL_READONLY:
14110 case GETOPT_VAL_INIT_CSUM:
14111 printf("Creating a new CRC tree\n");
14112 init_csum_tree = 1;
14114 ctree_flags |= OPEN_CTREE_WRITES;
14116 case GETOPT_VAL_INIT_EXTENT:
14117 init_extent_tree = 1;
14118 ctree_flags |= (OPEN_CTREE_WRITES |
14119 OPEN_CTREE_NO_BLOCK_GROUPS);
14122 case GETOPT_VAL_CHECK_CSUM:
14123 check_data_csum = 1;
14125 case GETOPT_VAL_MODE:
14126 check_mode = parse_check_mode(optarg);
14127 if (check_mode == CHECK_MODE_UNKNOWN) {
14128 error("unknown mode: %s", optarg);
14132 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14133 if (strcmp(optarg, "v1") == 0) {
14134 clear_space_cache = 1;
14135 } else if (strcmp(optarg, "v2") == 0) {
14136 clear_space_cache = 2;
14137 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14140 "invalid argument to --clear-space-cache, must be v1 or v2");
14143 ctree_flags |= OPEN_CTREE_WRITES;
14145 case GETOPT_VAL_FORCE:
14151 if (check_argc_exact(argc - optind, 1))
14152 usage(cmd_check_usage);
14154 if (ctx.progress_enabled) {
14155 ctx.tp = TASK_NOTHING;
14156 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14159 /* This check is the only reason for --readonly to exist */
14160 if (readonly && repair) {
14161 error("repair options are not compatible with --readonly");
14166 * experimental and dangerous
14168 if (repair && check_mode == CHECK_MODE_LOWMEM)
14169 warning("low-memory mode repair support is only partial");
14172 cache_tree_init(&root_cache);
14174 ret = check_mounted(argv[optind]);
14177 error("could not check mount status: %s",
14183 "%s is currently mounted, use --force if you really intend to check the filesystem",
14191 error("repair and --force is not yet supported");
14198 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14202 "filesystem mounted, continuing because of --force");
14204 /* A block device is mounted in exclusive mode by kernel */
14205 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14208 /* only allow partial opening under repair mode */
14210 ctree_flags |= OPEN_CTREE_PARTIAL;
14212 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14213 chunk_root_bytenr, ctree_flags);
14215 error("cannot open file system");
14221 global_info = info;
14222 root = info->fs_root;
14223 uuid_unparse(info->super_copy->fsid, uuidbuf);
14225 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14228 * Check the bare minimum before starting anything else that could rely
14229 * on it, namely the tree roots, any local consistency checks
14231 if (!extent_buffer_uptodate(info->tree_root->node) ||
14232 !extent_buffer_uptodate(info->dev_root->node) ||
14233 !extent_buffer_uptodate(info->chunk_root->node)) {
14234 error("critical roots corrupted, unable to check the filesystem");
14240 if (clear_space_cache) {
14241 ret = do_clear_free_space_cache(info, clear_space_cache);
14247 * repair mode will force us to commit transaction which
14248 * will make us fail to load log tree when mounting.
14250 if (repair && btrfs_super_log_root(info->super_copy)) {
14251 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14257 ret = zero_log_tree(root);
14260 error("failed to zero log tree: %d", ret);
14265 if (qgroup_report) {
14266 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14268 ret = qgroup_verify_all(info);
14275 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14276 subvolid, argv[optind], uuidbuf);
14277 ret = print_extent_state(info, subvolid);
14282 if (init_extent_tree || init_csum_tree) {
14283 struct btrfs_trans_handle *trans;
14285 trans = btrfs_start_transaction(info->extent_root, 0);
14286 if (IS_ERR(trans)) {
14287 error("error starting transaction");
14288 ret = PTR_ERR(trans);
14293 if (init_extent_tree) {
14294 printf("Creating a new extent tree\n");
14295 ret = reinit_extent_tree(trans, info);
14301 if (init_csum_tree) {
14302 printf("Reinitialize checksum tree\n");
14303 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14305 error("checksum tree initialization failed: %d",
14312 ret = fill_csum_tree(trans, info->csum_root,
14316 error("checksum tree refilling failed: %d", ret);
14321 * Ok now we commit and run the normal fsck, which will add
14322 * extent entries for all of the items it finds.
14324 ret = btrfs_commit_transaction(trans, info->extent_root);
14329 if (!extent_buffer_uptodate(info->extent_root->node)) {
14330 error("critical: extent_root, unable to check the filesystem");
14335 if (!extent_buffer_uptodate(info->csum_root->node)) {
14336 error("critical: csum_root, unable to check the filesystem");
14342 if (!init_extent_tree) {
14343 ret = repair_root_items(info);
14346 error("failed to repair root items: %s", strerror(-ret));
14350 fprintf(stderr, "Fixed %d roots.\n", ret);
14352 } else if (ret > 0) {
14354 "Found %d roots with an outdated root item.\n",
14357 "Please run a filesystem check with the option --repair to fix them.\n");
14364 ret = do_check_chunks_and_extents(info);
14368 "errors found in extent allocation tree or chunk allocation");
14370 /* Only re-check super size after we checked and repaired the fs */
14371 err |= !is_super_size_valid(info);
14373 if (!ctx.progress_enabled) {
14374 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14375 fprintf(stderr, "checking free space tree\n");
14377 fprintf(stderr, "checking free space cache\n");
14379 ret = check_space_cache(root);
14382 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14383 error("errors found in free space tree");
14385 error("errors found in free space cache");
14390 * We used to have to have these hole extents in between our real
14391 * extents so if we don't have this flag set we need to make sure there
14392 * are no gaps in the file extents for inodes, otherwise we can just
14393 * ignore it when this happens.
14395 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14396 ret = do_check_fs_roots(info, &root_cache);
14399 error("errors found in fs roots");
14403 fprintf(stderr, "checking csums\n");
14404 ret = check_csums(root);
14407 error("errors found in csum tree");
14411 fprintf(stderr, "checking root refs\n");
14412 /* For low memory mode, check_fs_roots_v2 handles root refs */
14413 if (check_mode != CHECK_MODE_LOWMEM) {
14414 ret = check_root_refs(root, &root_cache);
14417 error("errors found in root refs");
14422 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14423 struct extent_buffer *eb;
14425 eb = list_first_entry(&root->fs_info->recow_ebs,
14426 struct extent_buffer, recow);
14427 list_del_init(&eb->recow);
14428 ret = recow_extent_buffer(root, eb);
14431 error("fails to fix transid errors");
14436 while (!list_empty(&delete_items)) {
14437 struct bad_item *bad;
14439 bad = list_first_entry(&delete_items, struct bad_item, list);
14440 list_del_init(&bad->list);
14442 ret = delete_bad_item(root, bad);
14448 if (info->quota_enabled) {
14449 fprintf(stderr, "checking quota groups\n");
14450 ret = qgroup_verify_all(info);
14453 error("failed to check quota groups");
14457 ret = repair_qgroups(info, &qgroups_repaired);
14460 error("failed to repair quota groups");
14466 if (!list_empty(&root->fs_info->recow_ebs)) {
14467 error("transid errors in file system");
14472 printf("found %llu bytes used, ",
14473 (unsigned long long)bytes_used);
14475 printf("error(s) found\n");
14477 printf("no error found\n");
14478 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14479 printf("total tree bytes: %llu\n",
14480 (unsigned long long)total_btree_bytes);
14481 printf("total fs tree bytes: %llu\n",
14482 (unsigned long long)total_fs_tree_bytes);
14483 printf("total extent tree bytes: %llu\n",
14484 (unsigned long long)total_extent_tree_bytes);
14485 printf("btree space waste bytes: %llu\n",
14486 (unsigned long long)btree_space_waste);
14487 printf("file data blocks allocated: %llu\n referenced %llu\n",
14488 (unsigned long long)data_bytes_allocated,
14489 (unsigned long long)data_bytes_referenced);
14491 free_qgroup_counts();
14492 free_root_recs_tree(&root_cache);
14496 if (ctx.progress_enabled)
14497 task_deinit(ctx.info);