2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
54 TASK_NOTHING, /* have to be the last element */
59 enum task_position tp;
61 struct task_info *info;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
103 if (back1->parent < back2->parent)
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
110 if (back1->owner > back2->owner)
112 if (back1->owner < back2->owner)
115 if (back1->offset > back2->offset)
117 if (back1->offset < back2->offset)
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
123 if (back1->disk_bytenr < back2->disk_bytenr)
126 if (back1->bytes > back2->bytes)
128 if (back1->bytes < back2->bytes)
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
148 if (back1->parent < back2->parent)
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
162 if (ext1->is_data < ext2->is_data)
165 if (ext1->full_backref > ext2->full_backref)
167 if (ext1->full_backref < ext2->full_backref)
171 return compare_data_backref(node1, node2);
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string[] = {
184 "checking free space cache",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
198 task_period_wait(priv->info);
203 static int print_status_return(void *p)
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
245 if (hole1->start < hole2->start)
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole->node))
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
301 rb_erase(&next->node, holes);
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
315 hole = (struct file_extent_hole *)data;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
321 if (start >= hole->start && start < hole->start + hole->len)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
340 struct rb_node *node;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
368 rb_erase(node, holes);
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
376 ret = add_file_extent_hole(holes, next_start, next_len);
383 static int copy_file_extent_holes(struct rb_root *dst,
386 struct file_extent_hole *hole;
387 struct rb_node *node;
390 node = rb_first(src);
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
396 node = rb_next(node);
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
411 node = rb_first(holes);
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root)
420 if (root->last_trans != trans->transid) {
421 root->track_dirty = 1;
422 root->last_trans = trans->transid;
423 root->commit_root = root->node;
424 extent_buffer_get(root->node);
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
430 struct device_record *rec1;
431 struct device_record *rec2;
433 rec1 = rb_entry(node1, struct device_record, node);
434 rec2 = rb_entry(node2, struct device_record, node);
435 if (rec1->devid > rec2->devid)
437 else if (rec1->devid < rec2->devid)
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
445 struct inode_record *rec;
446 struct inode_backref *backref;
447 struct inode_backref *orig;
448 struct inode_backref *tmp;
449 struct orphan_data_extent *src_orphan;
450 struct orphan_data_extent *dst_orphan;
455 rec = malloc(sizeof(*rec));
457 return ERR_PTR(-ENOMEM);
458 memcpy(rec, orig_rec, sizeof(*rec));
460 INIT_LIST_HEAD(&rec->backrefs);
461 INIT_LIST_HEAD(&rec->orphan_extents);
462 rec->holes = RB_ROOT;
464 list_for_each_entry(orig, &orig_rec->backrefs, list) {
465 size = sizeof(*orig) + orig->namelen + 1;
466 backref = malloc(size);
471 memcpy(backref, orig, size);
472 list_add_tail(&backref->list, &rec->backrefs);
474 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475 dst_orphan = malloc(sizeof(*dst_orphan));
480 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
483 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
490 rb = rb_first(&rec->holes);
492 struct file_extent_hole *hole;
494 hole = rb_entry(rb, struct file_extent_hole, node);
500 if (!list_empty(&rec->backrefs))
501 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502 list_del(&orig->list);
506 if (!list_empty(&rec->orphan_extents))
507 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508 list_del(&orig->list);
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
520 struct orphan_data_extent *orphan;
522 if (list_empty(orphan_extents))
524 printf("The following data extent is lost in tree %llu:\n",
526 list_for_each_entry(orphan, orphan_extents, list) {
527 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528 orphan->objectid, orphan->offset, orphan->disk_bytenr,
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
535 u64 root_objectid = root->root_key.objectid;
536 int errors = rec->errors;
540 /* reloc root errors, we print its corresponding fs root objectid*/
541 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542 root_objectid = root->root_key.offset;
543 fprintf(stderr, "reloc");
545 fprintf(stderr, "root %llu inode %llu errors %x",
546 (unsigned long long) root_objectid,
547 (unsigned long long) rec->ino, rec->errors);
549 if (errors & I_ERR_NO_INODE_ITEM)
550 fprintf(stderr, ", no inode item");
551 if (errors & I_ERR_NO_ORPHAN_ITEM)
552 fprintf(stderr, ", no orphan item");
553 if (errors & I_ERR_DUP_INODE_ITEM)
554 fprintf(stderr, ", dup inode item");
555 if (errors & I_ERR_DUP_DIR_INDEX)
556 fprintf(stderr, ", dup dir index");
557 if (errors & I_ERR_ODD_DIR_ITEM)
558 fprintf(stderr, ", odd dir item");
559 if (errors & I_ERR_ODD_FILE_EXTENT)
560 fprintf(stderr, ", odd file extent");
561 if (errors & I_ERR_BAD_FILE_EXTENT)
562 fprintf(stderr, ", bad file extent");
563 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564 fprintf(stderr, ", file extent overlap");
565 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566 fprintf(stderr, ", file extent discount");
567 if (errors & I_ERR_DIR_ISIZE_WRONG)
568 fprintf(stderr, ", dir isize wrong");
569 if (errors & I_ERR_FILE_NBYTES_WRONG)
570 fprintf(stderr, ", nbytes wrong");
571 if (errors & I_ERR_ODD_CSUM_ITEM)
572 fprintf(stderr, ", odd csum item");
573 if (errors & I_ERR_SOME_CSUM_MISSING)
574 fprintf(stderr, ", some csum missing");
575 if (errors & I_ERR_LINK_COUNT_WRONG)
576 fprintf(stderr, ", link count wrong");
577 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578 fprintf(stderr, ", orphan file extent");
579 fprintf(stderr, "\n");
580 /* Print the orphan extents if needed */
581 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
584 /* Print the holes if needed */
585 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586 struct file_extent_hole *hole;
587 struct rb_node *node;
590 node = rb_first(&rec->holes);
591 fprintf(stderr, "Found file extent holes:\n");
594 hole = rb_entry(node, struct file_extent_hole, node);
595 fprintf(stderr, "\tstart: %llu, len: %llu\n",
596 hole->start, hole->len);
597 node = rb_next(node);
600 fprintf(stderr, "\tstart: 0, len: %llu\n",
602 root->fs_info->sectorsize));
606 static void print_ref_error(int errors)
608 if (errors & REF_ERR_NO_DIR_ITEM)
609 fprintf(stderr, ", no dir item");
610 if (errors & REF_ERR_NO_DIR_INDEX)
611 fprintf(stderr, ", no dir index");
612 if (errors & REF_ERR_NO_INODE_REF)
613 fprintf(stderr, ", no inode ref");
614 if (errors & REF_ERR_DUP_DIR_ITEM)
615 fprintf(stderr, ", dup dir item");
616 if (errors & REF_ERR_DUP_DIR_INDEX)
617 fprintf(stderr, ", dup dir index");
618 if (errors & REF_ERR_DUP_INODE_REF)
619 fprintf(stderr, ", dup inode ref");
620 if (errors & REF_ERR_INDEX_UNMATCH)
621 fprintf(stderr, ", index mismatch");
622 if (errors & REF_ERR_FILETYPE_UNMATCH)
623 fprintf(stderr, ", filetype mismatch");
624 if (errors & REF_ERR_NAME_TOO_LONG)
625 fprintf(stderr, ", name too long");
626 if (errors & REF_ERR_NO_ROOT_REF)
627 fprintf(stderr, ", no root ref");
628 if (errors & REF_ERR_NO_ROOT_BACKREF)
629 fprintf(stderr, ", no root backref");
630 if (errors & REF_ERR_DUP_ROOT_REF)
631 fprintf(stderr, ", dup root ref");
632 if (errors & REF_ERR_DUP_ROOT_BACKREF)
633 fprintf(stderr, ", dup root backref");
634 fprintf(stderr, "\n");
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
640 struct ptr_node *node;
641 struct cache_extent *cache;
642 struct inode_record *rec = NULL;
645 cache = lookup_cache_extent(inode_cache, ino, 1);
647 node = container_of(cache, struct ptr_node, cache);
649 if (mod && rec->refs > 1) {
650 node->data = clone_inode_rec(rec);
651 if (IS_ERR(node->data))
657 rec = calloc(1, sizeof(*rec));
659 return ERR_PTR(-ENOMEM);
661 rec->extent_start = (u64)-1;
663 INIT_LIST_HEAD(&rec->backrefs);
664 INIT_LIST_HEAD(&rec->orphan_extents);
665 rec->holes = RB_ROOT;
667 node = malloc(sizeof(*node));
670 return ERR_PTR(-ENOMEM);
672 node->cache.start = ino;
673 node->cache.size = 1;
676 if (ino == BTRFS_FREE_INO_OBJECTID)
679 ret = insert_cache_extent(inode_cache, &node->cache);
681 return ERR_PTR(-EEXIST);
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
688 struct orphan_data_extent *orphan;
690 while (!list_empty(orphan_extents)) {
691 orphan = list_entry(orphan_extents->next,
692 struct orphan_data_extent, list);
693 list_del(&orphan->list);
698 static void free_inode_rec(struct inode_record *rec)
700 struct inode_backref *backref;
705 while (!list_empty(&rec->backrefs)) {
706 backref = to_inode_backref(rec->backrefs.next);
707 list_del(&backref->list);
710 free_orphan_data_extents(&rec->orphan_extents);
711 free_file_extent_holes(&rec->holes);
715 static int can_free_inode_rec(struct inode_record *rec)
717 if (!rec->errors && rec->checked && rec->found_inode_item &&
718 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724 struct inode_record *rec)
726 struct cache_extent *cache;
727 struct inode_backref *tmp, *backref;
728 struct ptr_node *node;
731 if (!rec->found_inode_item)
734 filetype = imode_to_type(rec->imode);
735 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736 if (backref->found_dir_item && backref->found_dir_index) {
737 if (backref->filetype != filetype)
738 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739 if (!backref->errors && backref->found_inode_ref &&
740 rec->nlink == rec->found_link) {
741 list_del(&backref->list);
747 if (!rec->checked || rec->merging)
750 if (S_ISDIR(rec->imode)) {
751 if (rec->found_size != rec->isize)
752 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753 if (rec->found_file_extent)
754 rec->errors |= I_ERR_ODD_FILE_EXTENT;
755 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756 if (rec->found_dir_item)
757 rec->errors |= I_ERR_ODD_DIR_ITEM;
758 if (rec->found_size != rec->nbytes)
759 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760 if (rec->nlink > 0 && !no_holes &&
761 (rec->extent_end < rec->isize ||
762 first_extent_gap(&rec->holes) < rec->isize))
763 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
766 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767 if (rec->found_csum_item && rec->nodatasum)
768 rec->errors |= I_ERR_ODD_CSUM_ITEM;
769 if (rec->some_csum_missing && !rec->nodatasum)
770 rec->errors |= I_ERR_SOME_CSUM_MISSING;
773 BUG_ON(rec->refs != 1);
774 if (can_free_inode_rec(rec)) {
775 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776 node = container_of(cache, struct ptr_node, cache);
777 BUG_ON(node->data != rec);
778 remove_cache_extent(inode_cache, &node->cache);
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
786 struct btrfs_path path;
787 struct btrfs_key key;
790 key.objectid = BTRFS_ORPHAN_OBJECTID;
791 key.type = BTRFS_ORPHAN_ITEM_KEY;
794 btrfs_init_path(&path);
795 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796 btrfs_release_path(&path);
802 static int process_inode_item(struct extent_buffer *eb,
803 int slot, struct btrfs_key *key,
804 struct shared_node *active_node)
806 struct inode_record *rec;
807 struct btrfs_inode_item *item;
809 rec = active_node->current;
810 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811 if (rec->found_inode_item) {
812 rec->errors |= I_ERR_DUP_INODE_ITEM;
815 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816 rec->nlink = btrfs_inode_nlink(eb, item);
817 rec->isize = btrfs_inode_size(eb, item);
818 rec->nbytes = btrfs_inode_nbytes(eb, item);
819 rec->imode = btrfs_inode_mode(eb, item);
820 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
822 rec->found_inode_item = 1;
824 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825 maybe_free_inode_rec(&active_node->inode_cache, rec);
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
831 int namelen, u64 dir)
833 struct inode_backref *backref;
835 list_for_each_entry(backref, &rec->backrefs, list) {
836 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
838 if (backref->dir != dir || backref->namelen != namelen)
840 if (memcmp(name, backref->name, namelen))
845 backref = malloc(sizeof(*backref) + namelen + 1);
848 memset(backref, 0, sizeof(*backref));
850 backref->namelen = namelen;
851 memcpy(backref->name, name, namelen);
852 backref->name[namelen] = '\0';
853 list_add_tail(&backref->list, &rec->backrefs);
857 static int add_inode_backref(struct cache_tree *inode_cache,
858 u64 ino, u64 dir, u64 index,
859 const char *name, int namelen,
860 u8 filetype, u8 itemtype, int errors)
862 struct inode_record *rec;
863 struct inode_backref *backref;
865 rec = get_inode_rec(inode_cache, ino, 1);
867 backref = get_inode_backref(rec, name, namelen, dir);
870 backref->errors |= errors;
871 if (itemtype == BTRFS_DIR_INDEX_KEY) {
872 if (backref->found_dir_index)
873 backref->errors |= REF_ERR_DUP_DIR_INDEX;
874 if (backref->found_inode_ref && backref->index != index)
875 backref->errors |= REF_ERR_INDEX_UNMATCH;
876 if (backref->found_dir_item && backref->filetype != filetype)
877 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
879 backref->index = index;
880 backref->filetype = filetype;
881 backref->found_dir_index = 1;
882 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
884 if (backref->found_dir_item)
885 backref->errors |= REF_ERR_DUP_DIR_ITEM;
886 if (backref->found_dir_index && backref->filetype != filetype)
887 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
889 backref->filetype = filetype;
890 backref->found_dir_item = 1;
891 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893 if (backref->found_inode_ref)
894 backref->errors |= REF_ERR_DUP_INODE_REF;
895 if (backref->found_dir_index && backref->index != index)
896 backref->errors |= REF_ERR_INDEX_UNMATCH;
898 backref->index = index;
900 backref->ref_type = itemtype;
901 backref->found_inode_ref = 1;
906 maybe_free_inode_rec(inode_cache, rec);
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911 struct cache_tree *dst_cache)
913 struct inode_backref *backref;
918 list_for_each_entry(backref, &src->backrefs, list) {
919 if (backref->found_dir_index) {
920 add_inode_backref(dst_cache, dst->ino, backref->dir,
921 backref->index, backref->name,
922 backref->namelen, backref->filetype,
923 BTRFS_DIR_INDEX_KEY, backref->errors);
925 if (backref->found_dir_item) {
927 add_inode_backref(dst_cache, dst->ino,
928 backref->dir, 0, backref->name,
929 backref->namelen, backref->filetype,
930 BTRFS_DIR_ITEM_KEY, backref->errors);
932 if (backref->found_inode_ref) {
933 add_inode_backref(dst_cache, dst->ino,
934 backref->dir, backref->index,
935 backref->name, backref->namelen, 0,
936 backref->ref_type, backref->errors);
940 if (src->found_dir_item)
941 dst->found_dir_item = 1;
942 if (src->found_file_extent)
943 dst->found_file_extent = 1;
944 if (src->found_csum_item)
945 dst->found_csum_item = 1;
946 if (src->some_csum_missing)
947 dst->some_csum_missing = 1;
948 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949 ret = copy_file_extent_holes(&dst->holes, &src->holes);
954 BUG_ON(src->found_link < dir_count);
955 dst->found_link += src->found_link - dir_count;
956 dst->found_size += src->found_size;
957 if (src->extent_start != (u64)-1) {
958 if (dst->extent_start == (u64)-1) {
959 dst->extent_start = src->extent_start;
960 dst->extent_end = src->extent_end;
962 if (dst->extent_end > src->extent_start)
963 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964 else if (dst->extent_end < src->extent_start) {
965 ret = add_file_extent_hole(&dst->holes,
967 src->extent_start - dst->extent_end);
969 if (dst->extent_end < src->extent_end)
970 dst->extent_end = src->extent_end;
974 dst->errors |= src->errors;
975 if (src->found_inode_item) {
976 if (!dst->found_inode_item) {
977 dst->nlink = src->nlink;
978 dst->isize = src->isize;
979 dst->nbytes = src->nbytes;
980 dst->imode = src->imode;
981 dst->nodatasum = src->nodatasum;
982 dst->found_inode_item = 1;
984 dst->errors |= I_ERR_DUP_INODE_ITEM;
992 static int splice_shared_node(struct shared_node *src_node,
993 struct shared_node *dst_node)
995 struct cache_extent *cache;
996 struct ptr_node *node, *ins;
997 struct cache_tree *src, *dst;
998 struct inode_record *rec, *conflict;
1003 if (--src_node->refs == 0)
1005 if (src_node->current)
1006 current_ino = src_node->current->ino;
1008 src = &src_node->root_cache;
1009 dst = &dst_node->root_cache;
1011 cache = search_cache_extent(src, 0);
1013 node = container_of(cache, struct ptr_node, cache);
1015 cache = next_cache_extent(cache);
1018 remove_cache_extent(src, &node->cache);
1021 ins = malloc(sizeof(*ins));
1023 ins->cache.start = node->cache.start;
1024 ins->cache.size = node->cache.size;
1028 ret = insert_cache_extent(dst, &ins->cache);
1029 if (ret == -EEXIST) {
1030 conflict = get_inode_rec(dst, rec->ino, 1);
1031 BUG_ON(IS_ERR(conflict));
1032 merge_inode_recs(rec, conflict, dst);
1034 conflict->checked = 1;
1035 if (dst_node->current == conflict)
1036 dst_node->current = NULL;
1038 maybe_free_inode_rec(dst, conflict);
1039 free_inode_rec(rec);
1046 if (src == &src_node->root_cache) {
1047 src = &src_node->inode_cache;
1048 dst = &dst_node->inode_cache;
1052 if (current_ino > 0 && (!dst_node->current ||
1053 current_ino > dst_node->current->ino)) {
1054 if (dst_node->current) {
1055 dst_node->current->checked = 1;
1056 maybe_free_inode_rec(dst, dst_node->current);
1058 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059 BUG_ON(IS_ERR(dst_node->current));
1064 static void free_inode_ptr(struct cache_extent *cache)
1066 struct ptr_node *node;
1067 struct inode_record *rec;
1069 node = container_of(cache, struct ptr_node, cache);
1071 free_inode_rec(rec);
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1080 struct cache_extent *cache;
1081 struct shared_node *node;
1083 cache = lookup_cache_extent(shared, bytenr, 1);
1085 node = container_of(cache, struct shared_node, cache);
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1094 struct shared_node *node;
1096 node = calloc(1, sizeof(*node));
1099 node->cache.start = bytenr;
1100 node->cache.size = 1;
1101 cache_tree_init(&node->root_cache);
1102 cache_tree_init(&node->inode_cache);
1105 ret = insert_cache_extent(shared, &node->cache);
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111 struct walk_control *wc, int level)
1113 struct shared_node *node;
1114 struct shared_node *dest;
1117 if (level == wc->active_node)
1120 BUG_ON(wc->active_node <= level);
1121 node = find_shared_node(&wc->shared, bytenr);
1123 ret = add_shared_node(&wc->shared, bytenr, refs);
1125 node = find_shared_node(&wc->shared, bytenr);
1126 wc->nodes[level] = node;
1127 wc->active_node = level;
1131 if (wc->root_level == wc->active_node &&
1132 btrfs_root_refs(&root->root_item) == 0) {
1133 if (--node->refs == 0) {
1134 free_inode_recs_tree(&node->root_cache);
1135 free_inode_recs_tree(&node->inode_cache);
1136 remove_cache_extent(&wc->shared, &node->cache);
1142 dest = wc->nodes[wc->active_node];
1143 splice_shared_node(node, dest);
1144 if (node->refs == 0) {
1145 remove_cache_extent(&wc->shared, &node->cache);
1151 static int leave_shared_node(struct btrfs_root *root,
1152 struct walk_control *wc, int level)
1154 struct shared_node *node;
1155 struct shared_node *dest;
1158 if (level == wc->root_level)
1161 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1165 BUG_ON(i >= BTRFS_MAX_LEVEL);
1167 node = wc->nodes[wc->active_node];
1168 wc->nodes[wc->active_node] = NULL;
1169 wc->active_node = i;
1171 dest = wc->nodes[wc->active_node];
1172 if (wc->active_node < wc->root_level ||
1173 btrfs_root_refs(&root->root_item) > 0) {
1174 BUG_ON(node->refs <= 1);
1175 splice_shared_node(node, dest);
1177 BUG_ON(node->refs < 2);
1186 * 1 - if the root with id child_root_id is a child of root parent_root_id
1187 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1188 * has other root(s) as parent(s)
1189 * 2 - if the root child_root_id doesn't have any parent roots
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1194 struct btrfs_path path;
1195 struct btrfs_key key;
1196 struct extent_buffer *leaf;
1200 btrfs_init_path(&path);
1202 key.objectid = parent_root_id;
1203 key.type = BTRFS_ROOT_REF_KEY;
1204 key.offset = child_root_id;
1205 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1209 btrfs_release_path(&path);
1213 key.objectid = child_root_id;
1214 key.type = BTRFS_ROOT_BACKREF_KEY;
1216 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1222 leaf = path.nodes[0];
1223 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1227 leaf = path.nodes[0];
1230 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231 if (key.objectid != child_root_id ||
1232 key.type != BTRFS_ROOT_BACKREF_KEY)
1237 if (key.offset == parent_root_id) {
1238 btrfs_release_path(&path);
1245 btrfs_release_path(&path);
1248 return has_parent ? 0 : 2;
1251 static int process_dir_item(struct extent_buffer *eb,
1252 int slot, struct btrfs_key *key,
1253 struct shared_node *active_node)
1263 struct btrfs_dir_item *di;
1264 struct inode_record *rec;
1265 struct cache_tree *root_cache;
1266 struct cache_tree *inode_cache;
1267 struct btrfs_key location;
1268 char namebuf[BTRFS_NAME_LEN];
1270 root_cache = &active_node->root_cache;
1271 inode_cache = &active_node->inode_cache;
1272 rec = active_node->current;
1273 rec->found_dir_item = 1;
1275 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276 total = btrfs_item_size_nr(eb, slot);
1277 while (cur < total) {
1279 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280 name_len = btrfs_dir_name_len(eb, di);
1281 data_len = btrfs_dir_data_len(eb, di);
1282 filetype = btrfs_dir_type(eb, di);
1284 rec->found_size += name_len;
1285 if (cur + sizeof(*di) + name_len > total ||
1286 name_len > BTRFS_NAME_LEN) {
1287 error = REF_ERR_NAME_TOO_LONG;
1289 if (cur + sizeof(*di) > total)
1291 len = min_t(u32, total - cur - sizeof(*di),
1298 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1300 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301 key->offset != btrfs_name_hash(namebuf, len)) {
1302 rec->errors |= I_ERR_ODD_DIR_ITEM;
1303 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304 key->objectid, key->offset, namebuf, len, filetype,
1305 key->offset, btrfs_name_hash(namebuf, len));
1308 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309 add_inode_backref(inode_cache, location.objectid,
1310 key->objectid, key->offset, namebuf,
1311 len, filetype, key->type, error);
1312 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313 add_inode_backref(root_cache, location.objectid,
1314 key->objectid, key->offset,
1315 namebuf, len, filetype,
1319 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320 location.type, key->objectid, key->offset);
1321 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322 key->objectid, key->offset, namebuf,
1323 len, filetype, key->type, error);
1326 len = sizeof(*di) + name_len + data_len;
1327 di = (struct btrfs_dir_item *)((char *)di + len);
1330 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331 rec->errors |= I_ERR_DUP_DIR_INDEX;
1336 static int process_inode_ref(struct extent_buffer *eb,
1337 int slot, struct btrfs_key *key,
1338 struct shared_node *active_node)
1346 struct cache_tree *inode_cache;
1347 struct btrfs_inode_ref *ref;
1348 char namebuf[BTRFS_NAME_LEN];
1350 inode_cache = &active_node->inode_cache;
1352 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353 total = btrfs_item_size_nr(eb, slot);
1354 while (cur < total) {
1355 name_len = btrfs_inode_ref_name_len(eb, ref);
1356 index = btrfs_inode_ref_index(eb, ref);
1358 /* inode_ref + namelen should not cross item boundary */
1359 if (cur + sizeof(*ref) + name_len > total ||
1360 name_len > BTRFS_NAME_LEN) {
1361 if (total < cur + sizeof(*ref))
1364 /* Still try to read out the remaining part */
1365 len = min_t(u32, total - cur - sizeof(*ref),
1367 error = REF_ERR_NAME_TOO_LONG;
1373 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374 add_inode_backref(inode_cache, key->objectid, key->offset,
1375 index, namebuf, len, 0, key->type, error);
1377 len = sizeof(*ref) + name_len;
1378 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1384 static int process_inode_extref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1395 struct cache_tree *inode_cache;
1396 struct btrfs_inode_extref *extref;
1397 char namebuf[BTRFS_NAME_LEN];
1399 inode_cache = &active_node->inode_cache;
1401 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402 total = btrfs_item_size_nr(eb, slot);
1403 while (cur < total) {
1404 name_len = btrfs_inode_extref_name_len(eb, extref);
1405 index = btrfs_inode_extref_index(eb, extref);
1406 parent = btrfs_inode_extref_parent(eb, extref);
1407 if (name_len <= BTRFS_NAME_LEN) {
1411 len = BTRFS_NAME_LEN;
1412 error = REF_ERR_NAME_TOO_LONG;
1414 read_extent_buffer(eb, namebuf,
1415 (unsigned long)(extref + 1), len);
1416 add_inode_backref(inode_cache, key->objectid, parent,
1417 index, namebuf, len, 0, key->type, error);
1419 len = sizeof(*extref) + name_len;
1420 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1427 static int process_file_extent(struct btrfs_root *root,
1428 struct extent_buffer *eb,
1429 int slot, struct btrfs_key *key,
1430 struct shared_node *active_node)
1432 struct inode_record *rec;
1433 struct btrfs_file_extent_item *fi;
1435 u64 disk_bytenr = 0;
1436 u64 extent_offset = 0;
1437 u64 mask = root->fs_info->sectorsize - 1;
1441 rec = active_node->current;
1442 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443 rec->found_file_extent = 1;
1445 if (rec->extent_start == (u64)-1) {
1446 rec->extent_start = key->offset;
1447 rec->extent_end = key->offset;
1450 if (rec->extent_end > key->offset)
1451 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452 else if (rec->extent_end < key->offset) {
1453 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454 key->offset - rec->extent_end);
1459 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460 extent_type = btrfs_file_extent_type(eb, fi);
1462 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1465 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466 rec->found_size += num_bytes;
1467 num_bytes = (num_bytes + mask) & ~mask;
1468 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472 extent_offset = btrfs_file_extent_offset(eb, fi);
1473 if (num_bytes == 0 || (num_bytes & mask))
1474 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475 if (num_bytes + extent_offset >
1476 btrfs_file_extent_ram_bytes(eb, fi))
1477 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479 (btrfs_file_extent_compression(eb, fi) ||
1480 btrfs_file_extent_encryption(eb, fi) ||
1481 btrfs_file_extent_other_encoding(eb, fi)))
1482 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483 if (disk_bytenr > 0)
1484 rec->found_size += num_bytes;
1486 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1488 rec->extent_end = key->offset + num_bytes;
1491 * The data reloc tree will copy full extents into its inode and then
1492 * copy the corresponding csums. Because the extent it copied could be
1493 * a preallocated extent that hasn't been written to yet there may be no
1494 * csums to copy, ergo we won't have csums for our file extent. This is
1495 * ok so just don't bother checking csums if the inode belongs to the
1498 if (disk_bytenr > 0 &&
1499 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1501 if (btrfs_file_extent_compression(eb, fi))
1502 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1504 disk_bytenr += extent_offset;
1506 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1510 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1512 rec->found_csum_item = 1;
1513 if (found < num_bytes)
1514 rec->some_csum_missing = 1;
1515 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1517 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524 struct walk_control *wc)
1526 struct btrfs_key key;
1530 struct cache_tree *inode_cache;
1531 struct shared_node *active_node;
1533 if (wc->root_level == wc->active_node &&
1534 btrfs_root_refs(&root->root_item) == 0)
1537 active_node = wc->nodes[wc->active_node];
1538 inode_cache = &active_node->inode_cache;
1539 nritems = btrfs_header_nritems(eb);
1540 for (i = 0; i < nritems; i++) {
1541 btrfs_item_key_to_cpu(eb, &key, i);
1543 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1545 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1548 if (active_node->current == NULL ||
1549 active_node->current->ino < key.objectid) {
1550 if (active_node->current) {
1551 active_node->current->checked = 1;
1552 maybe_free_inode_rec(inode_cache,
1553 active_node->current);
1555 active_node->current = get_inode_rec(inode_cache,
1557 BUG_ON(IS_ERR(active_node->current));
1560 case BTRFS_DIR_ITEM_KEY:
1561 case BTRFS_DIR_INDEX_KEY:
1562 ret = process_dir_item(eb, i, &key, active_node);
1564 case BTRFS_INODE_REF_KEY:
1565 ret = process_inode_ref(eb, i, &key, active_node);
1567 case BTRFS_INODE_EXTREF_KEY:
1568 ret = process_inode_extref(eb, i, &key, active_node);
1570 case BTRFS_INODE_ITEM_KEY:
1571 ret = process_inode_item(eb, i, &key, active_node);
1573 case BTRFS_EXTENT_DATA_KEY:
1574 ret = process_file_extent(root, eb, i, &key,
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585 struct extent_buffer *eb, struct node_refs *nrefs,
1586 u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588 unsigned int ext_ref);
1591 * Returns >0 Found error, not fatal, should continue
1592 * Returns <0 Fatal error, must exit the whole check
1593 * Returns 0 No errors found
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596 struct node_refs *nrefs, int *level, int ext_ref)
1598 struct extent_buffer *cur = path->nodes[0];
1599 struct btrfs_key key;
1603 int root_level = btrfs_header_level(root->node);
1605 int ret = 0; /* Final return value */
1606 int err = 0; /* Positive error bitmap */
1608 cur_bytenr = cur->start;
1610 /* skip to first inode item or the first inode number change */
1611 nritems = btrfs_header_nritems(cur);
1612 for (i = 0; i < nritems; i++) {
1613 btrfs_item_key_to_cpu(cur, &key, i);
1615 first_ino = key.objectid;
1616 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617 (first_ino && first_ino != key.objectid))
1621 path->slots[0] = nritems;
1627 err |= check_inode_item(root, path, ext_ref);
1629 /* modify cur since check_inode_item may change path */
1630 cur = path->nodes[0];
1632 if (err & LAST_ITEM)
1635 /* still have inode items in thie leaf */
1636 if (cur->start == cur_bytenr)
1640 * we have switched to another leaf, above nodes may
1641 * have changed, here walk down the path, if a node
1642 * or leaf is shared, check whether we can skip this
1645 for (i = root_level; i >= 0; i--) {
1646 if (path->nodes[i]->start == nrefs->bytenr[i])
1649 ret = update_nodes_refs(root, path->nodes[i]->start,
1650 path->nodes[i], nrefs, i, 0);
1654 if (!nrefs->need_check[i]) {
1660 for (i = 0; i < *level; i++) {
1661 free_extent_buffer(path->nodes[i]);
1662 path->nodes[i] = NULL;
1671 static void reada_walk_down(struct btrfs_root *root,
1672 struct extent_buffer *node, int slot)
1674 struct btrfs_fs_info *fs_info = root->fs_info;
1681 level = btrfs_header_level(node);
1685 nritems = btrfs_header_nritems(node);
1686 for (i = slot; i < nritems; i++) {
1687 bytenr = btrfs_node_blockptr(node, i);
1688 ptr_gen = btrfs_node_ptr_generation(node, i);
1689 readahead_tree_block(fs_info, bytenr, ptr_gen);
1694 * Check the child node/leaf by the following condition:
1695 * 1. the first item key of the node/leaf should be the same with the one
1697 * 2. block in parent node should match the child node/leaf.
1698 * 3. generation of parent node and child's header should be consistent.
1700 * Or the child node/leaf pointed by the key in parent is not valid.
1702 * We hope to check leaf owner too, but since subvol may share leaves,
1703 * which makes leaf owner check not so strong, key check should be
1704 * sufficient enough for that case.
1706 static int check_child_node(struct extent_buffer *parent, int slot,
1707 struct extent_buffer *child)
1709 struct btrfs_key parent_key;
1710 struct btrfs_key child_key;
1713 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1714 if (btrfs_header_level(child) == 0)
1715 btrfs_item_key_to_cpu(child, &child_key, 0);
1717 btrfs_node_key_to_cpu(child, &child_key, 0);
1719 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1722 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1723 parent_key.objectid, parent_key.type, parent_key.offset,
1724 child_key.objectid, child_key.type, child_key.offset);
1726 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1728 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1729 btrfs_node_blockptr(parent, slot),
1730 btrfs_header_bytenr(child));
1732 if (btrfs_node_ptr_generation(parent, slot) !=
1733 btrfs_header_generation(child)) {
1735 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1736 btrfs_header_generation(child),
1737 btrfs_node_ptr_generation(parent, slot));
1743 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1744 * in every fs or file tree check. Here we find its all root ids, and only check
1745 * it in the fs or file tree which has the smallest root id.
1747 static int need_check(struct btrfs_root *root, struct ulist *roots)
1749 struct rb_node *node;
1750 struct ulist_node *u;
1753 * @roots can be empty if it belongs to tree reloc tree
1754 * In that case, we should always check the leaf, as we can't use
1755 * the tree owner to ensure some other root will check it.
1757 if (roots->nnodes == 1 || roots->nnodes == 0)
1760 node = rb_first(&roots->root);
1761 u = rb_entry(node, struct ulist_node, rb_node);
1763 * current root id is not smallest, we skip it and let it be checked
1764 * in the fs or file tree who hash the smallest root id.
1766 if (root->objectid != u->val)
1772 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1775 struct btrfs_root *extent_root = root->fs_info->extent_root;
1776 struct btrfs_root_item *ri = &root->root_item;
1777 struct btrfs_extent_inline_ref *iref;
1778 struct btrfs_extent_item *ei;
1779 struct btrfs_key key;
1780 struct btrfs_path *path = NULL;
1791 * Except file/reloc tree, we can not have FULL BACKREF MODE
1793 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1797 if (eb->start == btrfs_root_bytenr(ri))
1800 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1803 owner = btrfs_header_owner(eb);
1804 if (owner == root->objectid)
1807 path = btrfs_alloc_path();
1811 key.objectid = btrfs_header_bytenr(eb);
1813 key.offset = (u64)-1;
1815 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1822 ret = btrfs_previous_extent_item(extent_root, path,
1828 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1830 eb = path->nodes[0];
1831 slot = path->slots[0];
1832 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1834 flags = btrfs_extent_flags(eb, ei);
1835 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1838 ptr = (unsigned long)(ei + 1);
1839 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1841 if (key.type == BTRFS_EXTENT_ITEM_KEY)
1842 ptr += sizeof(struct btrfs_tree_block_info);
1845 /* Reached extent item ends normally */
1849 /* Beyond extent item end, wrong item size */
1851 error("extent item at bytenr %llu slot %d has wrong size",
1856 iref = (struct btrfs_extent_inline_ref *)ptr;
1857 offset = btrfs_extent_inline_ref_offset(eb, iref);
1858 type = btrfs_extent_inline_ref_type(eb, iref);
1860 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1862 ptr += btrfs_extent_inline_ref_size(type);
1866 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1870 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1872 btrfs_free_path(path);
1877 * for a tree node or leaf, we record its reference count, so later if we still
1878 * process this node or leaf, don't need to compute its reference count again.
1880 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1882 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1883 struct extent_buffer *eb, struct node_refs *nrefs,
1884 u64 level, int check_all)
1886 struct ulist *roots;
1889 int root_level = btrfs_header_level(root->node);
1893 if (nrefs->bytenr[level] == bytenr)
1896 if (bytenr != (u64)-1) {
1897 /* the return value of this function seems a mistake */
1898 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1899 level, 1, &refs, &flags);
1901 if (ret < 0 && !check_all)
1904 nrefs->bytenr[level] = bytenr;
1905 nrefs->refs[level] = refs;
1906 nrefs->full_backref[level] = 0;
1907 nrefs->checked[level] = 0;
1910 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1915 check = need_check(root, roots);
1917 nrefs->need_check[level] = check;
1920 nrefs->need_check[level] = 1;
1922 if (level == root_level) {
1923 nrefs->need_check[level] = 1;
1926 * The node refs may have not been
1927 * updated if upper needs checking (the
1928 * lowest root_objectid) the node can
1931 nrefs->need_check[level] =
1932 nrefs->need_check[level + 1];
1938 if (check_all && eb) {
1939 calc_extent_flag_v2(root, eb, &flags);
1940 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1941 nrefs->full_backref[level] = 1;
1948 * @level if @level == -1 means extent data item
1949 * else normal treeblocl.
1951 static int should_check_extent_strictly(struct btrfs_root *root,
1952 struct node_refs *nrefs, int level)
1954 int root_level = btrfs_header_level(root->node);
1956 if (level > root_level || level < -1)
1958 if (level == root_level)
1961 * if the upper node is marked full backref, it should contain shared
1962 * backref of the parent (except owner == root->objectid).
1964 while (++level <= root_level)
1965 if (nrefs->refs[level] > 1)
1971 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1972 struct walk_control *wc, int *level,
1973 struct node_refs *nrefs)
1975 enum btrfs_tree_block_status status;
1978 struct btrfs_fs_info *fs_info = root->fs_info;
1979 struct extent_buffer *next;
1980 struct extent_buffer *cur;
1984 WARN_ON(*level < 0);
1985 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1987 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1988 refs = nrefs->refs[*level];
1991 ret = btrfs_lookup_extent_info(NULL, root,
1992 path->nodes[*level]->start,
1993 *level, 1, &refs, NULL);
1998 nrefs->bytenr[*level] = path->nodes[*level]->start;
1999 nrefs->refs[*level] = refs;
2003 ret = enter_shared_node(root, path->nodes[*level]->start,
2011 while (*level >= 0) {
2012 WARN_ON(*level < 0);
2013 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014 cur = path->nodes[*level];
2016 if (btrfs_header_level(cur) != *level)
2019 if (path->slots[*level] >= btrfs_header_nritems(cur))
2022 ret = process_one_leaf(root, cur, wc);
2027 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2028 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2030 if (bytenr == nrefs->bytenr[*level - 1]) {
2031 refs = nrefs->refs[*level - 1];
2033 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2034 *level - 1, 1, &refs, NULL);
2038 nrefs->bytenr[*level - 1] = bytenr;
2039 nrefs->refs[*level - 1] = refs;
2044 ret = enter_shared_node(root, bytenr, refs,
2047 path->slots[*level]++;
2052 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2053 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2054 free_extent_buffer(next);
2055 reada_walk_down(root, cur, path->slots[*level]);
2056 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2057 if (!extent_buffer_uptodate(next)) {
2058 struct btrfs_key node_key;
2060 btrfs_node_key_to_cpu(path->nodes[*level],
2062 path->slots[*level]);
2063 btrfs_add_corrupt_extent_record(root->fs_info,
2065 path->nodes[*level]->start,
2066 root->fs_info->nodesize,
2073 ret = check_child_node(cur, path->slots[*level], next);
2075 free_extent_buffer(next);
2080 if (btrfs_is_leaf(next))
2081 status = btrfs_check_leaf(root, NULL, next);
2083 status = btrfs_check_node(root, NULL, next);
2084 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2085 free_extent_buffer(next);
2090 *level = *level - 1;
2091 free_extent_buffer(path->nodes[*level]);
2092 path->nodes[*level] = next;
2093 path->slots[*level] = 0;
2096 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2101 * Update global fs information.
2103 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2107 struct extent_buffer *eb = path->nodes[level];
2109 total_btree_bytes += eb->len;
2110 if (fs_root_objectid(root->objectid))
2111 total_fs_tree_bytes += eb->len;
2112 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2113 total_extent_tree_bytes += eb->len;
2116 btree_space_waste += btrfs_leaf_free_space(root, eb);
2118 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2119 btrfs_header_nritems(eb));
2120 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2125 * This function only handles BACKREF_MISSING,
2126 * If corresponding extent item exists, increase the ref, else insert an extent
2129 * Returns error bits after repair.
2131 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2132 struct btrfs_root *root,
2133 struct extent_buffer *node,
2134 struct node_refs *nrefs, int level, int err)
2136 struct btrfs_fs_info *fs_info = root->fs_info;
2137 struct btrfs_root *extent_root = fs_info->extent_root;
2138 struct btrfs_path path;
2139 struct btrfs_extent_item *ei;
2140 struct btrfs_tree_block_info *bi;
2141 struct btrfs_key key;
2142 struct extent_buffer *eb;
2143 u32 size = sizeof(*ei);
2144 u32 node_size = root->fs_info->nodesize;
2145 int insert_extent = 0;
2146 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2147 int root_level = btrfs_header_level(root->node);
2152 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2155 if ((err & BACKREF_MISSING) == 0)
2158 WARN_ON(level > BTRFS_MAX_LEVEL);
2161 btrfs_init_path(&path);
2162 bytenr = btrfs_header_bytenr(node);
2163 owner = btrfs_header_owner(node);
2164 generation = btrfs_header_generation(node);
2166 key.objectid = bytenr;
2168 key.offset = (u64)-1;
2170 /* Search for the extent item */
2171 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2177 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2181 /* calculate if the extent item flag is full backref or not */
2182 if (nrefs->full_backref[level] != 0)
2183 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2185 /* insert an extent item */
2186 if (insert_extent) {
2187 struct btrfs_disk_key copy_key;
2189 generation = btrfs_header_generation(node);
2191 if (level < root_level && nrefs->full_backref[level + 1] &&
2192 owner != root->objectid) {
2193 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2196 key.objectid = bytenr;
2197 if (!skinny_metadata) {
2198 key.type = BTRFS_EXTENT_ITEM_KEY;
2199 key.offset = node_size;
2200 size += sizeof(*bi);
2202 key.type = BTRFS_METADATA_ITEM_KEY;
2206 btrfs_release_path(&path);
2207 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2213 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2215 btrfs_set_extent_refs(eb, ei, 0);
2216 btrfs_set_extent_generation(eb, ei, generation);
2217 btrfs_set_extent_flags(eb, ei, flags);
2219 if (!skinny_metadata) {
2220 bi = (struct btrfs_tree_block_info *)(ei + 1);
2221 memset_extent_buffer(eb, 0, (unsigned long)bi,
2223 btrfs_set_disk_key_objectid(©_key, root->objectid);
2224 btrfs_set_disk_key_type(©_key, 0);
2225 btrfs_set_disk_key_offset(©_key, 0);
2227 btrfs_set_tree_block_level(eb, bi, level);
2228 btrfs_set_tree_block_key(eb, bi, ©_key);
2230 btrfs_mark_buffer_dirty(eb);
2231 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2232 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2234 nrefs->refs[level] = 0;
2235 nrefs->full_backref[level] =
2236 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2237 btrfs_release_path(&path);
2240 if (level < root_level && nrefs->full_backref[level + 1] &&
2241 owner != root->objectid)
2242 parent = nrefs->bytenr[level + 1];
2244 /* increase the ref */
2245 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2246 parent, root->objectid, level, 0);
2248 nrefs->refs[level]++;
2250 btrfs_release_path(&path);
2253 "failed to repair tree block ref start %llu root %llu due to %s",
2254 bytenr, root->objectid, strerror(-ret));
2256 printf("Added one tree block ref start %llu %s %llu\n",
2257 bytenr, parent ? "parent" : "root",
2258 parent ? parent : root->objectid);
2259 err &= ~BACKREF_MISSING;
2265 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2266 unsigned int ext_ref);
2267 static int check_tree_block_ref(struct btrfs_root *root,
2268 struct extent_buffer *eb, u64 bytenr,
2269 int level, u64 owner, struct node_refs *nrefs);
2270 static int check_leaf_items(struct btrfs_trans_handle *trans,
2271 struct btrfs_root *root, struct btrfs_path *path,
2272 struct node_refs *nrefs, int account_bytes);
2275 * @trans just for lowmem repair mode
2276 * @check all if not 0 then check all tree block backrefs and items
2277 * 0 then just check relationship of items in fs tree(s)
2279 * Returns >0 Found error, should continue
2280 * Returns <0 Fatal error, must exit the whole check
2281 * Returns 0 No errors found
2283 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2284 struct btrfs_root *root, struct btrfs_path *path,
2285 int *level, struct node_refs *nrefs, int ext_ref,
2289 enum btrfs_tree_block_status status;
2292 struct btrfs_fs_info *fs_info = root->fs_info;
2293 struct extent_buffer *next;
2294 struct extent_buffer *cur;
2298 int account_file_data = 0;
2300 WARN_ON(*level < 0);
2301 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2303 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2304 path->nodes[*level], nrefs, *level, check_all);
2308 while (*level >= 0) {
2309 WARN_ON(*level < 0);
2310 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2311 cur = path->nodes[*level];
2312 bytenr = btrfs_header_bytenr(cur);
2313 check = nrefs->need_check[*level];
2315 if (btrfs_header_level(cur) != *level)
2318 * Update bytes accounting and check tree block ref
2319 * NOTE: Doing accounting and check before checking nritems
2320 * is necessary because of empty node/leaf.
2322 if ((check_all && !nrefs->checked[*level]) ||
2323 (!check_all && nrefs->need_check[*level])) {
2324 ret = check_tree_block_ref(root, cur,
2325 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2326 btrfs_header_owner(cur), nrefs);
2329 ret = repair_tree_block_ref(trans, root,
2330 path->nodes[*level], nrefs, *level, ret);
2333 if (check_all && nrefs->need_check[*level] &&
2334 nrefs->refs[*level]) {
2335 account_bytes(root, path, *level);
2336 account_file_data = 1;
2338 nrefs->checked[*level] = 1;
2341 if (path->slots[*level] >= btrfs_header_nritems(cur))
2344 /* Don't forgot to check leaf/node validation */
2346 /* skip duplicate check */
2347 if (check || !check_all) {
2348 ret = btrfs_check_leaf(root, NULL, cur);
2349 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2357 ret = process_one_leaf_v2(root, path, nrefs,
2360 ret = check_leaf_items(trans, root, path,
2361 nrefs, account_file_data);
2365 if (check || !check_all) {
2366 ret = btrfs_check_node(root, NULL, cur);
2367 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2374 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2375 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2377 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2382 * check all trees in check_chunks_and_extent_v2
2383 * check shared node once in check_fs_roots
2385 if (!check_all && !nrefs->need_check[*level - 1]) {
2386 path->slots[*level]++;
2390 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392 free_extent_buffer(next);
2393 reada_walk_down(root, cur, path->slots[*level]);
2394 next = read_tree_block(fs_info, bytenr, ptr_gen);
2395 if (!extent_buffer_uptodate(next)) {
2396 struct btrfs_key node_key;
2398 btrfs_node_key_to_cpu(path->nodes[*level],
2400 path->slots[*level]);
2401 btrfs_add_corrupt_extent_record(fs_info,
2402 &node_key, path->nodes[*level]->start,
2403 fs_info->nodesize, *level);
2409 ret = check_child_node(cur, path->slots[*level], next);
2414 if (btrfs_is_leaf(next))
2415 status = btrfs_check_leaf(root, NULL, next);
2417 status = btrfs_check_node(root, NULL, next);
2418 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2419 free_extent_buffer(next);
2424 *level = *level - 1;
2425 free_extent_buffer(path->nodes[*level]);
2426 path->nodes[*level] = next;
2427 path->slots[*level] = 0;
2428 account_file_data = 0;
2430 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436 struct walk_control *wc, int *level)
2439 struct extent_buffer *leaf;
2441 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442 leaf = path->nodes[i];
2443 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2448 free_extent_buffer(path->nodes[*level]);
2449 path->nodes[*level] = NULL;
2450 BUG_ON(*level > wc->active_node);
2451 if (*level == wc->active_node)
2452 leave_shared_node(root, wc, *level);
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2463 struct extent_buffer *leaf;
2465 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466 leaf = path->nodes[i];
2467 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2472 free_extent_buffer(path->nodes[*level]);
2473 path->nodes[*level] = NULL;
2480 static int check_root_dir(struct inode_record *rec)
2482 struct inode_backref *backref;
2485 if (!rec->found_inode_item || rec->errors)
2487 if (rec->nlink != 1 || rec->found_link != 0)
2489 if (list_empty(&rec->backrefs))
2491 backref = to_inode_backref(rec->backrefs.next);
2492 if (!backref->found_inode_ref)
2494 if (backref->index != 0 || backref->namelen != 2 ||
2495 memcmp(backref->name, "..", 2))
2497 if (backref->found_dir_index || backref->found_dir_item)
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505 struct btrfs_root *root, struct btrfs_path *path,
2506 struct inode_record *rec)
2508 struct btrfs_inode_item *ei;
2509 struct btrfs_key key;
2512 key.objectid = rec->ino;
2513 key.type = BTRFS_INODE_ITEM_KEY;
2514 key.offset = (u64)-1;
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (!path->slots[0]) {
2527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528 if (key.objectid != rec->ino) {
2533 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534 struct btrfs_inode_item);
2535 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536 btrfs_mark_buffer_dirty(path->nodes[0]);
2537 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539 root->root_key.objectid);
2541 btrfs_release_path(path);
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *root,
2547 struct btrfs_path *path,
2548 struct inode_record *rec)
2552 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553 btrfs_release_path(path);
2555 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct inode_record *rec)
2564 struct btrfs_inode_item *ei;
2565 struct btrfs_key key;
2568 key.objectid = rec->ino;
2569 key.type = BTRFS_INODE_ITEM_KEY;
2572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2579 /* Since ret == 0, no need to check anything */
2580 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581 struct btrfs_inode_item);
2582 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583 btrfs_mark_buffer_dirty(path->nodes[0]);
2584 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585 printf("reset nbytes for ino %llu root %llu\n",
2586 rec->ino, root->root_key.objectid);
2588 btrfs_release_path(path);
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593 struct cache_tree *inode_cache,
2594 struct inode_record *rec,
2595 struct inode_backref *backref)
2597 struct btrfs_path path;
2598 struct btrfs_trans_handle *trans;
2599 struct btrfs_dir_item *dir_item;
2600 struct extent_buffer *leaf;
2601 struct btrfs_key key;
2602 struct btrfs_disk_key disk_key;
2603 struct inode_record *dir_rec;
2604 unsigned long name_ptr;
2605 u32 data_size = sizeof(*dir_item) + backref->namelen;
2608 trans = btrfs_start_transaction(root, 1);
2610 return PTR_ERR(trans);
2612 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613 (unsigned long long)rec->ino);
2615 btrfs_init_path(&path);
2616 key.objectid = backref->dir;
2617 key.type = BTRFS_DIR_INDEX_KEY;
2618 key.offset = backref->index;
2619 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2622 leaf = path.nodes[0];
2623 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625 disk_key.objectid = cpu_to_le64(rec->ino);
2626 disk_key.type = BTRFS_INODE_ITEM_KEY;
2627 disk_key.offset = 0;
2629 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631 btrfs_set_dir_data_len(leaf, dir_item, 0);
2632 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633 name_ptr = (unsigned long)(dir_item + 1);
2634 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635 btrfs_mark_buffer_dirty(leaf);
2636 btrfs_release_path(&path);
2637 btrfs_commit_transaction(trans, root);
2639 backref->found_dir_index = 1;
2640 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641 BUG_ON(IS_ERR(dir_rec));
2644 dir_rec->found_size += backref->namelen;
2645 if (dir_rec->found_size == dir_rec->isize &&
2646 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648 if (dir_rec->found_size != dir_rec->isize)
2649 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2654 static int delete_dir_index(struct btrfs_root *root,
2655 struct inode_backref *backref)
2657 struct btrfs_trans_handle *trans;
2658 struct btrfs_dir_item *di;
2659 struct btrfs_path path;
2662 trans = btrfs_start_transaction(root, 1);
2664 return PTR_ERR(trans);
2666 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667 (unsigned long long)backref->dir,
2668 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669 (unsigned long long)root->objectid);
2671 btrfs_init_path(&path);
2672 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673 backref->name, backref->namelen,
2674 backref->index, -1);
2677 btrfs_release_path(&path);
2678 btrfs_commit_transaction(trans, root);
2685 ret = btrfs_del_item(trans, root, &path);
2687 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689 btrfs_release_path(&path);
2690 btrfs_commit_transaction(trans, root);
2694 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2695 struct btrfs_root *root, u64 ino,
2698 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2700 return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2703 static int create_inode_item(struct btrfs_root *root,
2704 struct inode_record *rec, int root_dir)
2706 struct btrfs_trans_handle *trans;
2712 trans = btrfs_start_transaction(root, 1);
2713 if (IS_ERR(trans)) {
2714 ret = PTR_ERR(trans);
2718 nlink = root_dir ? 1 : rec->found_link;
2719 if (rec->found_dir_item) {
2720 if (rec->found_file_extent)
2721 fprintf(stderr, "root %llu inode %llu has both a dir "
2722 "item and extents, unsure if it is a dir or a "
2723 "regular file so setting it as a directory\n",
2724 (unsigned long long)root->objectid,
2725 (unsigned long long)rec->ino);
2726 mode = S_IFDIR | 0755;
2727 size = rec->found_size;
2728 } else if (!rec->found_dir_item) {
2729 size = rec->extent_end;
2730 mode = S_IFREG | 0755;
2733 ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2735 btrfs_commit_transaction(trans, root);
2739 static int repair_inode_backrefs(struct btrfs_root *root,
2740 struct inode_record *rec,
2741 struct cache_tree *inode_cache,
2744 struct inode_backref *tmp, *backref;
2745 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2749 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2750 if (!delete && rec->ino == root_dirid) {
2751 if (!rec->found_inode_item) {
2752 ret = create_inode_item(root, rec, 1);
2759 /* Index 0 for root dir's are special, don't mess with it */
2760 if (rec->ino == root_dirid && backref->index == 0)
2764 ((backref->found_dir_index && !backref->found_inode_ref) ||
2765 (backref->found_dir_index && backref->found_inode_ref &&
2766 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2767 ret = delete_dir_index(root, backref);
2771 list_del(&backref->list);
2776 if (!delete && !backref->found_dir_index &&
2777 backref->found_dir_item && backref->found_inode_ref) {
2778 ret = add_missing_dir_index(root, inode_cache, rec,
2783 if (backref->found_dir_item &&
2784 backref->found_dir_index) {
2785 if (!backref->errors &&
2786 backref->found_inode_ref) {
2787 list_del(&backref->list);
2794 if (!delete && (!backref->found_dir_index &&
2795 !backref->found_dir_item &&
2796 backref->found_inode_ref)) {
2797 struct btrfs_trans_handle *trans;
2798 struct btrfs_key location;
2800 ret = check_dir_conflict(root, backref->name,
2806 * let nlink fixing routine to handle it,
2807 * which can do it better.
2812 location.objectid = rec->ino;
2813 location.type = BTRFS_INODE_ITEM_KEY;
2814 location.offset = 0;
2816 trans = btrfs_start_transaction(root, 1);
2817 if (IS_ERR(trans)) {
2818 ret = PTR_ERR(trans);
2821 fprintf(stderr, "adding missing dir index/item pair "
2823 (unsigned long long)rec->ino);
2824 ret = btrfs_insert_dir_item(trans, root, backref->name,
2826 backref->dir, &location,
2827 imode_to_type(rec->imode),
2830 btrfs_commit_transaction(trans, root);
2834 if (!delete && (backref->found_inode_ref &&
2835 backref->found_dir_index &&
2836 backref->found_dir_item &&
2837 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2838 !rec->found_inode_item)) {
2839 ret = create_inode_item(root, rec, 0);
2846 return ret ? ret : repaired;
2850 * To determine the file type for nlink/inode_item repair
2852 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2853 * Return -ENOENT if file type is not found.
2855 static int find_file_type(struct inode_record *rec, u8 *type)
2857 struct inode_backref *backref;
2859 /* For inode item recovered case */
2860 if (rec->found_inode_item) {
2861 *type = imode_to_type(rec->imode);
2865 list_for_each_entry(backref, &rec->backrefs, list) {
2866 if (backref->found_dir_index || backref->found_dir_item) {
2867 *type = backref->filetype;
2875 * To determine the file name for nlink repair
2877 * Return 0 if file name is found, set name and namelen.
2878 * Return -ENOENT if file name is not found.
2880 static int find_file_name(struct inode_record *rec,
2881 char *name, int *namelen)
2883 struct inode_backref *backref;
2885 list_for_each_entry(backref, &rec->backrefs, list) {
2886 if (backref->found_dir_index || backref->found_dir_item ||
2887 backref->found_inode_ref) {
2888 memcpy(name, backref->name, backref->namelen);
2889 *namelen = backref->namelen;
2896 /* Reset the nlink of the inode to the correct one */
2897 static int reset_nlink(struct btrfs_trans_handle *trans,
2898 struct btrfs_root *root,
2899 struct btrfs_path *path,
2900 struct inode_record *rec)
2902 struct inode_backref *backref;
2903 struct inode_backref *tmp;
2904 struct btrfs_key key;
2905 struct btrfs_inode_item *inode_item;
2908 /* We don't believe this either, reset it and iterate backref */
2909 rec->found_link = 0;
2911 /* Remove all backref including the valid ones */
2912 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2913 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2914 backref->index, backref->name,
2915 backref->namelen, 0);
2919 /* remove invalid backref, so it won't be added back */
2920 if (!(backref->found_dir_index &&
2921 backref->found_dir_item &&
2922 backref->found_inode_ref)) {
2923 list_del(&backref->list);
2930 /* Set nlink to 0 */
2931 key.objectid = rec->ino;
2932 key.type = BTRFS_INODE_ITEM_KEY;
2934 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2941 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2942 struct btrfs_inode_item);
2943 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2944 btrfs_mark_buffer_dirty(path->nodes[0]);
2945 btrfs_release_path(path);
2948 * Add back valid inode_ref/dir_item/dir_index,
2949 * add_link() will handle the nlink inc, so new nlink must be correct
2951 list_for_each_entry(backref, &rec->backrefs, list) {
2952 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2953 backref->name, backref->namelen,
2954 backref->filetype, &backref->index, 1, 0);
2959 btrfs_release_path(path);
2963 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2964 struct btrfs_root *root,
2965 struct btrfs_path *path,
2966 struct inode_record *rec)
2968 char namebuf[BTRFS_NAME_LEN] = {0};
2971 int name_recovered = 0;
2972 int type_recovered = 0;
2976 * Get file name and type first before these invalid inode ref
2977 * are deleted by remove_all_invalid_backref()
2979 name_recovered = !find_file_name(rec, namebuf, &namelen);
2980 type_recovered = !find_file_type(rec, &type);
2982 if (!name_recovered) {
2983 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2984 rec->ino, rec->ino);
2985 namelen = count_digits(rec->ino);
2986 sprintf(namebuf, "%llu", rec->ino);
2989 if (!type_recovered) {
2990 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2992 type = BTRFS_FT_REG_FILE;
2996 ret = reset_nlink(trans, root, path, rec);
2999 "Failed to reset nlink for inode %llu: %s\n",
3000 rec->ino, strerror(-ret));
3004 if (rec->found_link == 0) {
3005 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3006 namebuf, namelen, type,
3007 (u64 *)&rec->found_link);
3011 printf("Fixed the nlink of inode %llu\n", rec->ino);
3014 * Clear the flag anyway, or we will loop forever for the same inode
3015 * as it will not be removed from the bad inode list and the dead loop
3018 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3019 btrfs_release_path(path);
3024 * Check if there is any normal(reg or prealloc) file extent for given
3026 * This is used to determine the file type when neither its dir_index/item or
3027 * inode_item exists.
3029 * This will *NOT* report error, if any error happens, just consider it does
3030 * not have any normal file extent.
3032 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3034 struct btrfs_path path;
3035 struct btrfs_key key;
3036 struct btrfs_key found_key;
3037 struct btrfs_file_extent_item *fi;
3041 btrfs_init_path(&path);
3043 key.type = BTRFS_EXTENT_DATA_KEY;
3046 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3051 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3052 ret = btrfs_next_leaf(root, &path);
3059 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3061 if (found_key.objectid != ino ||
3062 found_key.type != BTRFS_EXTENT_DATA_KEY)
3064 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3065 struct btrfs_file_extent_item);
3066 type = btrfs_file_extent_type(path.nodes[0], fi);
3067 if (type != BTRFS_FILE_EXTENT_INLINE) {
3073 btrfs_release_path(&path);
3077 static u32 btrfs_type_to_imode(u8 type)
3079 static u32 imode_by_btrfs_type[] = {
3080 [BTRFS_FT_REG_FILE] = S_IFREG,
3081 [BTRFS_FT_DIR] = S_IFDIR,
3082 [BTRFS_FT_CHRDEV] = S_IFCHR,
3083 [BTRFS_FT_BLKDEV] = S_IFBLK,
3084 [BTRFS_FT_FIFO] = S_IFIFO,
3085 [BTRFS_FT_SOCK] = S_IFSOCK,
3086 [BTRFS_FT_SYMLINK] = S_IFLNK,
3089 return imode_by_btrfs_type[(type)];
3092 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3093 struct btrfs_root *root,
3094 struct btrfs_path *path,
3095 struct inode_record *rec)
3099 int type_recovered = 0;
3102 printf("Trying to rebuild inode:%llu\n", rec->ino);
3104 type_recovered = !find_file_type(rec, &filetype);
3107 * Try to determine inode type if type not found.
3109 * For found regular file extent, it must be FILE.
3110 * For found dir_item/index, it must be DIR.
3112 * For undetermined one, use FILE as fallback.
3115 * 1. If found backref(inode_index/item is already handled) to it,
3117 * Need new inode-inode ref structure to allow search for that.
3119 if (!type_recovered) {
3120 if (rec->found_file_extent &&
3121 find_normal_file_extent(root, rec->ino)) {
3123 filetype = BTRFS_FT_REG_FILE;
3124 } else if (rec->found_dir_item) {
3126 filetype = BTRFS_FT_DIR;
3127 } else if (!list_empty(&rec->orphan_extents)) {
3129 filetype = BTRFS_FT_REG_FILE;
3131 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3134 filetype = BTRFS_FT_REG_FILE;
3138 ret = btrfs_new_inode(trans, root, rec->ino,
3139 mode | btrfs_type_to_imode(filetype));
3144 * Here inode rebuild is done, we only rebuild the inode item,
3145 * don't repair the nlink(like move to lost+found).
3146 * That is the job of nlink repair.
3148 * We just fill the record and return
3150 rec->found_dir_item = 1;
3151 rec->imode = mode | btrfs_type_to_imode(filetype);
3153 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3154 /* Ensure the inode_nlinks repair function will be called */
3155 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3160 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3161 struct btrfs_root *root,
3162 struct btrfs_path *path,
3163 struct inode_record *rec)
3165 struct orphan_data_extent *orphan;
3166 struct orphan_data_extent *tmp;
3169 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3171 * Check for conflicting file extents
3173 * Here we don't know whether the extents is compressed or not,
3174 * so we can only assume it not compressed nor data offset,
3175 * and use its disk_len as extent length.
3177 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3178 orphan->offset, orphan->disk_len, 0);
3179 btrfs_release_path(path);
3184 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3185 orphan->disk_bytenr, orphan->disk_len);
3186 ret = btrfs_free_extent(trans,
3187 root->fs_info->extent_root,
3188 orphan->disk_bytenr, orphan->disk_len,
3189 0, root->objectid, orphan->objectid,
3194 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3195 orphan->offset, orphan->disk_bytenr,
3196 orphan->disk_len, orphan->disk_len);
3200 /* Update file size info */
3201 rec->found_size += orphan->disk_len;
3202 if (rec->found_size == rec->nbytes)
3203 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3205 /* Update the file extent hole info too */
3206 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3210 if (RB_EMPTY_ROOT(&rec->holes))
3211 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3213 list_del(&orphan->list);
3216 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3221 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3222 struct btrfs_root *root,
3223 struct btrfs_path *path,
3224 struct inode_record *rec)
3226 struct rb_node *node;
3227 struct file_extent_hole *hole;
3231 node = rb_first(&rec->holes);
3235 hole = rb_entry(node, struct file_extent_hole, node);
3236 ret = btrfs_punch_hole(trans, root, rec->ino,
3237 hole->start, hole->len);
3240 ret = del_file_extent_hole(&rec->holes, hole->start,
3244 if (RB_EMPTY_ROOT(&rec->holes))
3245 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3246 node = rb_first(&rec->holes);
3248 /* special case for a file losing all its file extent */
3250 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3251 round_up(rec->isize,
3252 root->fs_info->sectorsize));
3256 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3257 rec->ino, root->objectid);
3262 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3264 struct btrfs_trans_handle *trans;
3265 struct btrfs_path path;
3268 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3269 I_ERR_NO_ORPHAN_ITEM |
3270 I_ERR_LINK_COUNT_WRONG |
3271 I_ERR_NO_INODE_ITEM |
3272 I_ERR_FILE_EXTENT_ORPHAN |
3273 I_ERR_FILE_EXTENT_DISCOUNT|
3274 I_ERR_FILE_NBYTES_WRONG)))
3278 * For nlink repair, it may create a dir and add link, so
3279 * 2 for parent(256)'s dir_index and dir_item
3280 * 2 for lost+found dir's inode_item and inode_ref
3281 * 1 for the new inode_ref of the file
3282 * 2 for lost+found dir's dir_index and dir_item for the file
3284 trans = btrfs_start_transaction(root, 7);
3286 return PTR_ERR(trans);
3288 btrfs_init_path(&path);
3289 if (rec->errors & I_ERR_NO_INODE_ITEM)
3290 ret = repair_inode_no_item(trans, root, &path, rec);
3291 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3292 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3293 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3294 ret = repair_inode_discount_extent(trans, root, &path, rec);
3295 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3296 ret = repair_inode_isize(trans, root, &path, rec);
3297 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3298 ret = repair_inode_orphan_item(trans, root, &path, rec);
3299 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3300 ret = repair_inode_nlinks(trans, root, &path, rec);
3301 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3302 ret = repair_inode_nbytes(trans, root, &path, rec);
3303 btrfs_commit_transaction(trans, root);
3304 btrfs_release_path(&path);
3308 static int check_inode_recs(struct btrfs_root *root,
3309 struct cache_tree *inode_cache)
3311 struct cache_extent *cache;
3312 struct ptr_node *node;
3313 struct inode_record *rec;
3314 struct inode_backref *backref;
3319 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3321 if (btrfs_root_refs(&root->root_item) == 0) {
3322 if (!cache_tree_empty(inode_cache))
3323 fprintf(stderr, "warning line %d\n", __LINE__);
3328 * We need to repair backrefs first because we could change some of the
3329 * errors in the inode recs.
3331 * We also need to go through and delete invalid backrefs first and then
3332 * add the correct ones second. We do this because we may get EEXIST
3333 * when adding back the correct index because we hadn't yet deleted the
3336 * For example, if we were missing a dir index then the directories
3337 * isize would be wrong, so if we fixed the isize to what we thought it
3338 * would be and then fixed the backref we'd still have a invalid fs, so
3339 * we need to add back the dir index and then check to see if the isize
3344 if (stage == 3 && !err)
3347 cache = search_cache_extent(inode_cache, 0);
3348 while (repair && cache) {
3349 node = container_of(cache, struct ptr_node, cache);
3351 cache = next_cache_extent(cache);
3353 /* Need to free everything up and rescan */
3355 remove_cache_extent(inode_cache, &node->cache);
3357 free_inode_rec(rec);
3361 if (list_empty(&rec->backrefs))
3364 ret = repair_inode_backrefs(root, rec, inode_cache,
3378 rec = get_inode_rec(inode_cache, root_dirid, 0);
3379 BUG_ON(IS_ERR(rec));
3381 ret = check_root_dir(rec);
3383 fprintf(stderr, "root %llu root dir %llu error\n",
3384 (unsigned long long)root->root_key.objectid,
3385 (unsigned long long)root_dirid);
3386 print_inode_error(root, rec);
3391 struct btrfs_trans_handle *trans;
3393 trans = btrfs_start_transaction(root, 1);
3394 if (IS_ERR(trans)) {
3395 err = PTR_ERR(trans);
3400 "root %llu missing its root dir, recreating\n",
3401 (unsigned long long)root->objectid);
3403 ret = btrfs_make_root_dir(trans, root, root_dirid);
3406 btrfs_commit_transaction(trans, root);
3410 fprintf(stderr, "root %llu root dir %llu not found\n",
3411 (unsigned long long)root->root_key.objectid,
3412 (unsigned long long)root_dirid);
3416 cache = search_cache_extent(inode_cache, 0);
3419 node = container_of(cache, struct ptr_node, cache);
3421 remove_cache_extent(inode_cache, &node->cache);
3423 if (rec->ino == root_dirid ||
3424 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3425 free_inode_rec(rec);
3429 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3430 ret = check_orphan_item(root, rec->ino);
3432 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3433 if (can_free_inode_rec(rec)) {
3434 free_inode_rec(rec);
3439 if (!rec->found_inode_item)
3440 rec->errors |= I_ERR_NO_INODE_ITEM;
3441 if (rec->found_link != rec->nlink)
3442 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3444 ret = try_repair_inode(root, rec);
3445 if (ret == 0 && can_free_inode_rec(rec)) {
3446 free_inode_rec(rec);
3452 if (!(repair && ret == 0))
3454 print_inode_error(root, rec);
3455 list_for_each_entry(backref, &rec->backrefs, list) {
3456 if (!backref->found_dir_item)
3457 backref->errors |= REF_ERR_NO_DIR_ITEM;
3458 if (!backref->found_dir_index)
3459 backref->errors |= REF_ERR_NO_DIR_INDEX;
3460 if (!backref->found_inode_ref)
3461 backref->errors |= REF_ERR_NO_INODE_REF;
3462 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3463 " namelen %u name %s filetype %d errors %x",
3464 (unsigned long long)backref->dir,
3465 (unsigned long long)backref->index,
3466 backref->namelen, backref->name,
3467 backref->filetype, backref->errors);
3468 print_ref_error(backref->errors);
3470 free_inode_rec(rec);
3472 return (error > 0) ? -1 : 0;
3475 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3478 struct cache_extent *cache;
3479 struct root_record *rec = NULL;
3482 cache = lookup_cache_extent(root_cache, objectid, 1);
3484 rec = container_of(cache, struct root_record, cache);
3486 rec = calloc(1, sizeof(*rec));
3488 return ERR_PTR(-ENOMEM);
3489 rec->objectid = objectid;
3490 INIT_LIST_HEAD(&rec->backrefs);
3491 rec->cache.start = objectid;
3492 rec->cache.size = 1;
3494 ret = insert_cache_extent(root_cache, &rec->cache);
3496 return ERR_PTR(-EEXIST);
3501 static struct root_backref *get_root_backref(struct root_record *rec,
3502 u64 ref_root, u64 dir, u64 index,
3503 const char *name, int namelen)
3505 struct root_backref *backref;
3507 list_for_each_entry(backref, &rec->backrefs, list) {
3508 if (backref->ref_root != ref_root || backref->dir != dir ||
3509 backref->namelen != namelen)
3511 if (memcmp(name, backref->name, namelen))
3516 backref = calloc(1, sizeof(*backref) + namelen + 1);
3519 backref->ref_root = ref_root;
3521 backref->index = index;
3522 backref->namelen = namelen;
3523 memcpy(backref->name, name, namelen);
3524 backref->name[namelen] = '\0';
3525 list_add_tail(&backref->list, &rec->backrefs);
3529 static void free_root_record(struct cache_extent *cache)
3531 struct root_record *rec;
3532 struct root_backref *backref;
3534 rec = container_of(cache, struct root_record, cache);
3535 while (!list_empty(&rec->backrefs)) {
3536 backref = to_root_backref(rec->backrefs.next);
3537 list_del(&backref->list);
3544 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3546 static int add_root_backref(struct cache_tree *root_cache,
3547 u64 root_id, u64 ref_root, u64 dir, u64 index,
3548 const char *name, int namelen,
3549 int item_type, int errors)
3551 struct root_record *rec;
3552 struct root_backref *backref;
3554 rec = get_root_rec(root_cache, root_id);
3555 BUG_ON(IS_ERR(rec));
3556 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3559 backref->errors |= errors;
3561 if (item_type != BTRFS_DIR_ITEM_KEY) {
3562 if (backref->found_dir_index || backref->found_back_ref ||
3563 backref->found_forward_ref) {
3564 if (backref->index != index)
3565 backref->errors |= REF_ERR_INDEX_UNMATCH;
3567 backref->index = index;
3571 if (item_type == BTRFS_DIR_ITEM_KEY) {
3572 if (backref->found_forward_ref)
3574 backref->found_dir_item = 1;
3575 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3576 backref->found_dir_index = 1;
3577 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3578 if (backref->found_forward_ref)
3579 backref->errors |= REF_ERR_DUP_ROOT_REF;
3580 else if (backref->found_dir_item)
3582 backref->found_forward_ref = 1;
3583 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3584 if (backref->found_back_ref)
3585 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3586 backref->found_back_ref = 1;
3591 if (backref->found_forward_ref && backref->found_dir_item)
3592 backref->reachable = 1;
3596 static int merge_root_recs(struct btrfs_root *root,
3597 struct cache_tree *src_cache,
3598 struct cache_tree *dst_cache)
3600 struct cache_extent *cache;
3601 struct ptr_node *node;
3602 struct inode_record *rec;
3603 struct inode_backref *backref;
3606 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3607 free_inode_recs_tree(src_cache);
3612 cache = search_cache_extent(src_cache, 0);
3615 node = container_of(cache, struct ptr_node, cache);
3617 remove_cache_extent(src_cache, &node->cache);
3620 ret = is_child_root(root, root->objectid, rec->ino);
3626 list_for_each_entry(backref, &rec->backrefs, list) {
3627 BUG_ON(backref->found_inode_ref);
3628 if (backref->found_dir_item)
3629 add_root_backref(dst_cache, rec->ino,
3630 root->root_key.objectid, backref->dir,
3631 backref->index, backref->name,
3632 backref->namelen, BTRFS_DIR_ITEM_KEY,
3634 if (backref->found_dir_index)
3635 add_root_backref(dst_cache, rec->ino,
3636 root->root_key.objectid, backref->dir,
3637 backref->index, backref->name,
3638 backref->namelen, BTRFS_DIR_INDEX_KEY,
3642 free_inode_rec(rec);
3649 static int check_root_refs(struct btrfs_root *root,
3650 struct cache_tree *root_cache)
3652 struct root_record *rec;
3653 struct root_record *ref_root;
3654 struct root_backref *backref;
3655 struct cache_extent *cache;
3661 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3662 BUG_ON(IS_ERR(rec));
3665 /* fixme: this can not detect circular references */
3668 cache = search_cache_extent(root_cache, 0);
3672 rec = container_of(cache, struct root_record, cache);
3673 cache = next_cache_extent(cache);
3675 if (rec->found_ref == 0)
3678 list_for_each_entry(backref, &rec->backrefs, list) {
3679 if (!backref->reachable)
3682 ref_root = get_root_rec(root_cache,
3684 BUG_ON(IS_ERR(ref_root));
3685 if (ref_root->found_ref > 0)
3688 backref->reachable = 0;
3690 if (rec->found_ref == 0)
3696 cache = search_cache_extent(root_cache, 0);
3700 rec = container_of(cache, struct root_record, cache);
3701 cache = next_cache_extent(cache);
3703 if (rec->found_ref == 0 &&
3704 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3705 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3706 ret = check_orphan_item(root->fs_info->tree_root,
3712 * If we don't have a root item then we likely just have
3713 * a dir item in a snapshot for this root but no actual
3714 * ref key or anything so it's meaningless.
3716 if (!rec->found_root_item)
3719 fprintf(stderr, "fs tree %llu not referenced\n",
3720 (unsigned long long)rec->objectid);
3724 if (rec->found_ref > 0 && !rec->found_root_item)
3726 list_for_each_entry(backref, &rec->backrefs, list) {
3727 if (!backref->found_dir_item)
3728 backref->errors |= REF_ERR_NO_DIR_ITEM;
3729 if (!backref->found_dir_index)
3730 backref->errors |= REF_ERR_NO_DIR_INDEX;
3731 if (!backref->found_back_ref)
3732 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3733 if (!backref->found_forward_ref)
3734 backref->errors |= REF_ERR_NO_ROOT_REF;
3735 if (backref->reachable && backref->errors)
3742 fprintf(stderr, "fs tree %llu refs %u %s\n",
3743 (unsigned long long)rec->objectid, rec->found_ref,
3744 rec->found_root_item ? "" : "not found");
3746 list_for_each_entry(backref, &rec->backrefs, list) {
3747 if (!backref->reachable)
3749 if (!backref->errors && rec->found_root_item)
3751 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3752 " index %llu namelen %u name %s errors %x\n",
3753 (unsigned long long)backref->ref_root,
3754 (unsigned long long)backref->dir,
3755 (unsigned long long)backref->index,
3756 backref->namelen, backref->name,
3758 print_ref_error(backref->errors);
3761 return errors > 0 ? 1 : 0;
3764 static int process_root_ref(struct extent_buffer *eb, int slot,
3765 struct btrfs_key *key,
3766 struct cache_tree *root_cache)
3772 struct btrfs_root_ref *ref;
3773 char namebuf[BTRFS_NAME_LEN];
3776 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3778 dirid = btrfs_root_ref_dirid(eb, ref);
3779 index = btrfs_root_ref_sequence(eb, ref);
3780 name_len = btrfs_root_ref_name_len(eb, ref);
3782 if (name_len <= BTRFS_NAME_LEN) {
3786 len = BTRFS_NAME_LEN;
3787 error = REF_ERR_NAME_TOO_LONG;
3789 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3791 if (key->type == BTRFS_ROOT_REF_KEY) {
3792 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3793 index, namebuf, len, key->type, error);
3795 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3796 index, namebuf, len, key->type, error);
3801 static void free_corrupt_block(struct cache_extent *cache)
3803 struct btrfs_corrupt_block *corrupt;
3805 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3809 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3812 * Repair the btree of the given root.
3814 * The fix is to remove the node key in corrupt_blocks cache_tree.
3815 * and rebalance the tree.
3816 * After the fix, the btree should be writeable.
3818 static int repair_btree(struct btrfs_root *root,
3819 struct cache_tree *corrupt_blocks)
3821 struct btrfs_trans_handle *trans;
3822 struct btrfs_path path;
3823 struct btrfs_corrupt_block *corrupt;
3824 struct cache_extent *cache;
3825 struct btrfs_key key;
3830 if (cache_tree_empty(corrupt_blocks))
3833 trans = btrfs_start_transaction(root, 1);
3834 if (IS_ERR(trans)) {
3835 ret = PTR_ERR(trans);
3836 fprintf(stderr, "Error starting transaction: %s\n",
3840 btrfs_init_path(&path);
3841 cache = first_cache_extent(corrupt_blocks);
3843 corrupt = container_of(cache, struct btrfs_corrupt_block,
3845 level = corrupt->level;
3846 path.lowest_level = level;
3847 key.objectid = corrupt->key.objectid;
3848 key.type = corrupt->key.type;
3849 key.offset = corrupt->key.offset;
3852 * Here we don't want to do any tree balance, since it may
3853 * cause a balance with corrupted brother leaf/node,
3854 * so ins_len set to 0 here.
3855 * Balance will be done after all corrupt node/leaf is deleted.
3857 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3860 offset = btrfs_node_blockptr(path.nodes[level],
3863 /* Remove the ptr */
3864 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3868 * Remove the corresponding extent
3869 * return value is not concerned.
3871 btrfs_release_path(&path);
3872 ret = btrfs_free_extent(trans, root, offset,
3873 root->fs_info->nodesize, 0,
3874 root->root_key.objectid, level - 1, 0);
3875 cache = next_cache_extent(cache);
3878 /* Balance the btree using btrfs_search_slot() */
3879 cache = first_cache_extent(corrupt_blocks);
3881 corrupt = container_of(cache, struct btrfs_corrupt_block,
3883 memcpy(&key, &corrupt->key, sizeof(key));
3884 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3887 /* return will always >0 since it won't find the item */
3889 btrfs_release_path(&path);
3890 cache = next_cache_extent(cache);
3893 btrfs_commit_transaction(trans, root);
3894 btrfs_release_path(&path);
3898 static int check_fs_root(struct btrfs_root *root,
3899 struct cache_tree *root_cache,
3900 struct walk_control *wc)
3906 struct btrfs_path path;
3907 struct shared_node root_node;
3908 struct root_record *rec;
3909 struct btrfs_root_item *root_item = &root->root_item;
3910 struct cache_tree corrupt_blocks;
3911 struct orphan_data_extent *orphan;
3912 struct orphan_data_extent *tmp;
3913 enum btrfs_tree_block_status status;
3914 struct node_refs nrefs;
3917 * Reuse the corrupt_block cache tree to record corrupted tree block
3919 * Unlike the usage in extent tree check, here we do it in a per
3920 * fs/subvol tree base.
3922 cache_tree_init(&corrupt_blocks);
3923 root->fs_info->corrupt_blocks = &corrupt_blocks;
3925 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3926 rec = get_root_rec(root_cache, root->root_key.objectid);
3927 BUG_ON(IS_ERR(rec));
3928 if (btrfs_root_refs(root_item) > 0)
3929 rec->found_root_item = 1;
3932 btrfs_init_path(&path);
3933 memset(&root_node, 0, sizeof(root_node));
3934 cache_tree_init(&root_node.root_cache);
3935 cache_tree_init(&root_node.inode_cache);
3936 memset(&nrefs, 0, sizeof(nrefs));
3938 /* Move the orphan extent record to corresponding inode_record */
3939 list_for_each_entry_safe(orphan, tmp,
3940 &root->orphan_data_extents, list) {
3941 struct inode_record *inode;
3943 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3945 BUG_ON(IS_ERR(inode));
3946 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3947 list_move(&orphan->list, &inode->orphan_extents);
3950 level = btrfs_header_level(root->node);
3951 memset(wc->nodes, 0, sizeof(wc->nodes));
3952 wc->nodes[level] = &root_node;
3953 wc->active_node = level;
3954 wc->root_level = level;
3956 /* We may not have checked the root block, lets do that now */
3957 if (btrfs_is_leaf(root->node))
3958 status = btrfs_check_leaf(root, NULL, root->node);
3960 status = btrfs_check_node(root, NULL, root->node);
3961 if (status != BTRFS_TREE_BLOCK_CLEAN)
3964 if (btrfs_root_refs(root_item) > 0 ||
3965 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3966 path.nodes[level] = root->node;
3967 extent_buffer_get(root->node);
3968 path.slots[level] = 0;
3970 struct btrfs_key key;
3971 struct btrfs_disk_key found_key;
3973 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3974 level = root_item->drop_level;
3975 path.lowest_level = level;
3976 if (level > btrfs_header_level(root->node) ||
3977 level >= BTRFS_MAX_LEVEL) {
3978 error("ignoring invalid drop level: %u", level);
3981 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3984 btrfs_node_key(path.nodes[level], &found_key,
3986 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3987 sizeof(found_key)));
3991 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3997 wret = walk_up_tree(root, &path, wc, &level);
4004 btrfs_release_path(&path);
4006 if (!cache_tree_empty(&corrupt_blocks)) {
4007 struct cache_extent *cache;
4008 struct btrfs_corrupt_block *corrupt;
4010 printf("The following tree block(s) is corrupted in tree %llu:\n",
4011 root->root_key.objectid);
4012 cache = first_cache_extent(&corrupt_blocks);
4014 corrupt = container_of(cache,
4015 struct btrfs_corrupt_block,
4017 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4018 cache->start, corrupt->level,
4019 corrupt->key.objectid, corrupt->key.type,
4020 corrupt->key.offset);
4021 cache = next_cache_extent(cache);
4024 printf("Try to repair the btree for root %llu\n",
4025 root->root_key.objectid);
4026 ret = repair_btree(root, &corrupt_blocks);
4028 fprintf(stderr, "Failed to repair btree: %s\n",
4031 printf("Btree for root %llu is fixed\n",
4032 root->root_key.objectid);
4036 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4040 if (root_node.current) {
4041 root_node.current->checked = 1;
4042 maybe_free_inode_rec(&root_node.inode_cache,
4046 err = check_inode_recs(root, &root_node.inode_cache);
4050 free_corrupt_blocks_tree(&corrupt_blocks);
4051 root->fs_info->corrupt_blocks = NULL;
4052 free_orphan_data_extents(&root->orphan_data_extents);
4056 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4057 struct cache_tree *root_cache)
4059 struct btrfs_path path;
4060 struct btrfs_key key;
4061 struct walk_control wc;
4062 struct extent_buffer *leaf, *tree_node;
4063 struct btrfs_root *tmp_root;
4064 struct btrfs_root *tree_root = fs_info->tree_root;
4068 if (ctx.progress_enabled) {
4069 ctx.tp = TASK_FS_ROOTS;
4070 task_start(ctx.info);
4074 * Just in case we made any changes to the extent tree that weren't
4075 * reflected into the free space cache yet.
4078 reset_cached_block_groups(fs_info);
4079 memset(&wc, 0, sizeof(wc));
4080 cache_tree_init(&wc.shared);
4081 btrfs_init_path(&path);
4086 key.type = BTRFS_ROOT_ITEM_KEY;
4087 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4092 tree_node = tree_root->node;
4094 if (tree_node != tree_root->node) {
4095 free_root_recs_tree(root_cache);
4096 btrfs_release_path(&path);
4099 leaf = path.nodes[0];
4100 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4101 ret = btrfs_next_leaf(tree_root, &path);
4107 leaf = path.nodes[0];
4109 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4110 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4111 fs_root_objectid(key.objectid)) {
4112 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4113 tmp_root = btrfs_read_fs_root_no_cache(
4116 key.offset = (u64)-1;
4117 tmp_root = btrfs_read_fs_root(
4120 if (IS_ERR(tmp_root)) {
4124 ret = check_fs_root(tmp_root, root_cache, &wc);
4125 if (ret == -EAGAIN) {
4126 free_root_recs_tree(root_cache);
4127 btrfs_release_path(&path);
4132 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4133 btrfs_free_fs_root(tmp_root);
4134 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4135 key.type == BTRFS_ROOT_BACKREF_KEY) {
4136 process_root_ref(leaf, path.slots[0], &key,
4143 btrfs_release_path(&path);
4145 free_extent_cache_tree(&wc.shared);
4146 if (!cache_tree_empty(&wc.shared))
4147 fprintf(stderr, "warning line %d\n", __LINE__);
4149 task_stop(ctx.info);
4155 * Find the @index according by @ino and name.
4156 * Notice:time efficiency is O(N)
4158 * @root: the root of the fs/file tree
4159 * @index_ret: the index as return value
4160 * @namebuf: the name to match
4161 * @name_len: the length of name to match
4162 * @file_type: the file_type of INODE_ITEM to match
4164 * Returns 0 if found and *@index_ret will be modified with right value
4165 * Returns< 0 not found and *@index_ret will be (u64)-1
4167 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4168 u64 *index_ret, char *namebuf, u32 name_len,
4171 struct btrfs_path path;
4172 struct extent_buffer *node;
4173 struct btrfs_dir_item *di;
4174 struct btrfs_key key;
4175 struct btrfs_key location;
4176 char name[BTRFS_NAME_LEN] = {0};
4188 /* search from the last index */
4189 key.objectid = dirid;
4190 key.offset = (u64)-1;
4191 key.type = BTRFS_DIR_INDEX_KEY;
4193 btrfs_init_path(&path);
4194 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4199 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4202 *index_ret = (64)-1;
4205 /* Check whether inode_id/filetype/name match */
4206 node = path.nodes[0];
4207 slot = path.slots[0];
4208 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4209 total = btrfs_item_size_nr(node, slot);
4210 while (cur < total) {
4212 len = btrfs_dir_name_len(node, di);
4213 data_len = btrfs_dir_data_len(node, di);
4215 btrfs_dir_item_key_to_cpu(node, di, &location);
4216 if (location.objectid != location_id ||
4217 location.type != BTRFS_INODE_ITEM_KEY ||
4218 location.offset != 0)
4221 filetype = btrfs_dir_type(node, di);
4222 if (file_type != filetype)
4225 if (len > BTRFS_NAME_LEN)
4226 len = BTRFS_NAME_LEN;
4228 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4229 if (len != name_len || strncmp(namebuf, name, len))
4232 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4233 *index_ret = key.offset;
4237 len += sizeof(*di) + data_len;
4238 di = (struct btrfs_dir_item *)((char *)di + len);
4244 btrfs_release_path(&path);
4249 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4250 * INODE_REF/INODE_EXTREF match.
4252 * @root: the root of the fs/file tree
4253 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4254 * value while find index
4255 * @location_key: location key of the struct btrfs_dir_item to match
4256 * @name: the name to match
4257 * @namelen: the length of name
4258 * @file_type: the type of file to math
4260 * Return 0 if no error occurred.
4261 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4262 * DIR_ITEM/DIR_INDEX
4263 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4264 * and DIR_ITEM/DIR_INDEX mismatch
4266 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4267 struct btrfs_key *location_key, char *name,
4268 u32 namelen, u8 file_type)
4270 struct btrfs_path path;
4271 struct extent_buffer *node;
4272 struct btrfs_dir_item *di;
4273 struct btrfs_key location;
4274 char namebuf[BTRFS_NAME_LEN] = {0};
4283 /* get the index by traversing all index */
4284 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4285 ret = find_dir_index(root, key->objectid,
4286 location_key->objectid, &key->offset,
4287 name, namelen, file_type);
4289 ret = DIR_INDEX_MISSING;
4293 btrfs_init_path(&path);
4294 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4296 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4301 /* Check whether inode_id/filetype/name match */
4302 node = path.nodes[0];
4303 slot = path.slots[0];
4304 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4305 total = btrfs_item_size_nr(node, slot);
4306 while (cur < total) {
4307 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4308 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4310 len = btrfs_dir_name_len(node, di);
4311 data_len = btrfs_dir_data_len(node, di);
4313 btrfs_dir_item_key_to_cpu(node, di, &location);
4314 if (location.objectid != location_key->objectid ||
4315 location.type != location_key->type ||
4316 location.offset != location_key->offset)
4319 filetype = btrfs_dir_type(node, di);
4320 if (file_type != filetype)
4323 if (len > BTRFS_NAME_LEN) {
4324 len = BTRFS_NAME_LEN;
4325 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4327 key->type == BTRFS_DIR_ITEM_KEY ?
4328 "DIR_ITEM" : "DIR_INDEX",
4329 key->objectid, key->offset, len);
4331 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4333 if (len != namelen || strncmp(namebuf, name, len))
4339 len += sizeof(*di) + data_len;
4340 di = (struct btrfs_dir_item *)((char *)di + len);
4345 btrfs_release_path(&path);
4350 * Prints inode ref error message
4352 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4353 u64 index, const char *namebuf, int name_len,
4354 u8 filetype, int err)
4359 /* root dir error */
4360 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4362 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4363 root->objectid, key->objectid, key->offset, namebuf);
4368 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4369 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4370 root->objectid, key->offset,
4371 btrfs_name_hash(namebuf, name_len),
4372 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4374 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4375 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4376 root->objectid, key->offset, index,
4377 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4382 * Insert the missing inode item.
4384 * Returns 0 means success.
4385 * Returns <0 means error.
4387 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4390 struct btrfs_key key;
4391 struct btrfs_trans_handle *trans;
4392 struct btrfs_path path;
4396 key.type = BTRFS_INODE_ITEM_KEY;
4399 btrfs_init_path(&path);
4400 trans = btrfs_start_transaction(root, 1);
4401 if (IS_ERR(trans)) {
4406 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4407 if (ret < 0 || !ret)
4410 /* insert inode item */
4411 create_inode_item_lowmem(trans, root, ino, filetype);
4414 btrfs_commit_transaction(trans, root);
4417 error("failed to repair root %llu INODE ITEM[%llu] missing",
4418 root->objectid, ino);
4419 btrfs_release_path(&path);
4424 * The ternary means dir item, dir index and relative inode ref.
4425 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4426 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4428 * If two of three is missing or mismatched, delete the existing one.
4429 * If one of three is missing or mismatched, add the missing one.
4431 * returns 0 means success.
4432 * returns not 0 means on error;
4434 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4435 u64 index, char *name, int name_len, u8 filetype,
4438 struct btrfs_trans_handle *trans;
4443 * stage shall be one of following valild values:
4444 * 0: Fine, nothing to do.
4445 * 1: One of three is wrong, so add missing one.
4446 * 2: Two of three is wrong, so delete existed one.
4448 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4450 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4452 if (err & (INODE_REF_MISSING))
4455 /* stage must be smllarer than 3 */
4458 trans = btrfs_start_transaction(root, 1);
4460 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4465 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4466 filetype, &index, 1, 1);
4470 btrfs_commit_transaction(trans, root);
4473 error("fail to repair inode %llu name %s filetype %u",
4474 ino, name, filetype);
4476 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4477 stage == 2 ? "Delete" : "Add",
4478 ino, name, filetype);
4484 * Traverse the given INODE_REF and call find_dir_item() to find related
4485 * DIR_ITEM/DIR_INDEX.
4487 * @root: the root of the fs/file tree
4488 * @ref_key: the key of the INODE_REF
4489 * @path the path provides node and slot
4490 * @refs: the count of INODE_REF
4491 * @mode: the st_mode of INODE_ITEM
4492 * @name_ret: returns with the first ref's name
4493 * @name_len_ret: len of the name_ret
4495 * Return 0 if no error occurred.
4497 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4498 struct btrfs_path *path, char *name_ret,
4499 u32 *namelen_ret, u64 *refs_ret, int mode)
4501 struct btrfs_key key;
4502 struct btrfs_key location;
4503 struct btrfs_inode_ref *ref;
4504 struct extent_buffer *node;
4505 char namebuf[BTRFS_NAME_LEN] = {0};
4515 int need_research = 0;
4523 /* since after repair, path and the dir item may be changed */
4524 if (need_research) {
4526 btrfs_release_path(path);
4527 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4528 /* the item was deleted, let path point to the last checked item */
4530 if (path->slots[0] == 0)
4531 btrfs_prev_leaf(root, path);
4539 location.objectid = ref_key->objectid;
4540 location.type = BTRFS_INODE_ITEM_KEY;
4541 location.offset = 0;
4542 node = path->nodes[0];
4543 slot = path->slots[0];
4545 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4546 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4547 total = btrfs_item_size_nr(node, slot);
4550 /* Update inode ref count */
4553 index = btrfs_inode_ref_index(node, ref);
4554 name_len = btrfs_inode_ref_name_len(node, ref);
4556 if (name_len <= BTRFS_NAME_LEN) {
4559 len = BTRFS_NAME_LEN;
4560 warning("root %llu INODE_REF[%llu %llu] name too long",
4561 root->objectid, ref_key->objectid, ref_key->offset);
4564 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4566 /* copy the first name found to name_ret */
4567 if (refs == 1 && name_ret) {
4568 memcpy(name_ret, namebuf, len);
4572 /* Check root dir ref */
4573 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4574 if (index != 0 || len != strlen("..") ||
4575 strncmp("..", namebuf, len) ||
4576 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4577 /* set err bits then repair will delete the ref */
4578 err |= DIR_INDEX_MISSING;
4579 err |= DIR_ITEM_MISSING;
4584 /* Find related DIR_INDEX */
4585 key.objectid = ref_key->offset;
4586 key.type = BTRFS_DIR_INDEX_KEY;
4588 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4589 imode_to_type(mode));
4591 /* Find related dir_item */
4592 key.objectid = ref_key->offset;
4593 key.type = BTRFS_DIR_ITEM_KEY;
4594 key.offset = btrfs_name_hash(namebuf, len);
4595 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4596 imode_to_type(mode));
4598 if (tmp_err && repair) {
4599 ret = repair_ternary_lowmem(root, ref_key->offset,
4600 ref_key->objectid, index, namebuf,
4601 name_len, imode_to_type(mode),
4608 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4609 imode_to_type(mode), tmp_err);
4611 len = sizeof(*ref) + name_len;
4612 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4623 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4624 * DIR_ITEM/DIR_INDEX.
4626 * @root: the root of the fs/file tree
4627 * @ref_key: the key of the INODE_EXTREF
4628 * @refs: the count of INODE_EXTREF
4629 * @mode: the st_mode of INODE_ITEM
4631 * Return 0 if no error occurred.
4633 static int check_inode_extref(struct btrfs_root *root,
4634 struct btrfs_key *ref_key,
4635 struct extent_buffer *node, int slot, u64 *refs,
4638 struct btrfs_key key;
4639 struct btrfs_key location;
4640 struct btrfs_inode_extref *extref;
4641 char namebuf[BTRFS_NAME_LEN] = {0};
4651 location.objectid = ref_key->objectid;
4652 location.type = BTRFS_INODE_ITEM_KEY;
4653 location.offset = 0;
4655 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4656 total = btrfs_item_size_nr(node, slot);
4659 /* update inode ref count */
4661 name_len = btrfs_inode_extref_name_len(node, extref);
4662 index = btrfs_inode_extref_index(node, extref);
4663 parent = btrfs_inode_extref_parent(node, extref);
4664 if (name_len <= BTRFS_NAME_LEN) {
4667 len = BTRFS_NAME_LEN;
4668 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4669 root->objectid, ref_key->objectid, ref_key->offset);
4671 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4673 /* Check root dir ref name */
4674 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4675 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4676 root->objectid, ref_key->objectid, ref_key->offset,
4678 err |= ROOT_DIR_ERROR;
4681 /* find related dir_index */
4682 key.objectid = parent;
4683 key.type = BTRFS_DIR_INDEX_KEY;
4685 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4688 /* find related dir_item */
4689 key.objectid = parent;
4690 key.type = BTRFS_DIR_ITEM_KEY;
4691 key.offset = btrfs_name_hash(namebuf, len);
4692 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4695 len = sizeof(*extref) + name_len;
4696 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4706 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4707 * DIR_ITEM/DIR_INDEX match.
4708 * Return with @index_ret.
4710 * @root: the root of the fs/file tree
4711 * @key: the key of the INODE_REF/INODE_EXTREF
4712 * @name: the name in the INODE_REF/INODE_EXTREF
4713 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4714 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4715 * value (64)-1 means do not check index
4716 * @ext_ref: the EXTENDED_IREF feature
4718 * Return 0 if no error occurred.
4719 * Return >0 for error bitmap
4721 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4722 char *name, int namelen, u64 *index_ret,
4723 unsigned int ext_ref)
4725 struct btrfs_path path;
4726 struct btrfs_inode_ref *ref;
4727 struct btrfs_inode_extref *extref;
4728 struct extent_buffer *node;
4729 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4742 btrfs_init_path(&path);
4743 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4745 ret = INODE_REF_MISSING;
4749 node = path.nodes[0];
4750 slot = path.slots[0];
4752 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4753 total = btrfs_item_size_nr(node, slot);
4755 /* Iterate all entry of INODE_REF */
4756 while (cur < total) {
4757 ret = INODE_REF_MISSING;
4759 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4760 ref_index = btrfs_inode_ref_index(node, ref);
4761 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4764 if (cur + sizeof(*ref) + ref_namelen > total ||
4765 ref_namelen > BTRFS_NAME_LEN) {
4766 warning("root %llu INODE %s[%llu %llu] name too long",
4768 key->type == BTRFS_INODE_REF_KEY ?
4770 key->objectid, key->offset);
4772 if (cur + sizeof(*ref) > total)
4774 len = min_t(u32, total - cur - sizeof(*ref),
4780 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4783 if (len != namelen || strncmp(ref_namebuf, name, len))
4786 *index_ret = ref_index;
4790 len = sizeof(*ref) + ref_namelen;
4791 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4796 /* Skip if not support EXTENDED_IREF feature */
4800 btrfs_release_path(&path);
4801 btrfs_init_path(&path);
4803 dir_id = key->offset;
4804 key->type = BTRFS_INODE_EXTREF_KEY;
4805 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4807 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4809 ret = INODE_REF_MISSING;
4813 node = path.nodes[0];
4814 slot = path.slots[0];
4816 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4818 total = btrfs_item_size_nr(node, slot);
4820 /* Iterate all entry of INODE_EXTREF */
4821 while (cur < total) {
4822 ret = INODE_REF_MISSING;
4824 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4825 ref_index = btrfs_inode_extref_index(node, extref);
4826 parent = btrfs_inode_extref_parent(node, extref);
4827 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4830 if (parent != dir_id)
4833 if (ref_namelen <= BTRFS_NAME_LEN) {
4836 len = BTRFS_NAME_LEN;
4837 warning("root %llu INODE %s[%llu %llu] name too long",
4839 key->type == BTRFS_INODE_REF_KEY ?
4841 key->objectid, key->offset);
4843 read_extent_buffer(node, ref_namebuf,
4844 (unsigned long)(extref + 1), len);
4846 if (len != namelen || strncmp(ref_namebuf, name, len))
4849 *index_ret = ref_index;
4854 len = sizeof(*extref) + ref_namelen;
4855 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4860 btrfs_release_path(&path);
4864 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4865 u64 ino, u64 index, const char *namebuf,
4866 int name_len, u8 filetype, int err)
4868 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4869 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4870 root->objectid, key->objectid, key->offset, namebuf,
4872 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4875 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4876 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4877 root->objectid, key->objectid, index, namebuf, filetype,
4878 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4881 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4883 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4884 root->objectid, ino, index, namebuf, filetype,
4885 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4888 if (err & INODE_REF_MISSING)
4890 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4891 root->objectid, ino, key->objectid, namebuf, filetype);
4896 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4898 * Returns error after repair
4900 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4901 u64 index, u8 filetype, char *namebuf, u32 name_len,
4906 if (err & INODE_ITEM_MISSING) {
4907 ret = repair_inode_item_missing(root, ino, filetype);
4909 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
4912 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
4913 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
4914 name_len, filetype, err);
4916 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
4917 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
4918 err &= ~(INODE_REF_MISSING);
4924 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
4927 struct btrfs_key key;
4928 struct btrfs_path path;
4930 struct btrfs_dir_item *di;
4940 key.offset = (u64)-1;
4942 btrfs_init_path(&path);
4943 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4948 /* if found, go to spacial case */
4953 ret = btrfs_previous_item(root, &path, ino, type);
4961 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
4963 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
4965 while (cur < total) {
4966 len = btrfs_dir_name_len(path.nodes[0], di);
4967 if (len > BTRFS_NAME_LEN)
4968 len = BTRFS_NAME_LEN;
4971 len += btrfs_dir_data_len(path.nodes[0], di);
4973 di = (struct btrfs_dir_item *)((char *)di + len);
4979 btrfs_release_path(&path);
4983 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
4990 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
4994 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
4998 *size = item_size + index_size;
5002 error("failed to count root %llu INODE[%llu] root size",
5003 root->objectid, ino);
5008 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5009 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5011 * @root: the root of the fs/file tree
5012 * @key: the key of the INODE_REF/INODE_EXTREF
5014 * @size: the st_size of the INODE_ITEM
5015 * @ext_ref: the EXTENDED_IREF feature
5017 * Return 0 if no error occurred.
5018 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5020 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5021 struct btrfs_path *path, u64 *size,
5022 unsigned int ext_ref)
5024 struct btrfs_dir_item *di;
5025 struct btrfs_inode_item *ii;
5026 struct btrfs_key key;
5027 struct btrfs_key location;
5028 struct extent_buffer *node;
5030 char namebuf[BTRFS_NAME_LEN] = {0};
5042 int need_research = 0;
5045 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5046 * ignore index check.
5048 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5049 index = di_key->offset;
5056 /* since after repair, path and the dir item may be changed */
5057 if (need_research) {
5059 err |= DIR_COUNT_AGAIN;
5060 btrfs_release_path(path);
5061 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5062 /* the item was deleted, let path point the last checked item */
5064 if (path->slots[0] == 0)
5065 btrfs_prev_leaf(root, path);
5073 node = path->nodes[0];
5074 slot = path->slots[0];
5076 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5077 total = btrfs_item_size_nr(node, slot);
5078 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5080 while (cur < total) {
5081 data_len = btrfs_dir_data_len(node, di);
5084 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5086 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5087 di_key->objectid, di_key->offset, data_len);
5089 name_len = btrfs_dir_name_len(node, di);
5090 if (name_len <= BTRFS_NAME_LEN) {
5093 len = BTRFS_NAME_LEN;
5094 warning("root %llu %s[%llu %llu] name too long",
5096 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5097 di_key->objectid, di_key->offset);
5099 (*size) += name_len;
5100 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5102 filetype = btrfs_dir_type(node, di);
5104 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5105 di_key->offset != btrfs_name_hash(namebuf, len)) {
5107 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5108 root->objectid, di_key->objectid, di_key->offset,
5109 namebuf, len, filetype, di_key->offset,
5110 btrfs_name_hash(namebuf, len));
5113 btrfs_dir_item_key_to_cpu(node, di, &location);
5114 /* Ignore related ROOT_ITEM check */
5115 if (location.type == BTRFS_ROOT_ITEM_KEY)
5118 btrfs_release_path(path);
5119 /* Check relative INODE_ITEM(existence/filetype) */
5120 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5122 tmp_err |= INODE_ITEM_MISSING;
5126 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5127 struct btrfs_inode_item);
5128 mode = btrfs_inode_mode(path->nodes[0], ii);
5129 if (imode_to_type(mode) != filetype) {
5130 tmp_err |= INODE_ITEM_MISMATCH;
5134 /* Check relative INODE_REF/INODE_EXTREF */
5135 key.objectid = location.objectid;
5136 key.type = BTRFS_INODE_REF_KEY;
5137 key.offset = di_key->objectid;
5138 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5141 /* check relative INDEX/ITEM */
5142 key.objectid = di_key->objectid;
5143 if (key.type == BTRFS_DIR_ITEM_KEY) {
5144 key.type = BTRFS_DIR_INDEX_KEY;
5147 key.type = BTRFS_DIR_ITEM_KEY;
5148 key.offset = btrfs_name_hash(namebuf, name_len);
5151 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5152 name_len, filetype);
5153 /* find_dir_item may find index */
5154 if (key.type == BTRFS_DIR_INDEX_KEY)
5158 if (tmp_err && repair) {
5159 ret = repair_dir_item(root, di_key->objectid,
5160 location.objectid, index,
5161 imode_to_type(mode), namebuf,
5163 if (ret != tmp_err) {
5168 btrfs_release_path(path);
5169 print_dir_item_err(root, di_key, location.objectid, index,
5170 namebuf, name_len, filetype, tmp_err);
5172 len = sizeof(*di) + name_len + data_len;
5173 di = (struct btrfs_dir_item *)((char *)di + len);
5176 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5177 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5178 root->objectid, di_key->objectid,
5185 btrfs_release_path(path);
5186 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5188 err |= ret > 0 ? -ENOENT : ret;
5193 * Wrapper function of btrfs_punch_hole.
5195 * Returns 0 means success.
5196 * Returns not 0 means error.
5198 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5201 struct btrfs_trans_handle *trans;
5204 trans = btrfs_start_transaction(root, 1);
5206 return PTR_ERR(trans);
5208 ret = btrfs_punch_hole(trans, root, ino, start, len);
5210 error("failed to add hole [%llu, %llu] in inode [%llu]",
5213 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5216 btrfs_commit_transaction(trans, root);
5221 * Check file extent datasum/hole, update the size of the file extents,
5222 * check and update the last offset of the file extent.
5224 * @root: the root of fs/file tree.
5225 * @fkey: the key of the file extent.
5226 * @nodatasum: INODE_NODATASUM feature.
5227 * @size: the sum of all EXTENT_DATA items size for this inode.
5228 * @end: the offset of the last extent.
5230 * Return 0 if no error occurred.
5232 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5233 struct extent_buffer *node, int slot,
5234 unsigned int nodatasum, u64 *size, u64 *end)
5236 struct btrfs_file_extent_item *fi;
5239 u64 extent_num_bytes;
5241 u64 csum_found; /* In byte size, sectorsize aligned */
5242 u64 search_start; /* Logical range start we search for csum */
5243 u64 search_len; /* Logical range len we search for csum */
5244 unsigned int extent_type;
5245 unsigned int is_hole;
5250 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5252 /* Check inline extent */
5253 extent_type = btrfs_file_extent_type(node, fi);
5254 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5255 struct btrfs_item *e = btrfs_item_nr(slot);
5256 u32 item_inline_len;
5258 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5259 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5260 compressed = btrfs_file_extent_compression(node, fi);
5261 if (extent_num_bytes == 0) {
5263 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5264 root->objectid, fkey->objectid, fkey->offset);
5265 err |= FILE_EXTENT_ERROR;
5267 if (!compressed && extent_num_bytes != item_inline_len) {
5269 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5270 root->objectid, fkey->objectid, fkey->offset,
5271 extent_num_bytes, item_inline_len);
5272 err |= FILE_EXTENT_ERROR;
5274 *end += extent_num_bytes;
5275 *size += extent_num_bytes;
5279 /* Check extent type */
5280 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5281 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5282 err |= FILE_EXTENT_ERROR;
5283 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5284 root->objectid, fkey->objectid, fkey->offset);
5288 /* Check REG_EXTENT/PREALLOC_EXTENT */
5289 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5290 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5291 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5292 extent_offset = btrfs_file_extent_offset(node, fi);
5293 compressed = btrfs_file_extent_compression(node, fi);
5294 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5297 * Check EXTENT_DATA csum
5299 * For plain (uncompressed) extent, we should only check the range
5300 * we're referring to, as it's possible that part of prealloc extent
5301 * has been written, and has csum:
5303 * |<--- Original large preallocated extent A ---->|
5304 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5307 * For compressed extent, we should check the whole range.
5310 search_start = disk_bytenr + extent_offset;
5311 search_len = extent_num_bytes;
5313 search_start = disk_bytenr;
5314 search_len = disk_num_bytes;
5316 ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5317 if (csum_found > 0 && nodatasum) {
5318 err |= ODD_CSUM_ITEM;
5319 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5320 root->objectid, fkey->objectid, fkey->offset);
5321 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5322 !is_hole && (ret < 0 || csum_found < search_len)) {
5323 err |= CSUM_ITEM_MISSING;
5324 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5325 root->objectid, fkey->objectid, fkey->offset,
5326 csum_found, search_len);
5327 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5328 err |= ODD_CSUM_ITEM;
5329 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5330 root->objectid, fkey->objectid, fkey->offset, csum_found);
5333 /* Check EXTENT_DATA hole */
5334 if (!no_holes && *end != fkey->offset) {
5336 ret = punch_extent_hole(root, fkey->objectid,
5337 *end, fkey->offset - *end);
5338 if (!repair || ret) {
5339 err |= FILE_EXTENT_ERROR;
5341 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5342 root->objectid, fkey->objectid, fkey->offset,
5343 fkey->objectid, *end);
5347 *end += extent_num_bytes;
5349 *size += extent_num_bytes;
5355 * Set inode item nbytes to @nbytes
5357 * Returns 0 on success
5358 * Returns != 0 on error
5360 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5361 struct btrfs_path *path,
5362 u64 ino, u64 nbytes)
5364 struct btrfs_trans_handle *trans;
5365 struct btrfs_inode_item *ii;
5366 struct btrfs_key key;
5367 struct btrfs_key research_key;
5371 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5374 key.type = BTRFS_INODE_ITEM_KEY;
5377 trans = btrfs_start_transaction(root, 1);
5378 if (IS_ERR(trans)) {
5379 ret = PTR_ERR(trans);
5384 btrfs_release_path(path);
5385 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5393 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5394 struct btrfs_inode_item);
5395 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5396 btrfs_mark_buffer_dirty(path->nodes[0]);
5398 btrfs_commit_transaction(trans, root);
5401 error("failed to set nbytes in inode %llu root %llu",
5402 ino, root->root_key.objectid);
5404 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5405 root->root_key.objectid, nbytes);
5408 btrfs_release_path(path);
5409 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5416 * Set directory inode isize to @isize.
5418 * Returns 0 on success.
5419 * Returns != 0 on error.
5421 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5422 struct btrfs_path *path,
5425 struct btrfs_trans_handle *trans;
5426 struct btrfs_inode_item *ii;
5427 struct btrfs_key key;
5428 struct btrfs_key research_key;
5432 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5435 key.type = BTRFS_INODE_ITEM_KEY;
5438 trans = btrfs_start_transaction(root, 1);
5439 if (IS_ERR(trans)) {
5440 ret = PTR_ERR(trans);
5445 btrfs_release_path(path);
5446 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5454 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5455 struct btrfs_inode_item);
5456 btrfs_set_inode_size(path->nodes[0], ii, isize);
5457 btrfs_mark_buffer_dirty(path->nodes[0]);
5459 btrfs_commit_transaction(trans, root);
5462 error("failed to set isize in inode %llu root %llu",
5463 ino, root->root_key.objectid);
5465 printf("Set isize in inode %llu root %llu to %llu\n",
5466 ino, root->root_key.objectid, isize);
5468 btrfs_release_path(path);
5469 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5476 * Wrapper function for btrfs_add_orphan_item().
5478 * Returns 0 on success.
5479 * Returns != 0 on error.
5481 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5482 struct btrfs_path *path, u64 ino)
5484 struct btrfs_trans_handle *trans;
5485 struct btrfs_key research_key;
5489 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5491 trans = btrfs_start_transaction(root, 1);
5492 if (IS_ERR(trans)) {
5493 ret = PTR_ERR(trans);
5498 btrfs_release_path(path);
5499 ret = btrfs_add_orphan_item(trans, root, path, ino);
5501 btrfs_commit_transaction(trans, root);
5504 error("failed to add inode %llu as orphan item root %llu",
5505 ino, root->root_key.objectid);
5507 printf("Added inode %llu as orphan item root %llu\n",
5508 ino, root->root_key.objectid);
5510 btrfs_release_path(path);
5511 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5517 /* Set inode_item nlink to @ref_count.
5518 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5520 * Returns 0 on success
5522 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5523 struct btrfs_path *path, u64 ino,
5524 const char *name, u32 namelen,
5525 u64 ref_count, u8 filetype, u64 *nlink)
5527 struct btrfs_trans_handle *trans;
5528 struct btrfs_inode_item *ii;
5529 struct btrfs_key key;
5530 struct btrfs_key old_key;
5531 char namebuf[BTRFS_NAME_LEN] = {0};
5537 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5539 if (name && namelen) {
5540 ASSERT(namelen <= BTRFS_NAME_LEN);
5541 memcpy(namebuf, name, namelen);
5544 sprintf(namebuf, "%llu", ino);
5545 name_len = count_digits(ino);
5546 printf("Can't find file name for inode %llu, use %s instead\n",
5550 trans = btrfs_start_transaction(root, 1);
5551 if (IS_ERR(trans)) {
5552 ret = PTR_ERR(trans);
5556 btrfs_release_path(path);
5557 /* if refs is 0, put it into lostfound */
5558 if (ref_count == 0) {
5559 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5560 name_len, filetype, &ref_count);
5565 /* reset inode_item's nlink to ref_count */
5567 key.type = BTRFS_INODE_ITEM_KEY;
5570 btrfs_release_path(path);
5571 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5577 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5578 struct btrfs_inode_item);
5579 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5580 btrfs_mark_buffer_dirty(path->nodes[0]);
5585 btrfs_commit_transaction(trans, root);
5589 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5590 root->objectid, ino, namebuf, filetype);
5592 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5593 root->objectid, ino, namebuf, filetype);
5596 btrfs_release_path(path);
5597 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5604 * Check INODE_ITEM and related ITEMs (the same inode number)
5605 * 1. check link count
5606 * 2. check inode ref/extref
5607 * 3. check dir item/index
5609 * @ext_ref: the EXTENDED_IREF feature
5611 * Return 0 if no error occurred.
5612 * Return >0 for error or hit the traversal is done(by error bitmap)
5614 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5615 unsigned int ext_ref)
5617 struct extent_buffer *node;
5618 struct btrfs_inode_item *ii;
5619 struct btrfs_key key;
5620 struct btrfs_key last_key;
5629 u64 extent_size = 0;
5631 unsigned int nodatasum;
5635 char namebuf[BTRFS_NAME_LEN] = {0};
5638 node = path->nodes[0];
5639 slot = path->slots[0];
5641 btrfs_item_key_to_cpu(node, &key, slot);
5642 inode_id = key.objectid;
5644 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5645 ret = btrfs_next_item(root, path);
5651 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5652 isize = btrfs_inode_size(node, ii);
5653 nbytes = btrfs_inode_nbytes(node, ii);
5654 mode = btrfs_inode_mode(node, ii);
5655 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5656 nlink = btrfs_inode_nlink(node, ii);
5657 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5660 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5661 ret = btrfs_next_item(root, path);
5663 /* out will fill 'err' rusing current statistics */
5665 } else if (ret > 0) {
5670 node = path->nodes[0];
5671 slot = path->slots[0];
5672 btrfs_item_key_to_cpu(node, &key, slot);
5673 if (key.objectid != inode_id)
5677 case BTRFS_INODE_REF_KEY:
5678 ret = check_inode_ref(root, &key, path, namebuf,
5679 &name_len, &refs, mode);
5682 case BTRFS_INODE_EXTREF_KEY:
5683 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5684 warning("root %llu EXTREF[%llu %llu] isn't supported",
5685 root->objectid, key.objectid,
5687 ret = check_inode_extref(root, &key, node, slot, &refs,
5691 case BTRFS_DIR_ITEM_KEY:
5692 case BTRFS_DIR_INDEX_KEY:
5694 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5695 root->objectid, inode_id,
5696 imode_to_type(mode), key.objectid,
5699 ret = check_dir_item(root, &key, path, &size, ext_ref);
5702 case BTRFS_EXTENT_DATA_KEY:
5704 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5705 root->objectid, inode_id, key.objectid,
5708 ret = check_file_extent(root, &key, node, slot,
5709 nodatasum, &extent_size,
5713 case BTRFS_XATTR_ITEM_KEY:
5716 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5717 key.objectid, key.type, key.offset);
5722 if (err & LAST_ITEM) {
5723 btrfs_release_path(path);
5724 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5729 /* verify INODE_ITEM nlink/isize/nbytes */
5731 if (repair && (err & DIR_COUNT_AGAIN)) {
5732 err &= ~DIR_COUNT_AGAIN;
5733 count_dir_isize(root, inode_id, &size);
5736 if ((nlink != 1 || refs != 1) && repair) {
5737 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5738 namebuf, name_len, refs, imode_to_type(mode),
5743 err |= LINK_COUNT_ERROR;
5744 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5745 root->objectid, inode_id, nlink);
5749 * Just a warning, as dir inode nbytes is just an
5750 * instructive value.
5752 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5753 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5754 root->objectid, inode_id,
5755 root->fs_info->nodesize);
5758 if (isize != size) {
5760 ret = repair_dir_isize_lowmem(root, path,
5762 if (!repair || ret) {
5765 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5766 root->objectid, inode_id, isize, size);
5770 if (nlink != refs) {
5772 ret = repair_inode_nlinks_lowmem(root, path,
5773 inode_id, namebuf, name_len, refs,
5774 imode_to_type(mode), &nlink);
5775 if (!repair || ret) {
5776 err |= LINK_COUNT_ERROR;
5778 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5779 root->objectid, inode_id, nlink, refs);
5781 } else if (!nlink) {
5783 ret = repair_inode_orphan_item_lowmem(root,
5785 if (!repair || ret) {
5787 error("root %llu INODE[%llu] is orphan item",
5788 root->objectid, inode_id);
5792 if (!nbytes && !no_holes && extent_end < isize) {
5794 ret = punch_extent_hole(root, inode_id,
5795 extent_end, isize - extent_end);
5796 if (!repair || ret) {
5797 err |= NBYTES_ERROR;
5799 "root %llu INODE[%llu] size %llu should have a file extent hole",
5800 root->objectid, inode_id, isize);
5804 if (nbytes != extent_size) {
5806 ret = repair_inode_nbytes_lowmem(root, path,
5807 inode_id, extent_size);
5808 if (!repair || ret) {
5809 err |= NBYTES_ERROR;
5811 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5812 root->objectid, inode_id, nbytes,
5818 if (err & LAST_ITEM)
5819 btrfs_next_item(root, path);
5824 * Insert the missing inode item and inode ref.
5826 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5827 * Root dir should be handled specially because root dir is the root of fs.
5829 * returns err (>0 or 0) after repair
5831 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5833 struct btrfs_trans_handle *trans;
5834 struct btrfs_key key;
5835 struct btrfs_path path;
5836 int filetype = BTRFS_FT_DIR;
5839 btrfs_init_path(&path);
5841 if (err & INODE_REF_MISSING) {
5842 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5843 key.type = BTRFS_INODE_REF_KEY;
5844 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5846 trans = btrfs_start_transaction(root, 1);
5847 if (IS_ERR(trans)) {
5848 ret = PTR_ERR(trans);
5852 btrfs_release_path(&path);
5853 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5857 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5858 BTRFS_FIRST_FREE_OBJECTID,
5859 BTRFS_FIRST_FREE_OBJECTID, 0);
5863 printf("Add INODE_REF[%llu %llu] name %s\n",
5864 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5866 err &= ~INODE_REF_MISSING;
5869 error("fail to insert first inode's ref");
5870 btrfs_commit_transaction(trans, root);
5873 if (err & INODE_ITEM_MISSING) {
5874 ret = repair_inode_item_missing(root,
5875 BTRFS_FIRST_FREE_OBJECTID, filetype);
5878 err &= ~INODE_ITEM_MISSING;
5882 error("fail to repair first inode");
5883 btrfs_release_path(&path);
5888 * check first root dir's inode_item and inode_ref
5890 * returns 0 means no error
5891 * returns >0 means error
5892 * returns <0 means fatal error
5894 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5896 struct btrfs_path path;
5897 struct btrfs_key key;
5898 struct btrfs_inode_item *ii;
5904 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5905 key.type = BTRFS_INODE_ITEM_KEY;
5908 /* For root being dropped, we don't need to check first inode */
5909 if (btrfs_root_refs(&root->root_item) == 0 &&
5910 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5911 BTRFS_FIRST_FREE_OBJECTID)
5914 btrfs_init_path(&path);
5915 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5920 err |= INODE_ITEM_MISSING;
5922 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5923 struct btrfs_inode_item);
5924 mode = btrfs_inode_mode(path.nodes[0], ii);
5925 if (imode_to_type(mode) != BTRFS_FT_DIR)
5926 err |= INODE_ITEM_MISMATCH;
5929 /* lookup first inode ref */
5930 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5931 key.type = BTRFS_INODE_REF_KEY;
5932 /* special index value */
5935 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5941 btrfs_release_path(&path);
5944 err = repair_fs_first_inode(root, err);
5946 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5947 error("root dir INODE_ITEM is %s",
5948 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5949 if (err & INODE_REF_MISSING)
5950 error("root dir INODE_REF is missing");
5952 return ret < 0 ? ret : err;
5955 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5956 u64 parent, u64 root)
5958 struct rb_node *node;
5959 struct tree_backref *back = NULL;
5960 struct tree_backref match = {
5967 match.parent = parent;
5968 match.node.full_backref = 1;
5973 node = rb_search(&rec->backref_tree, &match.node.node,
5974 (rb_compare_keys)compare_extent_backref, NULL);
5976 back = to_tree_backref(rb_node_to_extent_backref(node));
5981 static struct data_backref *find_data_backref(struct extent_record *rec,
5982 u64 parent, u64 root,
5983 u64 owner, u64 offset,
5985 u64 disk_bytenr, u64 bytes)
5987 struct rb_node *node;
5988 struct data_backref *back = NULL;
5989 struct data_backref match = {
5996 .found_ref = found_ref,
5997 .disk_bytenr = disk_bytenr,
6001 match.parent = parent;
6002 match.node.full_backref = 1;
6007 node = rb_search(&rec->backref_tree, &match.node.node,
6008 (rb_compare_keys)compare_extent_backref, NULL);
6010 back = to_data_backref(rb_node_to_extent_backref(node));
6015 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6016 * blocks and integrity of fs tree items.
6018 * @root: the root of the tree to be checked.
6019 * @ext_ref feature EXTENDED_IREF is enable or not.
6020 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6021 * otherwise means check fs tree(s) items relationship and
6022 * @root MUST be a fs tree root.
6023 * Returns 0 represents OK.
6024 * Returns not 0 represents error.
6026 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6027 struct btrfs_root *root, unsigned int ext_ref,
6031 struct btrfs_path path;
6032 struct node_refs nrefs;
6033 struct btrfs_root_item *root_item = &root->root_item;
6038 memset(&nrefs, 0, sizeof(nrefs));
6041 * We need to manually check the first inode item (256)
6042 * As the following traversal function will only start from
6043 * the first inode item in the leaf, if inode item (256) is
6044 * missing we will skip it forever.
6046 ret = check_fs_first_inode(root, ext_ref);
6052 level = btrfs_header_level(root->node);
6053 btrfs_init_path(&path);
6055 if (btrfs_root_refs(root_item) > 0 ||
6056 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6057 path.nodes[level] = root->node;
6058 path.slots[level] = 0;
6059 extent_buffer_get(root->node);
6061 struct btrfs_key key;
6063 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6064 level = root_item->drop_level;
6065 path.lowest_level = level;
6066 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6073 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6074 ext_ref, check_all);
6078 /* if ret is negative, walk shall stop */
6084 ret = walk_up_tree_v2(root, &path, &level);
6086 /* Normal exit, reset ret to err */
6093 btrfs_release_path(&path);
6098 * Iterate all items in the tree and call check_inode_item() to check.
6100 * @root: the root of the tree to be checked.
6101 * @ext_ref: the EXTENDED_IREF feature
6103 * Return 0 if no error found.
6104 * Return <0 for error.
6106 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6108 reset_cached_block_groups(root->fs_info);
6109 return check_btrfs_root(NULL, root, ext_ref, 0);
6113 * Find the relative ref for root_ref and root_backref.
6115 * @root: the root of the root tree.
6116 * @ref_key: the key of the root ref.
6118 * Return 0 if no error occurred.
6120 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6121 struct extent_buffer *node, int slot)
6123 struct btrfs_path path;
6124 struct btrfs_key key;
6125 struct btrfs_root_ref *ref;
6126 struct btrfs_root_ref *backref;
6127 char ref_name[BTRFS_NAME_LEN] = {0};
6128 char backref_name[BTRFS_NAME_LEN] = {0};
6134 u32 backref_namelen;
6139 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6140 ref_dirid = btrfs_root_ref_dirid(node, ref);
6141 ref_seq = btrfs_root_ref_sequence(node, ref);
6142 ref_namelen = btrfs_root_ref_name_len(node, ref);
6144 if (ref_namelen <= BTRFS_NAME_LEN) {
6147 len = BTRFS_NAME_LEN;
6148 warning("%s[%llu %llu] ref_name too long",
6149 ref_key->type == BTRFS_ROOT_REF_KEY ?
6150 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6153 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6155 /* Find relative root_ref */
6156 key.objectid = ref_key->offset;
6157 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6158 key.offset = ref_key->objectid;
6160 btrfs_init_path(&path);
6161 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6163 err |= ROOT_REF_MISSING;
6164 error("%s[%llu %llu] couldn't find relative ref",
6165 ref_key->type == BTRFS_ROOT_REF_KEY ?
6166 "ROOT_REF" : "ROOT_BACKREF",
6167 ref_key->objectid, ref_key->offset);
6171 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6172 struct btrfs_root_ref);
6173 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6174 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6175 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6177 if (backref_namelen <= BTRFS_NAME_LEN) {
6178 len = backref_namelen;
6180 len = BTRFS_NAME_LEN;
6181 warning("%s[%llu %llu] ref_name too long",
6182 key.type == BTRFS_ROOT_REF_KEY ?
6183 "ROOT_REF" : "ROOT_BACKREF",
6184 key.objectid, key.offset);
6186 read_extent_buffer(path.nodes[0], backref_name,
6187 (unsigned long)(backref + 1), len);
6189 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6190 ref_namelen != backref_namelen ||
6191 strncmp(ref_name, backref_name, len)) {
6192 err |= ROOT_REF_MISMATCH;
6193 error("%s[%llu %llu] mismatch relative ref",
6194 ref_key->type == BTRFS_ROOT_REF_KEY ?
6195 "ROOT_REF" : "ROOT_BACKREF",
6196 ref_key->objectid, ref_key->offset);
6199 btrfs_release_path(&path);
6204 * Check all fs/file tree in low_memory mode.
6206 * 1. for fs tree root item, call check_fs_root_v2()
6207 * 2. for fs tree root ref/backref, call check_root_ref()
6209 * Return 0 if no error occurred.
6211 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6213 struct btrfs_root *tree_root = fs_info->tree_root;
6214 struct btrfs_root *cur_root = NULL;
6215 struct btrfs_path path;
6216 struct btrfs_key key;
6217 struct extent_buffer *node;
6218 unsigned int ext_ref;
6223 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6225 btrfs_init_path(&path);
6226 key.objectid = BTRFS_FS_TREE_OBJECTID;
6228 key.type = BTRFS_ROOT_ITEM_KEY;
6230 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6234 } else if (ret > 0) {
6240 node = path.nodes[0];
6241 slot = path.slots[0];
6242 btrfs_item_key_to_cpu(node, &key, slot);
6243 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6245 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6246 fs_root_objectid(key.objectid)) {
6247 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6248 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6251 key.offset = (u64)-1;
6252 cur_root = btrfs_read_fs_root(fs_info, &key);
6255 if (IS_ERR(cur_root)) {
6256 error("Fail to read fs/subvol tree: %lld",
6262 ret = check_fs_root_v2(cur_root, ext_ref);
6265 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6266 btrfs_free_fs_root(cur_root);
6267 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6268 key.type == BTRFS_ROOT_BACKREF_KEY) {
6269 ret = check_root_ref(tree_root, &key, node, slot);
6273 ret = btrfs_next_item(tree_root, &path);
6283 btrfs_release_path(&path);
6287 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6288 struct cache_tree *root_cache)
6292 if (!ctx.progress_enabled)
6293 fprintf(stderr, "checking fs roots\n");
6294 if (check_mode == CHECK_MODE_LOWMEM)
6295 ret = check_fs_roots_v2(fs_info);
6297 ret = check_fs_roots(fs_info, root_cache);
6302 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6304 struct extent_backref *back, *tmp;
6305 struct tree_backref *tback;
6306 struct data_backref *dback;
6310 rbtree_postorder_for_each_entry_safe(back, tmp,
6311 &rec->backref_tree, node) {
6312 if (!back->found_extent_tree) {
6316 if (back->is_data) {
6317 dback = to_data_backref(back);
6318 fprintf(stderr, "Data backref %llu %s %llu"
6319 " owner %llu offset %llu num_refs %lu"
6320 " not found in extent tree\n",
6321 (unsigned long long)rec->start,
6322 back->full_backref ?
6324 back->full_backref ?
6325 (unsigned long long)dback->parent:
6326 (unsigned long long)dback->root,
6327 (unsigned long long)dback->owner,
6328 (unsigned long long)dback->offset,
6329 (unsigned long)dback->num_refs);
6331 tback = to_tree_backref(back);
6332 fprintf(stderr, "Tree backref %llu parent %llu"
6333 " root %llu not found in extent tree\n",
6334 (unsigned long long)rec->start,
6335 (unsigned long long)tback->parent,
6336 (unsigned long long)tback->root);
6339 if (!back->is_data && !back->found_ref) {
6343 tback = to_tree_backref(back);
6344 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6345 (unsigned long long)rec->start,
6346 back->full_backref ? "parent" : "root",
6347 back->full_backref ?
6348 (unsigned long long)tback->parent :
6349 (unsigned long long)tback->root, back);
6351 if (back->is_data) {
6352 dback = to_data_backref(back);
6353 if (dback->found_ref != dback->num_refs) {
6357 fprintf(stderr, "Incorrect local backref count"
6358 " on %llu %s %llu owner %llu"
6359 " offset %llu found %u wanted %u back %p\n",
6360 (unsigned long long)rec->start,
6361 back->full_backref ?
6363 back->full_backref ?
6364 (unsigned long long)dback->parent:
6365 (unsigned long long)dback->root,
6366 (unsigned long long)dback->owner,
6367 (unsigned long long)dback->offset,
6368 dback->found_ref, dback->num_refs, back);
6370 if (dback->disk_bytenr != rec->start) {
6374 fprintf(stderr, "Backref disk bytenr does not"
6375 " match extent record, bytenr=%llu, "
6376 "ref bytenr=%llu\n",
6377 (unsigned long long)rec->start,
6378 (unsigned long long)dback->disk_bytenr);
6381 if (dback->bytes != rec->nr) {
6385 fprintf(stderr, "Backref bytes do not match "
6386 "extent backref, bytenr=%llu, ref "
6387 "bytes=%llu, backref bytes=%llu\n",
6388 (unsigned long long)rec->start,
6389 (unsigned long long)rec->nr,
6390 (unsigned long long)dback->bytes);
6393 if (!back->is_data) {
6396 dback = to_data_backref(back);
6397 found += dback->found_ref;
6400 if (found != rec->refs) {
6404 fprintf(stderr, "Incorrect global backref count "
6405 "on %llu found %llu wanted %llu\n",
6406 (unsigned long long)rec->start,
6407 (unsigned long long)found,
6408 (unsigned long long)rec->refs);
6414 static void __free_one_backref(struct rb_node *node)
6416 struct extent_backref *back = rb_node_to_extent_backref(node);
6421 static void free_all_extent_backrefs(struct extent_record *rec)
6423 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6426 static void free_extent_record_cache(struct cache_tree *extent_cache)
6428 struct cache_extent *cache;
6429 struct extent_record *rec;
6432 cache = first_cache_extent(extent_cache);
6435 rec = container_of(cache, struct extent_record, cache);
6436 remove_cache_extent(extent_cache, cache);
6437 free_all_extent_backrefs(rec);
6442 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6443 struct extent_record *rec)
6445 if (rec->content_checked && rec->owner_ref_checked &&
6446 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6447 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6448 !rec->bad_full_backref && !rec->crossing_stripes &&
6449 !rec->wrong_chunk_type) {
6450 remove_cache_extent(extent_cache, &rec->cache);
6451 free_all_extent_backrefs(rec);
6452 list_del_init(&rec->list);
6458 static int check_owner_ref(struct btrfs_root *root,
6459 struct extent_record *rec,
6460 struct extent_buffer *buf)
6462 struct extent_backref *node, *tmp;
6463 struct tree_backref *back;
6464 struct btrfs_root *ref_root;
6465 struct btrfs_key key;
6466 struct btrfs_path path;
6467 struct extent_buffer *parent;
6472 rbtree_postorder_for_each_entry_safe(node, tmp,
6473 &rec->backref_tree, node) {
6476 if (!node->found_ref)
6478 if (node->full_backref)
6480 back = to_tree_backref(node);
6481 if (btrfs_header_owner(buf) == back->root)
6484 BUG_ON(rec->is_root);
6486 /* try to find the block by search corresponding fs tree */
6487 key.objectid = btrfs_header_owner(buf);
6488 key.type = BTRFS_ROOT_ITEM_KEY;
6489 key.offset = (u64)-1;
6491 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6492 if (IS_ERR(ref_root))
6495 level = btrfs_header_level(buf);
6497 btrfs_item_key_to_cpu(buf, &key, 0);
6499 btrfs_node_key_to_cpu(buf, &key, 0);
6501 btrfs_init_path(&path);
6502 path.lowest_level = level + 1;
6503 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6507 parent = path.nodes[level + 1];
6508 if (parent && buf->start == btrfs_node_blockptr(parent,
6509 path.slots[level + 1]))
6512 btrfs_release_path(&path);
6513 return found ? 0 : 1;
6516 static int is_extent_tree_record(struct extent_record *rec)
6518 struct extent_backref *node, *tmp;
6519 struct tree_backref *back;
6522 rbtree_postorder_for_each_entry_safe(node, tmp,
6523 &rec->backref_tree, node) {
6526 back = to_tree_backref(node);
6527 if (node->full_backref)
6529 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6536 static int record_bad_block_io(struct btrfs_fs_info *info,
6537 struct cache_tree *extent_cache,
6540 struct extent_record *rec;
6541 struct cache_extent *cache;
6542 struct btrfs_key key;
6544 cache = lookup_cache_extent(extent_cache, start, len);
6548 rec = container_of(cache, struct extent_record, cache);
6549 if (!is_extent_tree_record(rec))
6552 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6553 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6556 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6557 struct extent_buffer *buf, int slot)
6559 if (btrfs_header_level(buf)) {
6560 struct btrfs_key_ptr ptr1, ptr2;
6562 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6563 sizeof(struct btrfs_key_ptr));
6564 read_extent_buffer(buf, &ptr2,
6565 btrfs_node_key_ptr_offset(slot + 1),
6566 sizeof(struct btrfs_key_ptr));
6567 write_extent_buffer(buf, &ptr1,
6568 btrfs_node_key_ptr_offset(slot + 1),
6569 sizeof(struct btrfs_key_ptr));
6570 write_extent_buffer(buf, &ptr2,
6571 btrfs_node_key_ptr_offset(slot),
6572 sizeof(struct btrfs_key_ptr));
6574 struct btrfs_disk_key key;
6575 btrfs_node_key(buf, &key, 0);
6576 btrfs_fixup_low_keys(root, path, &key,
6577 btrfs_header_level(buf) + 1);
6580 struct btrfs_item *item1, *item2;
6581 struct btrfs_key k1, k2;
6582 char *item1_data, *item2_data;
6583 u32 item1_offset, item2_offset, item1_size, item2_size;
6585 item1 = btrfs_item_nr(slot);
6586 item2 = btrfs_item_nr(slot + 1);
6587 btrfs_item_key_to_cpu(buf, &k1, slot);
6588 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6589 item1_offset = btrfs_item_offset(buf, item1);
6590 item2_offset = btrfs_item_offset(buf, item2);
6591 item1_size = btrfs_item_size(buf, item1);
6592 item2_size = btrfs_item_size(buf, item2);
6594 item1_data = malloc(item1_size);
6597 item2_data = malloc(item2_size);
6603 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6604 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6606 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6607 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6611 btrfs_set_item_offset(buf, item1, item2_offset);
6612 btrfs_set_item_offset(buf, item2, item1_offset);
6613 btrfs_set_item_size(buf, item1, item2_size);
6614 btrfs_set_item_size(buf, item2, item1_size);
6616 path->slots[0] = slot;
6617 btrfs_set_item_key_unsafe(root, path, &k2);
6618 path->slots[0] = slot + 1;
6619 btrfs_set_item_key_unsafe(root, path, &k1);
6624 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6626 struct extent_buffer *buf;
6627 struct btrfs_key k1, k2;
6629 int level = path->lowest_level;
6632 buf = path->nodes[level];
6633 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6635 btrfs_node_key_to_cpu(buf, &k1, i);
6636 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6638 btrfs_item_key_to_cpu(buf, &k1, i);
6639 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6641 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6643 ret = swap_values(root, path, buf, i);
6646 btrfs_mark_buffer_dirty(buf);
6652 static int delete_bogus_item(struct btrfs_root *root,
6653 struct btrfs_path *path,
6654 struct extent_buffer *buf, int slot)
6656 struct btrfs_key key;
6657 int nritems = btrfs_header_nritems(buf);
6659 btrfs_item_key_to_cpu(buf, &key, slot);
6661 /* These are all the keys we can deal with missing. */
6662 if (key.type != BTRFS_DIR_INDEX_KEY &&
6663 key.type != BTRFS_EXTENT_ITEM_KEY &&
6664 key.type != BTRFS_METADATA_ITEM_KEY &&
6665 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6666 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6669 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6670 (unsigned long long)key.objectid, key.type,
6671 (unsigned long long)key.offset, slot, buf->start);
6672 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6673 btrfs_item_nr_offset(slot + 1),
6674 sizeof(struct btrfs_item) *
6675 (nritems - slot - 1));
6676 btrfs_set_header_nritems(buf, nritems - 1);
6678 struct btrfs_disk_key disk_key;
6680 btrfs_item_key(buf, &disk_key, 0);
6681 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6683 btrfs_mark_buffer_dirty(buf);
6687 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6689 struct extent_buffer *buf;
6693 /* We should only get this for leaves */
6694 BUG_ON(path->lowest_level);
6695 buf = path->nodes[0];
6697 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6698 unsigned int shift = 0, offset;
6700 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6701 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6702 if (btrfs_item_end_nr(buf, i) >
6703 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6704 ret = delete_bogus_item(root, path, buf, i);
6707 fprintf(stderr, "item is off the end of the "
6708 "leaf, can't fix\n");
6712 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6713 btrfs_item_end_nr(buf, i);
6714 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6715 btrfs_item_offset_nr(buf, i - 1)) {
6716 if (btrfs_item_end_nr(buf, i) >
6717 btrfs_item_offset_nr(buf, i - 1)) {
6718 ret = delete_bogus_item(root, path, buf, i);
6721 fprintf(stderr, "items overlap, can't fix\n");
6725 shift = btrfs_item_offset_nr(buf, i - 1) -
6726 btrfs_item_end_nr(buf, i);
6731 printf("Shifting item nr %d by %u bytes in block %llu\n",
6732 i, shift, (unsigned long long)buf->start);
6733 offset = btrfs_item_offset_nr(buf, i);
6734 memmove_extent_buffer(buf,
6735 btrfs_leaf_data(buf) + offset + shift,
6736 btrfs_leaf_data(buf) + offset,
6737 btrfs_item_size_nr(buf, i));
6738 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6740 btrfs_mark_buffer_dirty(buf);
6744 * We may have moved things, in which case we want to exit so we don't
6745 * write those changes out. Once we have proper abort functionality in
6746 * progs this can be changed to something nicer.
6753 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6754 * then just return -EIO.
6756 static int try_to_fix_bad_block(struct btrfs_root *root,
6757 struct extent_buffer *buf,
6758 enum btrfs_tree_block_status status)
6760 struct btrfs_trans_handle *trans;
6761 struct ulist *roots;
6762 struct ulist_node *node;
6763 struct btrfs_root *search_root;
6764 struct btrfs_path path;
6765 struct ulist_iterator iter;
6766 struct btrfs_key root_key, key;
6769 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6770 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6773 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6777 btrfs_init_path(&path);
6778 ULIST_ITER_INIT(&iter);
6779 while ((node = ulist_next(roots, &iter))) {
6780 root_key.objectid = node->val;
6781 root_key.type = BTRFS_ROOT_ITEM_KEY;
6782 root_key.offset = (u64)-1;
6784 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6791 trans = btrfs_start_transaction(search_root, 0);
6792 if (IS_ERR(trans)) {
6793 ret = PTR_ERR(trans);
6797 path.lowest_level = btrfs_header_level(buf);
6798 path.skip_check_block = 1;
6799 if (path.lowest_level)
6800 btrfs_node_key_to_cpu(buf, &key, 0);
6802 btrfs_item_key_to_cpu(buf, &key, 0);
6803 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6806 btrfs_commit_transaction(trans, search_root);
6809 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6810 ret = fix_key_order(search_root, &path);
6811 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6812 ret = fix_item_offset(search_root, &path);
6814 btrfs_commit_transaction(trans, search_root);
6817 btrfs_release_path(&path);
6818 btrfs_commit_transaction(trans, search_root);
6821 btrfs_release_path(&path);
6825 static int check_block(struct btrfs_root *root,
6826 struct cache_tree *extent_cache,
6827 struct extent_buffer *buf, u64 flags)
6829 struct extent_record *rec;
6830 struct cache_extent *cache;
6831 struct btrfs_key key;
6832 enum btrfs_tree_block_status status;
6836 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6839 rec = container_of(cache, struct extent_record, cache);
6840 rec->generation = btrfs_header_generation(buf);
6842 level = btrfs_header_level(buf);
6843 if (btrfs_header_nritems(buf) > 0) {
6846 btrfs_item_key_to_cpu(buf, &key, 0);
6848 btrfs_node_key_to_cpu(buf, &key, 0);
6850 rec->info_objectid = key.objectid;
6852 rec->info_level = level;
6854 if (btrfs_is_leaf(buf))
6855 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6857 status = btrfs_check_node(root, &rec->parent_key, buf);
6859 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6861 status = try_to_fix_bad_block(root, buf, status);
6862 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6864 fprintf(stderr, "bad block %llu\n",
6865 (unsigned long long)buf->start);
6868 * Signal to callers we need to start the scan over
6869 * again since we'll have cowed blocks.
6874 rec->content_checked = 1;
6875 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6876 rec->owner_ref_checked = 1;
6878 ret = check_owner_ref(root, rec, buf);
6880 rec->owner_ref_checked = 1;
6884 maybe_free_extent_rec(extent_cache, rec);
6889 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6890 u64 parent, u64 root)
6892 struct list_head *cur = rec->backrefs.next;
6893 struct extent_backref *node;
6894 struct tree_backref *back;
6896 while(cur != &rec->backrefs) {
6897 node = to_extent_backref(cur);
6901 back = to_tree_backref(node);
6903 if (!node->full_backref)
6905 if (parent == back->parent)
6908 if (node->full_backref)
6910 if (back->root == root)
6918 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6919 u64 parent, u64 root)
6921 struct tree_backref *ref = malloc(sizeof(*ref));
6925 memset(&ref->node, 0, sizeof(ref->node));
6927 ref->parent = parent;
6928 ref->node.full_backref = 1;
6931 ref->node.full_backref = 0;
6938 static struct data_backref *find_data_backref(struct extent_record *rec,
6939 u64 parent, u64 root,
6940 u64 owner, u64 offset,
6942 u64 disk_bytenr, u64 bytes)
6944 struct list_head *cur = rec->backrefs.next;
6945 struct extent_backref *node;
6946 struct data_backref *back;
6948 while(cur != &rec->backrefs) {
6949 node = to_extent_backref(cur);
6953 back = to_data_backref(node);
6955 if (!node->full_backref)
6957 if (parent == back->parent)
6960 if (node->full_backref)
6962 if (back->root == root && back->owner == owner &&
6963 back->offset == offset) {
6964 if (found_ref && node->found_ref &&
6965 (back->bytes != bytes ||
6966 back->disk_bytenr != disk_bytenr))
6976 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6977 u64 parent, u64 root,
6978 u64 owner, u64 offset,
6981 struct data_backref *ref = malloc(sizeof(*ref));
6985 memset(&ref->node, 0, sizeof(ref->node));
6986 ref->node.is_data = 1;
6989 ref->parent = parent;
6992 ref->node.full_backref = 1;
6996 ref->offset = offset;
6997 ref->node.full_backref = 0;
6999 ref->bytes = max_size;
7002 if (max_size > rec->max_size)
7003 rec->max_size = max_size;
7007 /* Check if the type of extent matches with its chunk */
7008 static void check_extent_type(struct extent_record *rec)
7010 struct btrfs_block_group_cache *bg_cache;
7012 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7016 /* data extent, check chunk directly*/
7017 if (!rec->metadata) {
7018 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7019 rec->wrong_chunk_type = 1;
7023 /* metadata extent, check the obvious case first */
7024 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7025 BTRFS_BLOCK_GROUP_METADATA))) {
7026 rec->wrong_chunk_type = 1;
7031 * Check SYSTEM extent, as it's also marked as metadata, we can only
7032 * make sure it's a SYSTEM extent by its backref
7034 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7035 struct extent_backref *node;
7036 struct tree_backref *tback;
7039 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7040 if (node->is_data) {
7041 /* tree block shouldn't have data backref */
7042 rec->wrong_chunk_type = 1;
7045 tback = container_of(node, struct tree_backref, node);
7047 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7048 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7050 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7051 if (!(bg_cache->flags & bg_type))
7052 rec->wrong_chunk_type = 1;
7057 * Allocate a new extent record, fill default values from @tmpl and insert int
7058 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7059 * the cache, otherwise it fails.
7061 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7062 struct extent_record *tmpl)
7064 struct extent_record *rec;
7067 BUG_ON(tmpl->max_size == 0);
7068 rec = malloc(sizeof(*rec));
7071 rec->start = tmpl->start;
7072 rec->max_size = tmpl->max_size;
7073 rec->nr = max(tmpl->nr, tmpl->max_size);
7074 rec->found_rec = tmpl->found_rec;
7075 rec->content_checked = tmpl->content_checked;
7076 rec->owner_ref_checked = tmpl->owner_ref_checked;
7077 rec->num_duplicates = 0;
7078 rec->metadata = tmpl->metadata;
7079 rec->flag_block_full_backref = FLAG_UNSET;
7080 rec->bad_full_backref = 0;
7081 rec->crossing_stripes = 0;
7082 rec->wrong_chunk_type = 0;
7083 rec->is_root = tmpl->is_root;
7084 rec->refs = tmpl->refs;
7085 rec->extent_item_refs = tmpl->extent_item_refs;
7086 rec->parent_generation = tmpl->parent_generation;
7087 INIT_LIST_HEAD(&rec->backrefs);
7088 INIT_LIST_HEAD(&rec->dups);
7089 INIT_LIST_HEAD(&rec->list);
7090 rec->backref_tree = RB_ROOT;
7091 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7092 rec->cache.start = tmpl->start;
7093 rec->cache.size = tmpl->nr;
7094 ret = insert_cache_extent(extent_cache, &rec->cache);
7099 bytes_used += rec->nr;
7102 rec->crossing_stripes = check_crossing_stripes(global_info,
7103 rec->start, global_info->nodesize);
7104 check_extent_type(rec);
7109 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7111 * - refs - if found, increase refs
7112 * - is_root - if found, set
7113 * - content_checked - if found, set
7114 * - owner_ref_checked - if found, set
7116 * If not found, create a new one, initialize and insert.
7118 static int add_extent_rec(struct cache_tree *extent_cache,
7119 struct extent_record *tmpl)
7121 struct extent_record *rec;
7122 struct cache_extent *cache;
7126 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7128 rec = container_of(cache, struct extent_record, cache);
7132 rec->nr = max(tmpl->nr, tmpl->max_size);
7135 * We need to make sure to reset nr to whatever the extent
7136 * record says was the real size, this way we can compare it to
7139 if (tmpl->found_rec) {
7140 if (tmpl->start != rec->start || rec->found_rec) {
7141 struct extent_record *tmp;
7144 if (list_empty(&rec->list))
7145 list_add_tail(&rec->list,
7146 &duplicate_extents);
7149 * We have to do this song and dance in case we
7150 * find an extent record that falls inside of
7151 * our current extent record but does not have
7152 * the same objectid.
7154 tmp = malloc(sizeof(*tmp));
7157 tmp->start = tmpl->start;
7158 tmp->max_size = tmpl->max_size;
7161 tmp->metadata = tmpl->metadata;
7162 tmp->extent_item_refs = tmpl->extent_item_refs;
7163 INIT_LIST_HEAD(&tmp->list);
7164 list_add_tail(&tmp->list, &rec->dups);
7165 rec->num_duplicates++;
7172 if (tmpl->extent_item_refs && !dup) {
7173 if (rec->extent_item_refs) {
7174 fprintf(stderr, "block %llu rec "
7175 "extent_item_refs %llu, passed %llu\n",
7176 (unsigned long long)tmpl->start,
7177 (unsigned long long)
7178 rec->extent_item_refs,
7179 (unsigned long long)tmpl->extent_item_refs);
7181 rec->extent_item_refs = tmpl->extent_item_refs;
7185 if (tmpl->content_checked)
7186 rec->content_checked = 1;
7187 if (tmpl->owner_ref_checked)
7188 rec->owner_ref_checked = 1;
7189 memcpy(&rec->parent_key, &tmpl->parent_key,
7190 sizeof(tmpl->parent_key));
7191 if (tmpl->parent_generation)
7192 rec->parent_generation = tmpl->parent_generation;
7193 if (rec->max_size < tmpl->max_size)
7194 rec->max_size = tmpl->max_size;
7197 * A metadata extent can't cross stripe_len boundary, otherwise
7198 * kernel scrub won't be able to handle it.
7199 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7203 rec->crossing_stripes = check_crossing_stripes(
7204 global_info, rec->start,
7205 global_info->nodesize);
7206 check_extent_type(rec);
7207 maybe_free_extent_rec(extent_cache, rec);
7211 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7216 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7217 u64 parent, u64 root, int found_ref)
7219 struct extent_record *rec;
7220 struct tree_backref *back;
7221 struct cache_extent *cache;
7223 bool insert = false;
7225 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7227 struct extent_record tmpl;
7229 memset(&tmpl, 0, sizeof(tmpl));
7230 tmpl.start = bytenr;
7235 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7239 /* really a bug in cache_extent implement now */
7240 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7245 rec = container_of(cache, struct extent_record, cache);
7246 if (rec->start != bytenr) {
7248 * Several cause, from unaligned bytenr to over lapping extents
7253 back = find_tree_backref(rec, parent, root);
7255 back = alloc_tree_backref(rec, parent, root);
7262 if (back->node.found_ref) {
7263 fprintf(stderr, "Extent back ref already exists "
7264 "for %llu parent %llu root %llu \n",
7265 (unsigned long long)bytenr,
7266 (unsigned long long)parent,
7267 (unsigned long long)root);
7269 back->node.found_ref = 1;
7271 if (back->node.found_extent_tree) {
7272 fprintf(stderr, "Extent back ref already exists "
7273 "for %llu parent %llu root %llu \n",
7274 (unsigned long long)bytenr,
7275 (unsigned long long)parent,
7276 (unsigned long long)root);
7278 back->node.found_extent_tree = 1;
7281 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7282 compare_extent_backref));
7283 check_extent_type(rec);
7284 maybe_free_extent_rec(extent_cache, rec);
7288 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7289 u64 parent, u64 root, u64 owner, u64 offset,
7290 u32 num_refs, int found_ref, u64 max_size)
7292 struct extent_record *rec;
7293 struct data_backref *back;
7294 struct cache_extent *cache;
7296 bool insert = false;
7298 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7300 struct extent_record tmpl;
7302 memset(&tmpl, 0, sizeof(tmpl));
7303 tmpl.start = bytenr;
7305 tmpl.max_size = max_size;
7307 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7311 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7316 rec = container_of(cache, struct extent_record, cache);
7317 if (rec->max_size < max_size)
7318 rec->max_size = max_size;
7321 * If found_ref is set then max_size is the real size and must match the
7322 * existing refs. So if we have already found a ref then we need to
7323 * make sure that this ref matches the existing one, otherwise we need
7324 * to add a new backref so we can notice that the backrefs don't match
7325 * and we need to figure out who is telling the truth. This is to
7326 * account for that awful fsync bug I introduced where we'd end up with
7327 * a btrfs_file_extent_item that would have its length include multiple
7328 * prealloc extents or point inside of a prealloc extent.
7330 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7333 back = alloc_data_backref(rec, parent, root, owner, offset,
7340 BUG_ON(num_refs != 1);
7341 if (back->node.found_ref)
7342 BUG_ON(back->bytes != max_size);
7343 back->node.found_ref = 1;
7344 back->found_ref += 1;
7345 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7346 back->bytes = max_size;
7347 back->disk_bytenr = bytenr;
7349 /* Need to reinsert if not already in the tree */
7351 rb_erase(&back->node.node, &rec->backref_tree);
7356 rec->content_checked = 1;
7357 rec->owner_ref_checked = 1;
7359 if (back->node.found_extent_tree) {
7360 fprintf(stderr, "Extent back ref already exists "
7361 "for %llu parent %llu root %llu "
7362 "owner %llu offset %llu num_refs %lu\n",
7363 (unsigned long long)bytenr,
7364 (unsigned long long)parent,
7365 (unsigned long long)root,
7366 (unsigned long long)owner,
7367 (unsigned long long)offset,
7368 (unsigned long)num_refs);
7370 back->num_refs = num_refs;
7371 back->node.found_extent_tree = 1;
7374 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7375 compare_extent_backref));
7377 maybe_free_extent_rec(extent_cache, rec);
7381 static int add_pending(struct cache_tree *pending,
7382 struct cache_tree *seen, u64 bytenr, u32 size)
7385 ret = add_cache_extent(seen, bytenr, size);
7388 add_cache_extent(pending, bytenr, size);
7392 static int pick_next_pending(struct cache_tree *pending,
7393 struct cache_tree *reada,
7394 struct cache_tree *nodes,
7395 u64 last, struct block_info *bits, int bits_nr,
7398 unsigned long node_start = last;
7399 struct cache_extent *cache;
7402 cache = search_cache_extent(reada, 0);
7404 bits[0].start = cache->start;
7405 bits[0].size = cache->size;
7410 if (node_start > 32768)
7411 node_start -= 32768;
7413 cache = search_cache_extent(nodes, node_start);
7415 cache = search_cache_extent(nodes, 0);
7418 cache = search_cache_extent(pending, 0);
7423 bits[ret].start = cache->start;
7424 bits[ret].size = cache->size;
7425 cache = next_cache_extent(cache);
7427 } while (cache && ret < bits_nr);
7433 bits[ret].start = cache->start;
7434 bits[ret].size = cache->size;
7435 cache = next_cache_extent(cache);
7437 } while (cache && ret < bits_nr);
7439 if (bits_nr - ret > 8) {
7440 u64 lookup = bits[0].start + bits[0].size;
7441 struct cache_extent *next;
7442 next = search_cache_extent(pending, lookup);
7444 if (next->start - lookup > 32768)
7446 bits[ret].start = next->start;
7447 bits[ret].size = next->size;
7448 lookup = next->start + next->size;
7452 next = next_cache_extent(next);
7460 static void free_chunk_record(struct cache_extent *cache)
7462 struct chunk_record *rec;
7464 rec = container_of(cache, struct chunk_record, cache);
7465 list_del_init(&rec->list);
7466 list_del_init(&rec->dextents);
7470 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7472 cache_tree_free_extents(chunk_cache, free_chunk_record);
7475 static void free_device_record(struct rb_node *node)
7477 struct device_record *rec;
7479 rec = container_of(node, struct device_record, node);
7483 FREE_RB_BASED_TREE(device_cache, free_device_record);
7485 int insert_block_group_record(struct block_group_tree *tree,
7486 struct block_group_record *bg_rec)
7490 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7494 list_add_tail(&bg_rec->list, &tree->block_groups);
7498 static void free_block_group_record(struct cache_extent *cache)
7500 struct block_group_record *rec;
7502 rec = container_of(cache, struct block_group_record, cache);
7503 list_del_init(&rec->list);
7507 void free_block_group_tree(struct block_group_tree *tree)
7509 cache_tree_free_extents(&tree->tree, free_block_group_record);
7512 int insert_device_extent_record(struct device_extent_tree *tree,
7513 struct device_extent_record *de_rec)
7518 * Device extent is a bit different from the other extents, because
7519 * the extents which belong to the different devices may have the
7520 * same start and size, so we need use the special extent cache
7521 * search/insert functions.
7523 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7527 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7528 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7532 static void free_device_extent_record(struct cache_extent *cache)
7534 struct device_extent_record *rec;
7536 rec = container_of(cache, struct device_extent_record, cache);
7537 if (!list_empty(&rec->chunk_list))
7538 list_del_init(&rec->chunk_list);
7539 if (!list_empty(&rec->device_list))
7540 list_del_init(&rec->device_list);
7544 void free_device_extent_tree(struct device_extent_tree *tree)
7546 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7549 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7550 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7551 struct extent_buffer *leaf, int slot)
7553 struct btrfs_extent_ref_v0 *ref0;
7554 struct btrfs_key key;
7557 btrfs_item_key_to_cpu(leaf, &key, slot);
7558 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7559 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7560 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7563 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7564 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7570 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7571 struct btrfs_key *key,
7574 struct btrfs_chunk *ptr;
7575 struct chunk_record *rec;
7578 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7579 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7581 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7583 fprintf(stderr, "memory allocation failed\n");
7587 INIT_LIST_HEAD(&rec->list);
7588 INIT_LIST_HEAD(&rec->dextents);
7591 rec->cache.start = key->offset;
7592 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7594 rec->generation = btrfs_header_generation(leaf);
7596 rec->objectid = key->objectid;
7597 rec->type = key->type;
7598 rec->offset = key->offset;
7600 rec->length = rec->cache.size;
7601 rec->owner = btrfs_chunk_owner(leaf, ptr);
7602 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7603 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7604 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7605 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7606 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7607 rec->num_stripes = num_stripes;
7608 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7610 for (i = 0; i < rec->num_stripes; ++i) {
7611 rec->stripes[i].devid =
7612 btrfs_stripe_devid_nr(leaf, ptr, i);
7613 rec->stripes[i].offset =
7614 btrfs_stripe_offset_nr(leaf, ptr, i);
7615 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7616 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7623 static int process_chunk_item(struct cache_tree *chunk_cache,
7624 struct btrfs_key *key, struct extent_buffer *eb,
7627 struct chunk_record *rec;
7628 struct btrfs_chunk *chunk;
7631 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7633 * Do extra check for this chunk item,
7635 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7636 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7637 * and owner<->key_type check.
7639 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7642 error("chunk(%llu, %llu) is not valid, ignore it",
7643 key->offset, btrfs_chunk_length(eb, chunk));
7646 rec = btrfs_new_chunk_record(eb, key, slot);
7647 ret = insert_cache_extent(chunk_cache, &rec->cache);
7649 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7650 rec->offset, rec->length);
7657 static int process_device_item(struct rb_root *dev_cache,
7658 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7660 struct btrfs_dev_item *ptr;
7661 struct device_record *rec;
7664 ptr = btrfs_item_ptr(eb,
7665 slot, struct btrfs_dev_item);
7667 rec = malloc(sizeof(*rec));
7669 fprintf(stderr, "memory allocation failed\n");
7673 rec->devid = key->offset;
7674 rec->generation = btrfs_header_generation(eb);
7676 rec->objectid = key->objectid;
7677 rec->type = key->type;
7678 rec->offset = key->offset;
7680 rec->devid = btrfs_device_id(eb, ptr);
7681 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7682 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7684 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7686 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7693 struct block_group_record *
7694 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7697 struct btrfs_block_group_item *ptr;
7698 struct block_group_record *rec;
7700 rec = calloc(1, sizeof(*rec));
7702 fprintf(stderr, "memory allocation failed\n");
7706 rec->cache.start = key->objectid;
7707 rec->cache.size = key->offset;
7709 rec->generation = btrfs_header_generation(leaf);
7711 rec->objectid = key->objectid;
7712 rec->type = key->type;
7713 rec->offset = key->offset;
7715 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7716 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7718 INIT_LIST_HEAD(&rec->list);
7723 static int process_block_group_item(struct block_group_tree *block_group_cache,
7724 struct btrfs_key *key,
7725 struct extent_buffer *eb, int slot)
7727 struct block_group_record *rec;
7730 rec = btrfs_new_block_group_record(eb, key, slot);
7731 ret = insert_block_group_record(block_group_cache, rec);
7733 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7734 rec->objectid, rec->offset);
7741 struct device_extent_record *
7742 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7743 struct btrfs_key *key, int slot)
7745 struct device_extent_record *rec;
7746 struct btrfs_dev_extent *ptr;
7748 rec = calloc(1, sizeof(*rec));
7750 fprintf(stderr, "memory allocation failed\n");
7754 rec->cache.objectid = key->objectid;
7755 rec->cache.start = key->offset;
7757 rec->generation = btrfs_header_generation(leaf);
7759 rec->objectid = key->objectid;
7760 rec->type = key->type;
7761 rec->offset = key->offset;
7763 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7764 rec->chunk_objecteid =
7765 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7767 btrfs_dev_extent_chunk_offset(leaf, ptr);
7768 rec->length = btrfs_dev_extent_length(leaf, ptr);
7769 rec->cache.size = rec->length;
7771 INIT_LIST_HEAD(&rec->chunk_list);
7772 INIT_LIST_HEAD(&rec->device_list);
7778 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7779 struct btrfs_key *key, struct extent_buffer *eb,
7782 struct device_extent_record *rec;
7785 rec = btrfs_new_device_extent_record(eb, key, slot);
7786 ret = insert_device_extent_record(dev_extent_cache, rec);
7789 "Device extent[%llu, %llu, %llu] existed.\n",
7790 rec->objectid, rec->offset, rec->length);
7797 static int process_extent_item(struct btrfs_root *root,
7798 struct cache_tree *extent_cache,
7799 struct extent_buffer *eb, int slot)
7801 struct btrfs_extent_item *ei;
7802 struct btrfs_extent_inline_ref *iref;
7803 struct btrfs_extent_data_ref *dref;
7804 struct btrfs_shared_data_ref *sref;
7805 struct btrfs_key key;
7806 struct extent_record tmpl;
7811 u32 item_size = btrfs_item_size_nr(eb, slot);
7817 btrfs_item_key_to_cpu(eb, &key, slot);
7819 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7821 num_bytes = root->fs_info->nodesize;
7823 num_bytes = key.offset;
7826 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7827 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7828 key.objectid, root->fs_info->sectorsize);
7831 if (item_size < sizeof(*ei)) {
7832 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7833 struct btrfs_extent_item_v0 *ei0;
7834 if (item_size != sizeof(*ei0)) {
7836 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7837 key.objectid, key.type, key.offset,
7838 btrfs_header_bytenr(eb), slot);
7841 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7842 refs = btrfs_extent_refs_v0(eb, ei0);
7846 memset(&tmpl, 0, sizeof(tmpl));
7847 tmpl.start = key.objectid;
7848 tmpl.nr = num_bytes;
7849 tmpl.extent_item_refs = refs;
7850 tmpl.metadata = metadata;
7852 tmpl.max_size = num_bytes;
7854 return add_extent_rec(extent_cache, &tmpl);
7857 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7858 refs = btrfs_extent_refs(eb, ei);
7859 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7863 if (metadata && num_bytes != root->fs_info->nodesize) {
7864 error("ignore invalid metadata extent, length %llu does not equal to %u",
7865 num_bytes, root->fs_info->nodesize);
7868 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7869 error("ignore invalid data extent, length %llu is not aligned to %u",
7870 num_bytes, root->fs_info->sectorsize);
7874 memset(&tmpl, 0, sizeof(tmpl));
7875 tmpl.start = key.objectid;
7876 tmpl.nr = num_bytes;
7877 tmpl.extent_item_refs = refs;
7878 tmpl.metadata = metadata;
7880 tmpl.max_size = num_bytes;
7881 add_extent_rec(extent_cache, &tmpl);
7883 ptr = (unsigned long)(ei + 1);
7884 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7885 key.type == BTRFS_EXTENT_ITEM_KEY)
7886 ptr += sizeof(struct btrfs_tree_block_info);
7888 end = (unsigned long)ei + item_size;
7890 iref = (struct btrfs_extent_inline_ref *)ptr;
7891 type = btrfs_extent_inline_ref_type(eb, iref);
7892 offset = btrfs_extent_inline_ref_offset(eb, iref);
7894 case BTRFS_TREE_BLOCK_REF_KEY:
7895 ret = add_tree_backref(extent_cache, key.objectid,
7899 "add_tree_backref failed (extent items tree block): %s",
7902 case BTRFS_SHARED_BLOCK_REF_KEY:
7903 ret = add_tree_backref(extent_cache, key.objectid,
7907 "add_tree_backref failed (extent items shared block): %s",
7910 case BTRFS_EXTENT_DATA_REF_KEY:
7911 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7912 add_data_backref(extent_cache, key.objectid, 0,
7913 btrfs_extent_data_ref_root(eb, dref),
7914 btrfs_extent_data_ref_objectid(eb,
7916 btrfs_extent_data_ref_offset(eb, dref),
7917 btrfs_extent_data_ref_count(eb, dref),
7920 case BTRFS_SHARED_DATA_REF_KEY:
7921 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7922 add_data_backref(extent_cache, key.objectid, offset,
7924 btrfs_shared_data_ref_count(eb, sref),
7928 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7929 key.objectid, key.type, num_bytes);
7932 ptr += btrfs_extent_inline_ref_size(type);
7939 static int check_cache_range(struct btrfs_root *root,
7940 struct btrfs_block_group_cache *cache,
7941 u64 offset, u64 bytes)
7943 struct btrfs_free_space *entry;
7949 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7950 bytenr = btrfs_sb_offset(i);
7951 ret = btrfs_rmap_block(root->fs_info,
7952 cache->key.objectid, bytenr, 0,
7953 &logical, &nr, &stripe_len);
7958 if (logical[nr] + stripe_len <= offset)
7960 if (offset + bytes <= logical[nr])
7962 if (logical[nr] == offset) {
7963 if (stripe_len >= bytes) {
7967 bytes -= stripe_len;
7968 offset += stripe_len;
7969 } else if (logical[nr] < offset) {
7970 if (logical[nr] + stripe_len >=
7975 bytes = (offset + bytes) -
7976 (logical[nr] + stripe_len);
7977 offset = logical[nr] + stripe_len;
7980 * Could be tricky, the super may land in the
7981 * middle of the area we're checking. First
7982 * check the easiest case, it's at the end.
7984 if (logical[nr] + stripe_len >=
7986 bytes = logical[nr] - offset;
7990 /* Check the left side */
7991 ret = check_cache_range(root, cache,
7993 logical[nr] - offset);
7999 /* Now we continue with the right side */
8000 bytes = (offset + bytes) -
8001 (logical[nr] + stripe_len);
8002 offset = logical[nr] + stripe_len;
8009 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8011 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8012 offset, offset+bytes);
8016 if (entry->offset != offset) {
8017 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8022 if (entry->bytes != bytes) {
8023 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8024 bytes, entry->bytes, offset);
8028 unlink_free_space(cache->free_space_ctl, entry);
8033 static int verify_space_cache(struct btrfs_root *root,
8034 struct btrfs_block_group_cache *cache)
8036 struct btrfs_path path;
8037 struct extent_buffer *leaf;
8038 struct btrfs_key key;
8042 root = root->fs_info->extent_root;
8044 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8046 btrfs_init_path(&path);
8047 key.objectid = last;
8049 key.type = BTRFS_EXTENT_ITEM_KEY;
8050 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8055 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8056 ret = btrfs_next_leaf(root, &path);
8064 leaf = path.nodes[0];
8065 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8066 if (key.objectid >= cache->key.offset + cache->key.objectid)
8068 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8069 key.type != BTRFS_METADATA_ITEM_KEY) {
8074 if (last == key.objectid) {
8075 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8076 last = key.objectid + key.offset;
8078 last = key.objectid + root->fs_info->nodesize;
8083 ret = check_cache_range(root, cache, last,
8084 key.objectid - last);
8087 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8088 last = key.objectid + key.offset;
8090 last = key.objectid + root->fs_info->nodesize;
8094 if (last < cache->key.objectid + cache->key.offset)
8095 ret = check_cache_range(root, cache, last,
8096 cache->key.objectid +
8097 cache->key.offset - last);
8100 btrfs_release_path(&path);
8103 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8104 fprintf(stderr, "There are still entries left in the space "
8112 static int check_space_cache(struct btrfs_root *root)
8114 struct btrfs_block_group_cache *cache;
8115 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8119 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8120 btrfs_super_generation(root->fs_info->super_copy) !=
8121 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8122 printf("cache and super generation don't match, space cache "
8123 "will be invalidated\n");
8127 if (ctx.progress_enabled) {
8128 ctx.tp = TASK_FREE_SPACE;
8129 task_start(ctx.info);
8133 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8137 start = cache->key.objectid + cache->key.offset;
8138 if (!cache->free_space_ctl) {
8139 if (btrfs_init_free_space_ctl(cache,
8140 root->fs_info->sectorsize)) {
8145 btrfs_remove_free_space_cache(cache);
8148 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8149 ret = exclude_super_stripes(root, cache);
8151 fprintf(stderr, "could not exclude super stripes: %s\n",
8156 ret = load_free_space_tree(root->fs_info, cache);
8157 free_excluded_extents(root, cache);
8159 fprintf(stderr, "could not load free space tree: %s\n",
8166 ret = load_free_space_cache(root->fs_info, cache);
8171 ret = verify_space_cache(root, cache);
8173 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8174 cache->key.objectid);
8179 task_stop(ctx.info);
8181 return error ? -EINVAL : 0;
8184 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8185 u64 num_bytes, unsigned long leaf_offset,
8186 struct extent_buffer *eb) {
8188 struct btrfs_fs_info *fs_info = root->fs_info;
8190 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8192 unsigned long csum_offset;
8196 u64 data_checked = 0;
8202 if (num_bytes % fs_info->sectorsize)
8205 data = malloc(num_bytes);
8209 while (offset < num_bytes) {
8212 read_len = num_bytes - offset;
8213 /* read as much space once a time */
8214 ret = read_extent_data(fs_info, data + offset,
8215 bytenr + offset, &read_len, mirror);
8219 /* verify every 4k data's checksum */
8220 while (data_checked < read_len) {
8222 tmp = offset + data_checked;
8224 csum = btrfs_csum_data((char *)data + tmp,
8225 csum, fs_info->sectorsize);
8226 btrfs_csum_final(csum, (u8 *)&csum);
8228 csum_offset = leaf_offset +
8229 tmp / fs_info->sectorsize * csum_size;
8230 read_extent_buffer(eb, (char *)&csum_expected,
8231 csum_offset, csum_size);
8232 /* try another mirror */
8233 if (csum != csum_expected) {
8234 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8235 mirror, bytenr + tmp,
8236 csum, csum_expected);
8237 num_copies = btrfs_num_copies(root->fs_info,
8239 if (mirror < num_copies - 1) {
8244 data_checked += fs_info->sectorsize;
8253 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8256 struct btrfs_path path;
8257 struct extent_buffer *leaf;
8258 struct btrfs_key key;
8261 btrfs_init_path(&path);
8262 key.objectid = bytenr;
8263 key.type = BTRFS_EXTENT_ITEM_KEY;
8264 key.offset = (u64)-1;
8267 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8270 fprintf(stderr, "Error looking up extent record %d\n", ret);
8271 btrfs_release_path(&path);
8274 if (path.slots[0] > 0) {
8277 ret = btrfs_prev_leaf(root, &path);
8280 } else if (ret > 0) {
8287 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8290 * Block group items come before extent items if they have the same
8291 * bytenr, so walk back one more just in case. Dear future traveller,
8292 * first congrats on mastering time travel. Now if it's not too much
8293 * trouble could you go back to 2006 and tell Chris to make the
8294 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8295 * EXTENT_ITEM_KEY please?
8297 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8298 if (path.slots[0] > 0) {
8301 ret = btrfs_prev_leaf(root, &path);
8304 } else if (ret > 0) {
8309 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8313 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8314 ret = btrfs_next_leaf(root, &path);
8316 fprintf(stderr, "Error going to next leaf "
8318 btrfs_release_path(&path);
8324 leaf = path.nodes[0];
8325 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8326 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8330 if (key.objectid + key.offset < bytenr) {
8334 if (key.objectid > bytenr + num_bytes)
8337 if (key.objectid == bytenr) {
8338 if (key.offset >= num_bytes) {
8342 num_bytes -= key.offset;
8343 bytenr += key.offset;
8344 } else if (key.objectid < bytenr) {
8345 if (key.objectid + key.offset >= bytenr + num_bytes) {
8349 num_bytes = (bytenr + num_bytes) -
8350 (key.objectid + key.offset);
8351 bytenr = key.objectid + key.offset;
8353 if (key.objectid + key.offset < bytenr + num_bytes) {
8354 u64 new_start = key.objectid + key.offset;
8355 u64 new_bytes = bytenr + num_bytes - new_start;
8358 * Weird case, the extent is in the middle of
8359 * our range, we'll have to search one side
8360 * and then the other. Not sure if this happens
8361 * in real life, but no harm in coding it up
8362 * anyway just in case.
8364 btrfs_release_path(&path);
8365 ret = check_extent_exists(root, new_start,
8368 fprintf(stderr, "Right section didn't "
8372 num_bytes = key.objectid - bytenr;
8375 num_bytes = key.objectid - bytenr;
8382 if (num_bytes && !ret) {
8383 fprintf(stderr, "There are no extents for csum range "
8384 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8388 btrfs_release_path(&path);
8392 static int check_csums(struct btrfs_root *root)
8394 struct btrfs_path path;
8395 struct extent_buffer *leaf;
8396 struct btrfs_key key;
8397 u64 offset = 0, num_bytes = 0;
8398 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8402 unsigned long leaf_offset;
8404 root = root->fs_info->csum_root;
8405 if (!extent_buffer_uptodate(root->node)) {
8406 fprintf(stderr, "No valid csum tree found\n");
8410 btrfs_init_path(&path);
8411 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8412 key.type = BTRFS_EXTENT_CSUM_KEY;
8414 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8416 fprintf(stderr, "Error searching csum tree %d\n", ret);
8417 btrfs_release_path(&path);
8421 if (ret > 0 && path.slots[0])
8426 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8427 ret = btrfs_next_leaf(root, &path);
8429 fprintf(stderr, "Error going to next leaf "
8436 leaf = path.nodes[0];
8438 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8439 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8444 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8445 csum_size) * root->fs_info->sectorsize;
8446 if (!check_data_csum)
8447 goto skip_csum_check;
8448 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8449 ret = check_extent_csums(root, key.offset, data_len,
8455 offset = key.offset;
8456 } else if (key.offset != offset + num_bytes) {
8457 ret = check_extent_exists(root, offset, num_bytes);
8459 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8460 "there is no extent record\n",
8461 offset, offset+num_bytes);
8464 offset = key.offset;
8467 num_bytes += data_len;
8471 btrfs_release_path(&path);
8475 static int is_dropped_key(struct btrfs_key *key,
8476 struct btrfs_key *drop_key) {
8477 if (key->objectid < drop_key->objectid)
8479 else if (key->objectid == drop_key->objectid) {
8480 if (key->type < drop_key->type)
8482 else if (key->type == drop_key->type) {
8483 if (key->offset < drop_key->offset)
8491 * Here are the rules for FULL_BACKREF.
8493 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8494 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8496 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8497 * if it happened after the relocation occurred since we'll have dropped the
8498 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8499 * have no real way to know for sure.
8501 * We process the blocks one root at a time, and we start from the lowest root
8502 * objectid and go to the highest. So we can just lookup the owner backref for
8503 * the record and if we don't find it then we know it doesn't exist and we have
8506 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8507 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8508 * be set or not and then we can check later once we've gathered all the refs.
8510 static int calc_extent_flag(struct cache_tree *extent_cache,
8511 struct extent_buffer *buf,
8512 struct root_item_record *ri,
8515 struct extent_record *rec;
8516 struct cache_extent *cache;
8517 struct tree_backref *tback;
8520 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8521 /* we have added this extent before */
8525 rec = container_of(cache, struct extent_record, cache);
8528 * Except file/reloc tree, we can not have
8531 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8536 if (buf->start == ri->bytenr)
8539 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8542 owner = btrfs_header_owner(buf);
8543 if (owner == ri->objectid)
8546 tback = find_tree_backref(rec, 0, owner);
8551 if (rec->flag_block_full_backref != FLAG_UNSET &&
8552 rec->flag_block_full_backref != 0)
8553 rec->bad_full_backref = 1;
8556 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8557 if (rec->flag_block_full_backref != FLAG_UNSET &&
8558 rec->flag_block_full_backref != 1)
8559 rec->bad_full_backref = 1;
8563 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8565 fprintf(stderr, "Invalid key type(");
8566 print_key_type(stderr, 0, key_type);
8567 fprintf(stderr, ") found in root(");
8568 print_objectid(stderr, rootid, 0);
8569 fprintf(stderr, ")\n");
8573 * Check if the key is valid with its extent buffer.
8575 * This is a early check in case invalid key exists in a extent buffer
8576 * This is not comprehensive yet, but should prevent wrong key/item passed
8579 static int check_type_with_root(u64 rootid, u8 key_type)
8582 /* Only valid in chunk tree */
8583 case BTRFS_DEV_ITEM_KEY:
8584 case BTRFS_CHUNK_ITEM_KEY:
8585 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8588 /* valid in csum and log tree */
8589 case BTRFS_CSUM_TREE_OBJECTID:
8590 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8594 case BTRFS_EXTENT_ITEM_KEY:
8595 case BTRFS_METADATA_ITEM_KEY:
8596 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8597 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8600 case BTRFS_ROOT_ITEM_KEY:
8601 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8604 case BTRFS_DEV_EXTENT_KEY:
8605 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8611 report_mismatch_key_root(key_type, rootid);
8615 static int run_next_block(struct btrfs_root *root,
8616 struct block_info *bits,
8619 struct cache_tree *pending,
8620 struct cache_tree *seen,
8621 struct cache_tree *reada,
8622 struct cache_tree *nodes,
8623 struct cache_tree *extent_cache,
8624 struct cache_tree *chunk_cache,
8625 struct rb_root *dev_cache,
8626 struct block_group_tree *block_group_cache,
8627 struct device_extent_tree *dev_extent_cache,
8628 struct root_item_record *ri)
8630 struct btrfs_fs_info *fs_info = root->fs_info;
8631 struct extent_buffer *buf;
8632 struct extent_record *rec = NULL;
8643 struct btrfs_key key;
8644 struct cache_extent *cache;
8647 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8648 bits_nr, &reada_bits);
8653 for(i = 0; i < nritems; i++) {
8654 ret = add_cache_extent(reada, bits[i].start,
8659 /* fixme, get the parent transid */
8660 readahead_tree_block(fs_info, bits[i].start, 0);
8663 *last = bits[0].start;
8664 bytenr = bits[0].start;
8665 size = bits[0].size;
8667 cache = lookup_cache_extent(pending, bytenr, size);
8669 remove_cache_extent(pending, cache);
8672 cache = lookup_cache_extent(reada, bytenr, size);
8674 remove_cache_extent(reada, cache);
8677 cache = lookup_cache_extent(nodes, bytenr, size);
8679 remove_cache_extent(nodes, cache);
8682 cache = lookup_cache_extent(extent_cache, bytenr, size);
8684 rec = container_of(cache, struct extent_record, cache);
8685 gen = rec->parent_generation;
8688 /* fixme, get the real parent transid */
8689 buf = read_tree_block(root->fs_info, bytenr, gen);
8690 if (!extent_buffer_uptodate(buf)) {
8691 record_bad_block_io(root->fs_info,
8692 extent_cache, bytenr, size);
8696 nritems = btrfs_header_nritems(buf);
8699 if (!init_extent_tree) {
8700 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8701 btrfs_header_level(buf), 1, NULL,
8704 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8706 fprintf(stderr, "Couldn't calc extent flags\n");
8707 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8712 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8714 fprintf(stderr, "Couldn't calc extent flags\n");
8715 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8719 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8721 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8722 ri->objectid == btrfs_header_owner(buf)) {
8724 * Ok we got to this block from it's original owner and
8725 * we have FULL_BACKREF set. Relocation can leave
8726 * converted blocks over so this is altogether possible,
8727 * however it's not possible if the generation > the
8728 * last snapshot, so check for this case.
8730 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8731 btrfs_header_generation(buf) > ri->last_snapshot) {
8732 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8733 rec->bad_full_backref = 1;
8738 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8739 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8740 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8741 rec->bad_full_backref = 1;
8745 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8746 rec->flag_block_full_backref = 1;
8750 rec->flag_block_full_backref = 0;
8752 owner = btrfs_header_owner(buf);
8755 ret = check_block(root, extent_cache, buf, flags);
8759 if (btrfs_is_leaf(buf)) {
8760 btree_space_waste += btrfs_leaf_free_space(root, buf);
8761 for (i = 0; i < nritems; i++) {
8762 struct btrfs_file_extent_item *fi;
8763 btrfs_item_key_to_cpu(buf, &key, i);
8765 * Check key type against the leaf owner.
8766 * Could filter quite a lot of early error if
8769 if (check_type_with_root(btrfs_header_owner(buf),
8771 fprintf(stderr, "ignoring invalid key\n");
8774 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8775 process_extent_item(root, extent_cache, buf,
8779 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8780 process_extent_item(root, extent_cache, buf,
8784 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8786 btrfs_item_size_nr(buf, i);
8789 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8790 process_chunk_item(chunk_cache, &key, buf, i);
8793 if (key.type == BTRFS_DEV_ITEM_KEY) {
8794 process_device_item(dev_cache, &key, buf, i);
8797 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8798 process_block_group_item(block_group_cache,
8802 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8803 process_device_extent_item(dev_extent_cache,
8808 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8809 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8810 process_extent_ref_v0(extent_cache, buf, i);
8817 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8818 ret = add_tree_backref(extent_cache,
8819 key.objectid, 0, key.offset, 0);
8822 "add_tree_backref failed (leaf tree block): %s",
8826 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8827 ret = add_tree_backref(extent_cache,
8828 key.objectid, key.offset, 0, 0);
8831 "add_tree_backref failed (leaf shared block): %s",
8835 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8836 struct btrfs_extent_data_ref *ref;
8837 ref = btrfs_item_ptr(buf, i,
8838 struct btrfs_extent_data_ref);
8839 add_data_backref(extent_cache,
8841 btrfs_extent_data_ref_root(buf, ref),
8842 btrfs_extent_data_ref_objectid(buf,
8844 btrfs_extent_data_ref_offset(buf, ref),
8845 btrfs_extent_data_ref_count(buf, ref),
8846 0, root->fs_info->sectorsize);
8849 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8850 struct btrfs_shared_data_ref *ref;
8851 ref = btrfs_item_ptr(buf, i,
8852 struct btrfs_shared_data_ref);
8853 add_data_backref(extent_cache,
8854 key.objectid, key.offset, 0, 0, 0,
8855 btrfs_shared_data_ref_count(buf, ref),
8856 0, root->fs_info->sectorsize);
8859 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8860 struct bad_item *bad;
8862 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8866 bad = malloc(sizeof(struct bad_item));
8869 INIT_LIST_HEAD(&bad->list);
8870 memcpy(&bad->key, &key,
8871 sizeof(struct btrfs_key));
8872 bad->root_id = owner;
8873 list_add_tail(&bad->list, &delete_items);
8876 if (key.type != BTRFS_EXTENT_DATA_KEY)
8878 fi = btrfs_item_ptr(buf, i,
8879 struct btrfs_file_extent_item);
8880 if (btrfs_file_extent_type(buf, fi) ==
8881 BTRFS_FILE_EXTENT_INLINE)
8883 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8886 data_bytes_allocated +=
8887 btrfs_file_extent_disk_num_bytes(buf, fi);
8888 if (data_bytes_allocated < root->fs_info->sectorsize) {
8891 data_bytes_referenced +=
8892 btrfs_file_extent_num_bytes(buf, fi);
8893 add_data_backref(extent_cache,
8894 btrfs_file_extent_disk_bytenr(buf, fi),
8895 parent, owner, key.objectid, key.offset -
8896 btrfs_file_extent_offset(buf, fi), 1, 1,
8897 btrfs_file_extent_disk_num_bytes(buf, fi));
8901 struct btrfs_key first_key;
8903 first_key.objectid = 0;
8906 btrfs_item_key_to_cpu(buf, &first_key, 0);
8907 level = btrfs_header_level(buf);
8908 for (i = 0; i < nritems; i++) {
8909 struct extent_record tmpl;
8911 ptr = btrfs_node_blockptr(buf, i);
8912 size = root->fs_info->nodesize;
8913 btrfs_node_key_to_cpu(buf, &key, i);
8915 if ((level == ri->drop_level)
8916 && is_dropped_key(&key, &ri->drop_key)) {
8921 memset(&tmpl, 0, sizeof(tmpl));
8922 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8923 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8928 tmpl.max_size = size;
8929 ret = add_extent_rec(extent_cache, &tmpl);
8933 ret = add_tree_backref(extent_cache, ptr, parent,
8937 "add_tree_backref failed (non-leaf block): %s",
8943 add_pending(nodes, seen, ptr, size);
8945 add_pending(pending, seen, ptr, size);
8948 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
8949 nritems) * sizeof(struct btrfs_key_ptr);
8951 total_btree_bytes += buf->len;
8952 if (fs_root_objectid(btrfs_header_owner(buf)))
8953 total_fs_tree_bytes += buf->len;
8954 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8955 total_extent_tree_bytes += buf->len;
8957 free_extent_buffer(buf);
8961 static int add_root_to_pending(struct extent_buffer *buf,
8962 struct cache_tree *extent_cache,
8963 struct cache_tree *pending,
8964 struct cache_tree *seen,
8965 struct cache_tree *nodes,
8968 struct extent_record tmpl;
8971 if (btrfs_header_level(buf) > 0)
8972 add_pending(nodes, seen, buf->start, buf->len);
8974 add_pending(pending, seen, buf->start, buf->len);
8976 memset(&tmpl, 0, sizeof(tmpl));
8977 tmpl.start = buf->start;
8982 tmpl.max_size = buf->len;
8983 add_extent_rec(extent_cache, &tmpl);
8985 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8986 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8987 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8990 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8995 /* as we fix the tree, we might be deleting blocks that
8996 * we're tracking for repair. This hook makes sure we
8997 * remove any backrefs for blocks as we are fixing them.
8999 static int free_extent_hook(struct btrfs_trans_handle *trans,
9000 struct btrfs_root *root,
9001 u64 bytenr, u64 num_bytes, u64 parent,
9002 u64 root_objectid, u64 owner, u64 offset,
9005 struct extent_record *rec;
9006 struct cache_extent *cache;
9008 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9010 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9011 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9015 rec = container_of(cache, struct extent_record, cache);
9017 struct data_backref *back;
9018 back = find_data_backref(rec, parent, root_objectid, owner,
9019 offset, 1, bytenr, num_bytes);
9022 if (back->node.found_ref) {
9023 back->found_ref -= refs_to_drop;
9025 rec->refs -= refs_to_drop;
9027 if (back->node.found_extent_tree) {
9028 back->num_refs -= refs_to_drop;
9029 if (rec->extent_item_refs)
9030 rec->extent_item_refs -= refs_to_drop;
9032 if (back->found_ref == 0)
9033 back->node.found_ref = 0;
9034 if (back->num_refs == 0)
9035 back->node.found_extent_tree = 0;
9037 if (!back->node.found_extent_tree && back->node.found_ref) {
9038 rb_erase(&back->node.node, &rec->backref_tree);
9042 struct tree_backref *back;
9043 back = find_tree_backref(rec, parent, root_objectid);
9046 if (back->node.found_ref) {
9049 back->node.found_ref = 0;
9051 if (back->node.found_extent_tree) {
9052 if (rec->extent_item_refs)
9053 rec->extent_item_refs--;
9054 back->node.found_extent_tree = 0;
9056 if (!back->node.found_extent_tree && back->node.found_ref) {
9057 rb_erase(&back->node.node, &rec->backref_tree);
9061 maybe_free_extent_rec(extent_cache, rec);
9066 static int delete_extent_records(struct btrfs_trans_handle *trans,
9067 struct btrfs_root *root,
9068 struct btrfs_path *path,
9071 struct btrfs_key key;
9072 struct btrfs_key found_key;
9073 struct extent_buffer *leaf;
9078 key.objectid = bytenr;
9080 key.offset = (u64)-1;
9083 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9090 if (path->slots[0] == 0)
9096 leaf = path->nodes[0];
9097 slot = path->slots[0];
9099 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9100 if (found_key.objectid != bytenr)
9103 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9104 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9105 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9106 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9107 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9108 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9109 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9110 btrfs_release_path(path);
9111 if (found_key.type == 0) {
9112 if (found_key.offset == 0)
9114 key.offset = found_key.offset - 1;
9115 key.type = found_key.type;
9117 key.type = found_key.type - 1;
9118 key.offset = (u64)-1;
9122 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9123 found_key.objectid, found_key.type, found_key.offset);
9125 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9128 btrfs_release_path(path);
9130 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9131 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9132 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9133 found_key.offset : root->fs_info->nodesize;
9135 ret = btrfs_update_block_group(root, bytenr,
9142 btrfs_release_path(path);
9147 * for a single backref, this will allocate a new extent
9148 * and add the backref to it.
9150 static int record_extent(struct btrfs_trans_handle *trans,
9151 struct btrfs_fs_info *info,
9152 struct btrfs_path *path,
9153 struct extent_record *rec,
9154 struct extent_backref *back,
9155 int allocated, u64 flags)
9158 struct btrfs_root *extent_root = info->extent_root;
9159 struct extent_buffer *leaf;
9160 struct btrfs_key ins_key;
9161 struct btrfs_extent_item *ei;
9162 struct data_backref *dback;
9163 struct btrfs_tree_block_info *bi;
9166 rec->max_size = max_t(u64, rec->max_size,
9170 u32 item_size = sizeof(*ei);
9173 item_size += sizeof(*bi);
9175 ins_key.objectid = rec->start;
9176 ins_key.offset = rec->max_size;
9177 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9179 ret = btrfs_insert_empty_item(trans, extent_root, path,
9180 &ins_key, item_size);
9184 leaf = path->nodes[0];
9185 ei = btrfs_item_ptr(leaf, path->slots[0],
9186 struct btrfs_extent_item);
9188 btrfs_set_extent_refs(leaf, ei, 0);
9189 btrfs_set_extent_generation(leaf, ei, rec->generation);
9191 if (back->is_data) {
9192 btrfs_set_extent_flags(leaf, ei,
9193 BTRFS_EXTENT_FLAG_DATA);
9195 struct btrfs_disk_key copy_key;;
9197 bi = (struct btrfs_tree_block_info *)(ei + 1);
9198 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9201 btrfs_set_disk_key_objectid(©_key,
9202 rec->info_objectid);
9203 btrfs_set_disk_key_type(©_key, 0);
9204 btrfs_set_disk_key_offset(©_key, 0);
9206 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9207 btrfs_set_tree_block_key(leaf, bi, ©_key);
9209 btrfs_set_extent_flags(leaf, ei,
9210 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9213 btrfs_mark_buffer_dirty(leaf);
9214 ret = btrfs_update_block_group(extent_root, rec->start,
9215 rec->max_size, 1, 0);
9218 btrfs_release_path(path);
9221 if (back->is_data) {
9225 dback = to_data_backref(back);
9226 if (back->full_backref)
9227 parent = dback->parent;
9231 for (i = 0; i < dback->found_ref; i++) {
9232 /* if parent != 0, we're doing a full backref
9233 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9234 * just makes the backref allocator create a data
9237 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9238 rec->start, rec->max_size,
9242 BTRFS_FIRST_FREE_OBJECTID :
9248 fprintf(stderr, "adding new data backref"
9249 " on %llu %s %llu owner %llu"
9250 " offset %llu found %d\n",
9251 (unsigned long long)rec->start,
9252 back->full_backref ?
9254 back->full_backref ?
9255 (unsigned long long)parent :
9256 (unsigned long long)dback->root,
9257 (unsigned long long)dback->owner,
9258 (unsigned long long)dback->offset,
9262 struct tree_backref *tback;
9264 tback = to_tree_backref(back);
9265 if (back->full_backref)
9266 parent = tback->parent;
9270 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9271 rec->start, rec->max_size,
9272 parent, tback->root, 0, 0);
9273 fprintf(stderr, "adding new tree backref on "
9274 "start %llu len %llu parent %llu root %llu\n",
9275 rec->start, rec->max_size, parent, tback->root);
9278 btrfs_release_path(path);
9282 static struct extent_entry *find_entry(struct list_head *entries,
9283 u64 bytenr, u64 bytes)
9285 struct extent_entry *entry = NULL;
9287 list_for_each_entry(entry, entries, list) {
9288 if (entry->bytenr == bytenr && entry->bytes == bytes)
9295 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9297 struct extent_entry *entry, *best = NULL, *prev = NULL;
9299 list_for_each_entry(entry, entries, list) {
9301 * If there are as many broken entries as entries then we know
9302 * not to trust this particular entry.
9304 if (entry->broken == entry->count)
9308 * Special case, when there are only two entries and 'best' is
9318 * If our current entry == best then we can't be sure our best
9319 * is really the best, so we need to keep searching.
9321 if (best && best->count == entry->count) {
9327 /* Prev == entry, not good enough, have to keep searching */
9328 if (!prev->broken && prev->count == entry->count)
9332 best = (prev->count > entry->count) ? prev : entry;
9333 else if (best->count < entry->count)
9341 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9342 struct data_backref *dback, struct extent_entry *entry)
9344 struct btrfs_trans_handle *trans;
9345 struct btrfs_root *root;
9346 struct btrfs_file_extent_item *fi;
9347 struct extent_buffer *leaf;
9348 struct btrfs_key key;
9352 key.objectid = dback->root;
9353 key.type = BTRFS_ROOT_ITEM_KEY;
9354 key.offset = (u64)-1;
9355 root = btrfs_read_fs_root(info, &key);
9357 fprintf(stderr, "Couldn't find root for our ref\n");
9362 * The backref points to the original offset of the extent if it was
9363 * split, so we need to search down to the offset we have and then walk
9364 * forward until we find the backref we're looking for.
9366 key.objectid = dback->owner;
9367 key.type = BTRFS_EXTENT_DATA_KEY;
9368 key.offset = dback->offset;
9369 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9371 fprintf(stderr, "Error looking up ref %d\n", ret);
9376 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9377 ret = btrfs_next_leaf(root, path);
9379 fprintf(stderr, "Couldn't find our ref, next\n");
9383 leaf = path->nodes[0];
9384 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9385 if (key.objectid != dback->owner ||
9386 key.type != BTRFS_EXTENT_DATA_KEY) {
9387 fprintf(stderr, "Couldn't find our ref, search\n");
9390 fi = btrfs_item_ptr(leaf, path->slots[0],
9391 struct btrfs_file_extent_item);
9392 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9393 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9395 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9400 btrfs_release_path(path);
9402 trans = btrfs_start_transaction(root, 1);
9404 return PTR_ERR(trans);
9407 * Ok we have the key of the file extent we want to fix, now we can cow
9408 * down to the thing and fix it.
9410 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9412 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9413 key.objectid, key.type, key.offset, ret);
9417 fprintf(stderr, "Well that's odd, we just found this key "
9418 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9423 leaf = path->nodes[0];
9424 fi = btrfs_item_ptr(leaf, path->slots[0],
9425 struct btrfs_file_extent_item);
9427 if (btrfs_file_extent_compression(leaf, fi) &&
9428 dback->disk_bytenr != entry->bytenr) {
9429 fprintf(stderr, "Ref doesn't match the record start and is "
9430 "compressed, please take a btrfs-image of this file "
9431 "system and send it to a btrfs developer so they can "
9432 "complete this functionality for bytenr %Lu\n",
9433 dback->disk_bytenr);
9438 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9439 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9440 } else if (dback->disk_bytenr > entry->bytenr) {
9441 u64 off_diff, offset;
9443 off_diff = dback->disk_bytenr - entry->bytenr;
9444 offset = btrfs_file_extent_offset(leaf, fi);
9445 if (dback->disk_bytenr + offset +
9446 btrfs_file_extent_num_bytes(leaf, fi) >
9447 entry->bytenr + entry->bytes) {
9448 fprintf(stderr, "Ref is past the entry end, please "
9449 "take a btrfs-image of this file system and "
9450 "send it to a btrfs developer, ref %Lu\n",
9451 dback->disk_bytenr);
9456 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9457 btrfs_set_file_extent_offset(leaf, fi, offset);
9458 } else if (dback->disk_bytenr < entry->bytenr) {
9461 offset = btrfs_file_extent_offset(leaf, fi);
9462 if (dback->disk_bytenr + offset < entry->bytenr) {
9463 fprintf(stderr, "Ref is before the entry start, please"
9464 " take a btrfs-image of this file system and "
9465 "send it to a btrfs developer, ref %Lu\n",
9466 dback->disk_bytenr);
9471 offset += dback->disk_bytenr;
9472 offset -= entry->bytenr;
9473 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9474 btrfs_set_file_extent_offset(leaf, fi, offset);
9477 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9480 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9481 * only do this if we aren't using compression, otherwise it's a
9484 if (!btrfs_file_extent_compression(leaf, fi))
9485 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9487 printf("ram bytes may be wrong?\n");
9488 btrfs_mark_buffer_dirty(leaf);
9490 err = btrfs_commit_transaction(trans, root);
9491 btrfs_release_path(path);
9492 return ret ? ret : err;
9495 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9496 struct extent_record *rec)
9498 struct extent_backref *back, *tmp;
9499 struct data_backref *dback;
9500 struct extent_entry *entry, *best = NULL;
9503 int broken_entries = 0;
9508 * Metadata is easy and the backrefs should always agree on bytenr and
9509 * size, if not we've got bigger issues.
9514 rbtree_postorder_for_each_entry_safe(back, tmp,
9515 &rec->backref_tree, node) {
9516 if (back->full_backref || !back->is_data)
9519 dback = to_data_backref(back);
9522 * We only pay attention to backrefs that we found a real
9525 if (dback->found_ref == 0)
9529 * For now we only catch when the bytes don't match, not the
9530 * bytenr. We can easily do this at the same time, but I want
9531 * to have a fs image to test on before we just add repair
9532 * functionality willy-nilly so we know we won't screw up the
9536 entry = find_entry(&entries, dback->disk_bytenr,
9539 entry = malloc(sizeof(struct extent_entry));
9544 memset(entry, 0, sizeof(*entry));
9545 entry->bytenr = dback->disk_bytenr;
9546 entry->bytes = dback->bytes;
9547 list_add_tail(&entry->list, &entries);
9552 * If we only have on entry we may think the entries agree when
9553 * in reality they don't so we have to do some extra checking.
9555 if (dback->disk_bytenr != rec->start ||
9556 dback->bytes != rec->nr || back->broken)
9567 /* Yay all the backrefs agree, carry on good sir */
9568 if (nr_entries <= 1 && !mismatch)
9571 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9572 "%Lu\n", rec->start);
9575 * First we want to see if the backrefs can agree amongst themselves who
9576 * is right, so figure out which one of the entries has the highest
9579 best = find_most_right_entry(&entries);
9582 * Ok so we may have an even split between what the backrefs think, so
9583 * this is where we use the extent ref to see what it thinks.
9586 entry = find_entry(&entries, rec->start, rec->nr);
9587 if (!entry && (!broken_entries || !rec->found_rec)) {
9588 fprintf(stderr, "Backrefs don't agree with each other "
9589 "and extent record doesn't agree with anybody,"
9590 " so we can't fix bytenr %Lu bytes %Lu\n",
9591 rec->start, rec->nr);
9594 } else if (!entry) {
9596 * Ok our backrefs were broken, we'll assume this is the
9597 * correct value and add an entry for this range.
9599 entry = malloc(sizeof(struct extent_entry));
9604 memset(entry, 0, sizeof(*entry));
9605 entry->bytenr = rec->start;
9606 entry->bytes = rec->nr;
9607 list_add_tail(&entry->list, &entries);
9611 best = find_most_right_entry(&entries);
9613 fprintf(stderr, "Backrefs and extent record evenly "
9614 "split on who is right, this is going to "
9615 "require user input to fix bytenr %Lu bytes "
9616 "%Lu\n", rec->start, rec->nr);
9623 * I don't think this can happen currently as we'll abort() if we catch
9624 * this case higher up, but in case somebody removes that we still can't
9625 * deal with it properly here yet, so just bail out of that's the case.
9627 if (best->bytenr != rec->start) {
9628 fprintf(stderr, "Extent start and backref starts don't match, "
9629 "please use btrfs-image on this file system and send "
9630 "it to a btrfs developer so they can make fsck fix "
9631 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9632 rec->start, rec->nr);
9638 * Ok great we all agreed on an extent record, let's go find the real
9639 * references and fix up the ones that don't match.
9641 rbtree_postorder_for_each_entry_safe(back, tmp,
9642 &rec->backref_tree, node) {
9643 if (back->full_backref || !back->is_data)
9646 dback = to_data_backref(back);
9649 * Still ignoring backrefs that don't have a real ref attached
9652 if (dback->found_ref == 0)
9655 if (dback->bytes == best->bytes &&
9656 dback->disk_bytenr == best->bytenr)
9659 ret = repair_ref(info, path, dback, best);
9665 * Ok we messed with the actual refs, which means we need to drop our
9666 * entire cache and go back and rescan. I know this is a huge pain and
9667 * adds a lot of extra work, but it's the only way to be safe. Once all
9668 * the backrefs agree we may not need to do anything to the extent
9673 while (!list_empty(&entries)) {
9674 entry = list_entry(entries.next, struct extent_entry, list);
9675 list_del_init(&entry->list);
9681 static int process_duplicates(struct cache_tree *extent_cache,
9682 struct extent_record *rec)
9684 struct extent_record *good, *tmp;
9685 struct cache_extent *cache;
9689 * If we found a extent record for this extent then return, or if we
9690 * have more than one duplicate we are likely going to need to delete
9693 if (rec->found_rec || rec->num_duplicates > 1)
9696 /* Shouldn't happen but just in case */
9697 BUG_ON(!rec->num_duplicates);
9700 * So this happens if we end up with a backref that doesn't match the
9701 * actual extent entry. So either the backref is bad or the extent
9702 * entry is bad. Either way we want to have the extent_record actually
9703 * reflect what we found in the extent_tree, so we need to take the
9704 * duplicate out and use that as the extent_record since the only way we
9705 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9707 remove_cache_extent(extent_cache, &rec->cache);
9709 good = to_extent_record(rec->dups.next);
9710 list_del_init(&good->list);
9711 INIT_LIST_HEAD(&good->backrefs);
9712 INIT_LIST_HEAD(&good->dups);
9713 good->cache.start = good->start;
9714 good->cache.size = good->nr;
9715 good->content_checked = 0;
9716 good->owner_ref_checked = 0;
9717 good->num_duplicates = 0;
9718 good->refs = rec->refs;
9719 list_splice_init(&rec->backrefs, &good->backrefs);
9721 cache = lookup_cache_extent(extent_cache, good->start,
9725 tmp = container_of(cache, struct extent_record, cache);
9728 * If we find another overlapping extent and it's found_rec is
9729 * set then it's a duplicate and we need to try and delete
9732 if (tmp->found_rec || tmp->num_duplicates > 0) {
9733 if (list_empty(&good->list))
9734 list_add_tail(&good->list,
9735 &duplicate_extents);
9736 good->num_duplicates += tmp->num_duplicates + 1;
9737 list_splice_init(&tmp->dups, &good->dups);
9738 list_del_init(&tmp->list);
9739 list_add_tail(&tmp->list, &good->dups);
9740 remove_cache_extent(extent_cache, &tmp->cache);
9745 * Ok we have another non extent item backed extent rec, so lets
9746 * just add it to this extent and carry on like we did above.
9748 good->refs += tmp->refs;
9749 list_splice_init(&tmp->backrefs, &good->backrefs);
9750 remove_cache_extent(extent_cache, &tmp->cache);
9753 ret = insert_cache_extent(extent_cache, &good->cache);
9756 return good->num_duplicates ? 0 : 1;
9759 static int delete_duplicate_records(struct btrfs_root *root,
9760 struct extent_record *rec)
9762 struct btrfs_trans_handle *trans;
9763 LIST_HEAD(delete_list);
9764 struct btrfs_path path;
9765 struct extent_record *tmp, *good, *n;
9768 struct btrfs_key key;
9770 btrfs_init_path(&path);
9773 /* Find the record that covers all of the duplicates. */
9774 list_for_each_entry(tmp, &rec->dups, list) {
9775 if (good->start < tmp->start)
9777 if (good->nr > tmp->nr)
9780 if (tmp->start + tmp->nr < good->start + good->nr) {
9781 fprintf(stderr, "Ok we have overlapping extents that "
9782 "aren't completely covered by each other, this "
9783 "is going to require more careful thought. "
9784 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9785 tmp->start, tmp->nr, good->start, good->nr);
9792 list_add_tail(&rec->list, &delete_list);
9794 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9797 list_move_tail(&tmp->list, &delete_list);
9800 root = root->fs_info->extent_root;
9801 trans = btrfs_start_transaction(root, 1);
9802 if (IS_ERR(trans)) {
9803 ret = PTR_ERR(trans);
9807 list_for_each_entry(tmp, &delete_list, list) {
9808 if (tmp->found_rec == 0)
9810 key.objectid = tmp->start;
9811 key.type = BTRFS_EXTENT_ITEM_KEY;
9812 key.offset = tmp->nr;
9814 /* Shouldn't happen but just in case */
9815 if (tmp->metadata) {
9816 fprintf(stderr, "Well this shouldn't happen, extent "
9817 "record overlaps but is metadata? "
9818 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9822 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9828 ret = btrfs_del_item(trans, root, &path);
9831 btrfs_release_path(&path);
9834 err = btrfs_commit_transaction(trans, root);
9838 while (!list_empty(&delete_list)) {
9839 tmp = to_extent_record(delete_list.next);
9840 list_del_init(&tmp->list);
9846 while (!list_empty(&rec->dups)) {
9847 tmp = to_extent_record(rec->dups.next);
9848 list_del_init(&tmp->list);
9852 btrfs_release_path(&path);
9854 if (!ret && !nr_del)
9855 rec->num_duplicates = 0;
9857 return ret ? ret : nr_del;
9860 static int find_possible_backrefs(struct btrfs_fs_info *info,
9861 struct btrfs_path *path,
9862 struct cache_tree *extent_cache,
9863 struct extent_record *rec)
9865 struct btrfs_root *root;
9866 struct extent_backref *back, *tmp;
9867 struct data_backref *dback;
9868 struct cache_extent *cache;
9869 struct btrfs_file_extent_item *fi;
9870 struct btrfs_key key;
9874 rbtree_postorder_for_each_entry_safe(back, tmp,
9875 &rec->backref_tree, node) {
9876 /* Don't care about full backrefs (poor unloved backrefs) */
9877 if (back->full_backref || !back->is_data)
9880 dback = to_data_backref(back);
9882 /* We found this one, we don't need to do a lookup */
9883 if (dback->found_ref)
9886 key.objectid = dback->root;
9887 key.type = BTRFS_ROOT_ITEM_KEY;
9888 key.offset = (u64)-1;
9890 root = btrfs_read_fs_root(info, &key);
9892 /* No root, definitely a bad ref, skip */
9893 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9895 /* Other err, exit */
9897 return PTR_ERR(root);
9899 key.objectid = dback->owner;
9900 key.type = BTRFS_EXTENT_DATA_KEY;
9901 key.offset = dback->offset;
9902 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9904 btrfs_release_path(path);
9907 /* Didn't find it, we can carry on */
9912 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9913 struct btrfs_file_extent_item);
9914 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9915 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9916 btrfs_release_path(path);
9917 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9919 struct extent_record *tmp;
9920 tmp = container_of(cache, struct extent_record, cache);
9923 * If we found an extent record for the bytenr for this
9924 * particular backref then we can't add it to our
9925 * current extent record. We only want to add backrefs
9926 * that don't have a corresponding extent item in the
9927 * extent tree since they likely belong to this record
9928 * and we need to fix it if it doesn't match bytenrs.
9934 dback->found_ref += 1;
9935 dback->disk_bytenr = bytenr;
9936 dback->bytes = bytes;
9939 * Set this so the verify backref code knows not to trust the
9940 * values in this backref.
9949 * Record orphan data ref into corresponding root.
9951 * Return 0 if the extent item contains data ref and recorded.
9952 * Return 1 if the extent item contains no useful data ref
9953 * On that case, it may contains only shared_dataref or metadata backref
9954 * or the file extent exists(this should be handled by the extent bytenr
9956 * Return <0 if something goes wrong.
9958 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9959 struct extent_record *rec)
9961 struct btrfs_key key;
9962 struct btrfs_root *dest_root;
9963 struct extent_backref *back, *tmp;
9964 struct data_backref *dback;
9965 struct orphan_data_extent *orphan;
9966 struct btrfs_path path;
9967 int recorded_data_ref = 0;
9972 btrfs_init_path(&path);
9973 rbtree_postorder_for_each_entry_safe(back, tmp,
9974 &rec->backref_tree, node) {
9975 if (back->full_backref || !back->is_data ||
9976 !back->found_extent_tree)
9978 dback = to_data_backref(back);
9979 if (dback->found_ref)
9981 key.objectid = dback->root;
9982 key.type = BTRFS_ROOT_ITEM_KEY;
9983 key.offset = (u64)-1;
9985 dest_root = btrfs_read_fs_root(fs_info, &key);
9987 /* For non-exist root we just skip it */
9988 if (IS_ERR(dest_root) || !dest_root)
9991 key.objectid = dback->owner;
9992 key.type = BTRFS_EXTENT_DATA_KEY;
9993 key.offset = dback->offset;
9995 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9996 btrfs_release_path(&path);
9998 * For ret < 0, it's OK since the fs-tree may be corrupted,
9999 * we need to record it for inode/file extent rebuild.
10000 * For ret > 0, we record it only for file extent rebuild.
10001 * For ret == 0, the file extent exists but only bytenr
10002 * mismatch, let the original bytenr fix routine to handle,
10008 orphan = malloc(sizeof(*orphan));
10013 INIT_LIST_HEAD(&orphan->list);
10014 orphan->root = dback->root;
10015 orphan->objectid = dback->owner;
10016 orphan->offset = dback->offset;
10017 orphan->disk_bytenr = rec->cache.start;
10018 orphan->disk_len = rec->cache.size;
10019 list_add(&dest_root->orphan_data_extents, &orphan->list);
10020 recorded_data_ref = 1;
10023 btrfs_release_path(&path);
10025 return !recorded_data_ref;
10031 * when an incorrect extent item is found, this will delete
10032 * all of the existing entries for it and recreate them
10033 * based on what the tree scan found.
10035 static int fixup_extent_refs(struct btrfs_fs_info *info,
10036 struct cache_tree *extent_cache,
10037 struct extent_record *rec)
10039 struct btrfs_trans_handle *trans = NULL;
10041 struct btrfs_path path;
10042 struct cache_extent *cache;
10043 struct extent_backref *back, *tmp;
10047 if (rec->flag_block_full_backref)
10048 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10050 btrfs_init_path(&path);
10051 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10053 * Sometimes the backrefs themselves are so broken they don't
10054 * get attached to any meaningful rec, so first go back and
10055 * check any of our backrefs that we couldn't find and throw
10056 * them into the list if we find the backref so that
10057 * verify_backrefs can figure out what to do.
10059 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10064 /* step one, make sure all of the backrefs agree */
10065 ret = verify_backrefs(info, &path, rec);
10069 trans = btrfs_start_transaction(info->extent_root, 1);
10070 if (IS_ERR(trans)) {
10071 ret = PTR_ERR(trans);
10075 /* step two, delete all the existing records */
10076 ret = delete_extent_records(trans, info->extent_root, &path,
10082 /* was this block corrupt? If so, don't add references to it */
10083 cache = lookup_cache_extent(info->corrupt_blocks,
10084 rec->start, rec->max_size);
10090 /* step three, recreate all the refs we did find */
10091 rbtree_postorder_for_each_entry_safe(back, tmp,
10092 &rec->backref_tree, node) {
10094 * if we didn't find any references, don't create a
10095 * new extent record
10097 if (!back->found_ref)
10100 rec->bad_full_backref = 0;
10101 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10109 int err = btrfs_commit_transaction(trans, info->extent_root);
10115 fprintf(stderr, "Repaired extent references for %llu\n",
10116 (unsigned long long)rec->start);
10118 btrfs_release_path(&path);
10122 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10123 struct extent_record *rec)
10125 struct btrfs_trans_handle *trans;
10126 struct btrfs_root *root = fs_info->extent_root;
10127 struct btrfs_path path;
10128 struct btrfs_extent_item *ei;
10129 struct btrfs_key key;
10133 key.objectid = rec->start;
10134 if (rec->metadata) {
10135 key.type = BTRFS_METADATA_ITEM_KEY;
10136 key.offset = rec->info_level;
10138 key.type = BTRFS_EXTENT_ITEM_KEY;
10139 key.offset = rec->max_size;
10142 trans = btrfs_start_transaction(root, 0);
10144 return PTR_ERR(trans);
10146 btrfs_init_path(&path);
10147 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10149 btrfs_release_path(&path);
10150 btrfs_commit_transaction(trans, root);
10153 fprintf(stderr, "Didn't find extent for %llu\n",
10154 (unsigned long long)rec->start);
10155 btrfs_release_path(&path);
10156 btrfs_commit_transaction(trans, root);
10160 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10161 struct btrfs_extent_item);
10162 flags = btrfs_extent_flags(path.nodes[0], ei);
10163 if (rec->flag_block_full_backref) {
10164 fprintf(stderr, "setting full backref on %llu\n",
10165 (unsigned long long)key.objectid);
10166 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10168 fprintf(stderr, "clearing full backref on %llu\n",
10169 (unsigned long long)key.objectid);
10170 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10172 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10173 btrfs_mark_buffer_dirty(path.nodes[0]);
10174 btrfs_release_path(&path);
10175 ret = btrfs_commit_transaction(trans, root);
10177 fprintf(stderr, "Repaired extent flags for %llu\n",
10178 (unsigned long long)rec->start);
10183 /* right now we only prune from the extent allocation tree */
10184 static int prune_one_block(struct btrfs_trans_handle *trans,
10185 struct btrfs_fs_info *info,
10186 struct btrfs_corrupt_block *corrupt)
10189 struct btrfs_path path;
10190 struct extent_buffer *eb;
10194 int level = corrupt->level + 1;
10196 btrfs_init_path(&path);
10198 /* we want to stop at the parent to our busted block */
10199 path.lowest_level = level;
10201 ret = btrfs_search_slot(trans, info->extent_root,
10202 &corrupt->key, &path, -1, 1);
10207 eb = path.nodes[level];
10214 * hopefully the search gave us the block we want to prune,
10215 * lets try that first
10217 slot = path.slots[level];
10218 found = btrfs_node_blockptr(eb, slot);
10219 if (found == corrupt->cache.start)
10222 nritems = btrfs_header_nritems(eb);
10224 /* the search failed, lets scan this node and hope we find it */
10225 for (slot = 0; slot < nritems; slot++) {
10226 found = btrfs_node_blockptr(eb, slot);
10227 if (found == corrupt->cache.start)
10231 * we couldn't find the bad block. TODO, search all the nodes for pointers
10234 if (eb == info->extent_root->node) {
10239 btrfs_release_path(&path);
10244 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10245 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10248 btrfs_release_path(&path);
10252 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10254 struct btrfs_trans_handle *trans = NULL;
10255 struct cache_extent *cache;
10256 struct btrfs_corrupt_block *corrupt;
10259 cache = search_cache_extent(info->corrupt_blocks, 0);
10263 trans = btrfs_start_transaction(info->extent_root, 1);
10265 return PTR_ERR(trans);
10267 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10268 prune_one_block(trans, info, corrupt);
10269 remove_cache_extent(info->corrupt_blocks, cache);
10272 return btrfs_commit_transaction(trans, info->extent_root);
10276 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10278 struct btrfs_block_group_cache *cache;
10283 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10284 &start, &end, EXTENT_DIRTY);
10287 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10292 cache = btrfs_lookup_first_block_group(fs_info, start);
10297 start = cache->key.objectid + cache->key.offset;
10301 static int check_extent_refs(struct btrfs_root *root,
10302 struct cache_tree *extent_cache)
10304 struct extent_record *rec;
10305 struct cache_extent *cache;
10312 * if we're doing a repair, we have to make sure
10313 * we don't allocate from the problem extents.
10314 * In the worst case, this will be all the
10315 * extents in the FS
10317 cache = search_cache_extent(extent_cache, 0);
10319 rec = container_of(cache, struct extent_record, cache);
10320 set_extent_dirty(root->fs_info->excluded_extents,
10322 rec->start + rec->max_size - 1);
10323 cache = next_cache_extent(cache);
10326 /* pin down all the corrupted blocks too */
10327 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10329 set_extent_dirty(root->fs_info->excluded_extents,
10331 cache->start + cache->size - 1);
10332 cache = next_cache_extent(cache);
10334 prune_corrupt_blocks(root->fs_info);
10335 reset_cached_block_groups(root->fs_info);
10338 reset_cached_block_groups(root->fs_info);
10341 * We need to delete any duplicate entries we find first otherwise we
10342 * could mess up the extent tree when we have backrefs that actually
10343 * belong to a different extent item and not the weird duplicate one.
10345 while (repair && !list_empty(&duplicate_extents)) {
10346 rec = to_extent_record(duplicate_extents.next);
10347 list_del_init(&rec->list);
10349 /* Sometimes we can find a backref before we find an actual
10350 * extent, so we need to process it a little bit to see if there
10351 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10352 * if this is a backref screwup. If we need to delete stuff
10353 * process_duplicates() will return 0, otherwise it will return
10356 if (process_duplicates(extent_cache, rec))
10358 ret = delete_duplicate_records(root, rec);
10362 * delete_duplicate_records will return the number of entries
10363 * deleted, so if it's greater than 0 then we know we actually
10364 * did something and we need to remove.
10377 cache = search_cache_extent(extent_cache, 0);
10380 rec = container_of(cache, struct extent_record, cache);
10381 if (rec->num_duplicates) {
10382 fprintf(stderr, "extent item %llu has multiple extent "
10383 "items\n", (unsigned long long)rec->start);
10387 if (rec->refs != rec->extent_item_refs) {
10388 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10389 (unsigned long long)rec->start,
10390 (unsigned long long)rec->nr);
10391 fprintf(stderr, "extent item %llu, found %llu\n",
10392 (unsigned long long)rec->extent_item_refs,
10393 (unsigned long long)rec->refs);
10394 ret = record_orphan_data_extents(root->fs_info, rec);
10400 if (all_backpointers_checked(rec, 1)) {
10401 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10402 (unsigned long long)rec->start,
10403 (unsigned long long)rec->nr);
10407 if (!rec->owner_ref_checked) {
10408 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10409 (unsigned long long)rec->start,
10410 (unsigned long long)rec->nr);
10415 if (repair && fix) {
10416 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10422 if (rec->bad_full_backref) {
10423 fprintf(stderr, "bad full backref, on [%llu]\n",
10424 (unsigned long long)rec->start);
10426 ret = fixup_extent_flags(root->fs_info, rec);
10434 * Although it's not a extent ref's problem, we reuse this
10435 * routine for error reporting.
10436 * No repair function yet.
10438 if (rec->crossing_stripes) {
10440 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10441 rec->start, rec->start + rec->max_size);
10445 if (rec->wrong_chunk_type) {
10447 "bad extent [%llu, %llu), type mismatch with chunk\n",
10448 rec->start, rec->start + rec->max_size);
10453 remove_cache_extent(extent_cache, cache);
10454 free_all_extent_backrefs(rec);
10455 if (!init_extent_tree && repair && (!cur_err || fix))
10456 clear_extent_dirty(root->fs_info->excluded_extents,
10458 rec->start + rec->max_size - 1);
10463 if (ret && ret != -EAGAIN) {
10464 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10467 struct btrfs_trans_handle *trans;
10469 root = root->fs_info->extent_root;
10470 trans = btrfs_start_transaction(root, 1);
10471 if (IS_ERR(trans)) {
10472 ret = PTR_ERR(trans);
10476 ret = btrfs_fix_block_accounting(trans, root);
10479 ret = btrfs_commit_transaction(trans, root);
10491 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10495 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10496 stripe_size = length;
10497 stripe_size /= num_stripes;
10498 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10499 stripe_size = length * 2;
10500 stripe_size /= num_stripes;
10501 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10502 stripe_size = length;
10503 stripe_size /= (num_stripes - 1);
10504 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10505 stripe_size = length;
10506 stripe_size /= (num_stripes - 2);
10508 stripe_size = length;
10510 return stripe_size;
10514 * Check the chunk with its block group/dev list ref:
10515 * Return 0 if all refs seems valid.
10516 * Return 1 if part of refs seems valid, need later check for rebuild ref
10517 * like missing block group and needs to search extent tree to rebuild them.
10518 * Return -1 if essential refs are missing and unable to rebuild.
10520 static int check_chunk_refs(struct chunk_record *chunk_rec,
10521 struct block_group_tree *block_group_cache,
10522 struct device_extent_tree *dev_extent_cache,
10525 struct cache_extent *block_group_item;
10526 struct block_group_record *block_group_rec;
10527 struct cache_extent *dev_extent_item;
10528 struct device_extent_record *dev_extent_rec;
10532 int metadump_v2 = 0;
10536 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10538 chunk_rec->length);
10539 if (block_group_item) {
10540 block_group_rec = container_of(block_group_item,
10541 struct block_group_record,
10543 if (chunk_rec->length != block_group_rec->offset ||
10544 chunk_rec->offset != block_group_rec->objectid ||
10546 chunk_rec->type_flags != block_group_rec->flags)) {
10549 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10550 chunk_rec->objectid,
10555 chunk_rec->type_flags,
10556 block_group_rec->objectid,
10557 block_group_rec->type,
10558 block_group_rec->offset,
10559 block_group_rec->offset,
10560 block_group_rec->objectid,
10561 block_group_rec->flags);
10564 list_del_init(&block_group_rec->list);
10565 chunk_rec->bg_rec = block_group_rec;
10570 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10571 chunk_rec->objectid,
10576 chunk_rec->type_flags);
10583 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10584 chunk_rec->num_stripes);
10585 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10586 devid = chunk_rec->stripes[i].devid;
10587 offset = chunk_rec->stripes[i].offset;
10588 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10589 devid, offset, length);
10590 if (dev_extent_item) {
10591 dev_extent_rec = container_of(dev_extent_item,
10592 struct device_extent_record,
10594 if (dev_extent_rec->objectid != devid ||
10595 dev_extent_rec->offset != offset ||
10596 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10597 dev_extent_rec->length != length) {
10600 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10601 chunk_rec->objectid,
10604 chunk_rec->stripes[i].devid,
10605 chunk_rec->stripes[i].offset,
10606 dev_extent_rec->objectid,
10607 dev_extent_rec->offset,
10608 dev_extent_rec->length);
10611 list_move(&dev_extent_rec->chunk_list,
10612 &chunk_rec->dextents);
10617 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10618 chunk_rec->objectid,
10621 chunk_rec->stripes[i].devid,
10622 chunk_rec->stripes[i].offset);
10629 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10630 int check_chunks(struct cache_tree *chunk_cache,
10631 struct block_group_tree *block_group_cache,
10632 struct device_extent_tree *dev_extent_cache,
10633 struct list_head *good, struct list_head *bad,
10634 struct list_head *rebuild, int silent)
10636 struct cache_extent *chunk_item;
10637 struct chunk_record *chunk_rec;
10638 struct block_group_record *bg_rec;
10639 struct device_extent_record *dext_rec;
10643 chunk_item = first_cache_extent(chunk_cache);
10644 while (chunk_item) {
10645 chunk_rec = container_of(chunk_item, struct chunk_record,
10647 err = check_chunk_refs(chunk_rec, block_group_cache,
10648 dev_extent_cache, silent);
10651 if (err == 0 && good)
10652 list_add_tail(&chunk_rec->list, good);
10653 if (err > 0 && rebuild)
10654 list_add_tail(&chunk_rec->list, rebuild);
10655 if (err < 0 && bad)
10656 list_add_tail(&chunk_rec->list, bad);
10657 chunk_item = next_cache_extent(chunk_item);
10660 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10663 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10671 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10675 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10676 dext_rec->objectid,
10686 static int check_device_used(struct device_record *dev_rec,
10687 struct device_extent_tree *dext_cache)
10689 struct cache_extent *cache;
10690 struct device_extent_record *dev_extent_rec;
10691 u64 total_byte = 0;
10693 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10695 dev_extent_rec = container_of(cache,
10696 struct device_extent_record,
10698 if (dev_extent_rec->objectid != dev_rec->devid)
10701 list_del_init(&dev_extent_rec->device_list);
10702 total_byte += dev_extent_rec->length;
10703 cache = next_cache_extent(cache);
10706 if (total_byte != dev_rec->byte_used) {
10708 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10709 total_byte, dev_rec->byte_used, dev_rec->objectid,
10710 dev_rec->type, dev_rec->offset);
10718 * Unlike device size alignment check above, some super total_bytes check
10719 * failure can lead to mount failure for newer kernel.
10721 * So this function will return the error for a fatal super total_bytes problem.
10723 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10725 struct btrfs_device *dev;
10726 struct list_head *dev_list = &fs_info->fs_devices->devices;
10727 u64 total_bytes = 0;
10728 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10730 list_for_each_entry(dev, dev_list, dev_list)
10731 total_bytes += dev->total_bytes;
10733 /* Important check, which can cause unmountable fs */
10734 if (super_bytes < total_bytes) {
10735 error("super total bytes %llu smaller than real device(s) size %llu",
10736 super_bytes, total_bytes);
10737 error("mounting this fs may fail for newer kernels");
10738 error("this can be fixed by 'btrfs rescue fix-device-size'");
10743 * Optional check, just to make everything aligned and match with each
10746 * For a btrfs-image restored fs, we don't need to check it anyway.
10748 if (btrfs_super_flags(fs_info->super_copy) &
10749 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10751 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10752 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10753 super_bytes != total_bytes) {
10754 warning("minor unaligned/mismatch device size detected");
10756 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10761 /* check btrfs_dev_item -> btrfs_dev_extent */
10762 static int check_devices(struct rb_root *dev_cache,
10763 struct device_extent_tree *dev_extent_cache)
10765 struct rb_node *dev_node;
10766 struct device_record *dev_rec;
10767 struct device_extent_record *dext_rec;
10771 dev_node = rb_first(dev_cache);
10773 dev_rec = container_of(dev_node, struct device_record, node);
10774 err = check_device_used(dev_rec, dev_extent_cache);
10778 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10779 global_info->sectorsize);
10780 dev_node = rb_next(dev_node);
10782 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10785 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10786 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10793 static int add_root_item_to_list(struct list_head *head,
10794 u64 objectid, u64 bytenr, u64 last_snapshot,
10795 u8 level, u8 drop_level,
10796 struct btrfs_key *drop_key)
10799 struct root_item_record *ri_rec;
10800 ri_rec = malloc(sizeof(*ri_rec));
10803 ri_rec->bytenr = bytenr;
10804 ri_rec->objectid = objectid;
10805 ri_rec->level = level;
10806 ri_rec->drop_level = drop_level;
10807 ri_rec->last_snapshot = last_snapshot;
10809 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10810 list_add_tail(&ri_rec->list, head);
10815 static void free_root_item_list(struct list_head *list)
10817 struct root_item_record *ri_rec;
10819 while (!list_empty(list)) {
10820 ri_rec = list_first_entry(list, struct root_item_record,
10822 list_del_init(&ri_rec->list);
10827 static int deal_root_from_list(struct list_head *list,
10828 struct btrfs_root *root,
10829 struct block_info *bits,
10831 struct cache_tree *pending,
10832 struct cache_tree *seen,
10833 struct cache_tree *reada,
10834 struct cache_tree *nodes,
10835 struct cache_tree *extent_cache,
10836 struct cache_tree *chunk_cache,
10837 struct rb_root *dev_cache,
10838 struct block_group_tree *block_group_cache,
10839 struct device_extent_tree *dev_extent_cache)
10844 while (!list_empty(list)) {
10845 struct root_item_record *rec;
10846 struct extent_buffer *buf;
10847 rec = list_entry(list->next,
10848 struct root_item_record, list);
10850 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10851 if (!extent_buffer_uptodate(buf)) {
10852 free_extent_buffer(buf);
10856 ret = add_root_to_pending(buf, extent_cache, pending,
10857 seen, nodes, rec->objectid);
10861 * To rebuild extent tree, we need deal with snapshot
10862 * one by one, otherwise we deal with node firstly which
10863 * can maximize readahead.
10866 ret = run_next_block(root, bits, bits_nr, &last,
10867 pending, seen, reada, nodes,
10868 extent_cache, chunk_cache,
10869 dev_cache, block_group_cache,
10870 dev_extent_cache, rec);
10874 free_extent_buffer(buf);
10875 list_del(&rec->list);
10881 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10882 reada, nodes, extent_cache, chunk_cache,
10883 dev_cache, block_group_cache,
10884 dev_extent_cache, NULL);
10894 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10896 struct rb_root dev_cache;
10897 struct cache_tree chunk_cache;
10898 struct block_group_tree block_group_cache;
10899 struct device_extent_tree dev_extent_cache;
10900 struct cache_tree extent_cache;
10901 struct cache_tree seen;
10902 struct cache_tree pending;
10903 struct cache_tree reada;
10904 struct cache_tree nodes;
10905 struct extent_io_tree excluded_extents;
10906 struct cache_tree corrupt_blocks;
10907 struct btrfs_path path;
10908 struct btrfs_key key;
10909 struct btrfs_key found_key;
10911 struct block_info *bits;
10913 struct extent_buffer *leaf;
10915 struct btrfs_root_item ri;
10916 struct list_head dropping_trees;
10917 struct list_head normal_trees;
10918 struct btrfs_root *root1;
10919 struct btrfs_root *root;
10923 root = fs_info->fs_root;
10924 dev_cache = RB_ROOT;
10925 cache_tree_init(&chunk_cache);
10926 block_group_tree_init(&block_group_cache);
10927 device_extent_tree_init(&dev_extent_cache);
10929 cache_tree_init(&extent_cache);
10930 cache_tree_init(&seen);
10931 cache_tree_init(&pending);
10932 cache_tree_init(&nodes);
10933 cache_tree_init(&reada);
10934 cache_tree_init(&corrupt_blocks);
10935 extent_io_tree_init(&excluded_extents);
10936 INIT_LIST_HEAD(&dropping_trees);
10937 INIT_LIST_HEAD(&normal_trees);
10940 fs_info->excluded_extents = &excluded_extents;
10941 fs_info->fsck_extent_cache = &extent_cache;
10942 fs_info->free_extent_hook = free_extent_hook;
10943 fs_info->corrupt_blocks = &corrupt_blocks;
10947 bits = malloc(bits_nr * sizeof(struct block_info));
10953 if (ctx.progress_enabled) {
10954 ctx.tp = TASK_EXTENTS;
10955 task_start(ctx.info);
10959 root1 = fs_info->tree_root;
10960 level = btrfs_header_level(root1->node);
10961 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10962 root1->node->start, 0, level, 0, NULL);
10965 root1 = fs_info->chunk_root;
10966 level = btrfs_header_level(root1->node);
10967 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10968 root1->node->start, 0, level, 0, NULL);
10971 btrfs_init_path(&path);
10974 key.type = BTRFS_ROOT_ITEM_KEY;
10975 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10979 leaf = path.nodes[0];
10980 slot = path.slots[0];
10981 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10982 ret = btrfs_next_leaf(root, &path);
10985 leaf = path.nodes[0];
10986 slot = path.slots[0];
10988 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10989 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10990 unsigned long offset;
10993 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10994 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10995 last_snapshot = btrfs_root_last_snapshot(&ri);
10996 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10997 level = btrfs_root_level(&ri);
10998 ret = add_root_item_to_list(&normal_trees,
10999 found_key.objectid,
11000 btrfs_root_bytenr(&ri),
11001 last_snapshot, level,
11006 level = btrfs_root_level(&ri);
11007 objectid = found_key.objectid;
11008 btrfs_disk_key_to_cpu(&found_key,
11009 &ri.drop_progress);
11010 ret = add_root_item_to_list(&dropping_trees,
11012 btrfs_root_bytenr(&ri),
11013 last_snapshot, level,
11014 ri.drop_level, &found_key);
11021 btrfs_release_path(&path);
11024 * check_block can return -EAGAIN if it fixes something, please keep
11025 * this in mind when dealing with return values from these functions, if
11026 * we get -EAGAIN we want to fall through and restart the loop.
11028 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11029 &seen, &reada, &nodes, &extent_cache,
11030 &chunk_cache, &dev_cache, &block_group_cache,
11031 &dev_extent_cache);
11033 if (ret == -EAGAIN)
11037 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11038 &pending, &seen, &reada, &nodes,
11039 &extent_cache, &chunk_cache, &dev_cache,
11040 &block_group_cache, &dev_extent_cache);
11042 if (ret == -EAGAIN)
11047 ret = check_chunks(&chunk_cache, &block_group_cache,
11048 &dev_extent_cache, NULL, NULL, NULL, 0);
11050 if (ret == -EAGAIN)
11055 ret = check_extent_refs(root, &extent_cache);
11057 if (ret == -EAGAIN)
11062 ret = check_devices(&dev_cache, &dev_extent_cache);
11067 task_stop(ctx.info);
11069 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11070 extent_io_tree_cleanup(&excluded_extents);
11071 fs_info->fsck_extent_cache = NULL;
11072 fs_info->free_extent_hook = NULL;
11073 fs_info->corrupt_blocks = NULL;
11074 fs_info->excluded_extents = NULL;
11077 free_chunk_cache_tree(&chunk_cache);
11078 free_device_cache_tree(&dev_cache);
11079 free_block_group_tree(&block_group_cache);
11080 free_device_extent_tree(&dev_extent_cache);
11081 free_extent_cache_tree(&seen);
11082 free_extent_cache_tree(&pending);
11083 free_extent_cache_tree(&reada);
11084 free_extent_cache_tree(&nodes);
11085 free_root_item_list(&normal_trees);
11086 free_root_item_list(&dropping_trees);
11089 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11090 free_extent_cache_tree(&seen);
11091 free_extent_cache_tree(&pending);
11092 free_extent_cache_tree(&reada);
11093 free_extent_cache_tree(&nodes);
11094 free_chunk_cache_tree(&chunk_cache);
11095 free_block_group_tree(&block_group_cache);
11096 free_device_cache_tree(&dev_cache);
11097 free_device_extent_tree(&dev_extent_cache);
11098 free_extent_record_cache(&extent_cache);
11099 free_root_item_list(&normal_trees);
11100 free_root_item_list(&dropping_trees);
11101 extent_io_tree_cleanup(&excluded_extents);
11105 static int check_extent_inline_ref(struct extent_buffer *eb,
11106 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11109 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11112 case BTRFS_TREE_BLOCK_REF_KEY:
11113 case BTRFS_EXTENT_DATA_REF_KEY:
11114 case BTRFS_SHARED_BLOCK_REF_KEY:
11115 case BTRFS_SHARED_DATA_REF_KEY:
11119 error("extent[%llu %u %llu] has unknown ref type: %d",
11120 key->objectid, key->type, key->offset, type);
11121 ret = UNKNOWN_TYPE;
11129 * Check backrefs of a tree block given by @bytenr or @eb.
11131 * @root: the root containing the @bytenr or @eb
11132 * @eb: tree block extent buffer, can be NULL
11133 * @bytenr: bytenr of the tree block to search
11134 * @level: tree level of the tree block
11135 * @owner: owner of the tree block
11137 * Return >0 for any error found and output error message
11138 * Return 0 for no error found
11140 static int check_tree_block_ref(struct btrfs_root *root,
11141 struct extent_buffer *eb, u64 bytenr,
11142 int level, u64 owner, struct node_refs *nrefs)
11144 struct btrfs_key key;
11145 struct btrfs_root *extent_root = root->fs_info->extent_root;
11146 struct btrfs_path path;
11147 struct btrfs_extent_item *ei;
11148 struct btrfs_extent_inline_ref *iref;
11149 struct extent_buffer *leaf;
11154 int root_level = btrfs_header_level(root->node);
11156 u32 nodesize = root->fs_info->nodesize;
11165 btrfs_init_path(&path);
11166 key.objectid = bytenr;
11167 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11168 key.type = BTRFS_METADATA_ITEM_KEY;
11170 key.type = BTRFS_EXTENT_ITEM_KEY;
11171 key.offset = (u64)-1;
11173 /* Search for the backref in extent tree */
11174 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11176 err |= BACKREF_MISSING;
11179 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11181 err |= BACKREF_MISSING;
11185 leaf = path.nodes[0];
11186 slot = path.slots[0];
11187 btrfs_item_key_to_cpu(leaf, &key, slot);
11189 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11191 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11192 skinny_level = (int)key.offset;
11193 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11195 struct btrfs_tree_block_info *info;
11197 info = (struct btrfs_tree_block_info *)(ei + 1);
11198 skinny_level = btrfs_tree_block_level(leaf, info);
11199 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11208 * Due to the feature of shared tree blocks, if the upper node
11209 * is a fs root or shared node, the extent of checked node may
11210 * not be updated until the next CoW.
11213 strict = should_check_extent_strictly(root, nrefs,
11215 if (!(btrfs_extent_flags(leaf, ei) &
11216 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11218 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11219 key.objectid, nodesize,
11220 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11221 err = BACKREF_MISMATCH;
11223 header_gen = btrfs_header_generation(eb);
11224 extent_gen = btrfs_extent_generation(leaf, ei);
11225 if (header_gen != extent_gen) {
11227 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11228 key.objectid, nodesize, header_gen,
11230 err = BACKREF_MISMATCH;
11232 if (level != skinny_level) {
11234 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11235 key.objectid, nodesize, level, skinny_level);
11236 err = BACKREF_MISMATCH;
11238 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11240 "extent[%llu %u] is referred by other roots than %llu",
11241 key.objectid, nodesize, root->objectid);
11242 err = BACKREF_MISMATCH;
11247 * Iterate the extent/metadata item to find the exact backref
11249 item_size = btrfs_item_size_nr(leaf, slot);
11250 ptr = (unsigned long)iref;
11251 end = (unsigned long)ei + item_size;
11253 while (ptr < end) {
11254 iref = (struct btrfs_extent_inline_ref *)ptr;
11255 type = btrfs_extent_inline_ref_type(leaf, iref);
11256 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11258 ret = check_extent_inline_ref(leaf, &key, iref);
11263 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11264 if (offset == root->objectid)
11266 if (!strict && owner == offset)
11268 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11270 * Backref of tree reloc root points to itself, no need
11271 * to check backref any more.
11273 * This may be an error of loop backref, but extent tree
11274 * checker should have already handled it.
11275 * Here we only need to avoid infinite iteration.
11277 if (offset == bytenr) {
11281 * Check if the backref points to valid
11284 found_ref = !check_tree_block_ref( root, NULL,
11285 offset, level + 1, owner,
11292 ptr += btrfs_extent_inline_ref_size(type);
11296 * Inlined extent item doesn't have what we need, check
11297 * TREE_BLOCK_REF_KEY
11300 btrfs_release_path(&path);
11301 key.objectid = bytenr;
11302 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11303 key.offset = root->objectid;
11305 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11310 * Finally check SHARED BLOCK REF, any found will be good
11311 * Here we're not doing comprehensive extent backref checking,
11312 * only need to ensure there is some extent referring to this
11316 btrfs_release_path(&path);
11317 key.objectid = bytenr;
11318 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11319 key.offset = (u64)-1;
11321 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11323 err |= BACKREF_MISSING;
11326 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11328 err |= BACKREF_MISSING;
11334 err |= BACKREF_MISSING;
11336 btrfs_release_path(&path);
11337 if (nrefs && strict &&
11338 level < root_level && nrefs->full_backref[level + 1])
11339 parent = nrefs->bytenr[level + 1];
11340 if (eb && (err & BACKREF_MISSING))
11342 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11343 bytenr, nodesize, owner, level,
11344 parent ? "parent" : "root",
11345 parent ? parent : root->objectid);
11350 * If @err contains BACKREF_MISSING then add extent of the
11351 * file_extent_data_item.
11353 * Returns error bits after reapir.
11355 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11356 struct btrfs_root *root,
11357 struct btrfs_path *pathp,
11358 struct node_refs *nrefs,
11361 struct btrfs_file_extent_item *fi;
11362 struct btrfs_key fi_key;
11363 struct btrfs_key key;
11364 struct btrfs_extent_item *ei;
11365 struct btrfs_path path;
11366 struct btrfs_root *extent_root = root->fs_info->extent_root;
11367 struct extent_buffer *eb;
11379 eb = pathp->nodes[0];
11380 slot = pathp->slots[0];
11381 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11382 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11384 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11385 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11388 file_offset = fi_key.offset;
11389 generation = btrfs_file_extent_generation(eb, fi);
11390 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11391 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11392 extent_offset = btrfs_file_extent_offset(eb, fi);
11393 offset = file_offset - extent_offset;
11395 /* now repair only adds backref */
11396 if ((err & BACKREF_MISSING) == 0)
11399 /* search extent item */
11400 key.objectid = disk_bytenr;
11401 key.type = BTRFS_EXTENT_ITEM_KEY;
11402 key.offset = num_bytes;
11404 btrfs_init_path(&path);
11405 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11411 /* insert an extent item */
11413 key.objectid = disk_bytenr;
11414 key.type = BTRFS_EXTENT_ITEM_KEY;
11415 key.offset = num_bytes;
11416 size = sizeof(*ei);
11418 btrfs_release_path(&path);
11419 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11423 eb = path.nodes[0];
11424 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11426 btrfs_set_extent_refs(eb, ei, 0);
11427 btrfs_set_extent_generation(eb, ei, generation);
11428 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11430 btrfs_mark_buffer_dirty(eb);
11431 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11433 btrfs_release_path(&path);
11436 if (nrefs->full_backref[0])
11437 parent = btrfs_header_bytenr(eb);
11441 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11443 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11447 "failed to increase extent data backref[%llu %llu] root %llu",
11448 disk_bytenr, num_bytes, root->objectid);
11451 printf("Add one extent data backref [%llu %llu]\n",
11452 disk_bytenr, num_bytes);
11455 err &= ~BACKREF_MISSING;
11458 error("can't repair root %llu extent data item[%llu %llu]",
11459 root->objectid, disk_bytenr, num_bytes);
11464 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11466 * Return >0 any error found and output error message
11467 * Return 0 for no error found
11469 static int check_extent_data_item(struct btrfs_root *root,
11470 struct btrfs_path *pathp,
11471 struct node_refs *nrefs, int account_bytes)
11473 struct btrfs_file_extent_item *fi;
11474 struct extent_buffer *eb = pathp->nodes[0];
11475 struct btrfs_path path;
11476 struct btrfs_root *extent_root = root->fs_info->extent_root;
11477 struct btrfs_key fi_key;
11478 struct btrfs_key dbref_key;
11479 struct extent_buffer *leaf;
11480 struct btrfs_extent_item *ei;
11481 struct btrfs_extent_inline_ref *iref;
11482 struct btrfs_extent_data_ref *dref;
11485 u64 disk_num_bytes;
11486 u64 extent_num_bytes;
11493 int found_dbackref = 0;
11494 int slot = pathp->slots[0];
11499 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11500 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11502 /* Nothing to check for hole and inline data extents */
11503 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11504 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11507 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11508 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11509 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11510 offset = btrfs_file_extent_offset(eb, fi);
11512 /* Check unaligned disk_num_bytes and num_bytes */
11513 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11515 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11516 fi_key.objectid, fi_key.offset, disk_num_bytes,
11517 root->fs_info->sectorsize);
11518 err |= BYTES_UNALIGNED;
11519 } else if (account_bytes) {
11520 data_bytes_allocated += disk_num_bytes;
11522 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11524 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11525 fi_key.objectid, fi_key.offset, extent_num_bytes,
11526 root->fs_info->sectorsize);
11527 err |= BYTES_UNALIGNED;
11528 } else if (account_bytes) {
11529 data_bytes_referenced += extent_num_bytes;
11531 owner = btrfs_header_owner(eb);
11533 /* Check the extent item of the file extent in extent tree */
11534 btrfs_init_path(&path);
11535 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11536 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11537 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11539 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11543 leaf = path.nodes[0];
11544 slot = path.slots[0];
11545 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11547 extent_flags = btrfs_extent_flags(leaf, ei);
11549 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11551 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11552 disk_bytenr, disk_num_bytes,
11553 BTRFS_EXTENT_FLAG_DATA);
11554 err |= BACKREF_MISMATCH;
11557 /* Check data backref inside that extent item */
11558 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11559 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11560 ptr = (unsigned long)iref;
11561 end = (unsigned long)ei + item_size;
11562 strict = should_check_extent_strictly(root, nrefs, -1);
11564 while (ptr < end) {
11568 bool match = false;
11570 iref = (struct btrfs_extent_inline_ref *)ptr;
11571 type = btrfs_extent_inline_ref_type(leaf, iref);
11572 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11574 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11579 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11580 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11581 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11582 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11584 if (ref_objectid == fi_key.objectid &&
11585 ref_offset == fi_key.offset - offset)
11587 if (ref_root == root->objectid && match)
11588 found_dbackref = 1;
11589 else if (!strict && owner == ref_root && match)
11590 found_dbackref = 1;
11591 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11592 found_dbackref = !check_tree_block_ref(root, NULL,
11593 btrfs_extent_inline_ref_offset(leaf, iref),
11597 if (found_dbackref)
11599 ptr += btrfs_extent_inline_ref_size(type);
11602 if (!found_dbackref) {
11603 btrfs_release_path(&path);
11605 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11606 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11607 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11608 dbref_key.offset = hash_extent_data_ref(root->objectid,
11609 fi_key.objectid, fi_key.offset - offset);
11611 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11612 &dbref_key, &path, 0, 0);
11614 found_dbackref = 1;
11618 btrfs_release_path(&path);
11621 * Neither inlined nor EXTENT_DATA_REF found, try
11622 * SHARED_DATA_REF as last chance.
11624 dbref_key.objectid = disk_bytenr;
11625 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11626 dbref_key.offset = eb->start;
11628 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11629 &dbref_key, &path, 0, 0);
11631 found_dbackref = 1;
11637 if (!found_dbackref)
11638 err |= BACKREF_MISSING;
11639 btrfs_release_path(&path);
11640 if (err & BACKREF_MISSING) {
11641 error("data extent[%llu %llu] backref lost",
11642 disk_bytenr, disk_num_bytes);
11648 * Get real tree block level for the case like shared block
11649 * Return >= 0 as tree level
11650 * Return <0 for error
11652 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11654 struct extent_buffer *eb;
11655 struct btrfs_path path;
11656 struct btrfs_key key;
11657 struct btrfs_extent_item *ei;
11664 /* Search extent tree for extent generation and level */
11665 key.objectid = bytenr;
11666 key.type = BTRFS_METADATA_ITEM_KEY;
11667 key.offset = (u64)-1;
11669 btrfs_init_path(&path);
11670 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11673 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11681 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11682 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11683 struct btrfs_extent_item);
11684 flags = btrfs_extent_flags(path.nodes[0], ei);
11685 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11690 /* Get transid for later read_tree_block() check */
11691 transid = btrfs_extent_generation(path.nodes[0], ei);
11693 /* Get backref level as one source */
11694 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11695 backref_level = key.offset;
11697 struct btrfs_tree_block_info *info;
11699 info = (struct btrfs_tree_block_info *)(ei + 1);
11700 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11702 btrfs_release_path(&path);
11704 /* Get level from tree block as an alternative source */
11705 eb = read_tree_block(fs_info, bytenr, transid);
11706 if (!extent_buffer_uptodate(eb)) {
11707 free_extent_buffer(eb);
11710 header_level = btrfs_header_level(eb);
11711 free_extent_buffer(eb);
11713 if (header_level != backref_level)
11715 return header_level;
11718 btrfs_release_path(&path);
11723 * Check if a tree block backref is valid (points to a valid tree block)
11724 * if level == -1, level will be resolved
11725 * Return >0 for any error found and print error message
11727 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11728 u64 bytenr, int level)
11730 struct btrfs_root *root;
11731 struct btrfs_key key;
11732 struct btrfs_path path;
11733 struct extent_buffer *eb;
11734 struct extent_buffer *node;
11735 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11739 /* Query level for level == -1 special case */
11741 level = query_tree_block_level(fs_info, bytenr);
11743 err |= REFERENCER_MISSING;
11747 key.objectid = root_id;
11748 key.type = BTRFS_ROOT_ITEM_KEY;
11749 key.offset = (u64)-1;
11751 root = btrfs_read_fs_root(fs_info, &key);
11752 if (IS_ERR(root)) {
11753 err |= REFERENCER_MISSING;
11757 /* Read out the tree block to get item/node key */
11758 eb = read_tree_block(fs_info, bytenr, 0);
11759 if (!extent_buffer_uptodate(eb)) {
11760 err |= REFERENCER_MISSING;
11761 free_extent_buffer(eb);
11765 /* Empty tree, no need to check key */
11766 if (!btrfs_header_nritems(eb) && !level) {
11767 free_extent_buffer(eb);
11772 btrfs_node_key_to_cpu(eb, &key, 0);
11774 btrfs_item_key_to_cpu(eb, &key, 0);
11776 free_extent_buffer(eb);
11778 btrfs_init_path(&path);
11779 path.lowest_level = level;
11780 /* Search with the first key, to ensure we can reach it */
11781 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11783 err |= REFERENCER_MISSING;
11787 node = path.nodes[level];
11788 if (btrfs_header_bytenr(node) != bytenr) {
11790 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11791 bytenr, nodesize, bytenr,
11792 btrfs_header_bytenr(node));
11793 err |= REFERENCER_MISMATCH;
11795 if (btrfs_header_level(node) != level) {
11797 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11798 bytenr, nodesize, level,
11799 btrfs_header_level(node));
11800 err |= REFERENCER_MISMATCH;
11804 btrfs_release_path(&path);
11806 if (err & REFERENCER_MISSING) {
11808 error("extent [%llu %d] lost referencer (owner: %llu)",
11809 bytenr, nodesize, root_id);
11812 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11813 bytenr, nodesize, root_id, level);
11820 * Check if tree block @eb is tree reloc root.
11821 * Return 0 if it's not or any problem happens
11822 * Return 1 if it's a tree reloc root
11824 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11825 struct extent_buffer *eb)
11827 struct btrfs_root *tree_reloc_root;
11828 struct btrfs_key key;
11829 u64 bytenr = btrfs_header_bytenr(eb);
11830 u64 owner = btrfs_header_owner(eb);
11833 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11834 key.offset = owner;
11835 key.type = BTRFS_ROOT_ITEM_KEY;
11837 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11838 if (IS_ERR(tree_reloc_root))
11841 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11843 btrfs_free_fs_root(tree_reloc_root);
11848 * Check referencer for shared block backref
11849 * If level == -1, this function will resolve the level.
11851 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11852 u64 parent, u64 bytenr, int level)
11854 struct extent_buffer *eb;
11856 int found_parent = 0;
11859 eb = read_tree_block(fs_info, parent, 0);
11860 if (!extent_buffer_uptodate(eb))
11864 level = query_tree_block_level(fs_info, bytenr);
11868 /* It's possible it's a tree reloc root */
11869 if (parent == bytenr) {
11870 if (is_tree_reloc_root(fs_info, eb))
11875 if (level + 1 != btrfs_header_level(eb))
11878 nr = btrfs_header_nritems(eb);
11879 for (i = 0; i < nr; i++) {
11880 if (bytenr == btrfs_node_blockptr(eb, i)) {
11886 free_extent_buffer(eb);
11887 if (!found_parent) {
11889 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11890 bytenr, fs_info->nodesize, parent, level);
11891 return REFERENCER_MISSING;
11897 * Check referencer for normal (inlined) data ref
11898 * If len == 0, it will be resolved by searching in extent tree
11900 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11901 u64 root_id, u64 objectid, u64 offset,
11902 u64 bytenr, u64 len, u32 count)
11904 struct btrfs_root *root;
11905 struct btrfs_root *extent_root = fs_info->extent_root;
11906 struct btrfs_key key;
11907 struct btrfs_path path;
11908 struct extent_buffer *leaf;
11909 struct btrfs_file_extent_item *fi;
11910 u32 found_count = 0;
11915 key.objectid = bytenr;
11916 key.type = BTRFS_EXTENT_ITEM_KEY;
11917 key.offset = (u64)-1;
11919 btrfs_init_path(&path);
11920 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11923 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11926 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11927 if (key.objectid != bytenr ||
11928 key.type != BTRFS_EXTENT_ITEM_KEY)
11931 btrfs_release_path(&path);
11933 key.objectid = root_id;
11934 key.type = BTRFS_ROOT_ITEM_KEY;
11935 key.offset = (u64)-1;
11936 btrfs_init_path(&path);
11938 root = btrfs_read_fs_root(fs_info, &key);
11942 key.objectid = objectid;
11943 key.type = BTRFS_EXTENT_DATA_KEY;
11945 * It can be nasty as data backref offset is
11946 * file offset - file extent offset, which is smaller or
11947 * equal to original backref offset. The only special case is
11948 * overflow. So we need to special check and do further search.
11950 key.offset = offset & (1ULL << 63) ? 0 : offset;
11952 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11957 * Search afterwards to get correct one
11958 * NOTE: As we must do a comprehensive check on the data backref to
11959 * make sure the dref count also matches, we must iterate all file
11960 * extents for that inode.
11963 leaf = path.nodes[0];
11964 slot = path.slots[0];
11966 if (slot >= btrfs_header_nritems(leaf) ||
11967 btrfs_header_owner(leaf) != root_id)
11969 btrfs_item_key_to_cpu(leaf, &key, slot);
11970 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11972 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11974 * Except normal disk bytenr and disk num bytes, we still
11975 * need to do extra check on dbackref offset as
11976 * dbackref offset = file_offset - file_extent_offset
11978 * Also, we must check the leaf owner.
11979 * In case of shared tree blocks (snapshots) we can inherit
11980 * leaves from source snapshot.
11981 * In that case, reference from source snapshot should not
11984 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11985 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11986 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11987 offset && btrfs_header_owner(leaf) == root_id)
11991 ret = btrfs_next_item(root, &path);
11996 btrfs_release_path(&path);
11997 if (found_count != count) {
11999 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12000 bytenr, len, root_id, objectid, offset, count, found_count);
12001 return REFERENCER_MISSING;
12007 * Check if the referencer of a shared data backref exists
12009 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12010 u64 parent, u64 bytenr)
12012 struct extent_buffer *eb;
12013 struct btrfs_key key;
12014 struct btrfs_file_extent_item *fi;
12016 int found_parent = 0;
12019 eb = read_tree_block(fs_info, parent, 0);
12020 if (!extent_buffer_uptodate(eb))
12023 nr = btrfs_header_nritems(eb);
12024 for (i = 0; i < nr; i++) {
12025 btrfs_item_key_to_cpu(eb, &key, i);
12026 if (key.type != BTRFS_EXTENT_DATA_KEY)
12029 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12030 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12033 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12040 free_extent_buffer(eb);
12041 if (!found_parent) {
12042 error("shared extent %llu referencer lost (parent: %llu)",
12044 return REFERENCER_MISSING;
12050 * Only delete backref if REFERENCER_MISSING now
12052 * Returns <0 the extent was deleted
12053 * Returns >0 the backref was deleted but extent still exists, returned value
12054 * means error after repair
12055 * Returns 0 nothing happened
12057 static int repair_extent_item(struct btrfs_trans_handle *trans,
12058 struct btrfs_root *root, struct btrfs_path *path,
12059 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12060 u64 owner, u64 offset, int err)
12062 struct btrfs_key old_key;
12066 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12068 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12069 /* delete the backref */
12070 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12071 num_bytes, parent, root_objectid, owner, offset);
12074 err &= ~REFERENCER_MISSING;
12075 printf("Delete backref in extent [%llu %llu]\n",
12076 bytenr, num_bytes);
12078 error("fail to delete backref in extent [%llu %llu]",
12079 bytenr, num_bytes);
12083 /* btrfs_free_extent may delete the extent */
12084 btrfs_release_path(path);
12085 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12095 * This function will check a given extent item, including its backref and
12096 * itself (like crossing stripe boundary and type)
12098 * Since we don't use extent_record anymore, introduce new error bit
12100 static int check_extent_item(struct btrfs_trans_handle *trans,
12101 struct btrfs_fs_info *fs_info,
12102 struct btrfs_path *path)
12104 struct btrfs_extent_item *ei;
12105 struct btrfs_extent_inline_ref *iref;
12106 struct btrfs_extent_data_ref *dref;
12107 struct extent_buffer *eb = path->nodes[0];
12110 int slot = path->slots[0];
12112 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12113 u32 item_size = btrfs_item_size_nr(eb, slot);
12123 struct btrfs_key key;
12127 btrfs_item_key_to_cpu(eb, &key, slot);
12128 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12129 bytes_used += key.offset;
12130 num_bytes = key.offset;
12132 bytes_used += nodesize;
12133 num_bytes = nodesize;
12136 if (item_size < sizeof(*ei)) {
12138 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12139 * old thing when on disk format is still un-determined.
12140 * No need to care about it anymore
12142 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12146 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12147 flags = btrfs_extent_flags(eb, ei);
12149 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12151 if (metadata && check_crossing_stripes(global_info, key.objectid,
12153 error("bad metadata [%llu, %llu) crossing stripe boundary",
12154 key.objectid, key.objectid + nodesize);
12155 err |= CROSSING_STRIPE_BOUNDARY;
12158 ptr = (unsigned long)(ei + 1);
12160 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12161 /* Old EXTENT_ITEM metadata */
12162 struct btrfs_tree_block_info *info;
12164 info = (struct btrfs_tree_block_info *)ptr;
12165 level = btrfs_tree_block_level(eb, info);
12166 ptr += sizeof(struct btrfs_tree_block_info);
12168 /* New METADATA_ITEM */
12169 level = key.offset;
12171 end = (unsigned long)ei + item_size;
12174 /* Reached extent item end normally */
12178 /* Beyond extent item end, wrong item size */
12180 err |= ITEM_SIZE_MISMATCH;
12181 error("extent item at bytenr %llu slot %d has wrong size",
12190 /* Now check every backref in this extent item */
12191 iref = (struct btrfs_extent_inline_ref *)ptr;
12192 type = btrfs_extent_inline_ref_type(eb, iref);
12193 offset = btrfs_extent_inline_ref_offset(eb, iref);
12195 case BTRFS_TREE_BLOCK_REF_KEY:
12196 root_objectid = offset;
12198 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12202 case BTRFS_SHARED_BLOCK_REF_KEY:
12204 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12208 case BTRFS_EXTENT_DATA_REF_KEY:
12209 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12210 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12211 owner = btrfs_extent_data_ref_objectid(eb, dref);
12212 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12213 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12214 owner_offset, key.objectid, key.offset,
12215 btrfs_extent_data_ref_count(eb, dref));
12218 case BTRFS_SHARED_DATA_REF_KEY:
12220 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12224 error("extent[%llu %d %llu] has unknown ref type: %d",
12225 key.objectid, key.type, key.offset, type);
12226 ret = UNKNOWN_TYPE;
12231 if (err && repair) {
12232 ret = repair_extent_item(trans, fs_info->extent_root, path,
12233 key.objectid, num_bytes, parent, root_objectid,
12234 owner, owner_offset, ret);
12243 ptr += btrfs_extent_inline_ref_size(type);
12251 * Check if a dev extent item is referred correctly by its chunk
12253 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12254 struct extent_buffer *eb, int slot)
12256 struct btrfs_root *chunk_root = fs_info->chunk_root;
12257 struct btrfs_dev_extent *ptr;
12258 struct btrfs_path path;
12259 struct btrfs_key chunk_key;
12260 struct btrfs_key devext_key;
12261 struct btrfs_chunk *chunk;
12262 struct extent_buffer *l;
12266 int found_chunk = 0;
12269 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12270 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12271 length = btrfs_dev_extent_length(eb, ptr);
12273 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12274 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12275 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12277 btrfs_init_path(&path);
12278 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12283 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12284 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12289 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12292 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12293 for (i = 0; i < num_stripes; i++) {
12294 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12295 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12297 if (devid == devext_key.objectid &&
12298 offset == devext_key.offset) {
12304 btrfs_release_path(&path);
12305 if (!found_chunk) {
12307 "device extent[%llu, %llu, %llu] did not find the related chunk",
12308 devext_key.objectid, devext_key.offset, length);
12309 return REFERENCER_MISSING;
12315 * Check if the used space is correct with the dev item
12317 static int check_dev_item(struct btrfs_fs_info *fs_info,
12318 struct extent_buffer *eb, int slot)
12320 struct btrfs_root *dev_root = fs_info->dev_root;
12321 struct btrfs_dev_item *dev_item;
12322 struct btrfs_path path;
12323 struct btrfs_key key;
12324 struct btrfs_dev_extent *ptr;
12331 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12332 dev_id = btrfs_device_id(eb, dev_item);
12333 used = btrfs_device_bytes_used(eb, dev_item);
12334 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12336 key.objectid = dev_id;
12337 key.type = BTRFS_DEV_EXTENT_KEY;
12340 btrfs_init_path(&path);
12341 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12343 btrfs_item_key_to_cpu(eb, &key, slot);
12344 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12345 key.objectid, key.type, key.offset);
12346 btrfs_release_path(&path);
12347 return REFERENCER_MISSING;
12350 /* Iterate dev_extents to calculate the used space of a device */
12352 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12355 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12356 if (key.objectid > dev_id)
12358 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12361 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12362 struct btrfs_dev_extent);
12363 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12365 ret = btrfs_next_item(dev_root, &path);
12369 btrfs_release_path(&path);
12371 if (used != total) {
12372 btrfs_item_key_to_cpu(eb, &key, slot);
12374 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12375 total, used, BTRFS_ROOT_TREE_OBJECTID,
12376 BTRFS_DEV_EXTENT_KEY, dev_id);
12377 return ACCOUNTING_MISMATCH;
12379 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12385 * Check a block group item with its referener (chunk) and its used space
12386 * with extent/metadata item
12388 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12389 struct extent_buffer *eb, int slot)
12391 struct btrfs_root *extent_root = fs_info->extent_root;
12392 struct btrfs_root *chunk_root = fs_info->chunk_root;
12393 struct btrfs_block_group_item *bi;
12394 struct btrfs_block_group_item bg_item;
12395 struct btrfs_path path;
12396 struct btrfs_key bg_key;
12397 struct btrfs_key chunk_key;
12398 struct btrfs_key extent_key;
12399 struct btrfs_chunk *chunk;
12400 struct extent_buffer *leaf;
12401 struct btrfs_extent_item *ei;
12402 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12410 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12411 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12412 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12413 used = btrfs_block_group_used(&bg_item);
12414 bg_flags = btrfs_block_group_flags(&bg_item);
12416 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12417 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12418 chunk_key.offset = bg_key.objectid;
12420 btrfs_init_path(&path);
12421 /* Search for the referencer chunk */
12422 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12425 "block group[%llu %llu] did not find the related chunk item",
12426 bg_key.objectid, bg_key.offset);
12427 err |= REFERENCER_MISSING;
12429 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12430 struct btrfs_chunk);
12431 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12434 "block group[%llu %llu] related chunk item length does not match",
12435 bg_key.objectid, bg_key.offset);
12436 err |= REFERENCER_MISMATCH;
12439 btrfs_release_path(&path);
12441 /* Search from the block group bytenr */
12442 extent_key.objectid = bg_key.objectid;
12443 extent_key.type = 0;
12444 extent_key.offset = 0;
12446 btrfs_init_path(&path);
12447 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12451 /* Iterate extent tree to account used space */
12453 leaf = path.nodes[0];
12455 /* Search slot can point to the last item beyond leaf nritems */
12456 if (path.slots[0] >= btrfs_header_nritems(leaf))
12459 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12460 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12463 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12464 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12466 if (extent_key.objectid < bg_key.objectid)
12469 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12472 total += extent_key.offset;
12474 ei = btrfs_item_ptr(leaf, path.slots[0],
12475 struct btrfs_extent_item);
12476 flags = btrfs_extent_flags(leaf, ei);
12477 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12478 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12480 "bad extent[%llu, %llu) type mismatch with chunk",
12481 extent_key.objectid,
12482 extent_key.objectid + extent_key.offset);
12483 err |= CHUNK_TYPE_MISMATCH;
12485 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12486 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12487 BTRFS_BLOCK_GROUP_METADATA))) {
12489 "bad extent[%llu, %llu) type mismatch with chunk",
12490 extent_key.objectid,
12491 extent_key.objectid + nodesize);
12492 err |= CHUNK_TYPE_MISMATCH;
12496 ret = btrfs_next_item(extent_root, &path);
12502 btrfs_release_path(&path);
12504 if (total != used) {
12506 "block group[%llu %llu] used %llu but extent items used %llu",
12507 bg_key.objectid, bg_key.offset, used, total);
12508 err |= BG_ACCOUNTING_ERROR;
12514 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12515 * FIXME: We still need to repair error of dev_item.
12517 * Returns error after repair.
12519 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12520 struct btrfs_root *chunk_root,
12521 struct btrfs_path *path, int err)
12523 struct btrfs_chunk *chunk;
12524 struct btrfs_key chunk_key;
12525 struct extent_buffer *eb = path->nodes[0];
12527 int slot = path->slots[0];
12531 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12532 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12534 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12535 type = btrfs_chunk_type(path->nodes[0], chunk);
12536 length = btrfs_chunk_length(eb, chunk);
12538 if (err & REFERENCER_MISSING) {
12539 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12540 type, chunk_key.offset, length);
12542 error("fail to add block group item[%llu %llu]",
12543 chunk_key.offset, length);
12546 err &= ~REFERENCER_MISSING;
12547 printf("Added block group item[%llu %llu]\n",
12548 chunk_key.offset, length);
12557 * Check a chunk item.
12558 * Including checking all referred dev_extents and block group
12560 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12561 struct extent_buffer *eb, int slot)
12563 struct btrfs_root *extent_root = fs_info->extent_root;
12564 struct btrfs_root *dev_root = fs_info->dev_root;
12565 struct btrfs_path path;
12566 struct btrfs_key chunk_key;
12567 struct btrfs_key bg_key;
12568 struct btrfs_key devext_key;
12569 struct btrfs_chunk *chunk;
12570 struct extent_buffer *leaf;
12571 struct btrfs_block_group_item *bi;
12572 struct btrfs_block_group_item bg_item;
12573 struct btrfs_dev_extent *ptr;
12585 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12586 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12587 length = btrfs_chunk_length(eb, chunk);
12588 chunk_end = chunk_key.offset + length;
12589 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12592 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12594 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12597 type = btrfs_chunk_type(eb, chunk);
12599 bg_key.objectid = chunk_key.offset;
12600 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12601 bg_key.offset = length;
12603 btrfs_init_path(&path);
12604 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12607 "chunk[%llu %llu) did not find the related block group item",
12608 chunk_key.offset, chunk_end);
12609 err |= REFERENCER_MISSING;
12611 leaf = path.nodes[0];
12612 bi = btrfs_item_ptr(leaf, path.slots[0],
12613 struct btrfs_block_group_item);
12614 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12616 if (btrfs_block_group_flags(&bg_item) != type) {
12618 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12619 chunk_key.offset, chunk_end, type,
12620 btrfs_block_group_flags(&bg_item));
12621 err |= REFERENCER_MISSING;
12625 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12626 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12627 for (i = 0; i < num_stripes; i++) {
12628 btrfs_release_path(&path);
12629 btrfs_init_path(&path);
12630 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12631 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12632 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12634 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12637 goto not_match_dev;
12639 leaf = path.nodes[0];
12640 ptr = btrfs_item_ptr(leaf, path.slots[0],
12641 struct btrfs_dev_extent);
12642 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12643 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12644 if (objectid != chunk_key.objectid ||
12645 offset != chunk_key.offset ||
12646 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12647 goto not_match_dev;
12650 err |= BACKREF_MISSING;
12652 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12653 chunk_key.objectid, chunk_end, i);
12656 btrfs_release_path(&path);
12661 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12662 struct btrfs_root *root,
12663 struct btrfs_path *path)
12665 struct btrfs_key key;
12668 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12669 btrfs_release_path(path);
12670 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12676 ret = btrfs_del_item(trans, root, path);
12680 if (path->slots[0] == 0)
12681 btrfs_prev_leaf(root, path);
12686 error("failed to delete root %llu item[%llu, %u, %llu]",
12687 root->objectid, key.objectid, key.type, key.offset);
12689 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12690 root->objectid, key.objectid, key.type, key.offset);
12695 * Main entry function to check known items and update related accounting info
12697 static int check_leaf_items(struct btrfs_trans_handle *trans,
12698 struct btrfs_root *root, struct btrfs_path *path,
12699 struct node_refs *nrefs, int account_bytes)
12701 struct btrfs_fs_info *fs_info = root->fs_info;
12702 struct btrfs_key key;
12703 struct extent_buffer *eb;
12706 struct btrfs_extent_data_ref *dref;
12711 eb = path->nodes[0];
12712 slot = path->slots[0];
12713 if (slot >= btrfs_header_nritems(eb)) {
12715 error("empty leaf [%llu %u] root %llu", eb->start,
12716 root->fs_info->nodesize, root->objectid);
12722 btrfs_item_key_to_cpu(eb, &key, slot);
12726 case BTRFS_EXTENT_DATA_KEY:
12727 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12729 ret = repair_extent_data_item(trans, root, path, nrefs,
12733 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12734 ret = check_block_group_item(fs_info, eb, slot);
12736 ret & REFERENCER_MISSING)
12737 ret = delete_extent_tree_item(trans, root, path);
12740 case BTRFS_DEV_ITEM_KEY:
12741 ret = check_dev_item(fs_info, eb, slot);
12744 case BTRFS_CHUNK_ITEM_KEY:
12745 ret = check_chunk_item(fs_info, eb, slot);
12747 ret = repair_chunk_item(trans, root, path, ret);
12750 case BTRFS_DEV_EXTENT_KEY:
12751 ret = check_dev_extent_item(fs_info, eb, slot);
12754 case BTRFS_EXTENT_ITEM_KEY:
12755 case BTRFS_METADATA_ITEM_KEY:
12756 ret = check_extent_item(trans, fs_info, path);
12759 case BTRFS_EXTENT_CSUM_KEY:
12760 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12763 case BTRFS_TREE_BLOCK_REF_KEY:
12764 ret = check_tree_block_backref(fs_info, key.offset,
12767 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12768 ret = delete_extent_tree_item(trans, root, path);
12771 case BTRFS_EXTENT_DATA_REF_KEY:
12772 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12773 ret = check_extent_data_backref(fs_info,
12774 btrfs_extent_data_ref_root(eb, dref),
12775 btrfs_extent_data_ref_objectid(eb, dref),
12776 btrfs_extent_data_ref_offset(eb, dref),
12778 btrfs_extent_data_ref_count(eb, dref));
12780 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12781 ret = delete_extent_tree_item(trans, root, path);
12784 case BTRFS_SHARED_BLOCK_REF_KEY:
12785 ret = check_shared_block_backref(fs_info, key.offset,
12788 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12789 ret = delete_extent_tree_item(trans, root, path);
12792 case BTRFS_SHARED_DATA_REF_KEY:
12793 ret = check_shared_data_backref(fs_info, key.offset,
12796 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12797 ret = delete_extent_tree_item(trans, root, path);
12811 * Low memory usage version check_chunks_and_extents.
12813 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12815 struct btrfs_trans_handle *trans = NULL;
12816 struct btrfs_path path;
12817 struct btrfs_key old_key;
12818 struct btrfs_key key;
12819 struct btrfs_root *root1;
12820 struct btrfs_root *root;
12821 struct btrfs_root *cur_root;
12825 root = fs_info->fs_root;
12828 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12829 if (IS_ERR(trans)) {
12830 error("failed to start transaction before check");
12831 return PTR_ERR(trans);
12835 root1 = root->fs_info->chunk_root;
12836 ret = check_btrfs_root(trans, root1, 0, 1);
12839 root1 = root->fs_info->tree_root;
12840 ret = check_btrfs_root(trans, root1, 0, 1);
12843 btrfs_init_path(&path);
12844 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12846 key.type = BTRFS_ROOT_ITEM_KEY;
12848 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12850 error("cannot find extent tree in tree_root");
12855 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12856 if (key.type != BTRFS_ROOT_ITEM_KEY)
12859 key.offset = (u64)-1;
12861 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12862 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12865 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12866 if (IS_ERR(cur_root) || !cur_root) {
12867 error("failed to read tree: %lld", key.objectid);
12871 ret = check_btrfs_root(trans, cur_root, 0, 1);
12874 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12875 btrfs_free_fs_root(cur_root);
12877 btrfs_release_path(&path);
12878 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12879 &old_key, &path, 0, 0);
12883 ret = btrfs_next_item(root1, &path);
12889 /* if repair, update block accounting */
12891 ret = btrfs_fix_block_accounting(trans, root);
12895 err &= ~BG_ACCOUNTING_ERROR;
12899 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12901 btrfs_release_path(&path);
12906 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12910 if (!ctx.progress_enabled)
12911 fprintf(stderr, "checking extents\n");
12912 if (check_mode == CHECK_MODE_LOWMEM)
12913 ret = check_chunks_and_extents_v2(fs_info);
12915 ret = check_chunks_and_extents(fs_info);
12917 /* Also repair device size related problems */
12918 if (repair && !ret) {
12919 ret = btrfs_fix_device_and_super_size(fs_info);
12926 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12927 struct btrfs_root *root, int overwrite)
12929 struct extent_buffer *c;
12930 struct extent_buffer *old = root->node;
12933 struct btrfs_disk_key disk_key = {0,0,0};
12939 extent_buffer_get(c);
12942 c = btrfs_alloc_free_block(trans, root,
12943 root->fs_info->nodesize,
12944 root->root_key.objectid,
12945 &disk_key, level, 0, 0);
12948 extent_buffer_get(c);
12952 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12953 btrfs_set_header_level(c, level);
12954 btrfs_set_header_bytenr(c, c->start);
12955 btrfs_set_header_generation(c, trans->transid);
12956 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12957 btrfs_set_header_owner(c, root->root_key.objectid);
12959 write_extent_buffer(c, root->fs_info->fsid,
12960 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12962 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12963 btrfs_header_chunk_tree_uuid(c),
12966 btrfs_mark_buffer_dirty(c);
12968 * this case can happen in the following case:
12970 * 1.overwrite previous root.
12972 * 2.reinit reloc data root, this is because we skip pin
12973 * down reloc data tree before which means we can allocate
12974 * same block bytenr here.
12976 if (old->start == c->start) {
12977 btrfs_set_root_generation(&root->root_item,
12979 root->root_item.level = btrfs_header_level(root->node);
12980 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12981 &root->root_key, &root->root_item);
12983 free_extent_buffer(c);
12987 free_extent_buffer(old);
12989 add_root_to_dirty_list(root);
12993 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12994 struct extent_buffer *eb, int tree_root)
12996 struct extent_buffer *tmp;
12997 struct btrfs_root_item *ri;
12998 struct btrfs_key key;
13000 int level = btrfs_header_level(eb);
13006 * If we have pinned this block before, don't pin it again.
13007 * This can not only avoid forever loop with broken filesystem
13008 * but also give us some speedups.
13010 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13011 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13014 btrfs_pin_extent(fs_info, eb->start, eb->len);
13016 nritems = btrfs_header_nritems(eb);
13017 for (i = 0; i < nritems; i++) {
13019 btrfs_item_key_to_cpu(eb, &key, i);
13020 if (key.type != BTRFS_ROOT_ITEM_KEY)
13022 /* Skip the extent root and reloc roots */
13023 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13024 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13025 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13027 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13028 bytenr = btrfs_disk_root_bytenr(eb, ri);
13031 * If at any point we start needing the real root we
13032 * will have to build a stump root for the root we are
13033 * in, but for now this doesn't actually use the root so
13034 * just pass in extent_root.
13036 tmp = read_tree_block(fs_info, bytenr, 0);
13037 if (!extent_buffer_uptodate(tmp)) {
13038 fprintf(stderr, "Error reading root block\n");
13041 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13042 free_extent_buffer(tmp);
13046 bytenr = btrfs_node_blockptr(eb, i);
13048 /* If we aren't the tree root don't read the block */
13049 if (level == 1 && !tree_root) {
13050 btrfs_pin_extent(fs_info, bytenr,
13051 fs_info->nodesize);
13055 tmp = read_tree_block(fs_info, bytenr, 0);
13056 if (!extent_buffer_uptodate(tmp)) {
13057 fprintf(stderr, "Error reading tree block\n");
13060 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13061 free_extent_buffer(tmp);
13070 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13074 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13078 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13081 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13083 struct btrfs_block_group_cache *cache;
13084 struct btrfs_path path;
13085 struct extent_buffer *leaf;
13086 struct btrfs_chunk *chunk;
13087 struct btrfs_key key;
13091 btrfs_init_path(&path);
13093 key.type = BTRFS_CHUNK_ITEM_KEY;
13095 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13097 btrfs_release_path(&path);
13102 * We do this in case the block groups were screwed up and had alloc
13103 * bits that aren't actually set on the chunks. This happens with
13104 * restored images every time and could happen in real life I guess.
13106 fs_info->avail_data_alloc_bits = 0;
13107 fs_info->avail_metadata_alloc_bits = 0;
13108 fs_info->avail_system_alloc_bits = 0;
13110 /* First we need to create the in-memory block groups */
13112 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13113 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13115 btrfs_release_path(&path);
13123 leaf = path.nodes[0];
13124 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13125 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13130 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13131 btrfs_add_block_group(fs_info, 0,
13132 btrfs_chunk_type(leaf, chunk), key.offset,
13133 btrfs_chunk_length(leaf, chunk));
13134 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13135 key.offset + btrfs_chunk_length(leaf, chunk));
13140 cache = btrfs_lookup_first_block_group(fs_info, start);
13144 start = cache->key.objectid + cache->key.offset;
13147 btrfs_release_path(&path);
13151 static int reset_balance(struct btrfs_trans_handle *trans,
13152 struct btrfs_fs_info *fs_info)
13154 struct btrfs_root *root = fs_info->tree_root;
13155 struct btrfs_path path;
13156 struct extent_buffer *leaf;
13157 struct btrfs_key key;
13158 int del_slot, del_nr = 0;
13162 btrfs_init_path(&path);
13163 key.objectid = BTRFS_BALANCE_OBJECTID;
13164 key.type = BTRFS_BALANCE_ITEM_KEY;
13166 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13171 goto reinit_data_reloc;
13176 ret = btrfs_del_item(trans, root, &path);
13179 btrfs_release_path(&path);
13181 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13182 key.type = BTRFS_ROOT_ITEM_KEY;
13184 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13188 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13193 ret = btrfs_del_items(trans, root, &path,
13200 btrfs_release_path(&path);
13203 ret = btrfs_search_slot(trans, root, &key, &path,
13210 leaf = path.nodes[0];
13211 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13212 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13214 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13219 del_slot = path.slots[0];
13228 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13232 btrfs_release_path(&path);
13235 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13236 key.type = BTRFS_ROOT_ITEM_KEY;
13237 key.offset = (u64)-1;
13238 root = btrfs_read_fs_root(fs_info, &key);
13239 if (IS_ERR(root)) {
13240 fprintf(stderr, "Error reading data reloc tree\n");
13241 ret = PTR_ERR(root);
13244 record_root_in_trans(trans, root);
13245 ret = btrfs_fsck_reinit_root(trans, root, 0);
13248 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13250 btrfs_release_path(&path);
13254 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13255 struct btrfs_fs_info *fs_info)
13261 * The only reason we don't do this is because right now we're just
13262 * walking the trees we find and pinning down their bytes, we don't look
13263 * at any of the leaves. In order to do mixed groups we'd have to check
13264 * the leaves of any fs roots and pin down the bytes for any file
13265 * extents we find. Not hard but why do it if we don't have to?
13267 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13268 fprintf(stderr, "We don't support re-initing the extent tree "
13269 "for mixed block groups yet, please notify a btrfs "
13270 "developer you want to do this so they can add this "
13271 "functionality.\n");
13276 * first we need to walk all of the trees except the extent tree and pin
13277 * down the bytes that are in use so we don't overwrite any existing
13280 ret = pin_metadata_blocks(fs_info);
13282 fprintf(stderr, "error pinning down used bytes\n");
13287 * Need to drop all the block groups since we're going to recreate all
13290 btrfs_free_block_groups(fs_info);
13291 ret = reset_block_groups(fs_info);
13293 fprintf(stderr, "error resetting the block groups\n");
13297 /* Ok we can allocate now, reinit the extent root */
13298 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13300 fprintf(stderr, "extent root initialization failed\n");
13302 * When the transaction code is updated we should end the
13303 * transaction, but for now progs only knows about commit so
13304 * just return an error.
13310 * Now we have all the in-memory block groups setup so we can make
13311 * allocations properly, and the metadata we care about is safe since we
13312 * pinned all of it above.
13315 struct btrfs_block_group_cache *cache;
13317 cache = btrfs_lookup_first_block_group(fs_info, start);
13320 start = cache->key.objectid + cache->key.offset;
13321 ret = btrfs_insert_item(trans, fs_info->extent_root,
13322 &cache->key, &cache->item,
13323 sizeof(cache->item));
13325 fprintf(stderr, "Error adding block group\n");
13328 btrfs_extent_post_op(trans, fs_info->extent_root);
13331 ret = reset_balance(trans, fs_info);
13333 fprintf(stderr, "error resetting the pending balance\n");
13338 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13340 struct btrfs_path path;
13341 struct btrfs_trans_handle *trans;
13342 struct btrfs_key key;
13345 printf("Recowing metadata block %llu\n", eb->start);
13346 key.objectid = btrfs_header_owner(eb);
13347 key.type = BTRFS_ROOT_ITEM_KEY;
13348 key.offset = (u64)-1;
13350 root = btrfs_read_fs_root(root->fs_info, &key);
13351 if (IS_ERR(root)) {
13352 fprintf(stderr, "Couldn't find owner root %llu\n",
13354 return PTR_ERR(root);
13357 trans = btrfs_start_transaction(root, 1);
13359 return PTR_ERR(trans);
13361 btrfs_init_path(&path);
13362 path.lowest_level = btrfs_header_level(eb);
13363 if (path.lowest_level)
13364 btrfs_node_key_to_cpu(eb, &key, 0);
13366 btrfs_item_key_to_cpu(eb, &key, 0);
13368 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13369 btrfs_commit_transaction(trans, root);
13370 btrfs_release_path(&path);
13374 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13376 struct btrfs_path path;
13377 struct btrfs_trans_handle *trans;
13378 struct btrfs_key key;
13381 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13382 bad->key.type, bad->key.offset);
13383 key.objectid = bad->root_id;
13384 key.type = BTRFS_ROOT_ITEM_KEY;
13385 key.offset = (u64)-1;
13387 root = btrfs_read_fs_root(root->fs_info, &key);
13388 if (IS_ERR(root)) {
13389 fprintf(stderr, "Couldn't find owner root %llu\n",
13391 return PTR_ERR(root);
13394 trans = btrfs_start_transaction(root, 1);
13396 return PTR_ERR(trans);
13398 btrfs_init_path(&path);
13399 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13405 ret = btrfs_del_item(trans, root, &path);
13407 btrfs_commit_transaction(trans, root);
13408 btrfs_release_path(&path);
13412 static int zero_log_tree(struct btrfs_root *root)
13414 struct btrfs_trans_handle *trans;
13417 trans = btrfs_start_transaction(root, 1);
13418 if (IS_ERR(trans)) {
13419 ret = PTR_ERR(trans);
13422 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13423 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13424 ret = btrfs_commit_transaction(trans, root);
13428 static int populate_csum(struct btrfs_trans_handle *trans,
13429 struct btrfs_root *csum_root, char *buf, u64 start,
13432 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13437 while (offset < len) {
13438 sectorsize = fs_info->sectorsize;
13439 ret = read_extent_data(fs_info, buf, start + offset,
13443 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13444 start + offset, buf, sectorsize);
13447 offset += sectorsize;
13452 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13453 struct btrfs_root *csum_root,
13454 struct btrfs_root *cur_root)
13456 struct btrfs_path path;
13457 struct btrfs_key key;
13458 struct extent_buffer *node;
13459 struct btrfs_file_extent_item *fi;
13466 buf = malloc(cur_root->fs_info->sectorsize);
13470 btrfs_init_path(&path);
13474 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13477 /* Iterate all regular file extents and fill its csum */
13479 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13481 if (key.type != BTRFS_EXTENT_DATA_KEY)
13483 node = path.nodes[0];
13484 slot = path.slots[0];
13485 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13486 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13488 start = btrfs_file_extent_disk_bytenr(node, fi);
13489 len = btrfs_file_extent_disk_num_bytes(node, fi);
13491 ret = populate_csum(trans, csum_root, buf, start, len);
13492 if (ret == -EEXIST)
13498 * TODO: if next leaf is corrupted, jump to nearest next valid
13501 ret = btrfs_next_item(cur_root, &path);
13511 btrfs_release_path(&path);
13516 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13517 struct btrfs_root *csum_root)
13519 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13520 struct btrfs_path path;
13521 struct btrfs_root *tree_root = fs_info->tree_root;
13522 struct btrfs_root *cur_root;
13523 struct extent_buffer *node;
13524 struct btrfs_key key;
13528 btrfs_init_path(&path);
13529 key.objectid = BTRFS_FS_TREE_OBJECTID;
13531 key.type = BTRFS_ROOT_ITEM_KEY;
13532 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13541 node = path.nodes[0];
13542 slot = path.slots[0];
13543 btrfs_item_key_to_cpu(node, &key, slot);
13544 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13546 if (key.type != BTRFS_ROOT_ITEM_KEY)
13548 if (!is_fstree(key.objectid))
13550 key.offset = (u64)-1;
13552 cur_root = btrfs_read_fs_root(fs_info, &key);
13553 if (IS_ERR(cur_root) || !cur_root) {
13554 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13558 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13563 ret = btrfs_next_item(tree_root, &path);
13573 btrfs_release_path(&path);
13577 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13578 struct btrfs_root *csum_root)
13580 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13581 struct btrfs_path path;
13582 struct btrfs_extent_item *ei;
13583 struct extent_buffer *leaf;
13585 struct btrfs_key key;
13588 btrfs_init_path(&path);
13590 key.type = BTRFS_EXTENT_ITEM_KEY;
13592 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13594 btrfs_release_path(&path);
13598 buf = malloc(csum_root->fs_info->sectorsize);
13600 btrfs_release_path(&path);
13605 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13606 ret = btrfs_next_leaf(extent_root, &path);
13614 leaf = path.nodes[0];
13616 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13617 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13622 ei = btrfs_item_ptr(leaf, path.slots[0],
13623 struct btrfs_extent_item);
13624 if (!(btrfs_extent_flags(leaf, ei) &
13625 BTRFS_EXTENT_FLAG_DATA)) {
13630 ret = populate_csum(trans, csum_root, buf, key.objectid,
13637 btrfs_release_path(&path);
13643 * Recalculate the csum and put it into the csum tree.
13645 * Extent tree init will wipe out all the extent info, so in that case, we
13646 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13647 * will use fs/subvol trees to init the csum tree.
13649 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13650 struct btrfs_root *csum_root,
13651 int search_fs_tree)
13653 if (search_fs_tree)
13654 return fill_csum_tree_from_fs(trans, csum_root);
13656 return fill_csum_tree_from_extent(trans, csum_root);
13659 static void free_roots_info_cache(void)
13661 if (!roots_info_cache)
13664 while (!cache_tree_empty(roots_info_cache)) {
13665 struct cache_extent *entry;
13666 struct root_item_info *rii;
13668 entry = first_cache_extent(roots_info_cache);
13671 remove_cache_extent(roots_info_cache, entry);
13672 rii = container_of(entry, struct root_item_info, cache_extent);
13676 free(roots_info_cache);
13677 roots_info_cache = NULL;
13680 static int build_roots_info_cache(struct btrfs_fs_info *info)
13683 struct btrfs_key key;
13684 struct extent_buffer *leaf;
13685 struct btrfs_path path;
13687 if (!roots_info_cache) {
13688 roots_info_cache = malloc(sizeof(*roots_info_cache));
13689 if (!roots_info_cache)
13691 cache_tree_init(roots_info_cache);
13694 btrfs_init_path(&path);
13696 key.type = BTRFS_EXTENT_ITEM_KEY;
13698 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13701 leaf = path.nodes[0];
13704 struct btrfs_key found_key;
13705 struct btrfs_extent_item *ei;
13706 struct btrfs_extent_inline_ref *iref;
13707 int slot = path.slots[0];
13712 struct cache_extent *entry;
13713 struct root_item_info *rii;
13715 if (slot >= btrfs_header_nritems(leaf)) {
13716 ret = btrfs_next_leaf(info->extent_root, &path);
13723 leaf = path.nodes[0];
13724 slot = path.slots[0];
13727 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13729 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13730 found_key.type != BTRFS_METADATA_ITEM_KEY)
13733 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13734 flags = btrfs_extent_flags(leaf, ei);
13736 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13737 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13740 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13741 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13742 level = found_key.offset;
13744 struct btrfs_tree_block_info *binfo;
13746 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13747 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13748 level = btrfs_tree_block_level(leaf, binfo);
13752 * For a root extent, it must be of the following type and the
13753 * first (and only one) iref in the item.
13755 type = btrfs_extent_inline_ref_type(leaf, iref);
13756 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13759 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13760 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13762 rii = malloc(sizeof(struct root_item_info));
13767 rii->cache_extent.start = root_id;
13768 rii->cache_extent.size = 1;
13769 rii->level = (u8)-1;
13770 entry = &rii->cache_extent;
13771 ret = insert_cache_extent(roots_info_cache, entry);
13774 rii = container_of(entry, struct root_item_info,
13778 ASSERT(rii->cache_extent.start == root_id);
13779 ASSERT(rii->cache_extent.size == 1);
13781 if (level > rii->level || rii->level == (u8)-1) {
13782 rii->level = level;
13783 rii->bytenr = found_key.objectid;
13784 rii->gen = btrfs_extent_generation(leaf, ei);
13785 rii->node_count = 1;
13786 } else if (level == rii->level) {
13794 btrfs_release_path(&path);
13799 static int maybe_repair_root_item(struct btrfs_path *path,
13800 const struct btrfs_key *root_key,
13801 const int read_only_mode)
13803 const u64 root_id = root_key->objectid;
13804 struct cache_extent *entry;
13805 struct root_item_info *rii;
13806 struct btrfs_root_item ri;
13807 unsigned long offset;
13809 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13812 "Error: could not find extent items for root %llu\n",
13813 root_key->objectid);
13817 rii = container_of(entry, struct root_item_info, cache_extent);
13818 ASSERT(rii->cache_extent.start == root_id);
13819 ASSERT(rii->cache_extent.size == 1);
13821 if (rii->node_count != 1) {
13823 "Error: could not find btree root extent for root %llu\n",
13828 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13829 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13831 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13832 btrfs_root_level(&ri) != rii->level ||
13833 btrfs_root_generation(&ri) != rii->gen) {
13836 * If we're in repair mode but our caller told us to not update
13837 * the root item, i.e. just check if it needs to be updated, don't
13838 * print this message, since the caller will call us again shortly
13839 * for the same root item without read only mode (the caller will
13840 * open a transaction first).
13842 if (!(read_only_mode && repair))
13844 "%sroot item for root %llu,"
13845 " current bytenr %llu, current gen %llu, current level %u,"
13846 " new bytenr %llu, new gen %llu, new level %u\n",
13847 (read_only_mode ? "" : "fixing "),
13849 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13850 btrfs_root_level(&ri),
13851 rii->bytenr, rii->gen, rii->level);
13853 if (btrfs_root_generation(&ri) > rii->gen) {
13855 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13856 root_id, btrfs_root_generation(&ri), rii->gen);
13860 if (!read_only_mode) {
13861 btrfs_set_root_bytenr(&ri, rii->bytenr);
13862 btrfs_set_root_level(&ri, rii->level);
13863 btrfs_set_root_generation(&ri, rii->gen);
13864 write_extent_buffer(path->nodes[0], &ri,
13865 offset, sizeof(ri));
13875 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13876 * caused read-only snapshots to be corrupted if they were created at a moment
13877 * when the source subvolume/snapshot had orphan items. The issue was that the
13878 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13879 * node instead of the post orphan cleanup root node.
13880 * So this function, and its callees, just detects and fixes those cases. Even
13881 * though the regression was for read-only snapshots, this function applies to
13882 * any snapshot/subvolume root.
13883 * This must be run before any other repair code - not doing it so, makes other
13884 * repair code delete or modify backrefs in the extent tree for example, which
13885 * will result in an inconsistent fs after repairing the root items.
13887 static int repair_root_items(struct btrfs_fs_info *info)
13889 struct btrfs_path path;
13890 struct btrfs_key key;
13891 struct extent_buffer *leaf;
13892 struct btrfs_trans_handle *trans = NULL;
13895 int need_trans = 0;
13897 btrfs_init_path(&path);
13899 ret = build_roots_info_cache(info);
13903 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13904 key.type = BTRFS_ROOT_ITEM_KEY;
13909 * Avoid opening and committing transactions if a leaf doesn't have
13910 * any root items that need to be fixed, so that we avoid rotating
13911 * backup roots unnecessarily.
13914 trans = btrfs_start_transaction(info->tree_root, 1);
13915 if (IS_ERR(trans)) {
13916 ret = PTR_ERR(trans);
13921 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13925 leaf = path.nodes[0];
13928 struct btrfs_key found_key;
13930 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13931 int no_more_keys = find_next_key(&path, &key);
13933 btrfs_release_path(&path);
13935 ret = btrfs_commit_transaction(trans,
13947 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13949 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13951 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13954 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13958 if (!trans && repair) {
13961 btrfs_release_path(&path);
13971 free_roots_info_cache();
13972 btrfs_release_path(&path);
13974 btrfs_commit_transaction(trans, info->tree_root);
13981 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13983 struct btrfs_trans_handle *trans;
13984 struct btrfs_block_group_cache *bg_cache;
13988 /* Clear all free space cache inodes and its extent data */
13990 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13993 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13996 current = bg_cache->key.objectid + bg_cache->key.offset;
13999 /* Don't forget to set cache_generation to -1 */
14000 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14001 if (IS_ERR(trans)) {
14002 error("failed to update super block cache generation");
14003 return PTR_ERR(trans);
14005 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14006 btrfs_commit_transaction(trans, fs_info->tree_root);
14011 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14016 if (clear_version == 1) {
14017 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14019 "free space cache v2 detected, use --clear-space-cache v2");
14023 printf("Clearing free space cache\n");
14024 ret = clear_free_space_cache(fs_info);
14026 error("failed to clear free space cache");
14029 printf("Free space cache cleared\n");
14031 } else if (clear_version == 2) {
14032 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14033 printf("no free space cache v2 to clear\n");
14037 printf("Clear free space cache v2\n");
14038 ret = btrfs_clear_free_space_tree(fs_info);
14040 error("failed to clear free space cache v2: %d", ret);
14043 printf("free space cache v2 cleared\n");
14050 const char * const cmd_check_usage[] = {
14051 "btrfs check [options] <device>",
14052 "Check structural integrity of a filesystem (unmounted).",
14053 "Check structural integrity of an unmounted filesystem. Verify internal",
14054 "trees' consistency and item connectivity. In the repair mode try to",
14055 "fix the problems found. ",
14056 "WARNING: the repair mode is considered dangerous",
14058 "-s|--super <superblock> use this superblock copy",
14059 "-b|--backup use the first valid backup root copy",
14060 "--force skip mount checks, repair is not possible",
14061 "--repair try to repair the filesystem",
14062 "--readonly run in read-only mode (default)",
14063 "--init-csum-tree create a new CRC tree",
14064 "--init-extent-tree create a new extent tree",
14065 "--mode <MODE> allows choice of memory/IO trade-offs",
14066 " where MODE is one of:",
14067 " original - read inodes and extents to memory (requires",
14068 " more memory, does less IO)",
14069 " lowmem - try to use less memory but read blocks again",
14071 "--check-data-csum verify checksums of data blocks",
14072 "-Q|--qgroup-report print a report on qgroup consistency",
14073 "-E|--subvol-extents <subvolid>",
14074 " print subvolume extents and sharing state",
14075 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14076 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14077 "-p|--progress indicate progress",
14078 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14082 int cmd_check(int argc, char **argv)
14084 struct cache_tree root_cache;
14085 struct btrfs_root *root;
14086 struct btrfs_fs_info *info;
14089 u64 tree_root_bytenr = 0;
14090 u64 chunk_root_bytenr = 0;
14091 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14095 int init_csum_tree = 0;
14097 int clear_space_cache = 0;
14098 int qgroup_report = 0;
14099 int qgroups_repaired = 0;
14100 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14105 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14106 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14107 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14108 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14109 GETOPT_VAL_FORCE };
14110 static const struct option long_options[] = {
14111 { "super", required_argument, NULL, 's' },
14112 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14113 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14114 { "init-csum-tree", no_argument, NULL,
14115 GETOPT_VAL_INIT_CSUM },
14116 { "init-extent-tree", no_argument, NULL,
14117 GETOPT_VAL_INIT_EXTENT },
14118 { "check-data-csum", no_argument, NULL,
14119 GETOPT_VAL_CHECK_CSUM },
14120 { "backup", no_argument, NULL, 'b' },
14121 { "subvol-extents", required_argument, NULL, 'E' },
14122 { "qgroup-report", no_argument, NULL, 'Q' },
14123 { "tree-root", required_argument, NULL, 'r' },
14124 { "chunk-root", required_argument, NULL,
14125 GETOPT_VAL_CHUNK_TREE },
14126 { "progress", no_argument, NULL, 'p' },
14127 { "mode", required_argument, NULL,
14129 { "clear-space-cache", required_argument, NULL,
14130 GETOPT_VAL_CLEAR_SPACE_CACHE},
14131 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14132 { NULL, 0, NULL, 0}
14135 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14139 case 'a': /* ignored */ break;
14141 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14144 num = arg_strtou64(optarg);
14145 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14147 "super mirror should be less than %d",
14148 BTRFS_SUPER_MIRROR_MAX);
14151 bytenr = btrfs_sb_offset(((int)num));
14152 printf("using SB copy %llu, bytenr %llu\n", num,
14153 (unsigned long long)bytenr);
14159 subvolid = arg_strtou64(optarg);
14162 tree_root_bytenr = arg_strtou64(optarg);
14164 case GETOPT_VAL_CHUNK_TREE:
14165 chunk_root_bytenr = arg_strtou64(optarg);
14168 ctx.progress_enabled = true;
14172 usage(cmd_check_usage);
14173 case GETOPT_VAL_REPAIR:
14174 printf("enabling repair mode\n");
14176 ctree_flags |= OPEN_CTREE_WRITES;
14178 case GETOPT_VAL_READONLY:
14181 case GETOPT_VAL_INIT_CSUM:
14182 printf("Creating a new CRC tree\n");
14183 init_csum_tree = 1;
14185 ctree_flags |= OPEN_CTREE_WRITES;
14187 case GETOPT_VAL_INIT_EXTENT:
14188 init_extent_tree = 1;
14189 ctree_flags |= (OPEN_CTREE_WRITES |
14190 OPEN_CTREE_NO_BLOCK_GROUPS);
14193 case GETOPT_VAL_CHECK_CSUM:
14194 check_data_csum = 1;
14196 case GETOPT_VAL_MODE:
14197 check_mode = parse_check_mode(optarg);
14198 if (check_mode == CHECK_MODE_UNKNOWN) {
14199 error("unknown mode: %s", optarg);
14203 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14204 if (strcmp(optarg, "v1") == 0) {
14205 clear_space_cache = 1;
14206 } else if (strcmp(optarg, "v2") == 0) {
14207 clear_space_cache = 2;
14208 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14211 "invalid argument to --clear-space-cache, must be v1 or v2");
14214 ctree_flags |= OPEN_CTREE_WRITES;
14216 case GETOPT_VAL_FORCE:
14222 if (check_argc_exact(argc - optind, 1))
14223 usage(cmd_check_usage);
14225 if (ctx.progress_enabled) {
14226 ctx.tp = TASK_NOTHING;
14227 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14230 /* This check is the only reason for --readonly to exist */
14231 if (readonly && repair) {
14232 error("repair options are not compatible with --readonly");
14237 * experimental and dangerous
14239 if (repair && check_mode == CHECK_MODE_LOWMEM)
14240 warning("low-memory mode repair support is only partial");
14243 cache_tree_init(&root_cache);
14245 ret = check_mounted(argv[optind]);
14248 error("could not check mount status: %s",
14254 "%s is currently mounted, use --force if you really intend to check the filesystem",
14262 error("repair and --force is not yet supported");
14269 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14273 "filesystem mounted, continuing because of --force");
14275 /* A block device is mounted in exclusive mode by kernel */
14276 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14279 /* only allow partial opening under repair mode */
14281 ctree_flags |= OPEN_CTREE_PARTIAL;
14283 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14284 chunk_root_bytenr, ctree_flags);
14286 error("cannot open file system");
14292 global_info = info;
14293 root = info->fs_root;
14294 uuid_unparse(info->super_copy->fsid, uuidbuf);
14296 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14299 * Check the bare minimum before starting anything else that could rely
14300 * on it, namely the tree roots, any local consistency checks
14302 if (!extent_buffer_uptodate(info->tree_root->node) ||
14303 !extent_buffer_uptodate(info->dev_root->node) ||
14304 !extent_buffer_uptodate(info->chunk_root->node)) {
14305 error("critical roots corrupted, unable to check the filesystem");
14311 if (clear_space_cache) {
14312 ret = do_clear_free_space_cache(info, clear_space_cache);
14318 * repair mode will force us to commit transaction which
14319 * will make us fail to load log tree when mounting.
14321 if (repair && btrfs_super_log_root(info->super_copy)) {
14322 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14328 ret = zero_log_tree(root);
14331 error("failed to zero log tree: %d", ret);
14336 if (qgroup_report) {
14337 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14339 ret = qgroup_verify_all(info);
14346 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14347 subvolid, argv[optind], uuidbuf);
14348 ret = print_extent_state(info, subvolid);
14353 if (init_extent_tree || init_csum_tree) {
14354 struct btrfs_trans_handle *trans;
14356 trans = btrfs_start_transaction(info->extent_root, 0);
14357 if (IS_ERR(trans)) {
14358 error("error starting transaction");
14359 ret = PTR_ERR(trans);
14364 if (init_extent_tree) {
14365 printf("Creating a new extent tree\n");
14366 ret = reinit_extent_tree(trans, info);
14372 if (init_csum_tree) {
14373 printf("Reinitialize checksum tree\n");
14374 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14376 error("checksum tree initialization failed: %d",
14383 ret = fill_csum_tree(trans, info->csum_root,
14387 error("checksum tree refilling failed: %d", ret);
14392 * Ok now we commit and run the normal fsck, which will add
14393 * extent entries for all of the items it finds.
14395 ret = btrfs_commit_transaction(trans, info->extent_root);
14400 if (!extent_buffer_uptodate(info->extent_root->node)) {
14401 error("critical: extent_root, unable to check the filesystem");
14406 if (!extent_buffer_uptodate(info->csum_root->node)) {
14407 error("critical: csum_root, unable to check the filesystem");
14413 if (!init_extent_tree) {
14414 ret = repair_root_items(info);
14417 error("failed to repair root items: %s", strerror(-ret));
14421 fprintf(stderr, "Fixed %d roots.\n", ret);
14423 } else if (ret > 0) {
14425 "Found %d roots with an outdated root item.\n",
14428 "Please run a filesystem check with the option --repair to fix them.\n");
14435 ret = do_check_chunks_and_extents(info);
14439 "errors found in extent allocation tree or chunk allocation");
14441 /* Only re-check super size after we checked and repaired the fs */
14442 err |= !is_super_size_valid(info);
14444 if (!ctx.progress_enabled) {
14445 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14446 fprintf(stderr, "checking free space tree\n");
14448 fprintf(stderr, "checking free space cache\n");
14450 ret = check_space_cache(root);
14453 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14454 error("errors found in free space tree");
14456 error("errors found in free space cache");
14461 * We used to have to have these hole extents in between our real
14462 * extents so if we don't have this flag set we need to make sure there
14463 * are no gaps in the file extents for inodes, otherwise we can just
14464 * ignore it when this happens.
14466 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14467 ret = do_check_fs_roots(info, &root_cache);
14470 error("errors found in fs roots");
14474 fprintf(stderr, "checking csums\n");
14475 ret = check_csums(root);
14478 error("errors found in csum tree");
14482 fprintf(stderr, "checking root refs\n");
14483 /* For low memory mode, check_fs_roots_v2 handles root refs */
14484 if (check_mode != CHECK_MODE_LOWMEM) {
14485 ret = check_root_refs(root, &root_cache);
14488 error("errors found in root refs");
14493 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14494 struct extent_buffer *eb;
14496 eb = list_first_entry(&root->fs_info->recow_ebs,
14497 struct extent_buffer, recow);
14498 list_del_init(&eb->recow);
14499 ret = recow_extent_buffer(root, eb);
14502 error("fails to fix transid errors");
14507 while (!list_empty(&delete_items)) {
14508 struct bad_item *bad;
14510 bad = list_first_entry(&delete_items, struct bad_item, list);
14511 list_del_init(&bad->list);
14513 ret = delete_bad_item(root, bad);
14519 if (info->quota_enabled) {
14520 fprintf(stderr, "checking quota groups\n");
14521 ret = qgroup_verify_all(info);
14524 error("failed to check quota groups");
14528 ret = repair_qgroups(info, &qgroups_repaired);
14531 error("failed to repair quota groups");
14537 if (!list_empty(&root->fs_info->recow_ebs)) {
14538 error("transid errors in file system");
14543 printf("found %llu bytes used, ",
14544 (unsigned long long)bytes_used);
14546 printf("error(s) found\n");
14548 printf("no error found\n");
14549 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14550 printf("total tree bytes: %llu\n",
14551 (unsigned long long)total_btree_bytes);
14552 printf("total fs tree bytes: %llu\n",
14553 (unsigned long long)total_fs_tree_bytes);
14554 printf("total extent tree bytes: %llu\n",
14555 (unsigned long long)total_extent_tree_bytes);
14556 printf("btree space waste bytes: %llu\n",
14557 (unsigned long long)btree_space_waste);
14558 printf("file data blocks allocated: %llu\n referenced %llu\n",
14559 (unsigned long long)data_bytes_allocated,
14560 (unsigned long long)data_bytes_referenced);
14562 free_qgroup_counts();
14563 free_root_recs_tree(&root_cache);
14567 if (ctx.progress_enabled)
14568 task_deinit(ctx.info);